From f45ec8b39dd6b6f67a64fbf57332cdf7bc9a9d7c Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 26 Jun 2022 22:21:25 +0500 Subject: [PATCH 1/8] Remove special case from tests --- tests/documents/sample_1_full.txt | 2 ++ tests/documents/sample_2_full.txt | 2 ++ tests/xmlrs_reader_tests.rs | 28 ++++------------------------ 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/tests/documents/sample_1_full.txt b/tests/documents/sample_1_full.txt index 91b53b5f..d31b2011 100644 --- a/tests/documents/sample_1_full.txt +++ b/tests/documents/sample_1_full.txt @@ -1,4 +1,6 @@ StartDocument(1.0, utf-8) +Characters( +) StartElement(project [name="project-name"]) Characters( ) diff --git a/tests/documents/sample_2_full.txt b/tests/documents/sample_2_full.txt index 88f3e7ec..9b409e0b 100644 --- a/tests/documents/sample_2_full.txt +++ b/tests/documents/sample_2_full.txt @@ -1,4 +1,6 @@ StartDocument(1.0, utf-8) +Characters( +) StartElement({urn:example:namespace}p:data) Characters( ) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index a23bb03c..937a0066 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -355,21 +355,15 @@ fn default_namespace_applies_to_end_elem() { } #[track_caller] -fn test(input: &str, output: &str, is_short: bool) { - test_bytes(input.as_bytes(), output.as_bytes(), is_short); +fn test(input: &str, output: &str, trim: bool) { + test_bytes(input.as_bytes(), output.as_bytes(), trim); } #[track_caller] -fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { - // Normalize newlines on Windows to just \n, which is what the reader and - // writer use. - // let input = input.replace("\r\n", "\n"); - // let input = input.as_bytes(); - // let output = output.replace("\r\n", "\n"); - // let output = output.as_bytes(); +fn test_bytes(input: &[u8], output: &[u8], trim: bool) { let mut reader = Reader::from_reader(input); reader - .trim_text(is_short) + .trim_text(trim) .check_comments(true) .expand_empty_elements(false); @@ -402,20 +396,6 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { } panic!("Unexpected event: {}", line); } - - if !is_short && line.starts_with("StartDocument") { - // advance next Characters(empty space) ... - if let Ok(Event::Text(ref e)) = reader.read_event(&mut Vec::new()) { - if e.iter().any(|b| match *b { - b' ' | b'\r' | b'\n' | b'\t' => false, - _ => true, - }) { - panic!("Reader expects empty Text event after a StartDocument"); - } - } else { - panic!("Reader expects empty Text event after a StartDocument"); - } - } } } From 5056603a1b2eed5ced05cb3892bf7efefb558264 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 26 Jun 2022 23:16:57 +0500 Subject: [PATCH 2/8] Use correct encoding when generating event information for comparison Also, Text events was incorrectly unescaped before decoding instead of correct decode, then unescape --- tests/xmlrs_reader_tests.rs | 55 +++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 937a0066..35ce90f3 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -1,3 +1,4 @@ +use quick_xml::escape::unescape; use quick_xml::events::{BytesStart, Event}; use quick_xml::name::{QName, ResolveResult}; use quick_xml::{Decoder, Reader, Result}; @@ -403,21 +404,25 @@ fn namespace_name(n: ResolveResult, name: QName, decoder: Decoder) -> String { let name = decoder.decode(name.as_ref()).unwrap(); match n { // Produces string '{namespace}prefixed_name' - ResolveResult::Bound(n) => format!("{{{}}}{}", from_utf8(n.as_ref()).unwrap(), name), + ResolveResult::Bound(n) => format!("{{{}}}{}", decoder.decode(n.as_ref()).unwrap(), name), _ => name.to_string(), } } -fn make_attrs(e: &BytesStart) -> ::std::result::Result { +fn make_attrs(e: &BytesStart, decoder: Decoder) -> ::std::result::Result { let mut atts = Vec::new(); for a in e.attributes() { match a { Ok(a) => { if a.key.as_namespace_binding().is_none() { + let key = decoder.decode(a.key.as_ref()).unwrap(); + let value = decoder.decode(a.value.as_ref()).unwrap(); + let unescaped_value = unescape(value.as_bytes()).unwrap(); atts.push(format!( "{}=\"{}\"", - from_utf8(a.key.as_ref()).unwrap(), - from_utf8(&*a.unescaped_value().unwrap()).unwrap() + key, + // unescape does not change validity of an UTF-8 string + from_utf8(&*unescaped_value).unwrap() )); } } @@ -430,43 +435,45 @@ fn make_attrs(e: &BytesStart) -> ::std::result::Result { fn xmlrs_display(opt_event: Result<(ResolveResult, Event)>, decoder: Decoder) -> String { match opt_event { Ok((_, Event::StartText(_))) => "StartText".to_string(), - Ok((n, Event::Start(ref e))) => { + Ok((n, Event::Start(e))) => { let name = namespace_name(n, e.name(), decoder); - match make_attrs(e) { - Ok(ref attrs) if attrs.is_empty() => format!("StartElement({})", &name), - Ok(ref attrs) => format!("StartElement({} [{}])", &name, &attrs), + match make_attrs(&e, decoder) { + Ok(attrs) if attrs.is_empty() => format!("StartElement({})", &name), + Ok(attrs) => format!("StartElement({} [{}])", &name, &attrs), Err(e) => format!("StartElement({}, attr-error: {})", &name, &e), } } - Ok((n, Event::Empty(ref e))) => { + Ok((n, Event::Empty(e))) => { let name = namespace_name(n, e.name(), decoder); - match make_attrs(e) { - Ok(ref attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), - Ok(ref attrs) => format!("EmptyElement({} [{}])", &name, &attrs), + match make_attrs(&e, decoder) { + Ok(attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), + Ok(attrs) => format!("EmptyElement({} [{}])", &name, &attrs), Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), } } - Ok((n, Event::End(ref e))) => { + Ok((n, Event::End(e))) => { let name = namespace_name(n, e.name(), decoder); format!("EndElement({})", name) } - Ok((_, Event::Comment(ref e))) => format!("Comment({})", from_utf8(e).unwrap()), - Ok((_, Event::CData(ref e))) => format!("CData({})", from_utf8(e).unwrap()), - Ok((_, Event::Text(ref e))) => match e.unescaped() { - Ok(c) => format!("Characters({})", decoder.decode(c.as_ref()).unwrap()), - Err(ref err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), + Ok((_, Event::Comment(e))) => format!("Comment({})", decoder.decode(&e).unwrap()), + Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()), + Ok((_, Event::Text(e))) => match unescape(decoder.decode(&e).unwrap().as_bytes()) { + Ok(c) => format!("Characters({})", from_utf8(c.as_ref()).unwrap()), + Err(err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), }, - Ok((_, Event::Decl(ref e))) => { + Ok((_, Event::Decl(e))) => { let version_cow = e.version().unwrap(); - let version = from_utf8(version_cow.as_ref()).unwrap(); + let version = decoder.decode(version_cow.as_ref()).unwrap(); let encoding_cow = e.encoding().unwrap().unwrap(); - let encoding = from_utf8(encoding_cow.as_ref()).unwrap(); + let encoding = decoder.decode(encoding_cow.as_ref()).unwrap(); format!("StartDocument({}, {})", version, encoding) } Ok((_, Event::Eof)) => format!("EndDocument"), - Ok((_, Event::PI(ref e))) => format!("ProcessingInstruction(PI={})", from_utf8(e).unwrap()), - Err(ref e) => format!("Error: {}", e), - Ok((_, Event::DocType(ref e))) => format!("DocType({})", from_utf8(e).unwrap()), + Ok((_, Event::PI(e))) => { + format!("ProcessingInstruction(PI={})", decoder.decode(&e).unwrap()) + } + Ok((_, Event::DocType(e))) => format!("DocType({})", decoder.decode(&e).unwrap()), + Err(e) => format!("Error: {}", e), } } From b4f7a387ceb5a2ea6808b2acab93c1aa7a0ff708 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 2 Jul 2022 18:40:11 +0500 Subject: [PATCH 3/8] Add a reference to #118 in changelog, forgotten in PR --- Changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog.md b/Changelog.md index 60b2b965..46b9f5ca 100644 --- a/Changelog.md +++ b/Changelog.md @@ -107,6 +107,7 @@ [#8]: https://github.com/Mingun/fast-xml/pull/8 [#9]: https://github.com/Mingun/fast-xml/pull/9 +[#118]: https://github.com/tafia/quick-xml/issues/118 [#180]: https://github.com/tafia/quick-xml/issues/180 [#191]: https://github.com/tafia/quick-xml/issues/191 [#324]: https://github.com/tafia/quick-xml/issues/324 From 4b0c1a73efff43bed94af36952faf893216310bf Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 3 Apr 2022 02:49:11 +0500 Subject: [PATCH 4/8] Use track_caller attribute on the util functions to get a correct line number when test fail --- tests/serde-migrated.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/serde-migrated.rs b/tests/serde-migrated.rs index 5dfc7b87..4fdcb9b2 100644 --- a/tests/serde-migrated.rs +++ b/tests/serde-migrated.rs @@ -14,6 +14,7 @@ struct Simple { d: Option, } +#[track_caller] fn test_parse_ok<'a, T: std::fmt::Debug>(errors: &[(&'a str, T)]) where T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>, @@ -37,6 +38,7 @@ where } } +#[track_caller] fn test_parse_err<'a, T>(errors: &[&'a str]) where T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>, From 246ef0bd3b72acfdbf63b8c9a788eb12f784e089 Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 27 Jun 2022 22:26:47 +0500 Subject: [PATCH 5/8] Rename methods of `Reader` to make them more descriptive: - read_event => read_event_into - read_to_end => read_to_end_into - read_text => read_text_into - read_event_unbuffered => read_event - read_to_end_unbuffered => read_to_end - read_event_buffered => read_event_impl --- Changelog.md | 12 ++++- README.md | 6 +-- benches/macrobenches.rs | 2 +- benches/microbenches.rs | 22 ++++---- compare/benches/bench.rs | 2 +- examples/custom_entities.rs | 2 +- examples/nested_readers.rs | 4 +- examples/read_texts.rs | 4 +- fuzz/fuzz_targets/fuzz_target_1.rs | 2 +- src/de/mod.rs | 8 +-- src/events/mod.rs | 17 ++++--- src/lib.rs | 4 +- src/reader.rs | 80 +++++++++++++++--------------- src/writer.rs | 2 +- tests/test.rs | 61 +++++++++++------------ tests/unit_tests.rs | 54 ++++++++++---------- 16 files changed, 145 insertions(+), 137 deletions(-) diff --git a/Changelog.md b/Changelog.md index 46b9f5ca..bea6287b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -96,13 +96,22 @@ - [#403]: Remove deprecated `quick_xml::de::from_bytes` and `Deserializer::from_borrowing_reader` +- [#412]: Rename methods of `Reader`: + |Old Name |New Name + |-------------------------|--------------------------------------------------- + |`read_event` |`read_event_into` + |`read_to_end` |`read_to_end_into` + |`read_text` |`read_text_into` + |`read_event_unbuffered` |`read_event` + |`read_to_end_unbuffered` |`read_to_end` + ### New Tests - [#9]: Added tests for incorrect nested tags in input - [#387]: Added a bunch of tests for sequences deserialization - [#393]: Added more tests for namespace resolver - [#393]: Added tests for reserved names (started with "xml"i) -- see -- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs +- [#363]: Add tests for `Reader::read_event_impl` to ensure that proper events generated for corresponding inputs - [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text [#8]: https://github.com/Mingun/fast-xml/pull/8 @@ -118,6 +127,7 @@ [#395]: https://github.com/tafia/quick-xml/pull/395 [#403]: https://github.com/tafia/quick-xml/pull/403 [#407]: https://github.com/tafia/quick-xml/pull/407 +[#412]: https://github.com/tafia/quick-xml/pull/412 ## 0.23.0 -- 2022-05-08 diff --git a/README.md b/README.md index 1cd30bb2..2e437c09 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ let mut buf = Vec::new(); loop { // NOTE: this is the generic case when we don't know about the input BufRead. // when the input is a &str or a &[u8], we don't actually need to use another - // buffer, we could directly call `reader.read_event_unbuffered()` - match reader.read_event(&mut buf) { + // buffer, we could directly call `reader.read_event()` + match reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) => { match e.name() { b"tag1" => println!("attributes values: {:?}", @@ -77,7 +77,7 @@ reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { // crates a new element ... alternatively we could reuse `e` by calling diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs index 14f28cb9..1a00abed 100644 --- a/benches/macrobenches.rs +++ b/benches/macrobenches.rs @@ -21,7 +21,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); fn parse_document(doc: &[u8]) -> XmlResult<()> { let mut r = Reader::from_reader(doc); loop { - match r.read_event_unbuffered()? { + match r.read_event()? { Event::Start(e) | Event::Empty(e) => { for attr in e.attributes() { criterion::black_box(attr?.unescaped_value()?); diff --git a/benches/microbenches.rs b/benches/microbenches.rs index 9d701c05..d89e3e77 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -34,7 +34,7 @@ fn read_event(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Eof) => break, _ => (), @@ -57,7 +57,7 @@ fn read_event(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Eof) => break, _ => (), @@ -137,7 +137,7 @@ fn bytes_text_unescaped(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut nbtxt = criterion::black_box(0); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), Ok(Event::Eof) => break, @@ -175,7 +175,7 @@ fn bytes_text_unescaped(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut nbtxt = criterion::black_box(0); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), Ok(Event::Eof) => break, @@ -215,7 +215,7 @@ fn one_event(c: &mut Criterion) { let mut r = Reader::from_reader(src.as_ref()); let mut nbtxt = criterion::black_box(0); r.check_end_names(false).check_comments(false); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::StartText(e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; @@ -235,7 +235,7 @@ fn one_event(c: &mut Criterion) { r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; @@ -255,7 +255,7 @@ fn one_event(c: &mut Criterion) { r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(), something_else => panic!("Did not expect {:?}", something_else), }; @@ -275,7 +275,7 @@ fn one_event(c: &mut Criterion) { r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::CData(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; @@ -298,7 +298,7 @@ fn attributes(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Empty(e)) => { for attr in e.attributes() { let _attr = attr.unwrap(); @@ -321,7 +321,7 @@ fn attributes(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Empty(e)) => { for attr in e.attributes().with_checks(false) { let _attr = attr.unwrap(); @@ -344,7 +344,7 @@ fn attributes(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Empty(e)) if e.name() == QName(b"player") => { for name in ["num", "status", "avg"] { if let Some(_attr) = e.try_get_attribute(name).unwrap() { diff --git a/compare/benches/bench.rs b/compare/benches/bench.rs index 4a398636..4dfb5a4f 100644 --- a/compare/benches/bench.rs +++ b/compare/benches/bench.rs @@ -18,7 +18,7 @@ fn low_level_comparison(c: &mut Criterion) { let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Eof) => break, _ => (), diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index 1212f343..02165faf 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -31,7 +31,7 @@ fn main() -> Result<(), Box> { let entity_re = Regex::new(r#""#)?; loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::DocType(ref e)) => { for cap in entity_re.captures_iter(&e) { custom_entities.insert(cap[1].to_vec(), cap[2].to_vec()); diff --git a/examples/nested_readers.rs b/examples/nested_readers.rs index 2b348da8..1e5b9c71 100644 --- a/examples/nested_readers.rs +++ b/examples/nested_readers.rs @@ -20,7 +20,7 @@ fn main() -> Result<(), quick_xml::Error> { let mut reader = Reader::from_file("tests/documents/document.xml")?; let mut found_tables = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Event::Start(element) => match element.name().as_ref() { b"w:tbl" => { count += 1; @@ -33,7 +33,7 @@ fn main() -> Result<(), quick_xml::Error> { let mut row_index = 0; loop { skip_buf.clear(); - match reader.read_event(&mut skip_buf)? { + match reader.read_event_into(&mut skip_buf)? { Event::Start(element) => match element.name().as_ref() { b"w:tr" => { stats.rows.push(vec![]); diff --git a/examples/read_texts.rs b/examples/read_texts.rs index dc6f9bd1..7b158be7 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -12,11 +12,11 @@ fn main() { let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => { txt.push( reader - .read_text(b"tag2", &mut Vec::new()) + .read_text_into(b"tag2", &mut Vec::new()) .expect("Cannot decode text value"), ); println!("{:?}", txt); diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index c6265753..50352856 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -11,7 +11,7 @@ fuzz_target!(|data: &[u8]| { let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e))=> { if e.unescaped().is_err() { break; diff --git a/src/de/mod.rs b/src/de/mod.rs index 462ec3b1..85949d8a 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -949,7 +949,7 @@ pub struct IoReader { impl<'i, R: BufRead> XmlRead<'i> for IoReader { fn next(&mut self) -> Result, DeError> { let event = loop { - let e = self.reader.read_event(&mut self.buf)?; + let e = self.reader.read_event_into(&mut self.buf)?; match e { //TODO: Probably not the best idea treat StartText as usual text // Usually this event will represent a BOM @@ -971,7 +971,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader { } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { - match self.reader.read_to_end(name, &mut self.buf) { + match self.reader.read_to_end_into(name, &mut self.buf) { Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof), other => Ok(other?), } @@ -993,7 +993,7 @@ pub struct SliceReader<'de> { impl<'de> XmlRead<'de> for SliceReader<'de> { fn next(&mut self) -> Result, DeError> { loop { - let e = self.reader.read_event_unbuffered()?; + let e = self.reader.read_event()?; match e { //TODO: Probably not the best idea treat StartText as usual text // Usually this event will represent a BOM @@ -1011,7 +1011,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> { } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { - match self.reader.read_to_end_unbuffered(name) { + match self.reader.read_to_end(name) { Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof), other => Ok(other?), } diff --git a/src/events/mod.rs b/src/events/mod.rs index b04e458e..f80c4093 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -17,7 +17,7 @@ //! //! # Reading //! When reading a XML stream, the events are emitted by -//! [`Reader::read_event`]. You must listen +//! [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. @@ -29,10 +29,8 @@ //! //! See [`Writer`] for further information. //! -//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event -//! [`Reader`]: ../reader/struct.Reader.html -//! [`Writer`]: ../writer/struct.Writer.html -//! [`Event`]: enum.Event.html +//! [`Writer`]: crate::writer::Writer +//! [`Event`]: crate::events::Event pub mod attributes; @@ -928,7 +926,7 @@ impl<'a> Deref for BytesCData<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Event emitted by [`Reader::read_event`]. +/// Event emitted by [`Reader::read_event_into`]. #[derive(Clone, Debug, Eq, PartialEq)] pub enum Event<'a> { /// Text that appeared before the first opening tag or an [XML declaration]. @@ -956,7 +954,7 @@ pub enum Event<'a> { /// let mut reader = Reader::from_bytes(xml); /// let mut events_processed = 0; /// loop { - /// match reader.read_event_unbuffered() { + /// match reader.read_event() { /// Ok(Event::StartText(e)) => { /// assert_eq!(events_processed, 0); /// // Content contains BOM @@ -1066,7 +1064,10 @@ mod test { let mut buf = Vec::new(); let mut parsed_local_names = Vec::new(); loop { - match rdr.read_event(&mut buf).expect("unable to read xml event") { + match rdr + .read_event_into(&mut buf) + .expect("unable to read xml event") + { Event::Start(ref e) => parsed_local_names.push( from_utf8(e.local_name().as_ref()) .expect("unable to build str from local_name") diff --git a/src/lib.rs b/src/lib.rs index 378488bb..cebc401d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,7 +44,7 @@ //! //! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) //! loop { -//! match reader.read_event(&mut buf) { +//! match reader.read_event_into(&mut buf) { //! // for triggering namespaced events, use this instead: //! // match reader.read_namespaced_event(&mut buf) { //! Ok(Event::Start(ref e)) => { @@ -86,7 +86,7 @@ //! let mut writer = Writer::new(Cursor::new(Vec::new())); //! let mut buf = Vec::new(); //! loop { -//! match reader.read_event(&mut buf) { +//! match reader.read_event_into(&mut buf) { //! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => { //! //! // crates a new element ... alternatively we could reuse `e` by calling diff --git a/src/reader.rs b/src/reader.rs index a1909255..a812ef16 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -125,7 +125,7 @@ impl EncodingRef { /// let mut txt = Vec::new(); /// let mut buf = Vec::new(); /// loop { -/// match reader.read_event(&mut buf) { +/// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => { /// match e.name().as_ref() { /// b"tag1" => println!("attributes values: {:?}", @@ -362,7 +362,7 @@ impl Reader { /// } /// /// loop { - /// match reader.read_event(&mut buf) { + /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => match e.name().as_ref() { /// b"tag1" | b"tag2" => (), /// tag => { @@ -494,7 +494,7 @@ impl Reader { /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { - /// match reader.read_event(&mut buf) { + /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), @@ -507,8 +507,8 @@ impl Reader { /// println!("Text events: {:?}", txt); /// ``` #[inline] - pub fn read_event<'b>(&mut self, buf: &'b mut Vec) -> Result> { - self.read_event_buffered(buf) + pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { + self.read_event_impl(buf) } /// Reads the next event and resolves its namespace (if applicable). @@ -569,7 +569,7 @@ impl Reader { self.ns_resolver.pop(namespace_buffer); } self.pending_pop = false; - match self.read_event(buf) { + match self.read_event_into(buf) { Ok(Event::Eof) => Ok((ResolveResult::Unbound, Event::Eof)), Ok(Event::Start(e)) => { self.ns_resolver.push(&e, namespace_buffer); @@ -610,11 +610,11 @@ impl Reader { /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name - pub fn read_to_end>(&mut self, end: K, buf: &mut Vec) -> Result<()> { + pub fn read_to_end_into>(&mut self, end: K, buf: &mut Vec) -> Result<()> { let mut depth = 0; let end = end.as_ref(); loop { - match self.read_event(buf) { + match self.read_event_into(buf) { Ok(Event::End(ref e)) if e.name().as_ref() == end => { if depth == 0 { return Ok(()); @@ -656,9 +656,9 @@ impl Reader { /// /// let expected = ["", ""]; /// for &content in expected.iter() { - /// match xml.read_event(&mut Vec::new()) { + /// match xml.read_event_into(&mut Vec::new()) { /// Ok(Event::Start(ref e)) => { - /// assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).unwrap(), content); + /// assert_eq!(&xml.read_text_into(e.name(), &mut Vec::new()).unwrap(), content); /// }, /// e => panic!("Expecting Start event, found {:?}", e), /// } @@ -667,15 +667,15 @@ impl Reader { /// /// [`Text`]: events/enum.Event.html#variant.Text /// [`End`]: events/enum.Event.html#variant.End - pub fn read_text>(&mut self, end: K, buf: &mut Vec) -> Result { - let s = match self.read_event(buf) { + pub fn read_text_into>(&mut self, end: K, buf: &mut Vec) -> Result { + let s = match self.read_event_into(buf) { Ok(Event::Text(e)) => e.unescape_and_decode(self), Ok(Event::End(ref e)) if e.name().as_ref() == end.as_ref() => return Ok("".to_string()), Err(e) => return Err(e), Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), _ => return Err(Error::TextNotFound), }; - self.read_to_end(end, buf)?; + self.read_to_end_into(end, buf)?; s } } @@ -685,7 +685,7 @@ impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader. - fn read_event_buffered<'i, B>(&mut self, buf: B) -> Result> + fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { @@ -718,7 +718,7 @@ impl Reader { // If we already at the `<` symbol, do not try to return an empty Text event if self.reader.skip_one(b'<', &mut self.buf_position)? { - return self.read_event_buffered(buf); + return self.read_event_impl(buf); } match self @@ -968,18 +968,18 @@ impl<'a> Reader<&'a [u8]> { /// Read an event that borrows from the input rather than a buffer. #[inline] - pub fn read_event_unbuffered(&mut self) -> Result> { - self.read_event_buffered(()) + pub fn read_event(&mut self) -> Result> { + self.read_event_impl(()) } /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name - pub fn read_to_end_unbuffered>(&mut self, end: K) -> Result<()> { + pub fn read_to_end>(&mut self, end: K) -> Result<()> { let mut depth = 0; let end = end.as_ref(); loop { - match self.read_event_unbuffered() { + match self.read_event() { Ok(Event::End(ref e)) if e.name().as_ref() == end => { if depth == 0 { return Ok(()); @@ -2405,7 +2405,7 @@ mod test { } /// Ensures, that no empty `Text` events are generated - mod read_event_buffered { + mod read_event_impl { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; @@ -2415,7 +2415,7 @@ mod test { let mut reader = Reader::from_str("bom"); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::StartText(BytesText::from_escaped(b"bom".as_ref()).into()) ); } @@ -2425,7 +2425,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::borrowed(b"xml ", 3))) ); } @@ -2435,7 +2435,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::DocType(BytesText::from_escaped(b"x".as_ref())) ); } @@ -2445,7 +2445,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::PI(BytesText::from_escaped(b"xml-stylesheet".as_ref())) ); } @@ -2455,7 +2455,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Start(BytesStart::borrowed_name(b"tag")) ); } @@ -2468,7 +2468,7 @@ mod test { reader.check_end_names(false); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::End(BytesEnd::borrowed(b"tag")) ); } @@ -2478,7 +2478,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Empty(BytesStart::borrowed_name(b"tag")) ); } @@ -2489,12 +2489,12 @@ mod test { let mut reader = Reader::from_str("text"); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Empty(BytesStart::borrowed_name(b"tag")) ); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Text(BytesText::from_escaped(b"text".as_ref())) ); } @@ -2504,7 +2504,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::CData(BytesCData::from_str("")) ); } @@ -2514,7 +2514,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Comment(BytesText::from_escaped(b"".as_ref())) ); } @@ -2524,7 +2524,7 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_buffered($buf).unwrap(), + reader.read_event_impl($buf).unwrap(), Event::Eof ); } @@ -2547,13 +2547,13 @@ mod test { let mut reader = Reader::from_bytes(b"\xFF\xFE"); assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_buffered($buf).unwrap(); + reader.read_event_impl($buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - reader.read_event_buffered($buf).unwrap(); + reader.read_event_impl($buf).unwrap(); assert_eq!(reader.decoder().encoding(), WINDOWS_1251); - assert_eq!(reader.read_event_buffered($buf).unwrap(), Event::Eof); + assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); } /// Checks that encoding is changed by XML declaration, but only once @@ -2562,13 +2562,13 @@ mod test { let mut reader = Reader::from_bytes(b""); assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_buffered($buf).unwrap(); + reader.read_event_impl($buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - reader.read_event_buffered($buf).unwrap(); + reader.read_event_impl($buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - assert_eq!(reader.read_event_buffered($buf).unwrap(), Event::Eof); + assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); } } @@ -2579,10 +2579,10 @@ mod test { let mut reader = Reader::from_str(""); assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_buffered($buf).unwrap(); + reader.read_event_impl($buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_8); - assert_eq!(reader.read_event_buffered($buf).unwrap(), Event::Eof); + assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); } } }; diff --git a/src/writer.rs b/src/writer.rs index d6fcf960..16e3dc81 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -22,7 +22,7 @@ use std::io::Write; /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// let mut buf = Vec::new(); /// loop { -/// match reader.read_event(&mut buf) { +/// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => { /// /// // crates a new element ... alternatively we could reuse `e` by calling diff --git a/tests/test.rs b/tests/test.rs index 36ed5df9..36338cf5 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -14,7 +14,7 @@ fn test_sample() { let mut r = Reader::from_reader(src); let mut count = 0; loop { - match r.read_event(&mut buf).unwrap() { + match r.read_event_into(&mut buf).unwrap() { Start(_) => count += 1, Decl(e) => println!("{:?}", e.version()), Eof => break, @@ -31,7 +31,7 @@ fn test_attributes_empty() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Empty(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -60,7 +60,7 @@ fn test_attribute_equal() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Empty(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -83,7 +83,7 @@ fn test_comment_starting_with_gt() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Comment(e)) => { assert_eq!(e.as_ref(), b">"); break; @@ -102,7 +102,7 @@ fn test_koi8_r_encoding() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); loop { - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Text(e)) => { e.unescape_and_decode(&r).unwrap(); } @@ -121,7 +121,7 @@ fn fuzz_53() { let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } @@ -137,7 +137,7 @@ fn test_issue94() { reader.trim_text(true); let mut buf = vec![]; loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } @@ -154,7 +154,7 @@ fn fuzz_101() { let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Start(ref e)) | Ok(Empty(ref e)) => { for a in e.attributes() { if a.ok().map_or(true, |a| a.unescaped_value().is_err()) { @@ -178,14 +178,11 @@ fn fuzz_101() { fn test_no_trim() { let mut reader = Reader::from_str(" text "); - assert!(matches!( - reader.read_event_unbuffered().unwrap(), - StartText(_) - )); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Start(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Text(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), End(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Text(_))); + assert!(matches!(reader.read_event().unwrap(), StartText(_))); + assert!(matches!(reader.read_event().unwrap(), Start(_))); + assert!(matches!(reader.read_event().unwrap(), Text(_))); + assert!(matches!(reader.read_event().unwrap(), End(_))); + assert!(matches!(reader.read_event().unwrap(), Text(_))); } #[test] @@ -193,13 +190,10 @@ fn test_trim_end() { let mut reader = Reader::from_str(" text "); reader.trim_text_end(true); - assert!(matches!( - reader.read_event_unbuffered().unwrap(), - StartText(_) - )); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Start(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Text(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), End(_))); + assert!(matches!(reader.read_event().unwrap(), StartText(_))); + assert!(matches!(reader.read_event().unwrap(), Start(_))); + assert!(matches!(reader.read_event().unwrap(), Text(_))); + assert!(matches!(reader.read_event().unwrap(), End(_))); } #[test] @@ -207,9 +201,9 @@ fn test_trim() { let mut reader = Reader::from_str(" text "); reader.trim_text(true); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Start(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), Text(_))); - assert!(matches!(reader.read_event_unbuffered().unwrap(), End(_))); + assert!(matches!(reader.read_event().unwrap(), Start(_))); + assert!(matches!(reader.read_event().unwrap(), Text(_))); + assert!(matches!(reader.read_event().unwrap(), End(_))); } #[test] @@ -218,15 +212,18 @@ fn test_clone_reader() { reader.trim_text(true); let mut buf = Vec::new(); - assert!(matches!(reader.read_event(&mut buf).unwrap(), Start(_))); + assert!(matches!( + reader.read_event_into(&mut buf).unwrap(), + Start(_) + )); let mut cloned = reader.clone(); - assert!(matches!(reader.read_event(&mut buf).unwrap(), Text(_))); - assert!(matches!(reader.read_event(&mut buf).unwrap(), End(_))); + assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Text(_))); + assert!(matches!(reader.read_event_into(&mut buf).unwrap(), End(_))); - assert!(matches!(cloned.read_event(&mut buf).unwrap(), Text(_))); - assert!(matches!(cloned.read_event(&mut buf).unwrap(), End(_))); + assert!(matches!(cloned.read_event_into(&mut buf).unwrap(), Text(_))); + assert!(matches!(cloned.read_event_into(&mut buf).unwrap(), End(_))); } #[cfg(feature = "serialize")] @@ -925,7 +922,7 @@ fn test_issue299() -> Result<(), Error> { "#; let mut reader = Reader::from_str(xml); loop { - match reader.read_event_unbuffered()? { + match reader.read_event()? { Start(e) | Empty(e) => { let attr_count = match e.name().as_ref() { b"MICEX_DOC" => 1, diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 39af252d..6971cfef 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -12,7 +12,7 @@ use pretty_assertions::assert_eq; macro_rules! next_eq_name { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + match $r.read_event_into(&mut buf).unwrap() { $t(ref e) if e.name().as_ref() == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -28,7 +28,7 @@ macro_rules! next_eq_name { macro_rules! next_eq_content { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + match $r.read_event_into(&mut buf).unwrap() { $t(ref e) if e.as_ref() == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -130,7 +130,7 @@ fn test_xml_decl() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf).unwrap() { + match r.read_event_into(&mut buf).unwrap() { Decl(ref e) => { match e.version() { Ok(v) => assert_eq!( @@ -208,7 +208,7 @@ fn test_writer() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -227,7 +227,7 @@ fn test_writer_borrow() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` } @@ -250,7 +250,7 @@ fn test_writer_indent() -> Result<()> { let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -276,7 +276,7 @@ fn test_writer_indent_cdata() -> Result<()> { let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -302,7 +302,7 @@ fn test_write_empty_element_attrs() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -324,7 +324,7 @@ fn test_write_attrs() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - let event = match reader.read_event(&mut buf)? { + let event = match reader.read_event_into(&mut buf)? { Eof => break, Start(elem) => { let mut attrs = elem.attributes().collect::>>()?; @@ -430,7 +430,7 @@ fn test_buf_position_err_end_element() { r.trim_text(true).check_end_names(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag Err(e) => panic!( "expecting buf_pos = 2, found {}, err: {:?}", @@ -450,7 +450,7 @@ fn test_buf_position_err_comment() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { // error at char 4: no closing --> tag found Err(e) => assert_eq!( r.buffer_position(), @@ -469,11 +469,11 @@ fn test_buf_position_err_comment_2_buf() { r.trim_text(true).check_end_names(true); let mut buf = Vec::new(); - let _ = r.read_event(&mut buf).unwrap(); + let _ = r.read_event_into(&mut buf).unwrap(); assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { // error at char 4: no closing --> tag found Err(e) => assert_eq!( r.buffer_position(), @@ -495,7 +495,7 @@ fn test_buf_position_err_comment_trim_text() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { // error at char 7: no closing --> tag found Err(e) => assert_eq!( r.buffer_position(), @@ -514,7 +514,7 @@ fn test_escaped_content() { r.trim_text(true); next_eq!(r, Start, b"a"); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Text(e)) => { assert_eq!( &*e, @@ -562,7 +562,7 @@ fn test_read_write_roundtrip_results_in_identity() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -589,7 +589,7 @@ fn test_read_write_roundtrip() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -616,7 +616,7 @@ fn test_read_write_roundtrip_escape() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, Text(e) => { let t = e.escaped(); @@ -649,7 +649,7 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf)? { + match reader.read_event_into(&mut buf)? { Eof => break, Text(e) => { let t = e.unescape_and_decode(&reader).unwrap(); @@ -671,7 +671,7 @@ fn test_closing_bracket_in_single_quote_attr() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -700,7 +700,7 @@ fn test_closing_bracket_in_double_quote_attr() { let mut r = Reader::from_str(r#""#); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -729,7 +729,7 @@ fn test_closing_bracket_in_double_quote_mixed() { let mut r = Reader::from_str(r#""#); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -758,7 +758,7 @@ fn test_closing_bracket_in_single_quote_mixed() { let mut r = Reader::from_str(r#""#); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + match r.read_event_into(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); assert_eq!( @@ -798,7 +798,7 @@ mod decode_with_bom_removal { let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::StartText(e)) => { txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()) } @@ -824,7 +824,7 @@ mod decode_with_bom_removal { let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::StartText(e)) => { txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()) } @@ -845,7 +845,7 @@ mod decode_with_bom_removal { let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::StartText(e)) => { txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()) } @@ -868,7 +868,7 @@ mod decode_with_bom_removal { let mut buf = Vec::new(); loop { - match reader.read_event(&mut buf) { + match reader.read_event_into(&mut buf) { Ok(Event::StartText(e)) => { txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()) } From db8ee6e0a8283d2dfc9cbe9522052b8ae7195315 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 2 Jul 2022 18:26:25 +0500 Subject: [PATCH 6/8] Change `read_to_end*` and `read_text` to accept `QName` instead of `AsRef<[u8]>` AsRef is too unsafe because you accidentally could pass wrong parameters Also fixes using incorrect encoding if `read_to_end` family of methods or `read_text` method would not find a corresponding end tag and the reader has a non-UTF-8 encoding --- Changelog.md | 3 +++ examples/read_texts.rs | 3 ++- src/reader.rs | 37 ++++++++++++++++++++----------------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Changelog.md b/Changelog.md index bea6287b..91dee4f6 100644 --- a/Changelog.md +++ b/Changelog.md @@ -38,6 +38,8 @@ returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer - [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns") - [#363]: Do not generate empty `Event::Text` events +- [#412]: Fix using incorrect encoding if `read_to_end` family of methods or `read_text` + method not found a corresponding end tag and reader has non-UTF-8 encoding ### Misc Changes @@ -104,6 +106,7 @@ |`read_text` |`read_text_into` |`read_event_unbuffered` |`read_event` |`read_to_end_unbuffered` |`read_to_end` +- [#412]: Change `read_to_end*` and `read_text_into` to accept `QName` instead of `AsRef<[u8]>` ### New Tests diff --git a/examples/read_texts.rs b/examples/read_texts.rs index 7b158be7..40d71e63 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -1,5 +1,6 @@ fn main() { use quick_xml::events::Event; + use quick_xml::name::QName; use quick_xml::Reader; let xml = "text1text2\ @@ -16,7 +17,7 @@ fn main() { Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => { txt.push( reader - .read_text_into(b"tag2", &mut Vec::new()) + .read_text_into(QName(b"tag2"), &mut Vec::new()) .expect("Cannot decode text value"), ); println!("{:?}", txt); diff --git a/src/reader.rs b/src/reader.rs index a812ef16..21967a70 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -610,21 +610,22 @@ impl Reader { /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name - pub fn read_to_end_into>(&mut self, end: K, buf: &mut Vec) -> Result<()> { + pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { let mut depth = 0; - let end = end.as_ref(); loop { match self.read_event_into(buf) { - Ok(Event::End(ref e)) if e.name().as_ref() == end => { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == end => depth += 1, + Ok(Event::End(e)) if e.name() == end => { if depth == 0 { return Ok(()); } depth -= 1; } - Ok(Event::Start(ref e)) if e.name().as_ref() == end => depth += 1, - Err(e) => return Err(e), Ok(Event::Eof) => { - return Err(Error::UnexpectedEof(format!("", from_utf8(end)))); + let name = self.decoder().decode(end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); } _ => (), } @@ -665,13 +666,14 @@ impl Reader { /// } /// ``` /// - /// [`Text`]: events/enum.Event.html#variant.Text - /// [`End`]: events/enum.Event.html#variant.End - pub fn read_text_into>(&mut self, end: K, buf: &mut Vec) -> Result { + /// [`Text`]: Event::Text + /// [`End`]: Event::End + pub fn read_text_into(&mut self, end: QName, buf: &mut Vec) -> Result { let s = match self.read_event_into(buf) { - Ok(Event::Text(e)) => e.unescape_and_decode(self), - Ok(Event::End(ref e)) if e.name().as_ref() == end.as_ref() => return Ok("".to_string()), Err(e) => return Err(e), + + Ok(Event::Text(e)) => e.unescape_and_decode(self), + Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()), Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), _ => return Err(Error::TextNotFound), }; @@ -975,21 +977,22 @@ impl<'a> Reader<&'a [u8]> { /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name - pub fn read_to_end>(&mut self, end: K) -> Result<()> { + pub fn read_to_end(&mut self, end: QName) -> Result<()> { let mut depth = 0; - let end = end.as_ref(); loop { match self.read_event() { - Ok(Event::End(ref e)) if e.name().as_ref() == end => { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == end => depth += 1, + Ok(Event::End(e)) if e.name() == end => { if depth == 0 { return Ok(()); } depth -= 1; } - Ok(Event::Start(ref e)) if e.name().as_ref() == end => depth += 1, - Err(e) => return Err(e), Ok(Event::Eof) => { - return Err(Error::UnexpectedEof(format!("", from_utf8(end)))); + let name = self.decoder().decode(end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); } _ => (), } From 5e6d04525173c5917cde133736c4f7d39fcc97b2 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 2 Jul 2022 17:59:39 +0500 Subject: [PATCH 7/8] Add doctests for `read_to_end*` family of methods --- src/reader.rs | 154 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 5 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 21967a70..adcd75b1 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -607,12 +607,91 @@ impl Reader { } } - /// Reads until end element is found + /// Reads until end element is found using provided buffer as intermediate + /// storage for events content. This function is supposed to be called after + /// you already read a [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name + /// Manages nested cases where parent and child elements have the same name. + /// + /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// If your reader created from a string slice or byte array slice, it is + /// better to use [`read_to_end()`] method, because it will not copy bytes + /// into intermediate buffer. + /// + /// The provided `buf` buffer will be filled only by one event content at time. + /// Before reading of each event the buffer will be cleared. If you know an + /// appropriate size of each event, you can preallocate the buffer to reduce + /// number of reallocations. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// The correctness of the skipped events does not checked, if you disabled + /// the [`check_end_names`] option. + /// + /// # Namespaces + /// + /// While the [`Reader`] does not support namespace resolution, namespaces + /// does not change the algorithm for comparing names. Although the names + /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the + /// same namespace, are semantically equivalent, `` cannot close + /// ``, because according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let start = BytesStart::borrowed_name(b"outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`read_to_end()`]: Self::read_to_end + /// [`check_end_names`]: Self::check_end_names + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { let mut depth = 0; loop { + buf.clear(); match self.read_event_into(buf) { Err(e) => return Err(e), @@ -629,7 +708,6 @@ impl Reader { } _ => (), } - buf.clear(); } } @@ -974,9 +1052,75 @@ impl<'a> Reader<&'a [u8]> { self.read_event_impl(()) } - /// Reads until end element is found + /// Reads until end element is found. This function is supposed to be called + /// after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name. + /// + /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// The correctness of the skipped events does not checked, if you disabled + /// the [`check_end_names`] option. + /// + /// # Namespaces + /// + /// While the [`Reader`] does not support namespace resolution, namespaces + /// does not change the algorithm for comparing names. Although the names + /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the + /// same namespace, are semantically equivalent, `` cannot close + /// ``, because according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. /// - /// Manages nested cases where parent and child elements have the same name + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// + /// let start = BytesStart::borrowed_name(b"outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end(end.name()).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event().unwrap(), Event::Eof); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`check_end_names`]: Self::check_end_names + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result<()> { let mut depth = 0; loop { From ae458cb831e96d19a9501f44307ec570d820914b Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 15 Jun 2022 23:16:20 +0500 Subject: [PATCH 8/8] Remove excessive `BufRead` constraint Co-authored-by: Daniel Alley --- src/events/attributes.rs | 8 ++++---- src/events/mod.rs | 7 +++---- src/reader.rs | 12 ++++++------ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/events/attributes.rs b/src/events/attributes.rs index d6331bc7..51f1455c 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -9,7 +9,7 @@ use crate::reader::{is_whitespace, Reader}; use crate::utils::{write_byte_string, write_cow_string, Bytes}; use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; -use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range}; +use std::{borrow::Cow, collections::HashMap, ops::Range}; /// A struct representing a key/value XML attribute. /// @@ -81,7 +81,7 @@ impl<'a> Attribute<'a> { /// /// [`unescaped_value()`]: #method.unescaped_value /// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode - pub fn unescape_and_decode_value(&self, reader: &Reader) -> XmlResult { + pub fn unescape_and_decode_value(&self, reader: &Reader) -> XmlResult { self.do_unescape_and_decode_value(reader, None) } @@ -99,7 +99,7 @@ impl<'a> Attribute<'a> { /// # Pre-condition /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. - pub fn unescape_and_decode_value_with_custom_entities( + pub fn unescape_and_decode_value_with_custom_entities( &self, reader: &Reader, custom_entities: &HashMap, Vec>, @@ -108,7 +108,7 @@ impl<'a> Attribute<'a> { } /// The keys and values of `custom_entities`, if any, must be valid UTF-8. - fn do_unescape_and_decode_value( + fn do_unescape_and_decode_value( &self, reader: &Reader, custom_entities: Option<&HashMap, Vec>>, diff --git a/src/events/mod.rs b/src/events/mod.rs index f80c4093..51c66ec7 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -39,7 +39,6 @@ use encoding_rs::Encoding; use std::borrow::Cow; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; -use std::io::BufRead; use std::ops::Deref; use std::str::from_utf8; @@ -755,7 +754,7 @@ impl<'a> BytesText<'a> { /// it might be wiser to manually use /// 1. BytesText::unescaped() /// 2. Reader::decode(...) - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &Reader) -> Result { self.do_unescape_and_decode_with_custom_entities(reader, None) } @@ -769,7 +768,7 @@ impl<'a> BytesText<'a> { /// # Pre-condition /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. - pub fn unescape_and_decode_with_custom_entities( + pub fn unescape_and_decode_with_custom_entities( &self, reader: &Reader, custom_entities: &HashMap, Vec>, @@ -777,7 +776,7 @@ impl<'a> BytesText<'a> { self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities)) } - fn do_unescape_and_decode_with_custom_entities( + fn do_unescape_and_decode_with_custom_entities( &self, reader: &Reader, custom_entities: Option<&HashMap, Vec>>, diff --git a/src/reader.rs b/src/reader.rs index adcd75b1..12221ebc 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -107,7 +107,7 @@ impl EncodingRef { /// A low level encoding-agnostic XML event reader. /// -/// Consumes a `BufRead` and streams XML `Event`s. +/// Consumes bytes and streams XML [`Event`]s. /// /// # Examples /// @@ -144,7 +144,7 @@ impl EncodingRef { /// } /// ``` #[derive(Clone)] -pub struct Reader { +pub struct Reader { /// reader pub(crate) reader: R, /// current buffer position, useful for debugging errors @@ -198,8 +198,8 @@ pub struct Reader { } /// Builder methods -impl Reader { - /// Creates a `Reader` that reads from a reader implementing `BufRead`. +impl Reader { + /// Creates a `Reader` that reads from a given reader. pub fn from_reader(reader: R) -> Self { Self { reader, @@ -323,7 +323,7 @@ impl Reader { } /// Getters -impl Reader { +impl Reader { /// Consumes `Reader` returning the underlying reader /// /// Can be used to compute line and column of a parsing error position @@ -761,7 +761,7 @@ impl Reader { } /// Private methods -impl Reader { +impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader.