Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename reading methods, fix some encoding errors in error-processing paths and tests #412

Merged
merged 8 commits into from
Jul 9, 2022
16 changes: 15 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
- [#363]: Do not generate empty `Event::Text` events
- [#412]: Fix using incorrect encoding if `read_to_end` family of methods or `read_text`
method not found a corresponding end tag and reader has non-UTF-8 encoding

### Misc Changes

Expand Down Expand Up @@ -96,17 +98,28 @@

- [#403]: Remove deprecated `quick_xml::de::from_bytes` and `Deserializer::from_borrowing_reader`

- [#412]: Rename methods of `Reader`:
|Old Name |New Name
|-------------------------|---------------------------------------------------
|`read_event` |`read_event_into`
|`read_to_end` |`read_to_end_into`
|`read_text` |`read_text_into`
|`read_event_unbuffered` |`read_event`
|`read_to_end_unbuffered` |`read_to_end`
- [#412]: Change `read_to_end*` and `read_text_into` to accept `QName` instead of `AsRef<[u8]>`

### New Tests

- [#9]: Added tests for incorrect nested tags in input
- [#387]: Added a bunch of tests for sequences deserialization
- [#393]: Added more tests for namespace resolver
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
- [#363]: Add tests for `Reader::read_event_impl` to ensure that proper events generated for corresponding inputs
- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
[#118]: https://github.com/tafia/quick-xml/issues/118
[#180]: https://github.com/tafia/quick-xml/issues/180
[#191]: https://github.com/tafia/quick-xml/issues/191
[#324]: https://github.com/tafia/quick-xml/issues/324
Expand All @@ -117,6 +130,7 @@
[#395]: https://github.com/tafia/quick-xml/pull/395
[#403]: https://github.com/tafia/quick-xml/pull/403
[#407]: https://github.com/tafia/quick-xml/pull/407
[#412]: https://github.com/tafia/quick-xml/pull/412

## 0.23.0 -- 2022-05-08

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ let mut buf = Vec::new();
loop {
// NOTE: this is the generic case when we don't know about the input BufRead.
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event_unbuffered()`
match reader.read_event(&mut buf) {
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
b"tag1" => println!("attributes values: {:?}",
Expand Down Expand Up @@ -77,7 +77,7 @@ reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
fn parse_document(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
loop {
match r.read_event_unbuffered()? {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.unescaped_value()?);
Expand Down
22 changes: 11 additions & 11 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn read_event(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand All @@ -57,7 +57,7 @@ fn read_event(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand Down Expand Up @@ -137,7 +137,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
Expand Down Expand Up @@ -175,7 +175,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
Expand Down Expand Up @@ -215,7 +215,7 @@ fn one_event(c: &mut Criterion) {
let mut r = Reader::from_reader(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -235,7 +235,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -255,7 +255,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -275,7 +275,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -298,7 +298,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
Expand All @@ -321,7 +321,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
Expand All @@ -344,7 +344,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
Expand Down
2 changes: 1 addition & 1 deletion compare/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn low_level_comparison(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
Expand Down
4 changes: 2 additions & 2 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut reader = Reader::from_file("tests/documents/document.xml")?;
let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf)? {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => match element.name().as_ref() {
b"w:tbl" => {
count += 1;
Expand All @@ -33,7 +33,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut row_index = 0;
loop {
skip_buf.clear();
match reader.read_event(&mut skip_buf)? {
match reader.read_event_into(&mut skip_buf)? {
Event::Start(element) => match element.name().as_ref() {
b"w:tr" => {
stats.rows.push(vec![]);
Expand Down
5 changes: 3 additions & 2 deletions examples/read_texts.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
fn main() {
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::Reader;

let xml = "<tag1>text1</tag1><tag1>text2</tag1>\
Expand All @@ -12,11 +13,11 @@ fn main() {
let mut buf = Vec::new();

loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
.read_text(b"tag2", &mut Vec::new())
.read_text_into(QName(b"tag2"), &mut Vec::new())
.expect("Cannot decode text value"),
);
println!("{:?}", txt);
Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fuzz_target!(|data: &[u8]| {
let mut reader = Reader::from_reader(cursor);
let mut buf = vec![];
loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e))=> {
if e.unescaped().is_err() {
break;
Expand Down
8 changes: 4 additions & 4 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ pub struct IoReader<R: BufRead> {
impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
fn next(&mut self) -> Result<DeEvent<'static>, DeError> {
let event = loop {
let e = self.reader.read_event(&mut self.buf)?;
let e = self.reader.read_event_into(&mut self.buf)?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
Expand All @@ -971,7 +971,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
}

fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end(name, &mut self.buf) {
match self.reader.read_to_end_into(name, &mut self.buf) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
}
Expand All @@ -993,7 +993,7 @@ pub struct SliceReader<'de> {
impl<'de> XmlRead<'de> for SliceReader<'de> {
fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
loop {
let e = self.reader.read_event_unbuffered()?;
let e = self.reader.read_event()?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
Expand All @@ -1011,7 +1011,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
}

fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end_unbuffered(name) {
match self.reader.read_to_end(name) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
}
Expand Down
8 changes: 4 additions & 4 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::reader::{is_whitespace, Reader};
use crate::utils::{write_byte_string, write_cow_string, Bytes};
use std::fmt::{self, Debug, Display, Formatter};
use std::iter::FusedIterator;
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range};
use std::{borrow::Cow, collections::HashMap, ops::Range};

/// A struct representing a key/value XML attribute.
///
Expand Down Expand Up @@ -81,7 +81,7 @@ impl<'a> Attribute<'a> {
///
/// [`unescaped_value()`]: #method.unescaped_value
/// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode
pub fn unescape_and_decode_value<B: BufRead>(&self, reader: &Reader<B>) -> XmlResult<String> {
pub fn unescape_and_decode_value<B>(&self, reader: &Reader<B>) -> XmlResult<String> {
self.do_unescape_and_decode_value(reader, None)
}

Expand All @@ -99,7 +99,7 @@ impl<'a> Attribute<'a> {
/// # Pre-condition
///
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
pub fn unescape_and_decode_value_with_custom_entities<B: BufRead>(
pub fn unescape_and_decode_value_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
Expand All @@ -108,7 +108,7 @@ impl<'a> Attribute<'a> {
}

/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
fn do_unescape_and_decode_value<B: BufRead>(
fn do_unescape_and_decode_value<B>(
&self,
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
Expand Down
24 changes: 12 additions & 12 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
//!
//! # Reading
//! When reading a XML stream, the events are emitted by
//! [`Reader::read_event`]. You must listen
//! [`Reader::read_event_into`]. You must listen
//! for the different types of events you are interested in.
//!
//! See [`Reader`] for further information.
Expand All @@ -29,10 +29,8 @@
//!
//! See [`Writer`] for further information.
//!
//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
//! [`Reader`]: ../reader/struct.Reader.html
//! [`Writer`]: ../writer/struct.Writer.html
//! [`Event`]: enum.Event.html
//! [`Writer`]: crate::writer::Writer
//! [`Event`]: crate::events::Event

pub mod attributes;

Expand All @@ -41,7 +39,6 @@ use encoding_rs::Encoding;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::io::BufRead;
use std::ops::Deref;
use std::str::from_utf8;

Expand Down Expand Up @@ -757,7 +754,7 @@ impl<'a> BytesText<'a> {
/// it might be wiser to manually use
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode<B>(&self, reader: &Reader<B>) -> Result<String> {
self.do_unescape_and_decode_with_custom_entities(reader, None)
}

Expand All @@ -771,15 +768,15 @@ impl<'a> BytesText<'a> {
/// # Pre-condition
///
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
pub fn unescape_and_decode_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
) -> Result<String> {
self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
}

fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
fn do_unescape_and_decode_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
Expand Down Expand Up @@ -928,7 +925,7 @@ impl<'a> Deref for BytesCData<'a> {

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Event emitted by [`Reader::read_event`].
/// Event emitted by [`Reader::read_event_into`].
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Event<'a> {
/// Text that appeared before the first opening tag or an [XML declaration].
Expand Down Expand Up @@ -956,7 +953,7 @@ pub enum Event<'a> {
/// let mut reader = Reader::from_bytes(xml);
/// let mut events_processed = 0;
/// loop {
/// match reader.read_event_unbuffered() {
/// match reader.read_event() {
/// Ok(Event::StartText(e)) => {
/// assert_eq!(events_processed, 0);
/// // Content contains BOM
Expand Down Expand Up @@ -1066,7 +1063,10 @@ mod test {
let mut buf = Vec::new();
let mut parsed_local_names = Vec::new();
loop {
match rdr.read_event(&mut buf).expect("unable to read xml event") {
match rdr
.read_event_into(&mut buf)
.expect("unable to read xml event")
{
Event::Start(ref e) => parsed_local_names.push(
from_utf8(e.local_name().as_ref())
.expect("unable to build str from local_name")
Expand Down
Loading