pub struct Reader<R: BufRead> { /* private fields */ }
Expand description
A low level encoding-agnostic XML event reader.
Consumes a BufRead
and streams XML Event
s.
Examples
use quick_xml::Reader;
use quick_xml::events::Event;
let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut count = 0;
let mut txt = Vec::new();
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
b"tag1" => println!("attributes values: {:?}",
e.attributes().map(|a| a.unwrap().value)
.collect::<Vec<_>>()),
b"tag2" => count += 1,
_ => (),
}
},
Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
Implementations
sourceimpl<R: BufRead> Reader<R>
impl<R: BufRead> Reader<R>
sourcepub fn from_reader(reader: R) -> Reader<R>
pub fn from_reader(reader: R) -> Reader<R>
Creates a Reader
that reads from a reader implementing BufRead
.
sourcepub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader<R>
pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader<R>
Changes whether empty elements should be split into an Open
and a Close
event.
When set to true
, all Empty
events produced by a self-closing tag like <tag/>
are
expanded into a Start
event followed by an End
event. When set to false
(the
default), those tags are represented by an Empty
event instead.
Note, that setting this to true
will lead to additional allocates that
needed to store tag name for an End
event. There is no additional
allocation, however, if Self::check_end_names()
is also set.
(false
by default)
sourcepub fn trim_text(&mut self, val: bool) -> &mut Reader<R>
pub fn trim_text(&mut self, val: bool) -> &mut Reader<R>
Changes whether whitespace before and after character data should be removed.
When set to true
, all Text
events are trimmed. If they are empty, no event will be
pushed.
(false
by default)
sourcepub fn trim_text_end(&mut self, val: bool) -> &mut Reader<R>
pub fn trim_text_end(&mut self, val: bool) -> &mut Reader<R>
Changes whether whitespace after character data should be removed.
When set to true
, trailing whitespace is trimmed in Text
events.
(false
by default)
Changes whether trailing whitespaces after the markup name are trimmed in closing tags
</a >
.
If true the emitted End
event is stripped of trailing whitespace after the markup name.
Note that if set to false
and check_end_names
is true the comparison of markup names is
going to fail erronously if a closing tag contains trailing whitespaces.
(true
by default)
sourcepub fn check_end_names(&mut self, val: bool) -> &mut Reader<R>
pub fn check_end_names(&mut self, val: bool) -> &mut Reader<R>
Changes whether mismatched closing tag names should be detected.
When set to false
, it won’t check if a closing tag matches the corresponding opening tag.
For example, <mytag></different_tag>
will be permitted.
If the XML is known to be sane (already processed, etc.) this saves extra time.
Note that the emitted End
event will not be modified if this is disabled, ie. it will
contain the data of the mismatched end tag.
Note, that setting this to true
will lead to additional allocates that
needed to store tag name for an End
event. There is no additional
allocation, however, if Self::expand_empty_elements()
is also set.
(true
by default)
sourcepub fn check_comments(&mut self, val: bool) -> &mut Reader<R>
pub fn check_comments(&mut self, val: bool) -> &mut Reader<R>
Changes whether comments should be validated.
When set to true
, every Comment
event will be checked for not containing --
, which
is not allowed in XML comments. Most of the time we don’t want comments at all so we don’t
really care about comment correctness, thus the default value is false
to improve
performance.
(false
by default)
sourcepub fn buffer_position(&self) -> usize
pub fn buffer_position(&self) -> usize
Gets the current byte position in the input data.
Useful when debugging errors.
sourcepub fn read_event<'a, 'b>(
&'a mut self,
buf: &'b mut Vec<u8>
) -> Result<Event<'b>>
pub fn read_event<'a, 'b>(
&'a mut self,
buf: &'b mut Vec<u8>
) -> Result<Event<'b>>
Reads the next Event
.
This is the main entry point for reading XML Event
s.
Event
s borrow buf
and can be converted to own their data if needed (uses Cow
internally).
Having the possibility to control the internal buffers gives you some additional benefits such as:
- Reduce the number of allocations by reusing the same buffer. For constrained systems,
you can call
buf.clear()
once you are done with processing the event (typically at the end of your loop). - Reserve the buffer length if you know the file size (using
Vec::with_capacity
).
Examples
use quick_xml::Reader;
use quick_xml::events::Event;
let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut count = 0;
let mut buf = Vec::new();
let mut txt = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => count += 1,
Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
println!("Found {} start events", count);
println!("Text events: {:?}", txt);
sourcepub fn event_namespace<'a, 'b, 'c>(
&'a self,
qname: &'b [u8],
namespace_buffer: &'c [u8]
) -> (Option<&'c [u8]>, &'b [u8])
pub fn event_namespace<'a, 'b, 'c>(
&'a self,
qname: &'b [u8],
namespace_buffer: &'c [u8]
) -> (Option<&'c [u8]>, &'b [u8])
Resolves a potentially qualified event name into (namespace name, local name).
Qualified attribute names have the form prefix:local-name
where theprefix
is defined
on any containing XML element via xmlns:prefix="the:namespace:uri"
. The namespace prefix
can be defined on the same element as the attribute in question.
Unqualified event inherits the current default namespace.
sourcepub fn attribute_namespace<'a, 'b, 'c>(
&'a self,
qname: &'b [u8],
namespace_buffer: &'c [u8]
) -> (Option<&'c [u8]>, &'b [u8])
pub fn attribute_namespace<'a, 'b, 'c>(
&'a self,
qname: &'b [u8],
namespace_buffer: &'c [u8]
) -> (Option<&'c [u8]>, &'b [u8])
Resolves a potentially qualified attribute name into (namespace name, local name).
Qualified attribute names have the form prefix:local-name
where theprefix
is defined
on any containing XML element via xmlns:prefix="the:namespace:uri"
. The namespace prefix
can be defined on the same element as the attribute in question.
Unqualified attribute names do not inherit the current default namespace.
sourcepub fn read_namespaced_event<'a, 'b, 'c>(
&'a mut self,
buf: &'b mut Vec<u8>,
namespace_buffer: &'c mut Vec<u8>
) -> Result<(Option<&'c [u8]>, Event<'b>)>
pub fn read_namespaced_event<'a, 'b, 'c>(
&'a mut self,
buf: &'b mut Vec<u8>,
namespace_buffer: &'c mut Vec<u8>
) -> Result<(Option<&'c [u8]>, Event<'b>)>
Reads the next event and resolves its namespace (if applicable).
Examples
use std::str::from_utf8;
use quick_xml::Reader;
use quick_xml::events::Event;
let xml = r#"<x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
<y:tag2><!--Test comment-->Test</y:tag2>
<y:tag2>Test 2</y:tag2>
</x:tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut count = 0;
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
let mut txt = Vec::new();
loop {
match reader.read_namespaced_event(&mut buf, &mut ns_buf) {
Ok((ref ns, Event::Start(ref e))) => {
count += 1;
match (*ns, e.local_name()) {
(Some(b"www.xxxx"), b"tag1") => (),
(Some(b"www.yyyy"), b"tag2") => (),
(ns, n) => panic!("Namespace and local name mismatch"),
}
println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok()));
}
Ok((_, Event::Text(e))) => {
txt.push(e.unescape_and_decode(&reader).expect("Error!"))
},
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
println!("Found {} start events", count);
println!("Text events: {:?}", txt);
sourcepub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>
pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>
Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration.
Decode bytes
without BOM and with malformed sequences replaced with the
U+FFFD REPLACEMENT CHARACTER
.
Note
If you instead want to use XML declared encoding, use the encoding
feature
sourcepub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>
pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>
Decodes a UTF8 slice regardless of XML declaration.
Decode bytes
with BOM sniffing and with malformed sequences replaced with the
U+FFFD REPLACEMENT CHARACTER
.
Note
If you instead want to use XML declared encoding, use the encoding
feature
sourcepub fn read_to_end<K: AsRef<[u8]>>(
&mut self,
end: K,
buf: &mut Vec<u8>
) -> Result<()>
pub fn read_to_end<K: AsRef<[u8]>>(
&mut self,
end: K,
buf: &mut Vec<u8>
) -> Result<()>
Reads until end element is found
Manages nested cases where parent and child elements have the same name
sourcepub fn read_text<K: AsRef<[u8]>>(
&mut self,
end: K,
buf: &mut Vec<u8>
) -> Result<String>
pub fn read_text<K: AsRef<[u8]>>(
&mut self,
end: K,
buf: &mut Vec<u8>
) -> Result<String>
Reads optional text between start and end tags.
If the next event is a Text
event, returns the decoded and unescaped content as a
String
. If the next event is an End
event, returns the empty string. In all other
cases, returns an error.
Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 if none is specified).
Examples
use quick_xml::Reader;
use quick_xml::events::Event;
let mut xml = Reader::from_reader(b"
<a><b></a>
<a></a>
" as &[u8]);
xml.trim_text(true);
let expected = ["<b>", ""];
for &content in expected.iter() {
match xml.read_event(&mut Vec::new()) {
Ok(Event::Start(ref e)) => {
assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).unwrap(), content);
},
e => panic!("Expecting Start event, found {:?}", e),
}
}
sourcepub fn into_inner(self) -> R
pub fn into_inner(self) -> R
Consumes Reader
returning the underlying reader
Can be used to compute line and column of a parsing error position
Examples
use std::{str, io::Cursor};
use quick_xml::Reader;
use quick_xml::events::Event;
let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag3>Test 2</tag3>
</tag1>"#;
let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
let mut buf = Vec::new();
fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
let end_pos = reader.buffer_position();
let mut cursor = reader.into_inner();
let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
.expect("can't make a string");
let mut line = 1;
let mut column = 0;
for c in s.chars() {
if c == '\n' {
line += 1;
column = 0;
} else {
column += 1;
}
}
(line, column)
}
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => match e.name() {
b"tag1" | b"tag2" => (),
tag => {
assert_eq!(b"tag3", tag);
assert_eq!((3, 22), into_line_and_column(reader));
break;
}
},
Ok(Event::Eof) => unreachable!(),
_ => (),
}
buf.clear();
}
Trait Implementations
Auto Trait Implementations
impl<R> RefUnwindSafe for Reader<R>where
R: RefUnwindSafe,
impl<R> Send for Reader<R>where
R: Send,
impl<R> Sync for Reader<R>where
R: Sync,
impl<R> Unpin for Reader<R>where
R: Unpin,
impl<R> UnwindSafe for Reader<R>where
R: UnwindSafe,
Blanket Implementations
sourceimpl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more