xml/reader.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
//! Contains high-level interface for a pull-based XML parser.
//!
//! The most important type in this module is `EventReader`, which provides an iterator
//! view for events in XML document.
use std::io::Read;
use std::iter::FusedIterator;
use std::result;
use crate::common::{Position, TextPosition};
pub use self::config::{ParserConfig, ParserConfig2};
pub use self::error::{Error, ErrorKind};
pub use self::events::XmlEvent;
use self::parser::PullParser;
mod config;
mod error;
mod events;
mod indexset;
mod lexer;
mod parser;
/// A result type yielded by `XmlReader`.
pub type Result<T, E = Error> = result::Result<T, E>;
/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
pub struct EventReader<R: Read> {
source: R,
parser: PullParser,
}
impl<R: Read> EventReader<R> {
/// Creates a new reader, consuming the given stream.
#[inline]
pub fn new(source: R) -> Self {
Self::new_with_config(source, ParserConfig2::new())
}
/// Creates a new reader with the provded configuration, consuming the given stream.
#[inline]
pub fn new_with_config(source: R, config: impl Into<ParserConfig2>) -> Self {
Self { source, parser: PullParser::new(config) }
}
/// Pulls and returns next XML event from the stream.
///
/// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
/// this method will return this event again.
#[inline]
pub fn next(&mut self) -> Result<XmlEvent> {
self.parser.next(&mut self.source)
}
/// Skips all XML events until the next end tag at the current level.
///
/// Convenience function that is useful for the case where you have
/// encountered a start tag that is of no interest and want to
/// skip the entire XML subtree until the corresponding end tag.
#[inline]
pub fn skip(&mut self) -> Result<()> {
let mut depth = 1;
while depth > 0 {
match self.next()? {
XmlEvent::StartElement { .. } => depth += 1,
XmlEvent::EndElement { .. } => depth -= 1,
XmlEvent::EndDocument => return Err(Error {
kind: ErrorKind::UnexpectedEof,
pos: self.parser.position(),
}),
_ => {},
}
}
Ok(())
}
/// Access underlying reader
///
/// Using it directly while the event reader is parsing is not recommended
pub fn source(&self) -> &R { &self.source }
/// Access underlying reader
///
/// Using it directly while the event reader is parsing is not recommended
pub fn source_mut(&mut self) -> &mut R { &mut self.source }
/// Unwraps this `EventReader`, returning the underlying reader.
///
/// Note that this operation is destructive; unwrapping the reader and wrapping it
/// again with `EventReader::new()` will create a fresh reader which will attempt
/// to parse an XML document from the beginning.
pub fn into_inner(self) -> R {
self.source
}
/// Returns the DOCTYPE of the document if it has already been seen
///
/// Available only after the root `StartElement` event
#[inline]
pub fn doctype(&self) -> Option<&str> {
self.parser.doctype()
}
}
impl<B: Read> Position for EventReader<B> {
/// Returns the position of the last event produced by the reader.
#[inline]
fn position(&self) -> TextPosition {
self.parser.position()
}
}
impl<R: Read> IntoIterator for EventReader<R> {
type IntoIter = Events<R>;
type Item = Result<XmlEvent>;
fn into_iter(self) -> Events<R> {
Events { reader: self, finished: false }
}
}
/// An iterator over XML events created from some type implementing `Read`.
///
/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
/// it will be returned by the iterator once, and then it will stop producing events.
pub struct Events<R: Read> {
reader: EventReader<R>,
finished: bool,
}
impl<R: Read> Events<R> {
/// Unwraps the iterator, returning the internal `EventReader`.
#[inline]
pub fn into_inner(self) -> EventReader<R> {
self.reader
}
/// Access the underlying reader
///
/// It's not recommended to use it while the events are still being parsed
pub fn source(&self) -> &R { &self.reader.source }
/// Access the underlying reader
///
/// It's not recommended to use it while the events are still being parsed
pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
}
impl<R: Read> FusedIterator for Events<R> {
}
impl<R: Read> Iterator for Events<R> {
type Item = Result<XmlEvent>;
#[inline]
fn next(&mut self) -> Option<Result<XmlEvent>> {
if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
None
} else {
let ev = self.reader.next();
if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
self.finished = true;
}
Some(ev)
}
}
}
impl<'r> EventReader<&'r [u8]> {
/// A convenience method to create an `XmlReader` from a string slice.
#[inline]
#[must_use]
pub fn from_str(source: &'r str) -> Self {
EventReader::new(source.as_bytes())
}
}