xml/
reader.rs

1//! Contains high-level interface for a pull-based XML parser.
2//!
3//! The most important type in this module is `EventReader`, which provides an iterator
4//! view for events in XML document.
5
6use std::io::Read;
7use std::iter::FusedIterator;
8use std::result;
9
10use crate::common::{Position, TextPosition};
11
12pub use self::config::{ParserConfig, ParserConfig2};
13pub use self::error::{Error, ErrorKind};
14pub use self::events::XmlEvent;
15
16use self::parser::PullParser;
17
18mod config;
19mod error;
20mod events;
21mod indexset;
22mod lexer;
23mod parser;
24
25/// A result type yielded by `XmlReader`.
26pub type Result<T, E = Error> = result::Result<T, E>;
27
28/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
29pub struct EventReader<R: Read> {
30    source: R,
31    parser: PullParser,
32}
33
34impl<R: Read> EventReader<R> {
35    /// Creates a new reader, consuming the given stream.
36    #[inline]
37    pub fn new(source: R) -> Self {
38        Self::new_with_config(source, ParserConfig2::new())
39    }
40
41    /// Creates a new reader with the provded configuration, consuming the given stream.
42    #[inline]
43    pub fn new_with_config(source: R, config: impl Into<ParserConfig2>) -> Self {
44        Self { source, parser: PullParser::new(config) }
45    }
46
47    /// Pulls and returns next XML event from the stream.
48    ///
49    /// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
50    /// this method will return this event again.
51    #[inline]
52    pub fn next(&mut self) -> Result<XmlEvent> {
53        self.parser.next(&mut self.source)
54    }
55
56    /// Skips all XML events until the next end tag at the current level.
57    ///
58    /// Convenience function that is useful for the case where you have
59    /// encountered a start tag that is of no interest and want to
60    /// skip the entire XML subtree until the corresponding end tag.
61    #[inline]
62    pub fn skip(&mut self) -> Result<()> {
63        let mut depth = 1;
64
65        while depth > 0 {
66            match self.next()? {
67                XmlEvent::StartElement { .. } => depth += 1,
68                XmlEvent::EndElement { .. } => depth -= 1,
69                XmlEvent::EndDocument => return Err(Error {
70                    kind: ErrorKind::UnexpectedEof,
71                    pos: self.parser.position(),
72                }),
73                _ => {},
74            }
75        }
76
77        Ok(())
78    }
79
80    /// Access underlying reader
81    ///
82    /// Using it directly while the event reader is parsing is not recommended
83    pub fn source(&self) -> &R { &self.source }
84
85    /// Access underlying reader
86    ///
87    /// Using it directly while the event reader is parsing is not recommended
88    pub fn source_mut(&mut self) -> &mut R { &mut self.source }
89
90    /// Unwraps this `EventReader`, returning the underlying reader.
91    ///
92    /// Note that this operation is destructive; unwrapping the reader and wrapping it
93    /// again with `EventReader::new()` will create a fresh reader which will attempt
94    /// to parse an XML document from the beginning.
95    pub fn into_inner(self) -> R {
96        self.source
97    }
98
99    /// Returns the DOCTYPE of the document if it has already been seen
100    ///
101    /// Available only after the root `StartElement` event
102    #[inline]
103    pub fn doctype(&self) -> Option<&str> {
104        self.parser.doctype()
105    }
106}
107
108impl<B: Read> Position for EventReader<B> {
109    /// Returns the position of the last event produced by the reader.
110    #[inline]
111    fn position(&self) -> TextPosition {
112        self.parser.position()
113    }
114}
115
116impl<R: Read> IntoIterator for EventReader<R> {
117    type IntoIter = Events<R>;
118    type Item = Result<XmlEvent>;
119
120    fn into_iter(self) -> Events<R> {
121        Events { reader: self, finished: false }
122    }
123}
124
125/// An iterator over XML events created from some type implementing `Read`.
126///
127/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
128/// it will be returned by the iterator once, and then it will stop producing events.
129pub struct Events<R: Read> {
130    reader: EventReader<R>,
131    finished: bool,
132}
133
134impl<R: Read> Events<R> {
135    /// Unwraps the iterator, returning the internal `EventReader`.
136    #[inline]
137    pub fn into_inner(self) -> EventReader<R> {
138        self.reader
139    }
140
141    /// Access the underlying reader
142    ///
143    /// It's not recommended to use it while the events are still being parsed
144    pub fn source(&self) -> &R { &self.reader.source }
145
146    /// Access the underlying reader
147    ///
148    /// It's not recommended to use it while the events are still being parsed
149    pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
150}
151
152impl<R: Read> FusedIterator for Events<R> {
153}
154
155impl<R: Read> Iterator for Events<R> {
156    type Item = Result<XmlEvent>;
157
158    #[inline]
159    fn next(&mut self) -> Option<Result<XmlEvent>> {
160        if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
161            None
162        } else {
163            let ev = self.reader.next();
164            if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
165                self.finished = true;
166            }
167            Some(ev)
168        }
169    }
170}
171
172impl<'r> EventReader<&'r [u8]> {
173    /// A convenience method to create an `XmlReader` from a string slice.
174    #[inline]
175    #[must_use]
176    pub fn from_str(source: &'r str) -> Self {
177        EventReader::new(source.as_bytes())
178    }
179}