xml/
reader.rs

1//! Contains high-level interface for a pull-based XML parser.
2//!
3//! The most important type in this module is `EventReader`, which provides an iterator
4//! view for events in XML document.
5
6use std::io::Read;
7use std::iter::FusedIterator;
8use std::result;
9
10use crate::common::{Position, TextPosition};
11
12pub use self::config::{ParserConfig, ParserConfig2};
13pub use self::error::{Error, ErrorKind};
14pub use self::events::XmlEvent;
15
16use self::parser::PullParser;
17
18mod config;
19mod error;
20mod events;
21mod indexset;
22mod lexer;
23mod parser;
24
25/// A result type yielded by `XmlReader`.
26pub type Result<T, E = Error> = result::Result<T, E>;
27
28/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
29///
30/// The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
31pub struct EventReader<R: Read> {
32    source: R,
33    parser: PullParser,
34}
35
36impl<R: Read> EventReader<R> {
37    /// Creates a new reader, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
38    #[inline]
39    pub fn new(source: R) -> Self {
40        Self::new_with_config(source, ParserConfig2::new())
41    }
42
43    /// Creates a new reader with the provded configuration, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
44    #[inline]
45    pub fn new_with_config(source: R, config: impl Into<ParserConfig2>) -> Self {
46        Self { source, parser: PullParser::new(config) }
47    }
48
49    /// Pulls and returns next XML event from the stream.
50    ///
51    /// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
52    /// this method will return this event again.
53    #[inline]
54    pub fn next(&mut self) -> Result<XmlEvent> {
55        self.parser.next(&mut self.source)
56    }
57
58    /// Skips all XML events until the next end tag at the current level.
59    ///
60    /// Convenience function that is useful for the case where you have
61    /// encountered a start tag that is of no interest and want to
62    /// skip the entire XML subtree until the corresponding end tag.
63    #[inline]
64    pub fn skip(&mut self) -> Result<()> {
65        let mut depth = 1;
66
67        while depth > 0 {
68            match self.next()? {
69                XmlEvent::StartElement { .. } => depth += 1,
70                XmlEvent::EndElement { .. } => depth -= 1,
71                XmlEvent::EndDocument => return Err(Error {
72                    kind: ErrorKind::UnexpectedEof,
73                    pos: self.parser.position(),
74                }),
75                _ => {},
76            }
77        }
78
79        Ok(())
80    }
81
82    /// Access underlying reader
83    ///
84    /// Using it directly while the event reader is parsing is not recommended
85    pub fn source(&self) -> &R { &self.source }
86
87    /// Access underlying reader
88    ///
89    /// Using it directly while the event reader is parsing is not recommended
90    pub fn source_mut(&mut self) -> &mut R { &mut self.source }
91
92    /// Unwraps this `EventReader`, returning the underlying reader.
93    ///
94    /// Note that this operation is destructive; unwrapping the reader and wrapping it
95    /// again with `EventReader::new()` will create a fresh reader which will attempt
96    /// to parse an XML document from the beginning.
97    pub fn into_inner(self) -> R {
98        self.source
99    }
100
101    /// Returns the DOCTYPE of the document if it has already been seen
102    ///
103    /// Available only after the root `StartElement` event
104    #[inline]
105    pub fn doctype(&self) -> Option<&str> {
106        self.parser.doctype()
107    }
108}
109
110impl<B: Read> Position for EventReader<B> {
111    /// Returns the position of the last event produced by the reader.
112    #[inline]
113    fn position(&self) -> TextPosition {
114        self.parser.position()
115    }
116}
117
118impl<R: Read> IntoIterator for EventReader<R> {
119    type IntoIter = Events<R>;
120    type Item = Result<XmlEvent>;
121
122    fn into_iter(self) -> Events<R> {
123        Events { reader: self, finished: false }
124    }
125}
126
127/// An iterator over XML events created from some type implementing `Read`.
128///
129/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
130/// it will be returned by the iterator once, and then it will stop producing events.
131pub struct Events<R: Read> {
132    reader: EventReader<R>,
133    finished: bool,
134}
135
136impl<R: Read> Events<R> {
137    /// Unwraps the iterator, returning the internal `EventReader`.
138    #[inline]
139    pub fn into_inner(self) -> EventReader<R> {
140        self.reader
141    }
142
143    /// Access the underlying reader
144    ///
145    /// It's not recommended to use it while the events are still being parsed
146    pub fn source(&self) -> &R { &self.reader.source }
147
148    /// Access the underlying reader
149    ///
150    /// It's not recommended to use it while the events are still being parsed
151    pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
152}
153
154impl<R: Read> FusedIterator for Events<R> {
155}
156
157impl<R: Read> Iterator for Events<R> {
158    type Item = Result<XmlEvent>;
159
160    #[inline]
161    fn next(&mut self) -> Option<Result<XmlEvent>> {
162        if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
163            None
164        } else {
165            let ev = self.reader.next();
166            if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
167                self.finished = true;
168            }
169            Some(ev)
170        }
171    }
172}
173
174impl<'r> EventReader<&'r [u8]> {
175    /// A convenience method to create an `XmlReader` from a string slice.
176    #[inline]
177    #[must_use]
178    pub fn from_str(source: &'r str) -> Self {
179        EventReader::new(source.as_bytes())
180    }
181}