quick_xml/parser/
element.rs

1//! Contains a parser for an XML element.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `>` symbol in the slice outside of quoted regions.
7///
8/// The parser considers two quoted regions: a double-quoted (`"..."`) and
9/// a single-quoted (`'...'`) region. Matches found inside those regions are not
10/// considered as results. Each region starts and ends by its quote symbol,
11/// which cannot be escaped (but can be encoded as XML character entity or named
12/// entity. Anyway, that encoding does not contain literal quotes).
13///
14/// To use a parser create an instance of parser and [`feed`] data into it.
15/// After successful search the parser will return [`Some`] with position of
16/// found symbol. If search is unsuccessful, a [`None`] will be returned. You
17/// typically would expect positive result of search, so that you should feed
18/// new data until you get it.
19///
20/// NOTE: after successful match the parser does not returned to the initial
21/// state and should not be used anymore. Create a new parser if you want to perform
22/// new search.
23///
24/// # Example
25///
26/// ```
27/// # use pretty_assertions::assert_eq;
28/// use quick_xml::parser::{ElementParser, Parser};
29///
30/// let mut parser = ElementParser::default();
31///
32/// // Parse `<my-element  with = 'some > inside'>and the text follow...`
33/// // splitted into three chunks
34/// assert_eq!(parser.feed(b"<my-element"), None);
35/// // ...get new chunk of data
36/// assert_eq!(parser.feed(b" with = 'some >"), None);
37/// // ...get another chunk of data
38/// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
39/// //                       ^       ^
40/// //                       0       8
41/// ```
42///
43/// [`feed`]: Self::feed()
44#[derive(Clone, Copy, Debug, Eq, PartialEq)]
45pub enum ElementParser {
46    /// The initial state (inside element, but outside of attribute value).
47    Outside,
48    /// Inside a single-quoted region (`'...'`).
49    SingleQ,
50    /// Inside a double-quoted region (`"..."`).
51    DoubleQ,
52}
53
54impl Parser for ElementParser {
55    /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
56    #[inline]
57    fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
58        for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
59            *self = match (*self, bytes[i]) {
60                // only allowed to match `>` while we are in state `Outside`
61                (Self::Outside, b'>') => return Some(i),
62                (Self::Outside, b'\'') => Self::SingleQ,
63                (Self::Outside, b'\"') => Self::DoubleQ,
64
65                // the only end_byte that gets us out if the same character
66                (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
67
68                // all other bytes: no state change
69                _ => continue,
70            };
71        }
72        None
73    }
74
75    #[inline]
76    fn eof_error() -> SyntaxError {
77        SyntaxError::UnclosedTag
78    }
79}
80
81impl Default for ElementParser {
82    #[inline]
83    fn default() -> Self {
84        Self::Outside
85    }
86}
87
88#[test]
89fn parse() {
90    use pretty_assertions::assert_eq;
91    use ElementParser::*;
92
93    /// Returns `Ok(pos)` with the position in the buffer where element is ended.
94    ///
95    /// Returns `Err(internal_state)` if parsing does not done yet.
96    fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
97        match parser.feed(bytes) {
98            Some(i) => Ok(i),
99            None => Err(parser),
100        }
101    }
102
103    assert_eq!(parse_element(b"", Outside), Err(Outside));
104    assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
105    assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
106
107    assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
108    assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
109    assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
110
111    assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
112    assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
113    assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
114
115    assert_eq!(parse_element(b">", Outside), Ok(0));
116    assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
117    assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
118
119    assert_eq!(parse_element(b"''>", Outside), Ok(2));
120    assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
121    assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
122}