quick_xml/parser/element.rs
1//! Contains a parser for an XML element.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `>` symbol in the slice outside of quoted regions.
7///
8/// The parser considers two quoted regions: a double-quoted (`"..."`) and
9/// a single-quoted (`'...'`) region. Matches found inside those regions are not
10/// considered as results. Each region starts and ends by its quote symbol,
11/// which cannot be escaped (but can be encoded as XML character entity or named
12/// entity. Anyway, that encoding does not contain literal quotes).
13///
14/// To use a parser create an instance of parser and [`feed`] data into it.
15/// After successful search the parser will return [`Some`] with position of
16/// found symbol. If search is unsuccessful, a [`None`] will be returned. You
17/// typically would expect positive result of search, so that you should feed
18/// new data until you get it.
19///
20/// NOTE: after successful match the parser does not returned to the initial
21/// state and should not be used anymore. Create a new parser if you want to perform
22/// new search.
23///
24/// # Example
25///
26/// ```
27/// # use pretty_assertions::assert_eq;
28/// use quick_xml::parser::{ElementParser, Parser};
29///
30/// let mut parser = ElementParser::default();
31///
32/// // Parse `<my-element with = 'some > inside'>and the text follow...`
33/// // splitted into three chunks
34/// assert_eq!(parser.feed(b"<my-element"), None);
35/// // ...get new chunk of data
36/// assert_eq!(parser.feed(b" with = 'some >"), None);
37/// // ...get another chunk of data
38/// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
39/// // ^ ^
40/// // 0 8
41/// ```
42///
43/// [`feed`]: Self::feed()
44#[derive(Clone, Copy, Debug, Eq, PartialEq)]
45pub enum ElementParser {
46 /// The initial state (inside element, but outside of attribute value).
47 Outside,
48 /// Inside a single-quoted region (`'...'`).
49 SingleQ,
50 /// Inside a double-quoted region (`"..."`).
51 DoubleQ,
52}
53
54impl Parser for ElementParser {
55 /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
56 #[inline]
57 fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
58 for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
59 *self = match (*self, bytes[i]) {
60 // only allowed to match `>` while we are in state `Outside`
61 (Self::Outside, b'>') => return Some(i),
62 (Self::Outside, b'\'') => Self::SingleQ,
63 (Self::Outside, b'\"') => Self::DoubleQ,
64
65 // the only end_byte that gets us out if the same character
66 (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
67
68 // all other bytes: no state change
69 _ => continue,
70 };
71 }
72 None
73 }
74
75 #[inline]
76 fn eof_error() -> SyntaxError {
77 SyntaxError::UnclosedTag
78 }
79}
80
81impl Default for ElementParser {
82 #[inline]
83 fn default() -> Self {
84 Self::Outside
85 }
86}
87
88#[test]
89fn parse() {
90 use pretty_assertions::assert_eq;
91 use ElementParser::*;
92
93 /// Returns `Ok(pos)` with the position in the buffer where element is ended.
94 ///
95 /// Returns `Err(internal_state)` if parsing does not done yet.
96 fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
97 match parser.feed(bytes) {
98 Some(i) => Ok(i),
99 None => Err(parser),
100 }
101 }
102
103 assert_eq!(parse_element(b"", Outside), Err(Outside));
104 assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
105 assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
106
107 assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
108 assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
109 assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
110
111 assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
112 assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
113 assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
114
115 assert_eq!(parse_element(b">", Outside), Ok(0));
116 assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
117 assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
118
119 assert_eq!(parse_element(b"''>", Outside), Ok(2));
120 assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
121 assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
122}