quick_xml/parser/element.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
//! Contains a parser for an XML element.
use crate::errors::SyntaxError;
use crate::parser::Parser;
/// A parser that search a `>` symbol in the slice outside of quoted regions.
///
/// The parser considers two quoted regions: a double-quoted (`"..."`) and
/// a single-quoted (`'...'`) region. Matches found inside those regions are not
/// considered as results. Each region starts and ends by its quote symbol,
/// which cannot be escaped (but can be encoded as XML character entity or named
/// entity. Anyway, that encoding does not contain literal quotes).
///
/// To use a parser create an instance of parser and [`feed`] data into it.
/// After successful search the parser will return [`Some`] with position of
/// found symbol. If search is unsuccessful, a [`None`] will be returned. You
/// typically would expect positive result of search, so that you should feed
/// new data until you get it.
///
/// NOTE: after successful match the parser does not returned to the initial
/// state and should not be used anymore. Create a new parser if you want to perform
/// new search.
///
/// # Example
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::parser::{ElementParser, Parser};
///
/// let mut parser = ElementParser::default();
///
/// // Parse `<my-element with = 'some > inside'>and the text follow...`
/// // splitted into three chunks
/// assert_eq!(parser.feed(b"<my-element"), None);
/// // ...get new chunk of data
/// assert_eq!(parser.feed(b" with = 'some >"), None);
/// // ...get another chunk of data
/// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
/// // ^ ^
/// // 0 8
/// ```
///
/// [`feed`]: Self::feed()
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ElementParser {
/// The initial state (inside element, but outside of attribute value).
Outside,
/// Inside a single-quoted region (`'...'`).
SingleQ,
/// Inside a double-quoted region (`"..."`).
DoubleQ,
}
impl Parser for ElementParser {
/// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
#[inline]
fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
*self = match (*self, bytes[i]) {
// only allowed to match `>` while we are in state `Outside`
(Self::Outside, b'>') => return Some(i),
(Self::Outside, b'\'') => Self::SingleQ,
(Self::Outside, b'\"') => Self::DoubleQ,
// the only end_byte that gets us out if the same character
(Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
// all other bytes: no state change
_ => continue,
};
}
None
}
#[inline]
fn eof_error() -> SyntaxError {
SyntaxError::UnclosedTag
}
}
impl Default for ElementParser {
#[inline]
fn default() -> Self {
Self::Outside
}
}
#[test]
fn parse() {
use pretty_assertions::assert_eq;
use ElementParser::*;
/// Returns `Ok(pos)` with the position in the buffer where element is ended.
///
/// Returns `Err(internal_state)` if parsing does not done yet.
fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
match parser.feed(bytes) {
Some(i) => Ok(i),
None => Err(parser),
}
}
assert_eq!(parse_element(b"", Outside), Err(Outside));
assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
assert_eq!(parse_element(b">", Outside), Ok(0));
assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
assert_eq!(parse_element(b"''>", Outside), Ok(2));
assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
}