quick_xml/parser/pi.rs
1//! Contains a parser for an XML processing instruction.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `?>` sequence in the slice.
7///
8/// To use a parser create an instance of parser and [`feed`] data into it.
9/// After successful search the parser will return [`Some`] with position where
10/// processing instruction is ended (the position after `?>`). If search was
11/// unsuccessful, a [`None`] will be returned. You typically would expect positive
12/// result of search, so that you should feed new data until you get it.
13///
14/// NOTE: after successful match the parser does not returned to the initial
15/// state and should not be used anymore. Create a new parser if you want to perform
16/// new search.
17///
18/// # Example
19///
20/// ```
21/// # use pretty_assertions::assert_eq;
22/// use quick_xml::parser::{Parser, PiParser};
23///
24/// let mut parser = PiParser::default();
25///
26/// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...`
27/// // splitted into three chunks
28/// assert_eq!(parser.feed(b"<?instruction"), None);
29/// // ...get new chunk of data
30/// assert_eq!(parser.feed(b" with = 'some > and ?"), None);
31/// // ...get another chunk of data
32/// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9));
33/// // ^ ^
34/// // 0 9
35/// ```
36///
37/// [`feed`]: Self::feed()
38#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
39pub struct PiParser(
40 /// A flag that indicates was the `bytes` in the previous attempt to find the
41 /// end ended with `?`.
42 pub bool,
43);
44
45impl Parser for PiParser {
46 /// Determines the end position of a processing instruction in the provided slice.
47 /// Processing instruction ends on the first occurrence of `?>` which cannot be
48 /// escaped.
49 ///
50 /// Returns position after the `?>` or `None` if such sequence was not found.
51 ///
52 /// [Section 2.6]: Parameter entity references MUST NOT be recognized within
53 /// processing instructions, so parser do not search for them.
54 ///
55 /// # Parameters
56 /// - `bytes`: a slice to find the end of a processing instruction.
57 /// Should contain text in ASCII-compatible encoding
58 ///
59 /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi
60 #[inline]
61 fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
62 for i in memchr::memchr_iter(b'>', bytes) {
63 match i {
64 0 if self.0 => return Some(0),
65 // If the previous byte is `?`, then we found `?>`
66 i if i > 0 && bytes[i - 1] == b'?' => return Some(i),
67 _ => {}
68 }
69 }
70 self.0 = bytes.last().copied() == Some(b'?');
71 None
72 }
73
74 #[inline]
75 fn eof_error() -> SyntaxError {
76 SyntaxError::UnclosedPIOrXmlDecl
77 }
78}
79
80#[test]
81fn pi() {
82 use pretty_assertions::assert_eq;
83
84 /// Returns `Ok(pos)` with the position in the buffer where processing
85 /// instruction is ended.
86 ///
87 /// Returns `Err(internal_state)` if parsing is not done yet.
88 fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> {
89 let mut parser = PiParser(had_question_mark);
90 match parser.feed(bytes) {
91 Some(i) => Ok(i),
92 None => Err(parser.0),
93 }
94 }
95
96 // Comments shows which character was seen the last before calling `feed`.
97 // `x` means any character, pipe denotes start of the buffer that passed to `feed`
98
99 assert_eq!(parse_pi(b"", false), Err(false)); // x|
100 assert_eq!(parse_pi(b"", true), Err(false)); // ?|
101
102 assert_eq!(parse_pi(b"?", false), Err(true)); // x|?
103 assert_eq!(parse_pi(b"?", true), Err(true)); // ?|?
104
105 assert_eq!(parse_pi(b">", false), Err(false)); // x|>
106 assert_eq!(parse_pi(b">", true), Ok(0)); // ?|>
107
108 assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?>
109 assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?>
110
111 assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?>
112 assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?>
113}