quick_xml/parser/
pi.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! Contains a parser for an XML processing instruction.

use crate::errors::SyntaxError;
use crate::parser::Parser;

/// A parser that search a `?>` sequence in the slice.
///
/// To use a parser create an instance of parser and [`feed`] data into it.
/// After successful search the parser will return [`Some`] with position where
/// processing instruction is ended (the position after `?>`). If search was
/// unsuccessful, a [`None`] will be returned. You typically would expect positive
/// result of search, so that you should feed new data until you get it.
///
/// NOTE: after successful match the parser does not returned to the initial
/// state and should not be used anymore. Create a new parser if you want to perform
/// new search.
///
/// # Example
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::parser::{Parser, PiParser};
///
/// let mut parser = PiParser::default();
///
/// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...`
/// // splitted into three chunks
/// assert_eq!(parser.feed(b"<?instruction"), None);
/// // ...get new chunk of data
/// assert_eq!(parser.feed(b" with = 'some > and ?"), None);
/// // ...get another chunk of data
/// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9));
/// //                       ^        ^
/// //                       0        9
/// ```
///
/// [`feed`]: Self::feed()
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct PiParser(
    /// A flag that indicates was the `bytes` in the previous attempt to find the
    /// end ended with `?`.
    pub bool,
);

impl Parser for PiParser {
    /// Determines the end position of a processing instruction in the provided slice.
    /// Processing instruction ends on the first occurrence of `?>` which cannot be
    /// escaped.
    ///
    /// Returns position after the `?>` or `None` if such sequence was not found.
    ///
    /// [Section 2.6]: Parameter entity references MUST NOT be recognized within
    /// processing instructions, so parser do not search for them.
    ///
    /// # Parameters
    /// - `bytes`: a slice to find the end of a processing instruction.
    ///   Should contain text in ASCII-compatible encoding
    ///
    /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi
    #[inline]
    fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
        for i in memchr::memchr_iter(b'>', bytes) {
            match i {
                0 if self.0 => return Some(0),
                // If the previous byte is `?`, then we found `?>`
                i if i > 0 && bytes[i - 1] == b'?' => return Some(i),
                _ => {}
            }
        }
        self.0 = bytes.last().copied() == Some(b'?');
        None
    }

    #[inline]
    fn eof_error() -> SyntaxError {
        SyntaxError::UnclosedPIOrXmlDecl
    }
}

#[test]
fn pi() {
    use pretty_assertions::assert_eq;

    /// Returns `Ok(pos)` with the position in the buffer where processing
    /// instruction is ended.
    ///
    /// Returns `Err(internal_state)` if parsing is not done yet.
    fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> {
        let mut parser = PiParser(had_question_mark);
        match parser.feed(bytes) {
            Some(i) => Ok(i),
            None => Err(parser.0),
        }
    }

    // Comments shows which character was seen the last before calling `feed`.
    // `x` means any character, pipe denotes start of the buffer that passed to `feed`

    assert_eq!(parse_pi(b"", false), Err(false)); // x|
    assert_eq!(parse_pi(b"", true), Err(false)); // ?|

    assert_eq!(parse_pi(b"?", false), Err(true)); // x|?
    assert_eq!(parse_pi(b"?", true), Err(true)); // ?|?

    assert_eq!(parse_pi(b">", false), Err(false)); // x|>
    assert_eq!(parse_pi(b">", true), Ok(0)); // ?|>

    assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?>
    assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?>

    assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?>
    assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?>
}