xml/reader/parser/
inside_processing_instruction.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;

use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;

use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State};

impl PullParser {
    pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
        match s {
            ProcessingInstructionSubstate::PIInsideName => match t {
                Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) ||
                                 self.buf_has_data() && is_name_char(c) => {
                    if self.buf.len() > self.config.max_name_length {
                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
                    }
                    self.buf.push(c);
                    None
                },

                Token::ProcessingInstructionEnd => {
                    // self.buf contains PI name
                    let name = self.take_buf();

                    // Don't need to check for declaration because it has mandatory attributes
                    // but there is none
                    match &*name {
                        // Name is empty, it is an error
                        "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)),

                        // Found <?xml-like PI not at the beginning of a document,
                        // it is an error - see section 2.6 of XML 1.1 spec
                        n if "xml".eq_ignore_ascii_case(n) =>
                            Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),

                        // All is ok, emitting event
                        _ => {
                            debug_assert!(self.next_event.is_none(), "{:?}", self.next_event);
                            // can't have a PI before `<?xml`
                            let event1 = self.set_encountered(Encountered::Declaration);
                            let event2 = Some(Ok(XmlEvent::ProcessingInstruction {
                                name,
                                data: None
                            }));
                            // emitting two events at once is cumbersome
                            let event1 = if event1.is_some() {
                                self.next_event = event2;
                                event1
                            } else {
                                event2
                            };
                            self.into_state(State::OutsideTag, event1)
                        },
                    }
                },

                Token::Character(c) if is_whitespace_char(c) => {
                    // self.buf contains PI name
                    let name = self.take_buf();

                    match &*name {
                        // We have not ever encountered an element and have not parsed XML declaration
                        "xml" if self.encountered == Encountered::None =>
                            self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),

                        // Found <?xml-like PI after the beginning of a document,
                        // it is an error - see section 2.6 of XML 1.1 spec
                        n if "xml".eq_ignore_ascii_case(n) =>
                            Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),

                        // All is ok, starting parsing PI data
                        _ => {
                            self.data.name = name;
                            // can't have a PI before `<?xml`
                            let next_event = self.set_encountered(Encountered::Declaration);
                            self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event)
                        },
                    }
                },

                _ => {
                    let buf = self.take_buf();
                    Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t)))
                },
            },

            ProcessingInstructionSubstate::PIInsideData => match t {
                Token::ProcessingInstructionEnd => {
                    let name = self.data.take_name();
                    let data = self.take_buf();
                    self.into_state_emit(
                        State::OutsideTag,
                        Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }),
                    )
                },

                Token::Character(c) if !self.is_valid_xml_char(c) => {
                    Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
                },

                // Any other token should be treated as plain characters
                _ => {
                    if self.buf.len() > self.config.max_data_length {
                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
                    }
                    t.push_to_string(&mut self.buf);
                    None
                },
            },
        }
    }
}