xml/reader/
parser.rs

1//! Contains an implementation of pull-based XML parser.
2
3use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
4use crate::common::{Position, TextPosition, XmlVersion};
5use crate::name::OwnedName;
6use crate::namespace::NamespaceStack;
7use crate::reader::config::ParserConfig2;
8use crate::reader::error::SyntaxError;
9use crate::reader::events::XmlEvent;
10use crate::reader::indexset::AttributesSet;
11use crate::reader::lexer::{Lexer, Token};
12use super::{Error, ErrorKind};
13
14use std::collections::HashMap;
15use std::io::Read;
16
17macro_rules! gen_takes(
18    ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
19        $(
20        impl MarkupData {
21            #[inline]
22            #[allow(clippy::mem_replace_option_with_none)]
23            #[allow(clippy::mem_replace_with_default)]
24            fn $method(&mut self) -> $t {
25                std::mem::replace(&mut self.$field, $def)
26            }
27        }
28        )+
29    )
30);
31
32gen_takes!(
33    name         -> take_name, String, String::new();
34    ref_data     -> take_ref_data, String, String::new();
35
36    encoding     -> take_encoding, Option<String>, None;
37
38    element_name -> take_element_name, Option<OwnedName>, None;
39
40    attr_name    -> take_attr_name, Option<OwnedName>, None;
41    attributes   -> take_attributes, AttributesSet, AttributesSet::new()
42);
43
44mod inside_cdata;
45mod inside_closing_tag_name;
46mod inside_comment;
47mod inside_declaration;
48mod inside_doctype;
49mod inside_opening_tag;
50mod inside_processing_instruction;
51mod inside_reference;
52mod outside_tag;
53
54static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
55static DEFAULT_STANDALONE: Option<bool> = None;
56
57type ElementStack = Vec<OwnedName>;
58pub type Result = super::Result<XmlEvent>;
59
60/// Pull-based XML parser.
61pub(crate) struct PullParser {
62    config: ParserConfig2,
63    lexer: Lexer,
64    st: State,
65    state_after_reference: State,
66    buf: String,
67
68    /// From DTD internal subset
69    entities: HashMap<String, String>,
70
71    nst: NamespaceStack,
72
73    data: MarkupData,
74    final_result: Option<Result>,
75    next_event: Option<Result>,
76    est: ElementStack,
77    pos: Vec<TextPosition>,
78
79    encountered: Encountered,
80    inside_whitespace: bool,
81    read_prefix_separator: bool,
82    pop_namespace: bool,
83}
84
85// Keeps track when XML declaration can happen
86#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
87enum Encountered {
88    None = 0,
89    AnyChars, // whitespace before <?xml is not allowed
90    Declaration,
91    Comment,
92    Doctype,
93    Element,
94}
95
96impl PullParser {
97    /// Returns a new parser using the given config.
98    #[inline]
99    pub fn new(config: impl Into<ParserConfig2>) -> Self {
100        let config = config.into();
101        Self::new_with_config2(config)
102    }
103
104    #[inline]
105    fn new_with_config2(config: ParserConfig2) -> Self {
106        let mut lexer = Lexer::new(&config);
107        if let Some(enc) = config.override_encoding {
108            lexer.set_encoding(enc);
109        }
110
111        let mut pos = Vec::with_capacity(16);
112        pos.push(TextPosition::new());
113
114        Self {
115            config,
116            lexer,
117            st: State::DocumentStart,
118            state_after_reference: State::OutsideTag,
119            buf: String::new(),
120            entities: HashMap::new(),
121            nst: NamespaceStack::default(),
122
123            data: MarkupData {
124                name: String::new(),
125                doctype: None,
126                version: None,
127                encoding: None,
128                standalone: None,
129                ref_data: String::new(),
130                element_name: None,
131                quote: None,
132                attr_name: None,
133                attributes: AttributesSet::new(),
134            },
135            final_result: None,
136            next_event: None,
137            est: Vec::new(),
138            pos,
139
140            encountered: Encountered::None,
141            inside_whitespace: true,
142            read_prefix_separator: false,
143            pop_namespace: false,
144        }
145    }
146
147    /// Checks if this parser ignores the end of stream errors.
148    pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }
149
150    /// Retrieves the Doctype from the document if any
151    #[inline]
152    pub fn doctype(&self) -> Option<&str> {
153        self.data.doctype.as_deref()
154    }
155
156    #[inline(never)]
157    fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
158        if new_encounter <= self.encountered {
159            return None;
160        }
161        let prev_enc = self.encountered;
162        self.encountered = new_encounter;
163
164        // If declaration was not parsed and we have encountered an element,
165        // emit this declaration as the next event.
166        if prev_enc == Encountered::None {
167            self.push_pos();
168            Some(Ok(XmlEvent::StartDocument {
169                version: DEFAULT_VERSION,
170                encoding: self.lexer.encoding().to_string(),
171                standalone: DEFAULT_STANDALONE,
172            }))
173        } else {
174            None
175        }
176    }
177}
178
179impl Position for PullParser {
180    /// Returns the position of the last event produced by the parser
181    #[inline]
182    fn position(&self) -> TextPosition {
183        self.pos.first().copied().unwrap_or_else(TextPosition::new)
184    }
185}
186
187#[derive(Copy, Clone, PartialEq)]
188pub enum State {
189    OutsideTag,
190    InsideOpeningTag(OpeningTagSubstate),
191    InsideClosingTag(ClosingTagSubstate),
192    InsideProcessingInstruction(ProcessingInstructionSubstate),
193    InsideComment,
194    InsideCData,
195    InsideDeclaration(DeclarationSubstate),
196    InsideDoctype(DoctypeSubstate),
197    InsideReference,
198    DocumentStart,
199}
200
201#[derive(Copy, Clone, PartialEq)]
202pub enum DoctypeSubstate {
203    Outside,
204    String,
205    InsideName,
206    BeforeEntityName,
207    EntityName,
208    BeforeEntityValue,
209    EntityValue,
210    NumericReferenceStart,
211    NumericReference,
212    /// expansion
213    PEReferenceInValue,
214    PEReferenceInDtd,
215    /// name definition
216    PEReferenceDefinitionStart,
217    PEReferenceDefinition,
218    SkipDeclaration,
219    Comment,
220}
221
222#[derive(Copy, Clone, PartialEq)]
223pub enum OpeningTagSubstate {
224    InsideName,
225
226    InsideTag,
227
228    InsideAttributeName,
229    AfterAttributeName,
230
231    InsideAttributeValue,
232    AfterAttributeValue,
233}
234
235#[derive(Copy, Clone, PartialEq)]
236pub enum ClosingTagSubstate {
237    CTInsideName,
238    CTAfterName,
239}
240
241#[derive(Copy, Clone, PartialEq)]
242pub enum ProcessingInstructionSubstate {
243    PIInsideName,
244    PIInsideData,
245}
246
247#[derive(Copy, Clone, PartialEq)]
248pub enum DeclarationSubstate {
249    BeforeVersion,
250    InsideVersion,
251    AfterVersion,
252
253    InsideVersionValue,
254    AfterVersionValue,
255
256    BeforeEncoding,
257    InsideEncoding,
258    AfterEncoding,
259
260    InsideEncodingValue,
261    AfterEncodingValue,
262
263    BeforeStandaloneDecl,
264    InsideStandaloneDecl,
265    AfterStandaloneDecl,
266
267    InsideStandaloneDeclValue,
268    AfterStandaloneDeclValue,
269}
270
271#[derive(Copy, Clone, PartialEq)]
272enum QualifiedNameTarget {
273    AttributeNameTarget,
274    OpeningTagNameTarget,
275    ClosingTagNameTarget,
276}
277
278#[derive(Copy, Clone, PartialEq, Eq)]
279enum QuoteToken {
280    SingleQuoteToken,
281    DoubleQuoteToken,
282}
283
284impl QuoteToken {
285    #[inline]
286    fn from_token(t: Token) -> Option<Self> {
287        match t {
288            Token::SingleQuote => Some(Self::SingleQuoteToken),
289            Token::DoubleQuote => Some(Self::DoubleQuoteToken),
290            _ => {
291                debug_assert!(false);
292                None
293            },
294        }
295    }
296
297    const fn as_token(self) -> Token {
298        match self {
299            Self::SingleQuoteToken => Token::SingleQuote,
300            Self::DoubleQuoteToken => Token::DoubleQuote,
301        }
302    }
303}
304
305struct MarkupData {
306    name: String,     // used for processing instruction name
307    ref_data: String,  // used for reference content
308
309    doctype: Option<String>, // keeps a copy of the original doctype
310    version: Option<XmlVersion>,  // used for XML declaration version
311    encoding: Option<String>,  // used for XML declaration encoding
312    standalone: Option<bool>,  // used for XML declaration standalone parameter
313
314    element_name: Option<OwnedName>,  // used for element name
315
316    quote: Option<QuoteToken>,  // used to hold opening quote for attribute value
317    attr_name: Option<OwnedName>,  // used to hold attribute name
318    attributes: AttributesSet,   // used to hold all accumulated attributes
319}
320
321impl PullParser {
322    /// Returns next event read from the given buffer.
323    ///
324    /// This method should be always called with the same buffer. If you call it
325    /// providing different buffers each time, the result will be undefined.
326    pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
327        if let Some(ref ev) = self.final_result {
328            return ev.clone();
329        }
330
331        if let Some(ev) = self.next_event.take() {
332            return ev;
333        }
334
335        if self.pop_namespace {
336            self.pop_namespace = false;
337            self.nst.pop();
338        }
339
340        loop {
341            debug_assert!(self.next_event.is_none());
342            debug_assert!(!self.pop_namespace);
343
344            // While lexer gives us Ok(maybe_token) -- we loop.
345            // Upon having a complete XML-event -- we return from the whole function.
346            match self.lexer.next_token(r) {
347                Ok(Token::Eof) => {
348                    // Forward pos to the lexer head
349                    self.next_pos();
350                    return self.handle_eof();
351                },
352                Ok(token) => match self.dispatch_token(token) {
353                    None => continue,
354                    Some(Ok(xml_event)) => {
355                        self.next_pos();
356                        return Ok(xml_event);
357                    },
358                    Some(Err(xml_error)) => {
359                        self.next_pos();
360                        return self.set_final_result(Err(xml_error));
361                    },
362                },
363                Err(lexer_error) => {
364                    self.next_pos();
365                    return self.set_final_result(Err(lexer_error));
366                },
367            }
368        }
369    }
370
371    /// Handle end of stream
372    #[cold]
373    fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
374        let ev = if self.depth() == 0 {
375            if self.encountered == Encountered::Element && self.st == State::OutsideTag {  // all is ok
376                Ok(XmlEvent::EndDocument)
377            } else if self.encountered < Encountered::Element {
378                self.error(SyntaxError::NoRootElement)
379            } else {  // self.st != State::OutsideTag
380                self.error(SyntaxError::UnexpectedEof)  // TODO: add expected hint?
381            }
382        } else if self.config.c.ignore_end_of_stream {
383            self.final_result = None;
384            self.lexer.reset_eof_handled();
385            return self.error(SyntaxError::UnbalancedRootElement);
386        } else {
387            self.error(SyntaxError::UnbalancedRootElement)
388        };
389        self.set_final_result(ev)
390    }
391
392    // This function is to be called when a terminal event is reached.
393    // The function sets up the `self.final_result` into `Some(result)` and return `result`.
394    #[inline]
395    fn set_final_result(&mut self, result: Result) -> Result {
396        self.final_result = Some(result.clone());
397        result
398    }
399
400    #[cold]
401    fn error(&self, e: SyntaxError) -> Result {
402        Err(Error {
403            pos: self.lexer.position(),
404            kind: ErrorKind::Syntax(e.to_cow()),
405        })
406    }
407
408    #[inline]
409    fn next_pos(&mut self) {
410        // unfortunately calls to next_pos will never be perfectly balanced with push_pos,
411        // at very least because parse errors and EOF can happen unexpectedly without a prior push.
412        if !self.pos.is_empty() {
413            if self.pos.len() > 1 {
414                self.pos.remove(0);
415            } else {
416                self.pos[0] = self.lexer.position();
417            }
418        }
419    }
420
421    #[inline]
422    #[track_caller]
423    fn push_pos(&mut self) {
424        debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
425            This case is ignored in release mode, and merely causes document positions to be out of sync.
426            Please file a bug and include the XML document that triggers this assert.");
427
428        // it has capacity preallocated for more than it ever needs, so this reduces code size
429        if self.pos.len() != self.pos.capacity() {
430            self.pos.push(self.lexer.position());
431        } else if self.pos.len() > 1 {
432            self.pos.remove(0); // this mitigates the excessive push_pos() call
433        }
434    }
435
436    #[inline(never)]
437    fn dispatch_token(&mut self, t: Token) -> Option<Result> {
438        match self.st {
439            State::OutsideTag                     => self.outside_tag(t),
440            State::InsideOpeningTag(s)            => self.inside_opening_tag(t, s),
441            State::InsideClosingTag(s)            => self.inside_closing_tag_name(t, s),
442            State::InsideReference                => self.inside_reference(t),
443            State::InsideComment                  => self.inside_comment(t),
444            State::InsideCData                    => self.inside_cdata(t),
445            State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
446            State::InsideDoctype(s)               => self.inside_doctype(t, s),
447            State::InsideDeclaration(s)           => self.inside_declaration(t, s),
448            State::DocumentStart                  => self.document_start(t),
449        }
450    }
451
452    #[inline]
453    fn depth(&self) -> usize {
454        self.est.len()
455    }
456
457    #[inline]
458    fn buf_has_data(&self) -> bool {
459        !self.buf.is_empty()
460    }
461
462    #[inline]
463    fn take_buf(&mut self) -> String {
464        std::mem::take(&mut self.buf)
465    }
466
467    #[inline]
468    fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
469        self.st = st;
470        ev
471    }
472
473    #[inline]
474    fn into_state_continue(&mut self, st: State) -> Option<Result> {
475        self.into_state(st, None)
476    }
477
478    #[inline]
479    fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
480        self.into_state(st, Some(ev))
481    }
482
483    /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
484    /// an error is returned.
485    ///
486    /// # Parameters
487    /// * `t`       --- next token;
488    /// * `on_name` --- a callback which is executed when whitespace is encountered.
489    fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
490      where F: Fn(&mut Self, Token, OwnedName) -> Option<Result> {
491        // We can get here for the first time only when self.data.name contains zero or one character,
492        // but first character cannot be a colon anyway
493        if self.buf.len() <= 1 {
494            self.read_prefix_separator = false;
495        }
496
497        let invoke_callback = move |this: &mut Self, t| {
498            let name = this.take_buf();
499            match name.parse() {
500                Ok(name) => on_name(this, t, name),
501                Err(()) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
502            }
503        };
504
505        match t {
506            // There can be only one colon, and not as the first character
507            Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
508                self.buf.push(':');
509                self.read_prefix_separator = true;
510                None
511            },
512
513            Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
514                                          self.buf_has_data() && is_name_char(c)) => {
515                if self.buf.len() > self.config.max_name_length {
516                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
517                }
518                self.buf.push(c);
519                None
520            },
521
522            Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
523
524            Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
525
526            Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
527                      target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
528
529            Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
530
531            _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
532        }
533    }
534
535    /// Dispatches tokens in order to process attribute value.
536    ///
537    /// # Parameters
538    /// * `t`        --- next token;
539    /// * `on_value` --- a callback which is called when terminating quote is encountered.
540    fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
541      where F: Fn(&mut Self, String) -> Option<Result> {
542        match t {
543            Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
544
545            Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
546                None => {  // Entered attribute value
547                    self.data.quote = QuoteToken::from_token(t);
548                    None
549                },
550                Some(q) if q.as_token() == t => {
551                    self.data.quote = None;
552                    let value = self.take_buf();
553                    on_value(self, value)
554                },
555                _ => {
556                    if let Token::Character(c) = t {
557                        if !self.is_valid_xml_char_not_restricted(c) {
558                            return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
559                        }
560                    }
561                    if self.buf.len() > self.config.max_attribute_length {
562                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
563                    }
564                    t.push_to_string(&mut self.buf);
565                    None
566                },
567            },
568
569            Token::ReferenceStart if self.data.quote.is_some() => {
570                self.state_after_reference = self.st;
571                self.into_state_continue(State::InsideReference)
572            },
573
574            Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
575
576            Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
577                Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
578            },
579
580            // Every character except " and ' and < is okay
581            _ if self.data.quote.is_some() => {
582                if self.buf.len() > self.config.max_attribute_length {
583                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
584                }
585                t.push_to_string(&mut self.buf);
586                None
587            },
588
589            _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
590        }
591    }
592
593    fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
594        let mut name = self.data.take_element_name()?;
595        let mut attributes = self.data.take_attributes().into_vec();
596
597        // check whether the name prefix is bound and fix its namespace
598        match self.nst.get(name.borrow().prefix_repr()) {
599            Some("") => name.namespace = None, // default namespace
600            Some(ns) => name.namespace = Some(ns.into()),
601            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
602        }
603
604        // check and fix accumulated attributes prefixes
605        for attr in &mut attributes {
606            if let Some(ref pfx) = attr.name.prefix {
607                let new_ns = match self.nst.get(pfx) {
608                    Some("") => None, // default namespace
609                    Some(ns) => Some(ns.into()),
610                    None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))),
611                };
612                attr.name.namespace = new_ns;
613            }
614        }
615
616        if emit_end_element {
617            self.pop_namespace = true;
618            self.next_event = Some(Ok(XmlEvent::EndElement {
619                name: name.clone()
620            }));
621        } else {
622            self.est.push(name.clone());
623        }
624        let namespace = self.nst.squash();
625        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
626            name,
627            attributes,
628            namespace
629        }))
630    }
631
632    fn emit_end_element(&mut self) -> Option<Result> {
633        let mut name = self.data.take_element_name()?;
634
635        // check whether the name prefix is bound and fix its namespace
636        match self.nst.get(name.borrow().prefix_repr()) {
637            Some("") => name.namespace = None, // default namespace
638            Some(ns) => name.namespace = Some(ns.into()),
639            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
640        }
641
642        let op_name = self.est.pop()?;
643
644        if name == op_name {
645            self.pop_namespace = true;
646            self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
647        } else {
648            Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
649        }
650    }
651
652    #[inline]
653    fn is_valid_xml_char(&self, c: char) -> bool {
654        if Some(XmlVersion::Version11) == self.data.version {
655            is_xml11_char(c)
656        } else {
657            is_xml10_char(c)
658        }
659    }
660
661    #[inline]
662    fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
663        if Some(XmlVersion::Version11) == self.data.version {
664            is_xml11_char_not_restricted(c)
665        } else {
666            is_xml10_char(c)
667        }
668    }
669}
670
671#[cfg(test)]
672mod tests {
673    use crate::attribute::OwnedAttribute;
674    use crate::common::TextPosition;
675    use crate::name::OwnedName;
676    use crate::reader::events::XmlEvent;
677    use crate::reader::parser::PullParser;
678    use crate::reader::ParserConfig;
679    use std::io::BufReader;
680
681    fn new_parser() -> PullParser {
682        PullParser::new(ParserConfig::new())
683    }
684
685    macro_rules! expect_event(
686        ($r:expr, $p:expr, $t:pat) => (
687            match $p.next(&mut $r) {
688                $t => {}
689                e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
690            }
691        );
692        ($r:expr, $p:expr, $t:pat => $c:expr ) => (
693            match $p.next(&mut $r) {
694                $t if $c => {}
695                e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
696            }
697        )
698    );
699
700    macro_rules! test_data(
701        ($d:expr) => ({
702            static DATA: &'static str = $d;
703            let r = BufReader::new(DATA.as_bytes());
704            let p = new_parser();
705            (r, p)
706        })
707    );
708
709    #[test]
710    fn issue_3_semicolon_in_attribute_value() {
711        let (mut r, mut p) = test_data!(r#"
712            <a attr="zzz;zzz" />
713        "#);
714
715        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
716        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
717            *name == OwnedName::local("a") &&
718             attributes.len() == 1 &&
719             attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
720             namespace.is_essentially_empty()
721        );
722        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
723        expect_event!(r, p, Ok(XmlEvent::EndDocument));
724    }
725
726    #[test]
727    fn issue_140_entity_reference_inside_tag() {
728        let (mut r, mut p) = test_data!(r"
729            <bla>&#9835;</bla>
730        ");
731
732        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
733        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
734        expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
735        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
736        expect_event!(r, p, Ok(XmlEvent::EndDocument));
737    }
738
739    #[test]
740    fn issue_220_comment() {
741        let (mut r, mut p) = test_data!(r"<x><!-- <!--></x>");
742        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
743        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
744        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
745        expect_event!(r, p, Ok(XmlEvent::EndDocument));
746
747        let (mut r, mut p) = test_data!(r"<x><!-- <!---></x>");
748        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
749        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
750        expect_event!(r, p, Err(_)); // ---> is forbidden in comments
751
752        let (mut r, mut p) = test_data!(r"<x><!--<text&x;> <!--></x>");
753        p.config.c.ignore_comments = false;
754        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
755        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
756        expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
757        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
758        expect_event!(r, p, Ok(XmlEvent::EndDocument));
759    }
760
761    #[test]
762    fn malformed_declaration_attrs() {
763        let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
764        expect_event!(r, p, Err(_));
765
766        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
767        expect_event!(r, p, Err(_));
768
769        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
770        expect_event!(r, p, Err(_));
771
772        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
773        expect_event!(r, p, Err(_));
774
775        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
776        expect_event!(r, p, Err(_));
777    }
778
779    #[test]
780    fn opening_tag_in_attribute_value() {
781        use crate::reader::error::{SyntaxError, Error, ErrorKind};
782
783        let (mut r, mut p) = test_data!(r#"
784            <a attr="zzz<zzz" />
785        "#);
786
787        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
788        expect_event!(r, p, Err(ref e) =>
789            *e == Error {
790                kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
791                pos: TextPosition { row: 1, column: 24 }
792            }
793        );
794    }
795
796    #[test]
797    fn reference_err() {
798        let (mut r, mut p) = test_data!(r"
799            <a>&&amp;</a>
800        ");
801
802        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
803        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
804        expect_event!(r, p, Err(_));
805    }
806
807    #[test]
808    fn state_size() {
809        assert_eq!(2, std::mem::size_of::<super::State>());
810        assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
811    }
812}