yaml_rust2/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of tokens that can be used by the [`crate::YamlLoader`] to
5//! construct the [`crate::Yaml`] object.
6
7use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType};
8use std::collections::HashMap;
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11enum State {
12    /// We await the start of the stream.
13    StreamStart,
14    ImplicitDocumentStart,
15    DocumentStart,
16    DocumentContent,
17    DocumentEnd,
18    BlockNode,
19    // BlockNodeOrIndentlessSequence,
20    // FlowNode,
21    BlockSequenceFirstEntry,
22    BlockSequenceEntry,
23    IndentlessSequenceEntry,
24    BlockMappingFirstKey,
25    BlockMappingKey,
26    BlockMappingValue,
27    FlowSequenceFirstEntry,
28    FlowSequenceEntry,
29    FlowSequenceEntryMappingKey,
30    FlowSequenceEntryMappingValue,
31    FlowSequenceEntryMappingEnd,
32    FlowMappingFirstKey,
33    FlowMappingKey,
34    FlowMappingValue,
35    FlowMappingEmptyValue,
36    End,
37}
38
39/// An event generated by the YAML parser.
40///
41/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
42/// [`EventReceiver`] trait.
43#[derive(Clone, PartialEq, Debug, Eq)]
44pub enum Event {
45    /// Reserved for internal use.
46    Nothing,
47    /// Event generated at the very beginning of parsing.
48    StreamStart,
49    /// Last event that will be generated by the parser. Signals EOF.
50    StreamEnd,
51    /// The YAML start document directive (`---`).
52    DocumentStart,
53    /// The YAML end document directive (`...`).
54    DocumentEnd,
55    /// A YAML Alias.
56    Alias(
57        /// The anchor ID the alias refers to.
58        usize,
59    ),
60    /// Value, style, anchor id, tag
61    Scalar(String, TScalarStyle, usize, Option<Tag>),
62    /// The start of a YAML sequence (array).
63    SequenceStart(
64        /// The anchor ID of the start of the sequence.
65        usize,
66        /// An optional tag
67        Option<Tag>,
68    ),
69    /// The end of a YAML sequence (array).
70    SequenceEnd,
71    /// The start of a YAML mapping (object, hash).
72    MappingStart(
73        /// The anchor ID of the start of the mapping.
74        usize,
75        /// An optional tag
76        Option<Tag>,
77    ),
78    /// The end of a YAML mapping (object, hash).
79    MappingEnd,
80}
81
82/// A YAML tag.
83#[derive(Clone, PartialEq, Debug, Eq)]
84pub struct Tag {
85    /// Handle of the tag (`!` included).
86    pub handle: String,
87    /// The suffix of the tag.
88    pub suffix: String,
89}
90
91impl Event {
92    /// Create an empty scalar.
93    fn empty_scalar() -> Event {
94        // a null scalar
95        Event::Scalar(String::new(), TScalarStyle::Plain, 0, None)
96    }
97
98    /// Create an empty scalar with the given anchor.
99    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Tag>) -> Event {
100        Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag)
101    }
102}
103
104/// A YAML parser.
105#[derive(Debug)]
106pub struct Parser<T> {
107    scanner: Scanner<T>,
108    states: Vec<State>,
109    state: State,
110    token: Option<Token>,
111    current: Option<(Event, Marker)>,
112    anchors: HashMap<String, usize>,
113    anchor_id: usize,
114    /// The tag directives (`%TAG`) the parser has encountered.
115    ///
116    /// Key is the handle, and value is the prefix.
117    tags: HashMap<String, String>,
118    /// Make tags global across all documents.
119    keep_tags: bool,
120}
121
122/// Trait to be implemented in order to use the low-level parsing API.
123///
124/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
125/// for each YAML [`Event`] that occurs.
126/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
127/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement
128/// [`MarkedEventReceiver`] automatically.
129///
130/// # Event hierarchy
131/// The event stream starts with an [`Event::StreamStart`] event followed by an
132/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
133/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
134/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
135///
136/// In a mapping, key-values are sent as consecutive events. The first event after an
137/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
138/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
139/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
140///
141/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
142///
143/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
144/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
145/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
146/// be part of the value and not another key-value pair or element in the sequence.
147///
148/// For instance, the following yaml:
149/// ```yaml
150/// a: b
151/// c:
152///   d: e
153/// f:
154///   - g
155///   - h
156/// ```
157/// will emit (indented and commented for lisibility):
158/// ```text
159/// StreamStart, DocumentStart, MappingStart,
160///   Scalar("a", ..), Scalar("b", ..)
161///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
162///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
163/// MappingEnd, DocumentEnd, StreamEnd
164/// ```
165///
166/// # Example
167/// ```
168/// # use yaml_rust2::parser::{Event, EventReceiver, Parser};
169/// #
170/// /// Sink of events. Collects them into an array.
171/// struct EventSink {
172///     events: Vec<Event>,
173/// }
174///
175/// /// Implement `on_event`, pushing into `self.events`.
176/// impl EventReceiver for EventSink {
177///     fn on_event(&mut self, ev: Event) {
178///         self.events.push(ev);
179///     }
180/// }
181///
182/// /// Load events from a yaml string.
183/// fn str_to_events(yaml: &str) -> Vec<Event> {
184///     let mut sink = EventSink { events: Vec::new() };
185///     let mut parser = Parser::new_from_str(yaml);
186///     // Load events using our sink as the receiver.
187///     parser.load(&mut sink, true).unwrap();
188///     sink.events
189/// }
190/// ```
191pub trait EventReceiver {
192    /// Handler called for each YAML event that is emitted by the parser.
193    fn on_event(&mut self, ev: Event);
194}
195
196/// Trait to be implemented for using the low-level parsing API.
197///
198/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event.
199pub trait MarkedEventReceiver {
200    /// Handler called for each event that occurs.
201    fn on_event(&mut self, ev: Event, _mark: Marker);
202}
203
204impl<R: EventReceiver> MarkedEventReceiver for R {
205    fn on_event(&mut self, ev: Event, _mark: Marker) {
206        self.on_event(ev);
207    }
208}
209
210/// A convenience alias for a `Result` of a parser event.
211pub type ParseResult = Result<(Event, Marker), ScanError>;
212
213impl<'a> Parser<core::str::Chars<'a>> {
214    /// Create a new instance of a parser from a &str.
215    #[must_use]
216    pub fn new_from_str(value: &'a str) -> Self {
217        Parser::new(value.chars())
218    }
219}
220
221impl<T: Iterator<Item = char>> Parser<T> {
222    /// Create a new instance of a parser from the given input of characters.
223    pub fn new(src: T) -> Parser<T> {
224        Parser {
225            scanner: Scanner::new(src),
226            states: Vec::new(),
227            state: State::StreamStart,
228            token: None,
229            current: None,
230
231            anchors: HashMap::new(),
232            // valid anchor_id starts from 1
233            anchor_id: 1,
234            tags: HashMap::new(),
235            keep_tags: false,
236        }
237    }
238
239    /// Whether to keep tags across multiple documents when parsing.
240    ///
241    /// This behavior is non-standard as per the YAML specification but can be encountered in the
242    /// wild. This boolean allows enabling this non-standard extension. This would result in the
243    /// parser accepting input from [test
244    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
245    /// of the yaml-test-suite:
246    ///
247    /// ```yaml
248    /// %TAG !prefix! tag:example.com,2011:
249    /// --- !prefix!A
250    /// a: b
251    /// --- !prefix!B
252    /// c: d
253    /// --- !prefix!C
254    /// e: f
255    /// ```
256    ///
257    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
258    /// only apply to the document immediately following them. This would error on `!prefix!B`.
259    ///
260    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
261    #[must_use]
262    pub fn keep_tags(mut self, value: bool) -> Self {
263        self.keep_tags = value;
264        self
265    }
266
267    /// Try to load the next event and return it, but do not consuming it from `self`.
268    ///
269    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
270    /// [`Iterator::next`] or [`Parser::load`].
271    /// # Errors
272    /// Returns `ScanError` when loading the next event fails.
273    pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> {
274        if let Some(ref x) = self.current {
275            Ok(x)
276        } else {
277            self.current = Some(self.next_token()?);
278            self.peek()
279        }
280    }
281
282    /// Try to load the next event and return it, consuming it from `self`.
283    /// # Errors
284    /// Returns `ScanError` when loading the next event fails.
285    pub fn next_token(&mut self) -> ParseResult {
286        match self.current.take() {
287            None => self.parse(),
288            Some(v) => Ok(v),
289        }
290    }
291
292    /// Peek at the next token from the scanner.
293    fn peek_token(&mut self) -> Result<&Token, ScanError> {
294        match self.token {
295            None => {
296                self.token = Some(self.scan_next_token()?);
297                Ok(self.token.as_ref().unwrap())
298            }
299            Some(ref tok) => Ok(tok),
300        }
301    }
302
303    /// Extract and return the next token from the scanner.
304    ///
305    /// This function does _not_ make use of `self.token`.
306    fn scan_next_token(&mut self) -> Result<Token, ScanError> {
307        let token = self.scanner.next();
308        match token {
309            None => match self.scanner.get_error() {
310                None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")),
311                Some(e) => Err(e),
312            },
313            Some(tok) => Ok(tok),
314        }
315    }
316
317    fn fetch_token(&mut self) -> Token {
318        self.token
319            .take()
320            .expect("fetch_token needs to be preceded by peek_token")
321    }
322
323    /// Skip the next token from the scanner.
324    fn skip(&mut self) {
325        self.token = None;
326        //self.peek_token();
327    }
328    /// Pops the top-most state and make it the current state.
329    fn pop_state(&mut self) {
330        self.state = self.states.pop().unwrap();
331    }
332    /// Push a new state atop the state stack.
333    fn push_state(&mut self, state: State) {
334        self.states.push(state);
335    }
336
337    fn parse(&mut self) -> ParseResult {
338        if self.state == State::End {
339            return Ok((Event::StreamEnd, self.scanner.mark()));
340        }
341        let (ev, mark) = self.state_machine()?;
342        // println!("EV {:?}", ev);
343        Ok((ev, mark))
344    }
345
346    /// Load the YAML from the stream in `self`, pushing events into `recv`.
347    ///
348    /// The contents of the stream are parsed and the corresponding events are sent into the
349    /// recveiver. For detailed explanations about how events work, see [`EventReceiver`].
350    ///
351    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
352    /// inside the stream.
353    ///
354    /// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the
355    /// former is enough to call this function.
356    /// # Errors
357    /// Returns `ScanError` when loading fails.
358    pub fn load<R: MarkedEventReceiver>(
359        &mut self,
360        recv: &mut R,
361        multi: bool,
362    ) -> Result<(), ScanError> {
363        if !self.scanner.stream_started() {
364            let (ev, mark) = self.next_token()?;
365            if ev != Event::StreamStart {
366                return Err(ScanError::new(mark, "did not find expected <stream-start>"));
367            }
368            recv.on_event(ev, mark);
369        }
370
371        if self.scanner.stream_ended() {
372            // XXX has parsed?
373            recv.on_event(Event::StreamEnd, self.scanner.mark());
374            return Ok(());
375        }
376        loop {
377            let (ev, mark) = self.next_token()?;
378            if ev == Event::StreamEnd {
379                recv.on_event(ev, mark);
380                return Ok(());
381            }
382            // clear anchors before a new document
383            self.anchors.clear();
384            self.load_document(ev, mark, recv)?;
385            if !multi {
386                break;
387            }
388        }
389        Ok(())
390    }
391
392    fn load_document<R: MarkedEventReceiver>(
393        &mut self,
394        first_ev: Event,
395        mark: Marker,
396        recv: &mut R,
397    ) -> Result<(), ScanError> {
398        if first_ev != Event::DocumentStart {
399            return Err(ScanError::new(
400                mark,
401                "did not find expected <document-start>",
402            ));
403        }
404        recv.on_event(first_ev, mark);
405
406        let (ev, mark) = self.next_token()?;
407        self.load_node(ev, mark, recv)?;
408
409        // DOCUMENT-END is expected.
410        let (ev, mark) = self.next_token()?;
411        assert_eq!(ev, Event::DocumentEnd);
412        recv.on_event(ev, mark);
413
414        Ok(())
415    }
416
417    fn load_node<R: MarkedEventReceiver>(
418        &mut self,
419        first_ev: Event,
420        mark: Marker,
421        recv: &mut R,
422    ) -> Result<(), ScanError> {
423        match first_ev {
424            Event::Alias(..) | Event::Scalar(..) => {
425                recv.on_event(first_ev, mark);
426                Ok(())
427            }
428            Event::SequenceStart(..) => {
429                recv.on_event(first_ev, mark);
430                self.load_sequence(recv)
431            }
432            Event::MappingStart(..) => {
433                recv.on_event(first_ev, mark);
434                self.load_mapping(recv)
435            }
436            _ => {
437                println!("UNREACHABLE EVENT: {first_ev:?}");
438                unreachable!();
439            }
440        }
441    }
442
443    fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
444        let (mut key_ev, mut key_mark) = self.next_token()?;
445        while key_ev != Event::MappingEnd {
446            // key
447            self.load_node(key_ev, key_mark, recv)?;
448
449            // value
450            let (ev, mark) = self.next_token()?;
451            self.load_node(ev, mark, recv)?;
452
453            // next event
454            let (ev, mark) = self.next_token()?;
455            key_ev = ev;
456            key_mark = mark;
457        }
458        recv.on_event(key_ev, key_mark);
459        Ok(())
460    }
461
462    fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
463        let (mut ev, mut mark) = self.next_token()?;
464        while ev != Event::SequenceEnd {
465            self.load_node(ev, mark, recv)?;
466
467            // next event
468            let (next_ev, next_mark) = self.next_token()?;
469            ev = next_ev;
470            mark = next_mark;
471        }
472        recv.on_event(ev, mark);
473        Ok(())
474    }
475
476    fn state_machine(&mut self) -> ParseResult {
477        // let next_tok = self.peek_token().cloned()?;
478        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
479        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
480
481        match self.state {
482            State::StreamStart => self.stream_start(),
483
484            State::ImplicitDocumentStart => self.document_start(true),
485            State::DocumentStart => self.document_start(false),
486            State::DocumentContent => self.document_content(),
487            State::DocumentEnd => self.document_end(),
488
489            State::BlockNode => self.parse_node(true, false),
490            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
491            // State::FlowNode => self.parse_node(false, false),
492            State::BlockMappingFirstKey => self.block_mapping_key(true),
493            State::BlockMappingKey => self.block_mapping_key(false),
494            State::BlockMappingValue => self.block_mapping_value(),
495
496            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
497            State::BlockSequenceEntry => self.block_sequence_entry(false),
498
499            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
500            State::FlowSequenceEntry => self.flow_sequence_entry(false),
501
502            State::FlowMappingFirstKey => self.flow_mapping_key(true),
503            State::FlowMappingKey => self.flow_mapping_key(false),
504            State::FlowMappingValue => self.flow_mapping_value(false),
505
506            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
507
508            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
509            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
510            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
511            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
512
513            /* impossible */
514            State::End => unreachable!(),
515        }
516    }
517
518    fn stream_start(&mut self) -> ParseResult {
519        match *self.peek_token()? {
520            Token(mark, TokenType::StreamStart(_)) => {
521                self.state = State::ImplicitDocumentStart;
522                self.skip();
523                Ok((Event::StreamStart, mark))
524            }
525            Token(mark, _) => Err(ScanError::new(mark, "did not find expected <stream-start>")),
526        }
527    }
528
529    fn document_start(&mut self, implicit: bool) -> ParseResult {
530        while let TokenType::DocumentEnd = self.peek_token()?.1 {
531            self.skip();
532        }
533
534        match *self.peek_token()? {
535            Token(mark, TokenType::StreamEnd) => {
536                self.state = State::End;
537                self.skip();
538                Ok((Event::StreamEnd, mark))
539            }
540            Token(
541                _,
542                TokenType::VersionDirective(..)
543                | TokenType::TagDirective(..)
544                | TokenType::DocumentStart,
545            ) => {
546                // explicit document
547                self.explicit_document_start()
548            }
549            Token(mark, _) if implicit => {
550                self.parser_process_directives()?;
551                self.push_state(State::DocumentEnd);
552                self.state = State::BlockNode;
553                Ok((Event::DocumentStart, mark))
554            }
555            _ => {
556                // explicit document
557                self.explicit_document_start()
558            }
559        }
560    }
561
562    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
563        let mut version_directive_received = false;
564        loop {
565            let mut tags = HashMap::new();
566            match self.peek_token()? {
567                Token(mark, TokenType::VersionDirective(_, _)) => {
568                    // XXX parsing with warning according to spec
569                    //if major != 1 || minor > 2 {
570                    //    return Err(ScanError::new(tok.0,
571                    //        "found incompatible YAML document"));
572                    //}
573                    if version_directive_received {
574                        return Err(ScanError::new(*mark, "duplicate version directive"));
575                    }
576                    version_directive_received = true;
577                }
578                Token(mark, TokenType::TagDirective(handle, prefix)) => {
579                    if tags.contains_key(handle) {
580                        return Err(ScanError::new(*mark, "the TAG directive must only be given at most once per handle in the same document"));
581                    }
582                    tags.insert(handle.to_string(), prefix.to_string());
583                }
584                _ => break,
585            }
586            self.tags = tags;
587            self.skip();
588        }
589        Ok(())
590    }
591
592    fn explicit_document_start(&mut self) -> ParseResult {
593        self.parser_process_directives()?;
594        match *self.peek_token()? {
595            Token(mark, TokenType::DocumentStart) => {
596                self.push_state(State::DocumentEnd);
597                self.state = State::DocumentContent;
598                self.skip();
599                Ok((Event::DocumentStart, mark))
600            }
601            Token(mark, _) => Err(ScanError::new(
602                mark,
603                "did not find expected <document start>",
604            )),
605        }
606    }
607
608    fn document_content(&mut self) -> ParseResult {
609        match *self.peek_token()? {
610            Token(
611                mark,
612                TokenType::VersionDirective(..)
613                | TokenType::TagDirective(..)
614                | TokenType::DocumentStart
615                | TokenType::DocumentEnd
616                | TokenType::StreamEnd,
617            ) => {
618                self.pop_state();
619                // empty scalar
620                Ok((Event::empty_scalar(), mark))
621            }
622            _ => self.parse_node(true, false),
623        }
624    }
625
626    fn document_end(&mut self) -> ParseResult {
627        let mut explicit_end = false;
628        let marker: Marker = match *self.peek_token()? {
629            Token(mark, TokenType::DocumentEnd) => {
630                explicit_end = true;
631                self.skip();
632                mark
633            }
634            Token(mark, _) => mark,
635        };
636
637        if !self.keep_tags {
638            self.tags.clear();
639        }
640        if explicit_end {
641            self.state = State::ImplicitDocumentStart;
642        } else {
643            if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
644                *self.peek_token()?
645            {
646                return Err(ScanError::new(
647                    mark,
648                    "missing explicit document end marker before directive",
649                ));
650            }
651            self.state = State::DocumentStart;
652        }
653
654        Ok((Event::DocumentEnd, marker))
655    }
656
657    fn register_anchor(&mut self, name: String, _: &Marker) -> usize {
658        // anchors can be overridden/reused
659        // if self.anchors.contains_key(name) {
660        //     return Err(ScanError::new(*mark,
661        //         "while parsing anchor, found duplicated anchor"));
662        // }
663        let new_id = self.anchor_id;
664        self.anchor_id += 1;
665        self.anchors.insert(name, new_id);
666        new_id
667    }
668
669    fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
670        let mut anchor_id = 0;
671        let mut tag = None;
672        match *self.peek_token()? {
673            Token(_, TokenType::Alias(_)) => {
674                self.pop_state();
675                if let Token(mark, TokenType::Alias(name)) = self.fetch_token() {
676                    match self.anchors.get(&name) {
677                        None => {
678                            return Err(ScanError::new(
679                                mark,
680                                "while parsing node, found unknown anchor",
681                            ))
682                        }
683                        Some(id) => return Ok((Event::Alias(*id), mark)),
684                    }
685                }
686                unreachable!()
687            }
688            Token(_, TokenType::Anchor(_)) => {
689                if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
690                    anchor_id = self.register_anchor(name, &mark);
691                    if let TokenType::Tag(..) = self.peek_token()?.1 {
692                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
693                            tag = Some(self.resolve_tag(mark, &handle, suffix)?);
694                        } else {
695                            unreachable!()
696                        }
697                    }
698                } else {
699                    unreachable!()
700                }
701            }
702            Token(mark, TokenType::Tag(..)) => {
703                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
704                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
705                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
706                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
707                            anchor_id = self.register_anchor(name, &mark);
708                        } else {
709                            unreachable!()
710                        }
711                    }
712                } else {
713                    unreachable!()
714                }
715            }
716            _ => {}
717        }
718        match *self.peek_token()? {
719            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
720                self.state = State::IndentlessSequenceEntry;
721                Ok((Event::SequenceStart(anchor_id, tag), mark))
722            }
723            Token(_, TokenType::Scalar(..)) => {
724                self.pop_state();
725                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
726                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
727                } else {
728                    unreachable!()
729                }
730            }
731            Token(mark, TokenType::FlowSequenceStart) => {
732                self.state = State::FlowSequenceFirstEntry;
733                Ok((Event::SequenceStart(anchor_id, tag), mark))
734            }
735            Token(mark, TokenType::FlowMappingStart) => {
736                self.state = State::FlowMappingFirstKey;
737                Ok((Event::MappingStart(anchor_id, tag), mark))
738            }
739            Token(mark, TokenType::BlockSequenceStart) if block => {
740                self.state = State::BlockSequenceFirstEntry;
741                Ok((Event::SequenceStart(anchor_id, tag), mark))
742            }
743            Token(mark, TokenType::BlockMappingStart) if block => {
744                self.state = State::BlockMappingFirstKey;
745                Ok((Event::MappingStart(anchor_id, tag), mark))
746            }
747            // ex 7.2, an empty scalar can follow a secondary tag
748            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
749                self.pop_state();
750                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
751            }
752            Token(mark, _) => Err(ScanError::new(
753                mark,
754                "while parsing a node, did not find expected node content",
755            )),
756        }
757    }
758
759    fn block_mapping_key(&mut self, first: bool) -> ParseResult {
760        // skip BlockMappingStart
761        if first {
762            let _ = self.peek_token()?;
763            //self.marks.push(tok.0);
764            self.skip();
765        }
766        match *self.peek_token()? {
767            Token(_, TokenType::Key) => {
768                self.skip();
769                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
770                    *self.peek_token()?
771                {
772                    self.state = State::BlockMappingValue;
773                    // empty scalar
774                    Ok((Event::empty_scalar(), mark))
775                } else {
776                    self.push_state(State::BlockMappingValue);
777                    self.parse_node(true, true)
778                }
779            }
780            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
781            Token(mark, TokenType::Value) => {
782                self.state = State::BlockMappingValue;
783                Ok((Event::empty_scalar(), mark))
784            }
785            Token(mark, TokenType::BlockEnd) => {
786                self.pop_state();
787                self.skip();
788                Ok((Event::MappingEnd, mark))
789            }
790            Token(mark, _) => Err(ScanError::new(
791                mark,
792                "while parsing a block mapping, did not find expected key",
793            )),
794        }
795    }
796
797    fn block_mapping_value(&mut self) -> ParseResult {
798        match *self.peek_token()? {
799            Token(_, TokenType::Value) => {
800                self.skip();
801                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
802                    *self.peek_token()?
803                {
804                    self.state = State::BlockMappingKey;
805                    // empty scalar
806                    Ok((Event::empty_scalar(), mark))
807                } else {
808                    self.push_state(State::BlockMappingKey);
809                    self.parse_node(true, true)
810                }
811            }
812            Token(mark, _) => {
813                self.state = State::BlockMappingKey;
814                // empty scalar
815                Ok((Event::empty_scalar(), mark))
816            }
817        }
818    }
819
820    fn flow_mapping_key(&mut self, first: bool) -> ParseResult {
821        if first {
822            let _ = self.peek_token()?;
823            self.skip();
824        }
825        let marker: Marker = {
826            match *self.peek_token()? {
827                Token(mark, TokenType::FlowMappingEnd) => mark,
828                Token(mark, _) => {
829                    if !first {
830                        match *self.peek_token()? {
831                            Token(_, TokenType::FlowEntry) => self.skip(),
832                            Token(mark, _) => return Err(ScanError::new(
833                                mark,
834                                "while parsing a flow mapping, did not find expected ',' or '}'",
835                            )),
836                        }
837                    }
838
839                    match *self.peek_token()? {
840                        Token(_, TokenType::Key) => {
841                            self.skip();
842                            if let Token(
843                                mark,
844                                TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
845                            ) = *self.peek_token()?
846                            {
847                                self.state = State::FlowMappingValue;
848                                return Ok((Event::empty_scalar(), mark));
849                            }
850                            self.push_state(State::FlowMappingValue);
851                            return self.parse_node(false, false);
852                        }
853                        Token(marker, TokenType::Value) => {
854                            self.state = State::FlowMappingValue;
855                            return Ok((Event::empty_scalar(), marker));
856                        }
857                        Token(_, TokenType::FlowMappingEnd) => (),
858                        _ => {
859                            self.push_state(State::FlowMappingEmptyValue);
860                            return self.parse_node(false, false);
861                        }
862                    }
863
864                    mark
865                }
866            }
867        };
868
869        self.pop_state();
870        self.skip();
871        Ok((Event::MappingEnd, marker))
872    }
873
874    fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
875        let mark: Marker = {
876            if empty {
877                let Token(mark, _) = *self.peek_token()?;
878                self.state = State::FlowMappingKey;
879                return Ok((Event::empty_scalar(), mark));
880            }
881            match *self.peek_token()? {
882                Token(marker, TokenType::Value) => {
883                    self.skip();
884                    match self.peek_token()?.1 {
885                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
886                        _ => {
887                            self.push_state(State::FlowMappingKey);
888                            return self.parse_node(false, false);
889                        }
890                    }
891                    marker
892                }
893                Token(marker, _) => marker,
894            }
895        };
896
897        self.state = State::FlowMappingKey;
898        Ok((Event::empty_scalar(), mark))
899    }
900
901    fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
902        // skip FlowMappingStart
903        if first {
904            let _ = self.peek_token()?;
905            //self.marks.push(tok.0);
906            self.skip();
907        }
908        match *self.peek_token()? {
909            Token(mark, TokenType::FlowSequenceEnd) => {
910                self.pop_state();
911                self.skip();
912                return Ok((Event::SequenceEnd, mark));
913            }
914            Token(_, TokenType::FlowEntry) if !first => {
915                self.skip();
916            }
917            Token(mark, _) if !first => {
918                return Err(ScanError::new(
919                    mark,
920                    "while parsing a flow sequence, expected ',' or ']'",
921                ));
922            }
923            _ => { /* next */ }
924        }
925        match *self.peek_token()? {
926            Token(mark, TokenType::FlowSequenceEnd) => {
927                self.pop_state();
928                self.skip();
929                Ok((Event::SequenceEnd, mark))
930            }
931            Token(mark, TokenType::Key) => {
932                self.state = State::FlowSequenceEntryMappingKey;
933                self.skip();
934                Ok((Event::MappingStart(0, None), mark))
935            }
936            _ => {
937                self.push_state(State::FlowSequenceEntry);
938                self.parse_node(false, false)
939            }
940        }
941    }
942
943    fn indentless_sequence_entry(&mut self) -> ParseResult {
944        match *self.peek_token()? {
945            Token(_, TokenType::BlockEntry) => (),
946            Token(mark, _) => {
947                self.pop_state();
948                return Ok((Event::SequenceEnd, mark));
949            }
950        }
951        self.skip();
952        if let Token(
953            mark,
954            TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
955        ) = *self.peek_token()?
956        {
957            self.state = State::IndentlessSequenceEntry;
958            Ok((Event::empty_scalar(), mark))
959        } else {
960            self.push_state(State::IndentlessSequenceEntry);
961            self.parse_node(true, false)
962        }
963    }
964
965    fn block_sequence_entry(&mut self, first: bool) -> ParseResult {
966        // BLOCK-SEQUENCE-START
967        if first {
968            let _ = self.peek_token()?;
969            //self.marks.push(tok.0);
970            self.skip();
971        }
972        match *self.peek_token()? {
973            Token(mark, TokenType::BlockEnd) => {
974                self.pop_state();
975                self.skip();
976                Ok((Event::SequenceEnd, mark))
977            }
978            Token(_, TokenType::BlockEntry) => {
979                self.skip();
980                if let Token(mark, TokenType::BlockEntry | TokenType::BlockEnd) =
981                    *self.peek_token()?
982                {
983                    self.state = State::BlockSequenceEntry;
984                    Ok((Event::empty_scalar(), mark))
985                } else {
986                    self.push_state(State::BlockSequenceEntry);
987                    self.parse_node(true, false)
988                }
989            }
990            Token(mark, _) => Err(ScanError::new(
991                mark,
992                "while parsing a block collection, did not find expected '-' indicator",
993            )),
994        }
995    }
996
997    fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult {
998        if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
999            *self.peek_token()?
1000        {
1001            self.skip();
1002            self.state = State::FlowSequenceEntryMappingValue;
1003            Ok((Event::empty_scalar(), mark))
1004        } else {
1005            self.push_state(State::FlowSequenceEntryMappingValue);
1006            self.parse_node(false, false)
1007        }
1008    }
1009
1010    fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult {
1011        match *self.peek_token()? {
1012            Token(_, TokenType::Value) => {
1013                self.skip();
1014                self.state = State::FlowSequenceEntryMappingValue;
1015                if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1016                    *self.peek_token()?
1017                {
1018                    self.state = State::FlowSequenceEntryMappingEnd;
1019                    Ok((Event::empty_scalar(), mark))
1020                } else {
1021                    self.push_state(State::FlowSequenceEntryMappingEnd);
1022                    self.parse_node(false, false)
1023                }
1024            }
1025            Token(mark, _) => {
1026                self.state = State::FlowSequenceEntryMappingEnd;
1027                Ok((Event::empty_scalar(), mark))
1028            }
1029        }
1030    }
1031
1032    #[allow(clippy::unnecessary_wraps)]
1033    fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
1034        self.state = State::FlowSequenceEntry;
1035        Ok((Event::MappingEnd, self.scanner.mark()))
1036    }
1037
1038    /// Resolve a tag from the handle and the suffix.
1039    fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result<Tag, ScanError> {
1040        if handle == "!!" {
1041            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1042            // overridden.
1043            match self.tags.get("!!") {
1044                Some(prefix) => Ok(Tag {
1045                    handle: prefix.to_string(),
1046                    suffix,
1047                }),
1048                None => Ok(Tag {
1049                    handle: "tag:yaml.org,2002:".to_string(),
1050                    suffix,
1051                }),
1052            }
1053        } else if handle.is_empty() && suffix == "!" {
1054            // "!" introduces a local tag. Local tags may have their prefix overridden.
1055            match self.tags.get("") {
1056                Some(prefix) => Ok(Tag {
1057                    handle: prefix.to_string(),
1058                    suffix,
1059                }),
1060                None => Ok(Tag {
1061                    handle: String::new(),
1062                    suffix,
1063                }),
1064            }
1065        } else {
1066            // Lookup handle in our tag directives.
1067            let prefix = self.tags.get(handle);
1068            if let Some(prefix) = prefix {
1069                Ok(Tag {
1070                    handle: prefix.to_string(),
1071                    suffix,
1072                })
1073            } else {
1074                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1075                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1076                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1077                // to error.
1078                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1079                    Err(ScanError::new(mark, "the handle wasn't declared"))
1080                } else {
1081                    Ok(Tag {
1082                        handle: handle.to_string(),
1083                        suffix,
1084                    })
1085                }
1086            }
1087        }
1088    }
1089}
1090
1091#[cfg(test)]
1092mod test {
1093    use super::{Event, Parser};
1094    use crate::YamlLoader;
1095
1096    #[test]
1097    fn test_peek_eq_parse() {
1098        let s = "
1099a0 bb: val
1100a1: &x
1101    b1: 4
1102    b2: d
1103a2: 4
1104a3: [1, 2, 3]
1105a4:
1106    - [a1, a2]
1107    - 2
1108a5: *x
1109";
1110        let mut p = Parser::new_from_str(s);
1111        while {
1112            let event_peek = p.peek().unwrap().clone();
1113            let event = p.next_token().unwrap();
1114            assert_eq!(event, event_peek);
1115            event.0 != Event::StreamEnd
1116        } {}
1117    }
1118
1119    #[test]
1120    fn test_keep_tags_across_multiple_documents() {
1121        let text = r#"
1122%YAML 1.1
1123%TAG !t! tag:test,2024:
1124--- !t!1 &1
1125foo: "bar"
1126--- !t!2 &2
1127baz: "qux"
1128"#;
1129        let mut parser = Parser::new_from_str(text).keep_tags(true);
1130        let result = YamlLoader::load_from_parser(&mut parser);
1131        assert!(result.is_ok());
1132        let docs = result.unwrap();
1133        assert_eq!(docs.len(), 2);
1134        let yaml = &docs[0];
1135        assert_eq!(yaml["foo"].as_str(), Some("bar"));
1136        let yaml = &docs[1];
1137        assert_eq!(yaml["baz"].as_str(), Some("qux"));
1138
1139        let mut parser = Parser::new_from_str(text).keep_tags(false);
1140        let result = YamlLoader::load_from_parser(&mut parser);
1141        assert!(result.is_err());
1142    }
1143}