yaml_rust/
scanner.rs

1use std::collections::VecDeque;
2use std::error::Error;
3use std::{char, fmt};
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6pub enum TEncoding {
7    Utf8,
8}
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11pub enum TScalarStyle {
12    Any,
13    Plain,
14    SingleQuoted,
15    DoubleQuoted,
16
17    Literal,
18    Foled,
19}
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22pub struct Marker {
23    index: usize,
24    line: usize,
25    col: usize,
26}
27
28impl Marker {
29    fn new(index: usize, line: usize, col: usize) -> Marker {
30        Marker { index, line, col }
31    }
32
33    pub fn index(&self) -> usize {
34        self.index
35    }
36
37    pub fn line(&self) -> usize {
38        self.line
39    }
40
41    pub fn col(&self) -> usize {
42        self.col
43    }
44}
45
46#[derive(Clone, PartialEq, Debug, Eq)]
47pub struct ScanError {
48    mark: Marker,
49    info: String,
50}
51
52impl ScanError {
53    pub fn new(loc: Marker, info: &str) -> ScanError {
54        ScanError {
55            mark: loc,
56            info: info.to_owned(),
57        }
58    }
59
60    pub fn marker(&self) -> &Marker {
61        &self.mark
62    }
63}
64
65impl Error for ScanError {
66    fn description(&self) -> &str {
67        self.info.as_ref()
68    }
69
70    fn cause(&self) -> Option<&dyn Error> {
71        None
72    }
73}
74
75impl fmt::Display for ScanError {
76    // col starts from 0
77    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
78        write!(
79            formatter,
80            "{} at line {} column {}",
81            self.info,
82            self.mark.line,
83            self.mark.col + 1
84        )
85    }
86}
87
88#[derive(Clone, PartialEq, Debug, Eq)]
89pub enum TokenType {
90    NoToken,
91    StreamStart(TEncoding),
92    StreamEnd,
93    /// major, minor
94    VersionDirective(u32, u32),
95    /// handle, prefix
96    TagDirective(String, String),
97    DocumentStart,
98    DocumentEnd,
99    BlockSequenceStart,
100    BlockMappingStart,
101    BlockEnd,
102    FlowSequenceStart,
103    FlowSequenceEnd,
104    FlowMappingStart,
105    FlowMappingEnd,
106    BlockEntry,
107    FlowEntry,
108    Key,
109    Value,
110    Alias(String),
111    Anchor(String),
112    /// handle, suffix
113    Tag(String, String),
114    Scalar(TScalarStyle, String),
115}
116
117#[derive(Clone, PartialEq, Debug, Eq)]
118pub struct Token(pub Marker, pub TokenType);
119
120#[derive(Clone, PartialEq, Debug, Eq)]
121struct SimpleKey {
122    possible: bool,
123    required: bool,
124    token_number: usize,
125    mark: Marker,
126}
127
128impl SimpleKey {
129    fn new(mark: Marker) -> SimpleKey {
130        SimpleKey {
131            possible: false,
132            required: false,
133            token_number: 0,
134            mark,
135        }
136    }
137}
138
139#[derive(Debug)]
140pub struct Scanner<T> {
141    rdr: T,
142    mark: Marker,
143    tokens: VecDeque<Token>,
144    buffer: VecDeque<char>,
145    error: Option<ScanError>,
146
147    stream_start_produced: bool,
148    stream_end_produced: bool,
149    adjacent_value_allowed_at: usize,
150    simple_key_allowed: bool,
151    simple_keys: Vec<SimpleKey>,
152    indent: isize,
153    indents: Vec<isize>,
154    flow_level: u8,
155    tokens_parsed: usize,
156    token_available: bool,
157}
158
159impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
160    type Item = Token;
161    fn next(&mut self) -> Option<Token> {
162        if self.error.is_some() {
163            return None;
164        }
165        match self.next_token() {
166            Ok(tok) => tok,
167            Err(e) => {
168                self.error = Some(e);
169                None
170            }
171        }
172    }
173}
174
175#[inline]
176fn is_z(c: char) -> bool {
177    c == '\0'
178}
179#[inline]
180fn is_break(c: char) -> bool {
181    c == '\n' || c == '\r'
182}
183#[inline]
184fn is_breakz(c: char) -> bool {
185    is_break(c) || is_z(c)
186}
187#[inline]
188fn is_blank(c: char) -> bool {
189    c == ' ' || c == '\t'
190}
191#[inline]
192fn is_blankz(c: char) -> bool {
193    is_blank(c) || is_breakz(c)
194}
195#[inline]
196fn is_digit(c: char) -> bool {
197    c >= '0' && c <= '9'
198}
199#[inline]
200fn is_alpha(c: char) -> bool {
201    match c {
202        '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
203        '_' | '-' => true,
204        _ => false,
205    }
206}
207#[inline]
208fn is_hex(c: char) -> bool {
209    (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
210}
211#[inline]
212fn as_hex(c: char) -> u32 {
213    match c {
214        '0'..='9' => (c as u32) - ('0' as u32),
215        'a'..='f' => (c as u32) - ('a' as u32) + 10,
216        'A'..='F' => (c as u32) - ('A' as u32) + 10,
217        _ => unreachable!(),
218    }
219}
220#[inline]
221fn is_flow(c: char) -> bool {
222    match c {
223        ',' | '[' | ']' | '{' | '}' => true,
224        _ => false,
225    }
226}
227
228pub type ScanResult = Result<(), ScanError>;
229
230impl<T: Iterator<Item = char>> Scanner<T> {
231    /// Creates the YAML tokenizer.
232    pub fn new(rdr: T) -> Scanner<T> {
233        Scanner {
234            rdr,
235            buffer: VecDeque::new(),
236            mark: Marker::new(0, 1, 0),
237            tokens: VecDeque::new(),
238            error: None,
239
240            stream_start_produced: false,
241            stream_end_produced: false,
242            adjacent_value_allowed_at: 0,
243            simple_key_allowed: true,
244            simple_keys: Vec::new(),
245            indent: -1,
246            indents: Vec::new(),
247            flow_level: 0,
248            tokens_parsed: 0,
249            token_available: false,
250        }
251    }
252    #[inline]
253    pub fn get_error(&self) -> Option<ScanError> {
254        match self.error {
255            None => None,
256            Some(ref e) => Some(e.clone()),
257        }
258    }
259
260    #[inline]
261    fn lookahead(&mut self, count: usize) {
262        if self.buffer.len() >= count {
263            return;
264        }
265        for _ in 0..(count - self.buffer.len()) {
266            self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
267        }
268    }
269    #[inline]
270    fn skip(&mut self) {
271        let c = self.buffer.pop_front().unwrap();
272
273        self.mark.index += 1;
274        if c == '\n' {
275            self.mark.line += 1;
276            self.mark.col = 0;
277        } else {
278            self.mark.col += 1;
279        }
280    }
281    #[inline]
282    fn skip_line(&mut self) {
283        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
284            self.skip();
285            self.skip();
286        } else if is_break(self.buffer[0]) {
287            self.skip();
288        }
289    }
290    #[inline]
291    fn ch(&self) -> char {
292        self.buffer[0]
293    }
294    #[inline]
295    fn ch_is(&self, c: char) -> bool {
296        self.buffer[0] == c
297    }
298    #[allow(dead_code)]
299    #[inline]
300    fn eof(&self) -> bool {
301        self.ch_is('\0')
302    }
303    #[inline]
304    pub fn stream_started(&self) -> bool {
305        self.stream_start_produced
306    }
307    #[inline]
308    pub fn stream_ended(&self) -> bool {
309        self.stream_end_produced
310    }
311    #[inline]
312    pub fn mark(&self) -> Marker {
313        self.mark
314    }
315    #[inline]
316    fn read_break(&mut self, s: &mut String) {
317        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
318            s.push('\n');
319            self.skip();
320            self.skip();
321        } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
322            s.push('\n');
323            self.skip();
324        } else {
325            unreachable!();
326        }
327    }
328    fn insert_token(&mut self, pos: usize, tok: Token) {
329        let old_len = self.tokens.len();
330        assert!(pos <= old_len);
331        self.tokens.push_back(tok);
332        for i in 0..old_len - pos {
333            self.tokens.swap(old_len - i, old_len - i - 1);
334        }
335    }
336    fn allow_simple_key(&mut self) {
337        self.simple_key_allowed = true;
338    }
339    fn disallow_simple_key(&mut self) {
340        self.simple_key_allowed = false;
341    }
342
343    pub fn fetch_next_token(&mut self) -> ScanResult {
344        self.lookahead(1);
345        // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
346
347        if !self.stream_start_produced {
348            self.fetch_stream_start();
349            return Ok(());
350        }
351        self.skip_to_next_token();
352
353        self.stale_simple_keys()?;
354
355        let mark = self.mark;
356        self.unroll_indent(mark.col as isize);
357
358        self.lookahead(4);
359
360        if is_z(self.ch()) {
361            self.fetch_stream_end()?;
362            return Ok(());
363        }
364
365        // Is it a directive?
366        if self.mark.col == 0 && self.ch_is('%') {
367            return self.fetch_directive();
368        }
369
370        if self.mark.col == 0
371            && self.buffer[0] == '-'
372            && self.buffer[1] == '-'
373            && self.buffer[2] == '-'
374            && is_blankz(self.buffer[3])
375        {
376            self.fetch_document_indicator(TokenType::DocumentStart)?;
377            return Ok(());
378        }
379
380        if self.mark.col == 0
381            && self.buffer[0] == '.'
382            && self.buffer[1] == '.'
383            && self.buffer[2] == '.'
384            && is_blankz(self.buffer[3])
385        {
386            self.fetch_document_indicator(TokenType::DocumentEnd)?;
387            return Ok(());
388        }
389
390        let c = self.buffer[0];
391        let nc = self.buffer[1];
392        match c {
393            '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
394            '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
395            ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
396            '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
397            ',' => self.fetch_flow_entry(),
398            '-' if is_blankz(nc) => self.fetch_block_entry(),
399            '?' if is_blankz(nc) => self.fetch_key(),
400            ':' if is_blankz(nc)
401                || (self.flow_level > 0
402                    && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
403            {
404                self.fetch_value()
405            }
406            // Is it an alias?
407            '*' => self.fetch_anchor(true),
408            // Is it an anchor?
409            '&' => self.fetch_anchor(false),
410            '!' => self.fetch_tag(),
411            // Is it a literal scalar?
412            '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
413            // Is it a folded scalar?
414            '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
415            '\'' => self.fetch_flow_scalar(true),
416            '"' => self.fetch_flow_scalar(false),
417            // plain scalar
418            '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
419            ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
420            '%' | '@' | '`' => Err(ScanError::new(
421                self.mark,
422                &format!("unexpected character: `{}'", c),
423            )),
424            _ => self.fetch_plain_scalar(),
425        }
426    }
427
428    pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
429        if self.stream_end_produced {
430            return Ok(None);
431        }
432
433        if !self.token_available {
434            self.fetch_more_tokens()?;
435        }
436        let t = self.tokens.pop_front().unwrap();
437        self.token_available = false;
438        self.tokens_parsed += 1;
439
440        if let TokenType::StreamEnd = t.1 {
441            self.stream_end_produced = true;
442        }
443        Ok(Some(t))
444    }
445
446    pub fn fetch_more_tokens(&mut self) -> ScanResult {
447        let mut need_more;
448        loop {
449            need_more = false;
450            if self.tokens.is_empty() {
451                need_more = true;
452            } else {
453                self.stale_simple_keys()?;
454                for sk in &self.simple_keys {
455                    if sk.possible && sk.token_number == self.tokens_parsed {
456                        need_more = true;
457                        break;
458                    }
459                }
460            }
461
462            if !need_more {
463                break;
464            }
465            self.fetch_next_token()?;
466        }
467        self.token_available = true;
468
469        Ok(())
470    }
471
472    fn stale_simple_keys(&mut self) -> ScanResult {
473        for sk in &mut self.simple_keys {
474            if sk.possible
475                && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
476            {
477                if sk.required {
478                    return Err(ScanError::new(self.mark, "simple key expect ':'"));
479                }
480                sk.possible = false;
481            }
482        }
483        Ok(())
484    }
485
486    fn skip_to_next_token(&mut self) {
487        loop {
488            self.lookahead(1);
489            // TODO(chenyh) BOM
490            match self.ch() {
491                ' ' => self.skip(),
492                '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
493                '\n' | '\r' => {
494                    self.lookahead(2);
495                    self.skip_line();
496                    if self.flow_level == 0 {
497                        self.allow_simple_key();
498                    }
499                }
500                '#' => {
501                    while !is_breakz(self.ch()) {
502                        self.skip();
503                        self.lookahead(1);
504                    }
505                }
506                _ => break,
507            }
508        }
509    }
510
511    fn fetch_stream_start(&mut self) {
512        let mark = self.mark;
513        self.indent = -1;
514        self.stream_start_produced = true;
515        self.allow_simple_key();
516        self.tokens
517            .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
518        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
519    }
520
521    fn fetch_stream_end(&mut self) -> ScanResult {
522        // force new line
523        if self.mark.col != 0 {
524            self.mark.col = 0;
525            self.mark.line += 1;
526        }
527
528        self.unroll_indent(-1);
529        self.remove_simple_key()?;
530        self.disallow_simple_key();
531
532        self.tokens
533            .push_back(Token(self.mark, TokenType::StreamEnd));
534        Ok(())
535    }
536
537    fn fetch_directive(&mut self) -> ScanResult {
538        self.unroll_indent(-1);
539        self.remove_simple_key()?;
540
541        self.disallow_simple_key();
542
543        let tok = self.scan_directive()?;
544
545        self.tokens.push_back(tok);
546
547        Ok(())
548    }
549
550    fn scan_directive(&mut self) -> Result<Token, ScanError> {
551        let start_mark = self.mark;
552        self.skip();
553
554        let name = self.scan_directive_name()?;
555        let tok = match name.as_ref() {
556            "YAML" => self.scan_version_directive_value(&start_mark)?,
557            "TAG" => self.scan_tag_directive_value(&start_mark)?,
558            // XXX This should be a warning instead of an error
559            _ => {
560                // skip current line
561                self.lookahead(1);
562                while !is_breakz(self.ch()) {
563                    self.skip();
564                    self.lookahead(1);
565                }
566                // XXX return an empty TagDirective token
567                Token(
568                    start_mark,
569                    TokenType::TagDirective(String::new(), String::new()),
570                )
571                // return Err(ScanError::new(start_mark,
572                //     "while scanning a directive, found unknown directive name"))
573            }
574        };
575        self.lookahead(1);
576
577        while is_blank(self.ch()) {
578            self.skip();
579            self.lookahead(1);
580        }
581
582        if self.ch() == '#' {
583            while !is_breakz(self.ch()) {
584                self.skip();
585                self.lookahead(1);
586            }
587        }
588
589        if !is_breakz(self.ch()) {
590            return Err(ScanError::new(
591                start_mark,
592                "while scanning a directive, did not find expected comment or line break",
593            ));
594        }
595
596        // Eat a line break
597        if is_break(self.ch()) {
598            self.lookahead(2);
599            self.skip_line();
600        }
601
602        Ok(tok)
603    }
604
605    fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
606        self.lookahead(1);
607
608        while is_blank(self.ch()) {
609            self.skip();
610            self.lookahead(1);
611        }
612
613        let major = self.scan_version_directive_number(mark)?;
614
615        if self.ch() != '.' {
616            return Err(ScanError::new(
617                *mark,
618                "while scanning a YAML directive, did not find expected digit or '.' character",
619            ));
620        }
621
622        self.skip();
623
624        let minor = self.scan_version_directive_number(mark)?;
625
626        Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
627    }
628
629    fn scan_directive_name(&mut self) -> Result<String, ScanError> {
630        let start_mark = self.mark;
631        let mut string = String::new();
632        self.lookahead(1);
633        while is_alpha(self.ch()) {
634            string.push(self.ch());
635            self.skip();
636            self.lookahead(1);
637        }
638
639        if string.is_empty() {
640            return Err(ScanError::new(
641                start_mark,
642                "while scanning a directive, could not find expected directive name",
643            ));
644        }
645
646        if !is_blankz(self.ch()) {
647            return Err(ScanError::new(
648                start_mark,
649                "while scanning a directive, found unexpected non-alphabetical character",
650            ));
651        }
652
653        Ok(string)
654    }
655
656    fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
657        let mut val = 0u32;
658        let mut length = 0usize;
659        self.lookahead(1);
660        while is_digit(self.ch()) {
661            if length + 1 > 9 {
662                return Err(ScanError::new(
663                    *mark,
664                    "while scanning a YAML directive, found extremely long version number",
665                ));
666            }
667            length += 1;
668            val = val * 10 + ((self.ch() as u32) - ('0' as u32));
669            self.skip();
670            self.lookahead(1);
671        }
672
673        if length == 0 {
674            return Err(ScanError::new(
675                *mark,
676                "while scanning a YAML directive, did not find expected version number",
677            ));
678        }
679
680        Ok(val)
681    }
682
683    fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
684        self.lookahead(1);
685        /* Eat whitespaces. */
686        while is_blank(self.ch()) {
687            self.skip();
688            self.lookahead(1);
689        }
690        let handle = self.scan_tag_handle(true, mark)?;
691
692        self.lookahead(1);
693        /* Eat whitespaces. */
694        while is_blank(self.ch()) {
695            self.skip();
696            self.lookahead(1);
697        }
698
699        let is_secondary = handle == "!!";
700        let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
701
702        self.lookahead(1);
703
704        if is_blankz(self.ch()) {
705            Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
706        } else {
707            Err(ScanError::new(
708                *mark,
709                "while scanning TAG, did not find expected whitespace or line break",
710            ))
711        }
712    }
713
714    fn fetch_tag(&mut self) -> ScanResult {
715        self.save_simple_key()?;
716        self.disallow_simple_key();
717
718        let tok = self.scan_tag()?;
719        self.tokens.push_back(tok);
720        Ok(())
721    }
722
723    fn scan_tag(&mut self) -> Result<Token, ScanError> {
724        let start_mark = self.mark;
725        let mut handle = String::new();
726        let mut suffix;
727        let mut secondary = false;
728
729        // Check if the tag is in the canonical form (verbatim).
730        self.lookahead(2);
731
732        if self.buffer[1] == '<' {
733            // Eat '!<'
734            self.skip();
735            self.skip();
736            suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
737
738            if self.ch() != '>' {
739                return Err(ScanError::new(
740                    start_mark,
741                    "while scanning a tag, did not find the expected '>'",
742                ));
743            }
744
745            self.skip();
746        } else {
747            // The tag has either the '!suffix' or the '!handle!suffix'
748            handle = self.scan_tag_handle(false, &start_mark)?;
749            // Check if it is, indeed, handle.
750            if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
751                if handle == "!!" {
752                    secondary = true;
753                }
754                suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
755            } else {
756                suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
757                handle = "!".to_owned();
758                // A special case: the '!' tag.  Set the handle to '' and the
759                // suffix to '!'.
760                if suffix.is_empty() {
761                    handle.clear();
762                    suffix = "!".to_owned();
763                }
764            }
765        }
766
767        self.lookahead(1);
768        if is_blankz(self.ch()) {
769            // XXX: ex 7.2, an empty scalar can follow a secondary tag
770            Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
771        } else {
772            Err(ScanError::new(
773                start_mark,
774                "while scanning a tag, did not find expected whitespace or line break",
775            ))
776        }
777    }
778
779    fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
780        let mut string = String::new();
781        self.lookahead(1);
782        if self.ch() != '!' {
783            return Err(ScanError::new(
784                *mark,
785                "while scanning a tag, did not find expected '!'",
786            ));
787        }
788
789        string.push(self.ch());
790        self.skip();
791
792        self.lookahead(1);
793        while is_alpha(self.ch()) {
794            string.push(self.ch());
795            self.skip();
796            self.lookahead(1);
797        }
798
799        // Check if the trailing character is '!' and copy it.
800        if self.ch() == '!' {
801            string.push(self.ch());
802            self.skip();
803        } else if directive && string != "!" {
804            // It's either the '!' tag or not really a tag handle.  If it's a %TAG
805            // directive, it's an error.  If it's a tag token, it must be a part of
806            // URI.
807            return Err(ScanError::new(
808                *mark,
809                "while parsing a tag directive, did not find expected '!'",
810            ));
811        }
812        Ok(string)
813    }
814
815    fn scan_tag_uri(
816        &mut self,
817        directive: bool,
818        _is_secondary: bool,
819        head: &str,
820        mark: &Marker,
821    ) -> Result<String, ScanError> {
822        let mut length = head.len();
823        let mut string = String::new();
824
825        // Copy the head if needed.
826        // Note that we don't copy the leading '!' character.
827        if length > 1 {
828            string.extend(head.chars().skip(1));
829        }
830
831        self.lookahead(1);
832        /*
833         * The set of characters that may appear in URI is as follows:
834         *
835         *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
836         *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
837         *      '%'.
838         */
839        while match self.ch() {
840            ';' | '/' | '?' | ':' | '@' | '&' => true,
841            '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
842            '%' => true,
843            c if is_alpha(c) => true,
844            _ => false,
845        } {
846            // Check if it is a URI-escape sequence.
847            if self.ch() == '%' {
848                string.push(self.scan_uri_escapes(directive, mark)?);
849            } else {
850                string.push(self.ch());
851                self.skip();
852            }
853
854            length += 1;
855            self.lookahead(1);
856        }
857
858        if length == 0 {
859            return Err(ScanError::new(
860                *mark,
861                "while parsing a tag, did not find expected tag URI",
862            ));
863        }
864
865        Ok(string)
866    }
867
868    fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
869        let mut width = 0usize;
870        let mut code = 0u32;
871        loop {
872            self.lookahead(3);
873
874            if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
875                return Err(ScanError::new(
876                    *mark,
877                    "while parsing a tag, did not find URI escaped octet",
878                ));
879            }
880
881            let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
882            if width == 0 {
883                width = match octet {
884                    _ if octet & 0x80 == 0x00 => 1,
885                    _ if octet & 0xE0 == 0xC0 => 2,
886                    _ if octet & 0xF0 == 0xE0 => 3,
887                    _ if octet & 0xF8 == 0xF0 => 4,
888                    _ => {
889                        return Err(ScanError::new(
890                            *mark,
891                            "while parsing a tag, found an incorrect leading UTF-8 octet",
892                        ));
893                    }
894                };
895                code = octet;
896            } else {
897                if octet & 0xc0 != 0x80 {
898                    return Err(ScanError::new(
899                        *mark,
900                        "while parsing a tag, found an incorrect trailing UTF-8 octet",
901                    ));
902                }
903                code = (code << 8) + octet;
904            }
905
906            self.skip();
907            self.skip();
908            self.skip();
909
910            width -= 1;
911            if width == 0 {
912                break;
913            }
914        }
915
916        match char::from_u32(code) {
917            Some(ch) => Ok(ch),
918            None => Err(ScanError::new(
919                *mark,
920                "while parsing a tag, found an invalid UTF-8 codepoint",
921            )),
922        }
923    }
924
925    fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
926        self.save_simple_key()?;
927        self.disallow_simple_key();
928
929        let tok = self.scan_anchor(alias)?;
930
931        self.tokens.push_back(tok);
932
933        Ok(())
934    }
935
936    fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
937        let mut string = String::new();
938        let start_mark = self.mark;
939
940        self.skip();
941        self.lookahead(1);
942        while is_alpha(self.ch()) {
943            string.push(self.ch());
944            self.skip();
945            self.lookahead(1);
946        }
947
948        if string.is_empty()
949            || match self.ch() {
950                c if is_blankz(c) => false,
951                '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
952                _ => true,
953            }
954        {
955            return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
956        }
957
958        if alias {
959            Ok(Token(start_mark, TokenType::Alias(string)))
960        } else {
961            Ok(Token(start_mark, TokenType::Anchor(string)))
962        }
963    }
964
965    fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
966        // The indicators '[' and '{' may start a simple key.
967        self.save_simple_key()?;
968
969        self.increase_flow_level()?;
970
971        self.allow_simple_key();
972
973        let start_mark = self.mark;
974        self.skip();
975
976        self.tokens.push_back(Token(start_mark, tok));
977        Ok(())
978    }
979
980    fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
981        self.remove_simple_key()?;
982        self.decrease_flow_level();
983
984        self.disallow_simple_key();
985
986        let start_mark = self.mark;
987        self.skip();
988
989        self.tokens.push_back(Token(start_mark, tok));
990        Ok(())
991    }
992
993    fn fetch_flow_entry(&mut self) -> ScanResult {
994        self.remove_simple_key()?;
995        self.allow_simple_key();
996
997        let start_mark = self.mark;
998        self.skip();
999
1000        self.tokens
1001            .push_back(Token(start_mark, TokenType::FlowEntry));
1002        Ok(())
1003    }
1004
1005    fn increase_flow_level(&mut self) -> ScanResult {
1006        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1007        self.flow_level = self
1008            .flow_level
1009            .checked_add(1)
1010            .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
1011        Ok(())
1012    }
1013    fn decrease_flow_level(&mut self) {
1014        if self.flow_level > 0 {
1015            self.flow_level -= 1;
1016            self.simple_keys.pop().unwrap();
1017        }
1018    }
1019
1020    fn fetch_block_entry(&mut self) -> ScanResult {
1021        if self.flow_level == 0 {
1022            // Check if we are allowed to start a new entry.
1023            if !self.simple_key_allowed {
1024                return Err(ScanError::new(
1025                    self.mark,
1026                    "block sequence entries are not allowed in this context",
1027                ));
1028            }
1029
1030            let mark = self.mark;
1031            // generate BLOCK-SEQUENCE-START if indented
1032            self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1033        } else {
1034            // - * only allowed in block
1035            return Err(ScanError::new(
1036                self.mark,
1037                r#""-" is only valid inside a block"#,
1038            ));
1039        }
1040        self.remove_simple_key()?;
1041        self.allow_simple_key();
1042
1043        let start_mark = self.mark;
1044        self.skip();
1045
1046        self.tokens
1047            .push_back(Token(start_mark, TokenType::BlockEntry));
1048        Ok(())
1049    }
1050
1051    fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1052        self.unroll_indent(-1);
1053        self.remove_simple_key()?;
1054        self.disallow_simple_key();
1055
1056        let mark = self.mark;
1057
1058        self.skip();
1059        self.skip();
1060        self.skip();
1061
1062        self.tokens.push_back(Token(mark, t));
1063        Ok(())
1064    }
1065
1066    fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1067        self.save_simple_key()?;
1068        self.allow_simple_key();
1069        let tok = self.scan_block_scalar(literal)?;
1070
1071        self.tokens.push_back(tok);
1072        Ok(())
1073    }
1074
1075    fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1076        let start_mark = self.mark;
1077        let mut chomping: i32 = 0;
1078        let mut increment: usize = 0;
1079        let mut indent: usize = 0;
1080        let mut trailing_blank: bool;
1081        let mut leading_blank: bool = false;
1082
1083        let mut string = String::new();
1084        let mut leading_break = String::new();
1085        let mut trailing_breaks = String::new();
1086
1087        // skip '|' or '>'
1088        self.skip();
1089        self.lookahead(1);
1090
1091        if self.ch() == '+' || self.ch() == '-' {
1092            if self.ch() == '+' {
1093                chomping = 1;
1094            } else {
1095                chomping = -1;
1096            }
1097            self.skip();
1098            self.lookahead(1);
1099            if is_digit(self.ch()) {
1100                if self.ch() == '0' {
1101                    return Err(ScanError::new(
1102                        start_mark,
1103                        "while scanning a block scalar, found an indentation indicator equal to 0",
1104                    ));
1105                }
1106                increment = (self.ch() as usize) - ('0' as usize);
1107                self.skip();
1108            }
1109        } else if is_digit(self.ch()) {
1110            if self.ch() == '0' {
1111                return Err(ScanError::new(
1112                    start_mark,
1113                    "while scanning a block scalar, found an indentation indicator equal to 0",
1114                ));
1115            }
1116
1117            increment = (self.ch() as usize) - ('0' as usize);
1118            self.skip();
1119            self.lookahead(1);
1120            if self.ch() == '+' || self.ch() == '-' {
1121                if self.ch() == '+' {
1122                    chomping = 1;
1123                } else {
1124                    chomping = -1;
1125                }
1126                self.skip();
1127            }
1128        }
1129
1130        // Eat whitespaces and comments to the end of the line.
1131        self.lookahead(1);
1132
1133        while is_blank(self.ch()) {
1134            self.skip();
1135            self.lookahead(1);
1136        }
1137
1138        if self.ch() == '#' {
1139            while !is_breakz(self.ch()) {
1140                self.skip();
1141                self.lookahead(1);
1142            }
1143        }
1144
1145        // Check if we are at the end of the line.
1146        if !is_breakz(self.ch()) {
1147            return Err(ScanError::new(
1148                start_mark,
1149                "while scanning a block scalar, did not find expected comment or line break",
1150            ));
1151        }
1152
1153        if is_break(self.ch()) {
1154            self.lookahead(2);
1155            self.skip_line();
1156        }
1157
1158        if increment > 0 {
1159            indent = if self.indent >= 0 {
1160                (self.indent + increment as isize) as usize
1161            } else {
1162                increment
1163            }
1164        }
1165        // Scan the leading line breaks and determine the indentation level if needed.
1166        self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1167
1168        self.lookahead(1);
1169
1170        let start_mark = self.mark;
1171
1172        while self.mark.col == indent && !is_z(self.ch()) {
1173            // We are at the beginning of a non-empty line.
1174            trailing_blank = is_blank(self.ch());
1175            if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1176                if trailing_breaks.is_empty() {
1177                    string.push(' ');
1178                }
1179                leading_break.clear();
1180            } else {
1181                string.push_str(&leading_break);
1182                leading_break.clear();
1183            }
1184
1185            string.push_str(&trailing_breaks);
1186            trailing_breaks.clear();
1187
1188            leading_blank = is_blank(self.ch());
1189
1190            while !is_breakz(self.ch()) {
1191                string.push(self.ch());
1192                self.skip();
1193                self.lookahead(1);
1194            }
1195            // break on EOF
1196            if is_z(self.ch()) {
1197                break;
1198            }
1199
1200            self.lookahead(2);
1201            self.read_break(&mut leading_break);
1202
1203            // Eat the following indentation spaces and line breaks.
1204            self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1205        }
1206
1207        // Chomp the tail.
1208        if chomping != -1 {
1209            string.push_str(&leading_break);
1210        }
1211
1212        if chomping == 1 {
1213            string.push_str(&trailing_breaks);
1214        }
1215
1216        if literal {
1217            Ok(Token(
1218                start_mark,
1219                TokenType::Scalar(TScalarStyle::Literal, string),
1220            ))
1221        } else {
1222            Ok(Token(
1223                start_mark,
1224                TokenType::Scalar(TScalarStyle::Foled, string),
1225            ))
1226        }
1227    }
1228
1229    fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1230        let mut max_indent = 0;
1231        loop {
1232            self.lookahead(1);
1233            while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
1234                self.skip();
1235                self.lookahead(1);
1236            }
1237
1238            if self.mark.col > max_indent {
1239                max_indent = self.mark.col;
1240            }
1241
1242            // Check for a tab character messing the indentation.
1243            if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
1244                return Err(ScanError::new(self.mark,
1245                        "while scanning a block scalar, found a tab character where an indentation space is expected"));
1246            }
1247
1248            if !is_break(self.ch()) {
1249                break;
1250            }
1251
1252            self.lookahead(2);
1253            // Consume the line break.
1254            self.read_break(breaks);
1255        }
1256
1257        if *indent == 0 {
1258            *indent = max_indent;
1259            if *indent < (self.indent + 1) as usize {
1260                *indent = (self.indent + 1) as usize;
1261            }
1262            if *indent < 1 {
1263                *indent = 1;
1264            }
1265        }
1266        Ok(())
1267    }
1268
1269    fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1270        self.save_simple_key()?;
1271        self.disallow_simple_key();
1272
1273        let tok = self.scan_flow_scalar(single)?;
1274
1275        // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
1276        // YAML allows the following value to be specified adjacent to the “:”.
1277        self.adjacent_value_allowed_at = self.mark.index;
1278
1279        self.tokens.push_back(tok);
1280        Ok(())
1281    }
1282
1283    fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1284        let start_mark = self.mark;
1285
1286        let mut string = String::new();
1287        let mut leading_break = String::new();
1288        let mut trailing_breaks = String::new();
1289        let mut whitespaces = String::new();
1290        let mut leading_blanks;
1291
1292        /* Eat the left quote. */
1293        self.skip();
1294
1295        loop {
1296            /* Check for a document indicator. */
1297            self.lookahead(4);
1298
1299            if self.mark.col == 0
1300                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1301                    || ((self.buffer[0] == '.')
1302                        && (self.buffer[1] == '.')
1303                        && (self.buffer[2] == '.')))
1304                && is_blankz(self.buffer[3])
1305            {
1306                return Err(ScanError::new(
1307                    start_mark,
1308                    "while scanning a quoted scalar, found unexpected document indicator",
1309                ));
1310            }
1311
1312            if is_z(self.ch()) {
1313                return Err(ScanError::new(
1314                    start_mark,
1315                    "while scanning a quoted scalar, found unexpected end of stream",
1316                ));
1317            }
1318
1319            self.lookahead(2);
1320
1321            leading_blanks = false;
1322            // Consume non-blank characters.
1323
1324            while !is_blankz(self.ch()) {
1325                match self.ch() {
1326                    // Check for an escaped single quote.
1327                    '\'' if self.buffer[1] == '\'' && single => {
1328                        string.push('\'');
1329                        self.skip();
1330                        self.skip();
1331                    }
1332                    // Check for the right quote.
1333                    '\'' if single => break,
1334                    '"' if !single => break,
1335                    // Check for an escaped line break.
1336                    '\\' if !single && is_break(self.buffer[1]) => {
1337                        self.lookahead(3);
1338                        self.skip();
1339                        self.skip_line();
1340                        leading_blanks = true;
1341                        break;
1342                    }
1343                    // Check for an escape sequence.
1344                    '\\' if !single => {
1345                        let mut code_length = 0usize;
1346                        match self.buffer[1] {
1347                            '0' => string.push('\0'),
1348                            'a' => string.push('\x07'),
1349                            'b' => string.push('\x08'),
1350                            't' | '\t' => string.push('\t'),
1351                            'n' => string.push('\n'),
1352                            'v' => string.push('\x0b'),
1353                            'f' => string.push('\x0c'),
1354                            'r' => string.push('\x0d'),
1355                            'e' => string.push('\x1b'),
1356                            ' ' => string.push('\x20'),
1357                            '"' => string.push('"'),
1358                            '\'' => string.push('\''),
1359                            '\\' => string.push('\\'),
1360                            // NEL (#x85)
1361                            'N' => string.push(char::from_u32(0x85).unwrap()),
1362                            // #xA0
1363                            '_' => string.push(char::from_u32(0xA0).unwrap()),
1364                            // LS (#x2028)
1365                            'L' => string.push(char::from_u32(0x2028).unwrap()),
1366                            // PS (#x2029)
1367                            'P' => string.push(char::from_u32(0x2029).unwrap()),
1368                            'x' => code_length = 2,
1369                            'u' => code_length = 4,
1370                            'U' => code_length = 8,
1371                            _ => {
1372                                return Err(ScanError::new(
1373                                    start_mark,
1374                                    "while parsing a quoted scalar, found unknown escape character",
1375                                ))
1376                            }
1377                        }
1378                        self.skip();
1379                        self.skip();
1380                        // Consume an arbitrary escape code.
1381                        if code_length > 0 {
1382                            self.lookahead(code_length);
1383                            let mut value = 0u32;
1384                            for i in 0..code_length {
1385                                if !is_hex(self.buffer[i]) {
1386                                    return Err(ScanError::new(start_mark,
1387                                        "while parsing a quoted scalar, did not find expected hexadecimal number"));
1388                                }
1389                                value = (value << 4) + as_hex(self.buffer[i]);
1390                            }
1391
1392                            let ch = match char::from_u32(value) {
1393                                Some(v) => v,
1394                                None => {
1395                                    return Err(ScanError::new(start_mark,
1396                                        "while parsing a quoted scalar, found invalid Unicode character escape code"));
1397                                }
1398                            };
1399                            string.push(ch);
1400
1401                            for _ in 0..code_length {
1402                                self.skip();
1403                            }
1404                        }
1405                    }
1406                    c => {
1407                        string.push(c);
1408                        self.skip();
1409                    }
1410                }
1411                self.lookahead(2);
1412            }
1413            self.lookahead(1);
1414            match self.ch() {
1415                '\'' if single => break,
1416                '"' if !single => break,
1417                _ => {}
1418            }
1419
1420            // Consume blank characters.
1421            while is_blank(self.ch()) || is_break(self.ch()) {
1422                if is_blank(self.ch()) {
1423                    // Consume a space or a tab character.
1424                    if leading_blanks {
1425                        self.skip();
1426                    } else {
1427                        whitespaces.push(self.ch());
1428                        self.skip();
1429                    }
1430                } else {
1431                    self.lookahead(2);
1432                    // Check if it is a first line break.
1433                    if leading_blanks {
1434                        self.read_break(&mut trailing_breaks);
1435                    } else {
1436                        whitespaces.clear();
1437                        self.read_break(&mut leading_break);
1438                        leading_blanks = true;
1439                    }
1440                }
1441                self.lookahead(1);
1442            }
1443            // Join the whitespaces or fold line breaks.
1444            if leading_blanks {
1445                if leading_break.is_empty() {
1446                    string.push_str(&leading_break);
1447                    string.push_str(&trailing_breaks);
1448                    trailing_breaks.clear();
1449                    leading_break.clear();
1450                } else {
1451                    if trailing_breaks.is_empty() {
1452                        string.push(' ');
1453                    } else {
1454                        string.push_str(&trailing_breaks);
1455                        trailing_breaks.clear();
1456                    }
1457                    leading_break.clear();
1458                }
1459            } else {
1460                string.push_str(&whitespaces);
1461                whitespaces.clear();
1462            }
1463        } // loop
1464
1465        // Eat the right quote.
1466        self.skip();
1467
1468        if single {
1469            Ok(Token(
1470                start_mark,
1471                TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1472            ))
1473        } else {
1474            Ok(Token(
1475                start_mark,
1476                TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1477            ))
1478        }
1479    }
1480
1481    fn fetch_plain_scalar(&mut self) -> ScanResult {
1482        self.save_simple_key()?;
1483        self.disallow_simple_key();
1484
1485        let tok = self.scan_plain_scalar()?;
1486
1487        self.tokens.push_back(tok);
1488        Ok(())
1489    }
1490
1491    fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1492        let indent = self.indent + 1;
1493        let start_mark = self.mark;
1494
1495        let mut string = String::new();
1496        let mut leading_break = String::new();
1497        let mut trailing_breaks = String::new();
1498        let mut whitespaces = String::new();
1499        let mut leading_blanks = false;
1500
1501        loop {
1502            /* Check for a document indicator. */
1503            self.lookahead(4);
1504
1505            if self.mark.col == 0
1506                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1507                    || ((self.buffer[0] == '.')
1508                        && (self.buffer[1] == '.')
1509                        && (self.buffer[2] == '.')))
1510                && is_blankz(self.buffer[3])
1511            {
1512                break;
1513            }
1514
1515            if self.ch() == '#' {
1516                break;
1517            }
1518            while !is_blankz(self.ch()) {
1519                // indicators can end a plain scalar, see 7.3.3. Plain Style
1520                match self.ch() {
1521                    ':' if is_blankz(self.buffer[1])
1522                        || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
1523                    {
1524                        break;
1525                    }
1526                    ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
1527                    _ => {}
1528                }
1529
1530                if leading_blanks || !whitespaces.is_empty() {
1531                    if leading_blanks {
1532                        if leading_break.is_empty() {
1533                            string.push_str(&leading_break);
1534                            string.push_str(&trailing_breaks);
1535                            trailing_breaks.clear();
1536                            leading_break.clear();
1537                        } else {
1538                            if trailing_breaks.is_empty() {
1539                                string.push(' ');
1540                            } else {
1541                                string.push_str(&trailing_breaks);
1542                                trailing_breaks.clear();
1543                            }
1544                            leading_break.clear();
1545                        }
1546                        leading_blanks = false;
1547                    } else {
1548                        string.push_str(&whitespaces);
1549                        whitespaces.clear();
1550                    }
1551                }
1552
1553                string.push(self.ch());
1554                self.skip();
1555                self.lookahead(2);
1556            }
1557            // is the end?
1558            if !(is_blank(self.ch()) || is_break(self.ch())) {
1559                break;
1560            }
1561            self.lookahead(1);
1562
1563            while is_blank(self.ch()) || is_break(self.ch()) {
1564                if is_blank(self.ch()) {
1565                    if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
1566                        return Err(ScanError::new(
1567                            start_mark,
1568                            "while scanning a plain scalar, found a tab",
1569                        ));
1570                    }
1571
1572                    if leading_blanks {
1573                        self.skip();
1574                    } else {
1575                        whitespaces.push(self.ch());
1576                        self.skip();
1577                    }
1578                } else {
1579                    self.lookahead(2);
1580                    // Check if it is a first line break
1581                    if leading_blanks {
1582                        self.read_break(&mut trailing_breaks);
1583                    } else {
1584                        whitespaces.clear();
1585                        self.read_break(&mut leading_break);
1586                        leading_blanks = true;
1587                    }
1588                }
1589                self.lookahead(1);
1590            }
1591
1592            // check indentation level
1593            if self.flow_level == 0 && (self.mark.col as isize) < indent {
1594                break;
1595            }
1596        }
1597
1598        if leading_blanks {
1599            self.allow_simple_key();
1600        }
1601
1602        Ok(Token(
1603            start_mark,
1604            TokenType::Scalar(TScalarStyle::Plain, string),
1605        ))
1606    }
1607
1608    fn fetch_key(&mut self) -> ScanResult {
1609        let start_mark = self.mark;
1610        if self.flow_level == 0 {
1611            // Check if we are allowed to start a new key (not necessarily simple).
1612            if !self.simple_key_allowed {
1613                return Err(ScanError::new(
1614                    self.mark,
1615                    "mapping keys are not allowed in this context",
1616                ));
1617            }
1618            self.roll_indent(
1619                start_mark.col,
1620                None,
1621                TokenType::BlockMappingStart,
1622                start_mark,
1623            );
1624        }
1625
1626        self.remove_simple_key()?;
1627
1628        if self.flow_level == 0 {
1629            self.allow_simple_key();
1630        } else {
1631            self.disallow_simple_key();
1632        }
1633
1634        self.skip();
1635        self.tokens.push_back(Token(start_mark, TokenType::Key));
1636        Ok(())
1637    }
1638
1639    fn fetch_value(&mut self) -> ScanResult {
1640        let sk = self.simple_keys.last().unwrap().clone();
1641        let start_mark = self.mark;
1642        if sk.possible {
1643            // insert simple key
1644            let tok = Token(sk.mark, TokenType::Key);
1645            let tokens_parsed = self.tokens_parsed;
1646            self.insert_token(sk.token_number - tokens_parsed, tok);
1647
1648            // Add the BLOCK-MAPPING-START token if needed.
1649            self.roll_indent(
1650                sk.mark.col,
1651                Some(sk.token_number),
1652                TokenType::BlockMappingStart,
1653                start_mark,
1654            );
1655
1656            self.simple_keys.last_mut().unwrap().possible = false;
1657            self.disallow_simple_key();
1658        } else {
1659            // The ':' indicator follows a complex key.
1660            if self.flow_level == 0 {
1661                if !self.simple_key_allowed {
1662                    return Err(ScanError::new(
1663                        start_mark,
1664                        "mapping values are not allowed in this context",
1665                    ));
1666                }
1667
1668                self.roll_indent(
1669                    start_mark.col,
1670                    None,
1671                    TokenType::BlockMappingStart,
1672                    start_mark,
1673                );
1674            }
1675
1676            if self.flow_level == 0 {
1677                self.allow_simple_key();
1678            } else {
1679                self.disallow_simple_key();
1680            }
1681        }
1682        self.skip();
1683        self.tokens.push_back(Token(start_mark, TokenType::Value));
1684
1685        Ok(())
1686    }
1687
1688    fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1689        if self.flow_level > 0 {
1690            return;
1691        }
1692
1693        if self.indent < col as isize {
1694            self.indents.push(self.indent);
1695            self.indent = col as isize;
1696            let tokens_parsed = self.tokens_parsed;
1697            match number {
1698                Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1699                None => self.tokens.push_back(Token(mark, tok)),
1700            }
1701        }
1702    }
1703
1704    fn unroll_indent(&mut self, col: isize) {
1705        if self.flow_level > 0 {
1706            return;
1707        }
1708        while self.indent > col {
1709            self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1710            self.indent = self.indents.pop().unwrap();
1711        }
1712    }
1713
1714    fn save_simple_key(&mut self) -> Result<(), ScanError> {
1715        let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
1716        if self.simple_key_allowed {
1717            let mut sk = SimpleKey::new(self.mark);
1718            sk.possible = true;
1719            sk.required = required;
1720            sk.token_number = self.tokens_parsed + self.tokens.len();
1721
1722            self.remove_simple_key()?;
1723
1724            self.simple_keys.pop();
1725            self.simple_keys.push(sk);
1726        }
1727        Ok(())
1728    }
1729
1730    fn remove_simple_key(&mut self) -> ScanResult {
1731        let last = self.simple_keys.last_mut().unwrap();
1732        if last.possible && last.required {
1733            return Err(ScanError::new(self.mark, "simple key expected"));
1734        }
1735
1736        last.possible = false;
1737        Ok(())
1738    }
1739}
1740
1741#[cfg(test)]
1742mod test {
1743    use super::TokenType::*;
1744    use super::*;
1745
1746    macro_rules! next {
1747        ($p:ident, $tk:pat) => {{
1748            let tok = $p.next().unwrap();
1749            match tok.1 {
1750                $tk => {}
1751                _ => panic!("unexpected token: {:?}", tok),
1752            }
1753        }};
1754    }
1755
1756    macro_rules! next_scalar {
1757        ($p:ident, $tk:expr, $v:expr) => {{
1758            let tok = $p.next().unwrap();
1759            match tok.1 {
1760                Scalar(style, ref v) => {
1761                    assert_eq!(style, $tk);
1762                    assert_eq!(v, $v);
1763                }
1764                _ => panic!("unexpected token: {:?}", tok),
1765            }
1766        }};
1767    }
1768
1769    macro_rules! end {
1770        ($p:ident) => {{
1771            assert_eq!($p.next(), None);
1772        }};
1773    }
1774    /// test cases in libyaml scanner.c
1775    #[test]
1776    fn test_empty() {
1777        let s = "";
1778        let mut p = Scanner::new(s.chars());
1779        next!(p, StreamStart(..));
1780        next!(p, StreamEnd);
1781        end!(p);
1782    }
1783
1784    #[test]
1785    fn test_scalar() {
1786        let s = "a scalar";
1787        let mut p = Scanner::new(s.chars());
1788        next!(p, StreamStart(..));
1789        next!(p, Scalar(TScalarStyle::Plain, _));
1790        next!(p, StreamEnd);
1791        end!(p);
1792    }
1793
1794    #[test]
1795    fn test_explicit_scalar() {
1796        let s = "---
1797'a scalar'
1798...
1799";
1800        let mut p = Scanner::new(s.chars());
1801        next!(p, StreamStart(..));
1802        next!(p, DocumentStart);
1803        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1804        next!(p, DocumentEnd);
1805        next!(p, StreamEnd);
1806        end!(p);
1807    }
1808
1809    #[test]
1810    fn test_multiple_documents() {
1811        let s = "
1812'a scalar'
1813---
1814'a scalar'
1815---
1816'a scalar'
1817";
1818        let mut p = Scanner::new(s.chars());
1819        next!(p, StreamStart(..));
1820        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1821        next!(p, DocumentStart);
1822        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1823        next!(p, DocumentStart);
1824        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1825        next!(p, StreamEnd);
1826        end!(p);
1827    }
1828
1829    #[test]
1830    fn test_a_flow_sequence() {
1831        let s = "[item 1, item 2, item 3]";
1832        let mut p = Scanner::new(s.chars());
1833        next!(p, StreamStart(..));
1834        next!(p, FlowSequenceStart);
1835        next_scalar!(p, TScalarStyle::Plain, "item 1");
1836        next!(p, FlowEntry);
1837        next!(p, Scalar(TScalarStyle::Plain, _));
1838        next!(p, FlowEntry);
1839        next!(p, Scalar(TScalarStyle::Plain, _));
1840        next!(p, FlowSequenceEnd);
1841        next!(p, StreamEnd);
1842        end!(p);
1843    }
1844
1845    #[test]
1846    fn test_a_flow_mapping() {
1847        let s = "
1848{
1849    a simple key: a value, # Note that the KEY token is produced.
1850    ? a complex key: another value,
1851}
1852";
1853        let mut p = Scanner::new(s.chars());
1854        next!(p, StreamStart(..));
1855        next!(p, FlowMappingStart);
1856        next!(p, Key);
1857        next!(p, Scalar(TScalarStyle::Plain, _));
1858        next!(p, Value);
1859        next!(p, Scalar(TScalarStyle::Plain, _));
1860        next!(p, FlowEntry);
1861        next!(p, Key);
1862        next_scalar!(p, TScalarStyle::Plain, "a complex key");
1863        next!(p, Value);
1864        next!(p, Scalar(TScalarStyle::Plain, _));
1865        next!(p, FlowEntry);
1866        next!(p, FlowMappingEnd);
1867        next!(p, StreamEnd);
1868        end!(p);
1869    }
1870
1871    #[test]
1872    fn test_block_sequences() {
1873        let s = "
1874- item 1
1875- item 2
1876-
1877  - item 3.1
1878  - item 3.2
1879-
1880  key 1: value 1
1881  key 2: value 2
1882";
1883        let mut p = Scanner::new(s.chars());
1884        next!(p, StreamStart(..));
1885        next!(p, BlockSequenceStart);
1886        next!(p, BlockEntry);
1887        next_scalar!(p, TScalarStyle::Plain, "item 1");
1888        next!(p, BlockEntry);
1889        next_scalar!(p, TScalarStyle::Plain, "item 2");
1890        next!(p, BlockEntry);
1891        next!(p, BlockSequenceStart);
1892        next!(p, BlockEntry);
1893        next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1894        next!(p, BlockEntry);
1895        next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1896        next!(p, BlockEnd);
1897        next!(p, BlockEntry);
1898        next!(p, BlockMappingStart);
1899        next!(p, Key);
1900        next_scalar!(p, TScalarStyle::Plain, "key 1");
1901        next!(p, Value);
1902        next_scalar!(p, TScalarStyle::Plain, "value 1");
1903        next!(p, Key);
1904        next_scalar!(p, TScalarStyle::Plain, "key 2");
1905        next!(p, Value);
1906        next_scalar!(p, TScalarStyle::Plain, "value 2");
1907        next!(p, BlockEnd);
1908        next!(p, BlockEnd);
1909        next!(p, StreamEnd);
1910        end!(p);
1911    }
1912
1913    #[test]
1914    fn test_block_mappings() {
1915        let s = "
1916a simple key: a value   # The KEY token is produced here.
1917? a complex key
1918: another value
1919a mapping:
1920  key 1: value 1
1921  key 2: value 2
1922a sequence:
1923  - item 1
1924  - item 2
1925";
1926        let mut p = Scanner::new(s.chars());
1927        next!(p, StreamStart(..));
1928        next!(p, BlockMappingStart);
1929        next!(p, Key);
1930        next!(p, Scalar(_, _));
1931        next!(p, Value);
1932        next!(p, Scalar(_, _));
1933        next!(p, Key);
1934        next!(p, Scalar(_, _));
1935        next!(p, Value);
1936        next!(p, Scalar(_, _));
1937        next!(p, Key);
1938        next!(p, Scalar(_, _));
1939        next!(p, Value); // libyaml comment seems to be wrong
1940        next!(p, BlockMappingStart);
1941        next!(p, Key);
1942        next!(p, Scalar(_, _));
1943        next!(p, Value);
1944        next!(p, Scalar(_, _));
1945        next!(p, Key);
1946        next!(p, Scalar(_, _));
1947        next!(p, Value);
1948        next!(p, Scalar(_, _));
1949        next!(p, BlockEnd);
1950        next!(p, Key);
1951        next!(p, Scalar(_, _));
1952        next!(p, Value);
1953        next!(p, BlockSequenceStart);
1954        next!(p, BlockEntry);
1955        next!(p, Scalar(_, _));
1956        next!(p, BlockEntry);
1957        next!(p, Scalar(_, _));
1958        next!(p, BlockEnd);
1959        next!(p, BlockEnd);
1960        next!(p, StreamEnd);
1961        end!(p);
1962    }
1963
1964    #[test]
1965    fn test_no_block_sequence_start() {
1966        let s = "
1967key:
1968- item 1
1969- item 2
1970";
1971        let mut p = Scanner::new(s.chars());
1972        next!(p, StreamStart(..));
1973        next!(p, BlockMappingStart);
1974        next!(p, Key);
1975        next_scalar!(p, TScalarStyle::Plain, "key");
1976        next!(p, Value);
1977        next!(p, BlockEntry);
1978        next_scalar!(p, TScalarStyle::Plain, "item 1");
1979        next!(p, BlockEntry);
1980        next_scalar!(p, TScalarStyle::Plain, "item 2");
1981        next!(p, BlockEnd);
1982        next!(p, StreamEnd);
1983        end!(p);
1984    }
1985
1986    #[test]
1987    fn test_collections_in_sequence() {
1988        let s = "
1989- - item 1
1990  - item 2
1991- key 1: value 1
1992  key 2: value 2
1993- ? complex key
1994  : complex value
1995";
1996        let mut p = Scanner::new(s.chars());
1997        next!(p, StreamStart(..));
1998        next!(p, BlockSequenceStart);
1999        next!(p, BlockEntry);
2000        next!(p, BlockSequenceStart);
2001        next!(p, BlockEntry);
2002        next_scalar!(p, TScalarStyle::Plain, "item 1");
2003        next!(p, BlockEntry);
2004        next_scalar!(p, TScalarStyle::Plain, "item 2");
2005        next!(p, BlockEnd);
2006        next!(p, BlockEntry);
2007        next!(p, BlockMappingStart);
2008        next!(p, Key);
2009        next_scalar!(p, TScalarStyle::Plain, "key 1");
2010        next!(p, Value);
2011        next_scalar!(p, TScalarStyle::Plain, "value 1");
2012        next!(p, Key);
2013        next_scalar!(p, TScalarStyle::Plain, "key 2");
2014        next!(p, Value);
2015        next_scalar!(p, TScalarStyle::Plain, "value 2");
2016        next!(p, BlockEnd);
2017        next!(p, BlockEntry);
2018        next!(p, BlockMappingStart);
2019        next!(p, Key);
2020        next_scalar!(p, TScalarStyle::Plain, "complex key");
2021        next!(p, Value);
2022        next_scalar!(p, TScalarStyle::Plain, "complex value");
2023        next!(p, BlockEnd);
2024        next!(p, BlockEnd);
2025        next!(p, StreamEnd);
2026        end!(p);
2027    }
2028
2029    #[test]
2030    fn test_collections_in_mapping() {
2031        let s = "
2032? a sequence
2033: - item 1
2034  - item 2
2035? a mapping
2036: key 1: value 1
2037  key 2: value 2
2038";
2039        let mut p = Scanner::new(s.chars());
2040        next!(p, StreamStart(..));
2041        next!(p, BlockMappingStart);
2042        next!(p, Key);
2043        next_scalar!(p, TScalarStyle::Plain, "a sequence");
2044        next!(p, Value);
2045        next!(p, BlockSequenceStart);
2046        next!(p, BlockEntry);
2047        next_scalar!(p, TScalarStyle::Plain, "item 1");
2048        next!(p, BlockEntry);
2049        next_scalar!(p, TScalarStyle::Plain, "item 2");
2050        next!(p, BlockEnd);
2051        next!(p, Key);
2052        next_scalar!(p, TScalarStyle::Plain, "a mapping");
2053        next!(p, Value);
2054        next!(p, BlockMappingStart);
2055        next!(p, Key);
2056        next_scalar!(p, TScalarStyle::Plain, "key 1");
2057        next!(p, Value);
2058        next_scalar!(p, TScalarStyle::Plain, "value 1");
2059        next!(p, Key);
2060        next_scalar!(p, TScalarStyle::Plain, "key 2");
2061        next!(p, Value);
2062        next_scalar!(p, TScalarStyle::Plain, "value 2");
2063        next!(p, BlockEnd);
2064        next!(p, BlockEnd);
2065        next!(p, StreamEnd);
2066        end!(p);
2067    }
2068
2069    #[test]
2070    fn test_spec_ex7_3() {
2071        let s = "
2072{
2073    ? foo :,
2074    : bar,
2075}
2076";
2077        let mut p = Scanner::new(s.chars());
2078        next!(p, StreamStart(..));
2079        next!(p, FlowMappingStart);
2080        next!(p, Key);
2081        next_scalar!(p, TScalarStyle::Plain, "foo");
2082        next!(p, Value);
2083        next!(p, FlowEntry);
2084        next!(p, Value);
2085        next_scalar!(p, TScalarStyle::Plain, "bar");
2086        next!(p, FlowEntry);
2087        next!(p, FlowMappingEnd);
2088        next!(p, StreamEnd);
2089        end!(p);
2090    }
2091
2092    #[test]
2093    fn test_plain_scalar_starting_with_indicators_in_flow() {
2094        // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
2095        // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
2096        // character if followed by a non-space “safe” character, as this causes no ambiguity."
2097
2098        let s = "{a: :b}";
2099        let mut p = Scanner::new(s.chars());
2100        next!(p, StreamStart(..));
2101        next!(p, FlowMappingStart);
2102        next!(p, Key);
2103        next_scalar!(p, TScalarStyle::Plain, "a");
2104        next!(p, Value);
2105        next_scalar!(p, TScalarStyle::Plain, ":b");
2106        next!(p, FlowMappingEnd);
2107        next!(p, StreamEnd);
2108        end!(p);
2109
2110        let s = "{a: ?b}";
2111        let mut p = Scanner::new(s.chars());
2112        next!(p, StreamStart(..));
2113        next!(p, FlowMappingStart);
2114        next!(p, Key);
2115        next_scalar!(p, TScalarStyle::Plain, "a");
2116        next!(p, Value);
2117        next_scalar!(p, TScalarStyle::Plain, "?b");
2118        next!(p, FlowMappingEnd);
2119        next!(p, StreamEnd);
2120        end!(p);
2121    }
2122
2123    #[test]
2124    fn test_plain_scalar_starting_with_indicators_in_block() {
2125        let s = ":a";
2126        let mut p = Scanner::new(s.chars());
2127        next!(p, StreamStart(..));
2128        next_scalar!(p, TScalarStyle::Plain, ":a");
2129        next!(p, StreamEnd);
2130        end!(p);
2131
2132        let s = "?a";
2133        let mut p = Scanner::new(s.chars());
2134        next!(p, StreamStart(..));
2135        next_scalar!(p, TScalarStyle::Plain, "?a");
2136        next!(p, StreamEnd);
2137        end!(p);
2138    }
2139
2140    #[test]
2141    fn test_plain_scalar_containing_indicators_in_block() {
2142        let s = "a:,b";
2143        let mut p = Scanner::new(s.chars());
2144        next!(p, StreamStart(..));
2145        next_scalar!(p, TScalarStyle::Plain, "a:,b");
2146        next!(p, StreamEnd);
2147        end!(p);
2148
2149        let s = ":,b";
2150        let mut p = Scanner::new(s.chars());
2151        next!(p, StreamStart(..));
2152        next_scalar!(p, TScalarStyle::Plain, ":,b");
2153        next!(p, StreamEnd);
2154        end!(p);
2155    }
2156
2157    #[test]
2158    fn test_scanner_cr() {
2159        let s = "---\r\n- tok1\r\n- tok2";
2160        let mut p = Scanner::new(s.chars());
2161        next!(p, StreamStart(..));
2162        next!(p, DocumentStart);
2163        next!(p, BlockSequenceStart);
2164        next!(p, BlockEntry);
2165        next_scalar!(p, TScalarStyle::Plain, "tok1");
2166        next!(p, BlockEntry);
2167        next_scalar!(p, TScalarStyle::Plain, "tok2");
2168        next!(p, BlockEnd);
2169        next!(p, StreamEnd);
2170        end!(p);
2171    }
2172
2173    #[test]
2174    fn test_uri() {
2175        // TODO
2176    }
2177
2178    #[test]
2179    fn test_uri_escapes() {
2180        // TODO
2181    }
2182}