1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use std::{char, collections::VecDeque, error::Error, fmt};
13
14use arraydeque::ArrayDeque;
15
16use crate::char_traits::{
17 as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit,
18 is_flow, is_hex, is_tag_char, is_uri_char, is_z,
19};
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
23pub enum TEncoding {
24 Utf8,
26}
27
28#[derive(Clone, Copy, PartialEq, Debug, Eq)]
30pub enum TScalarStyle {
31 Plain,
33 SingleQuoted,
35 DoubleQuoted,
37
38 Literal,
40 Folded,
42}
43
44#[derive(Clone, Copy, PartialEq, Debug, Eq)]
46pub struct Marker {
47 index: usize,
49 line: usize,
51 col: usize,
53}
54
55impl Marker {
56 fn new(index: usize, line: usize, col: usize) -> Marker {
57 Marker { index, line, col }
58 }
59
60 #[must_use]
62 pub fn index(&self) -> usize {
63 self.index
64 }
65
66 #[must_use]
68 pub fn line(&self) -> usize {
69 self.line
70 }
71
72 #[must_use]
74 pub fn col(&self) -> usize {
75 self.col
76 }
77}
78
79#[derive(Clone, PartialEq, Debug, Eq)]
81pub struct ScanError {
82 mark: Marker,
84 info: String,
86}
87
88impl ScanError {
89 #[must_use]
91 pub fn new(loc: Marker, info: &str) -> ScanError {
92 ScanError {
93 mark: loc,
94 info: info.to_owned(),
95 }
96 }
97
98 #[must_use]
100 pub fn new_string(loc: Marker, info: String) -> ScanError {
101 ScanError { mark: loc, info }
102 }
103
104 #[must_use]
106 pub fn marker(&self) -> &Marker {
107 &self.mark
108 }
109
110 #[must_use]
112 pub fn info(&self) -> &str {
113 self.info.as_ref()
114 }
115}
116
117impl Error for ScanError {
118 fn description(&self) -> &str {
119 self.info.as_ref()
120 }
121
122 fn cause(&self) -> Option<&dyn Error> {
123 None
124 }
125}
126
127impl fmt::Display for ScanError {
128 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
130 write!(
131 formatter,
132 "{} at byte {} line {} column {}",
133 self.info,
134 self.mark.index,
135 self.mark.line,
136 self.mark.col + 1,
137 )
138 }
139}
140
141#[derive(Clone, PartialEq, Debug, Eq)]
143pub enum TokenType {
144 StreamStart(TEncoding),
146 StreamEnd,
148 VersionDirective(
150 u32,
152 u32,
154 ),
155 TagDirective(
157 String,
159 String,
161 ),
162 DocumentStart,
164 DocumentEnd,
166 BlockSequenceStart,
170 BlockMappingStart,
174 BlockEnd,
176 FlowSequenceStart,
178 FlowSequenceEnd,
180 FlowMappingStart,
182 FlowMappingEnd,
184 BlockEntry,
186 FlowEntry,
188 Key,
190 Value,
192 Alias(String),
194 Anchor(String),
196 Tag(
198 String,
200 String,
202 ),
203 Scalar(TScalarStyle, String),
205}
206
207#[derive(Clone, PartialEq, Debug, Eq)]
209pub struct Token(pub Marker, pub TokenType);
210
211#[derive(Clone, PartialEq, Debug, Eq)]
246struct SimpleKey {
247 possible: bool,
260 required: bool,
268 token_number: usize,
274 mark: Marker,
276}
277
278impl SimpleKey {
279 fn new(mark: Marker) -> SimpleKey {
281 SimpleKey {
282 possible: false,
283 required: false,
284 token_number: 0,
285 mark,
286 }
287 }
288}
289
290#[derive(Clone, Debug, Default)]
292struct Indent {
293 indent: isize,
295 needs_block_end: bool,
313}
314
315const BUFFER_LEN: usize = 16;
326
327#[derive(Debug)]
337#[allow(clippy::struct_excessive_bools)]
338pub struct Scanner<T> {
339 rdr: T,
341 mark: Marker,
343 tokens: VecDeque<Token>,
350 buffer: ArrayDeque<char, BUFFER_LEN>,
352 error: Option<ScanError>,
354
355 stream_start_produced: bool,
357 stream_end_produced: bool,
359 adjacent_value_allowed_at: usize,
362 simple_key_allowed: bool,
366 simple_keys: Vec<SimpleKey>,
371 indent: isize,
373 indents: Vec<Indent>,
375 flow_level: u8,
377 tokens_parsed: usize,
381 token_available: bool,
383 leading_whitespace: bool,
385 flow_mapping_started: bool,
392 implicit_flow_mapping: bool,
394}
395
396impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
397 type Item = Token;
398 fn next(&mut self) -> Option<Token> {
399 if self.error.is_some() {
400 return None;
401 }
402 match self.next_token() {
403 Ok(Some(tok)) => {
404 debug_print!(
405 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
406 tok.1,
407 tok.0
408 );
409 Some(tok)
410 }
411 Ok(tok) => tok,
412 Err(e) => {
413 self.error = Some(e);
414 None
415 }
416 }
417 }
418}
419
420pub type ScanResult = Result<(), ScanError>;
422
423impl<T: Iterator<Item = char>> Scanner<T> {
424 pub fn new(rdr: T) -> Scanner<T> {
426 Scanner {
427 rdr,
428 buffer: ArrayDeque::new(),
429 mark: Marker::new(0, 1, 0),
430 tokens: VecDeque::new(),
431 error: None,
432
433 stream_start_produced: false,
434 stream_end_produced: false,
435 adjacent_value_allowed_at: 0,
436 simple_key_allowed: true,
437 simple_keys: Vec::new(),
438 indent: -1,
439 indents: Vec::new(),
440 flow_level: 0,
441 tokens_parsed: 0,
442 token_available: false,
443 leading_whitespace: true,
444 flow_mapping_started: false,
445 implicit_flow_mapping: false,
446 }
447 }
448
449 #[inline]
454 pub fn get_error(&self) -> Option<ScanError> {
455 self.error.clone()
456 }
457
458 #[inline]
462 fn lookahead(&mut self, count: usize) {
463 if self.buffer.len() >= count {
464 return;
465 }
466 for _ in 0..(count - self.buffer.len()) {
467 self.buffer
468 .push_back(self.rdr.next().unwrap_or('\0'))
469 .unwrap();
470 }
471 }
472
473 #[inline]
475 fn skip_blank(&mut self) {
476 self.buffer.pop_front();
477
478 self.mark.index += 1;
479 self.mark.col += 1;
480 }
481
482 #[inline]
484 fn skip_non_blank(&mut self) {
485 self.buffer.pop_front();
486
487 self.mark.index += 1;
488 self.mark.col += 1;
489 self.leading_whitespace = false;
490 }
491
492 #[inline]
494 fn skip_n_non_blank(&mut self, n: usize) {
495 self.buffer.drain(0..n);
496
497 self.mark.index += n;
498 self.mark.col += n;
499 self.leading_whitespace = false;
500 }
501
502 #[inline]
504 fn skip_nl(&mut self) {
505 self.buffer.pop_front();
506
507 self.mark.index += 1;
508 self.mark.col = 0;
509 self.mark.line += 1;
510 self.leading_whitespace = true;
511 }
512
513 #[inline]
515 fn skip_linebreak(&mut self) {
516 if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
517 self.skip_blank();
520 self.skip_nl();
521 } else if is_break(self.buffer[0]) {
522 self.skip_nl();
523 }
524 }
525
526 #[inline]
530 fn ch(&self) -> char {
531 self.buffer[0]
532 }
533
534 #[inline]
539 fn look_ch(&mut self) -> char {
540 self.lookahead(1);
541 self.ch()
542 }
543
544 #[inline]
548 #[must_use]
549 fn raw_read_ch(&mut self) -> char {
550 self.rdr.next().unwrap_or('\0')
551 }
552
553 #[inline]
555 fn ch_is(&self, c: char) -> bool {
556 self.buffer[0] == c
557 }
558
559 #[inline]
561 pub fn stream_started(&self) -> bool {
562 self.stream_start_produced
563 }
564
565 #[inline]
567 pub fn stream_ended(&self) -> bool {
568 self.stream_end_produced
569 }
570
571 #[inline]
573 pub fn mark(&self) -> Marker {
574 self.mark
575 }
576
577 #[inline]
584 fn read_break(&mut self, s: &mut String) {
585 let c = self.buffer[0];
586 let nc = self.buffer[1];
587 debug_assert!(is_break(c));
588 if c == '\r' && nc == '\n' {
589 self.skip_blank();
590 }
591 self.skip_nl();
592
593 s.push('\n');
594 }
595
596 fn next_is_document_end(&self) -> bool {
600 assert!(self.buffer.len() >= 4);
601 self.buffer[0] == '.'
602 && self.buffer[1] == '.'
603 && self.buffer[2] == '.'
604 && is_blank_or_breakz(self.buffer[3])
605 }
606
607 #[inline]
611 fn next_is_document_indicator(&self) -> bool {
612 assert!(self.buffer.len() >= 4);
613 self.mark.col == 0
614 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
615 || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && (self.buffer[2] == '.')))
616 && is_blank_or_breakz(self.buffer[3])
617 }
618
619 fn insert_token(&mut self, pos: usize, tok: Token) {
621 let old_len = self.tokens.len();
622 assert!(pos <= old_len);
623 self.tokens.insert(pos, tok);
624 }
625
626 fn allow_simple_key(&mut self) {
627 self.simple_key_allowed = true;
628 }
629
630 fn disallow_simple_key(&mut self) {
631 self.simple_key_allowed = false;
632 }
633
634 pub fn fetch_next_token(&mut self) -> ScanResult {
638 self.lookahead(1);
639 if !self.stream_start_produced {
642 self.fetch_stream_start();
643 return Ok(());
644 }
645 self.skip_to_next_token()?;
646
647 debug_print!(
648 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
649 self.mark,
650 self.ch()
651 );
652
653 self.stale_simple_keys()?;
654
655 let mark = self.mark;
656 self.unroll_indent(mark.col as isize);
657
658 self.lookahead(4);
659
660 if is_z(self.ch()) {
661 self.fetch_stream_end()?;
662 return Ok(());
663 }
664
665 if self.mark.col == 0 && self.ch_is('%') {
667 return self.fetch_directive();
668 }
669
670 if self.mark.col == 0
671 && self.buffer[0] == '-'
672 && self.buffer[1] == '-'
673 && self.buffer[2] == '-'
674 && is_blank_or_breakz(self.buffer[3])
675 {
676 self.fetch_document_indicator(TokenType::DocumentStart)?;
677 return Ok(());
678 }
679
680 if self.mark.col == 0
681 && self.buffer[0] == '.'
682 && self.buffer[1] == '.'
683 && self.buffer[2] == '.'
684 && is_blank_or_breakz(self.buffer[3])
685 {
686 self.fetch_document_indicator(TokenType::DocumentEnd)?;
687 self.skip_ws_to_eol(SkipTabs::Yes)?;
688 if !is_breakz(self.ch()) {
689 return Err(ScanError::new(
690 self.mark,
691 "invalid content after document end marker",
692 ));
693 }
694 return Ok(());
695 }
696
697 if (self.mark.col as isize) < self.indent {
698 return Err(ScanError::new(self.mark, "invalid indentation"));
699 }
700
701 let c = self.buffer[0];
702 let nc = self.buffer[1];
703 match c {
704 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
705 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
706 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
707 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
708 ',' => self.fetch_flow_entry(),
709 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
710 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
711 ':' if is_blank_or_breakz(nc)
712 || (self.flow_level > 0
713 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
714 {
715 self.fetch_value()
716 }
717 '*' => self.fetch_anchor(true),
719 '&' => self.fetch_anchor(false),
721 '!' => self.fetch_tag(),
722 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
724 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
726 '\'' => self.fetch_flow_scalar(true),
727 '"' => self.fetch_flow_scalar(false),
728 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
730 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
731 self.fetch_plain_scalar()
732 }
733 '%' | '@' | '`' => Err(ScanError::new(
734 self.mark,
735 &format!("unexpected character: `{c}'"),
736 )),
737 _ => self.fetch_plain_scalar(),
738 }
739 }
740
741 pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
745 if self.stream_end_produced {
746 return Ok(None);
747 }
748
749 if !self.token_available {
750 self.fetch_more_tokens()?;
751 }
752 let Some(t) = self.tokens.pop_front() else {
753 return Err(ScanError::new(
754 self.mark,
755 "did not find expected next token",
756 ));
757 };
758 self.token_available = false;
759 self.tokens_parsed += 1;
760
761 if let TokenType::StreamEnd = t.1 {
762 self.stream_end_produced = true;
763 }
764 Ok(Some(t))
765 }
766
767 pub fn fetch_more_tokens(&mut self) -> ScanResult {
771 let mut need_more;
772 loop {
773 if self.tokens.is_empty() {
774 need_more = true;
775 } else {
776 need_more = false;
777 self.stale_simple_keys()?;
779 for sk in &self.simple_keys {
781 if sk.possible && sk.token_number == self.tokens_parsed {
782 need_more = true;
783 break;
784 }
785 }
786 }
787
788 if !need_more {
789 break;
790 }
791 self.fetch_next_token()?;
792 }
793 self.token_available = true;
794
795 Ok(())
796 }
797
798 fn stale_simple_keys(&mut self) -> ScanResult {
806 for sk in &mut self.simple_keys {
807 if sk.possible
808 && self.flow_level == 0
810 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
811 {
812 if sk.required {
813 return Err(ScanError::new(self.mark, "simple key expect ':'"));
814 }
815 sk.possible = false;
816 }
817 }
818 Ok(())
819 }
820
821 fn skip_to_next_token(&mut self) -> ScanResult {
827 loop {
828 match self.look_ch() {
830 '\t' if self.is_within_block()
837 && self.leading_whitespace
838 && (self.mark.col as isize) < self.indent =>
839 {
840 self.skip_ws_to_eol(SkipTabs::Yes)?;
841 if !is_breakz(self.ch()) {
843 return Err(ScanError::new(
844 self.mark,
845 "tabs disallowed within this context (block indentation)",
846 ));
847 }
848 }
849 '\t' | ' ' => self.skip_blank(),
850 '\n' | '\r' => {
851 self.lookahead(2);
852 self.skip_linebreak();
853 if self.flow_level == 0 {
854 self.allow_simple_key();
855 }
856 }
857 '#' => {
858 while !is_breakz(self.look_ch()) {
859 self.skip_non_blank();
860 }
861 }
862 _ => break,
863 }
864 }
865 Ok(())
866 }
867
868 fn skip_yaml_whitespace(&mut self) -> ScanResult {
873 let mut need_whitespace = true;
874 loop {
875 match self.look_ch() {
876 ' ' => {
877 self.skip_blank();
878
879 need_whitespace = false;
880 }
881 '\n' | '\r' => {
882 self.lookahead(2);
883 self.skip_linebreak();
884 if self.flow_level == 0 {
885 self.allow_simple_key();
886 }
887 need_whitespace = false;
888 }
889 '#' => {
890 while !is_breakz(self.look_ch()) {
891 self.skip_non_blank();
892 }
893 }
894 _ => break,
895 }
896 }
897
898 if need_whitespace {
899 Err(ScanError::new(self.mark(), "expected whitespace"))
900 } else {
901 Ok(())
902 }
903 }
904
905 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
907 let mut encountered_tab = false;
908 let mut has_yaml_ws = false;
909 loop {
910 match self.look_ch() {
911 ' ' => {
912 has_yaml_ws = true;
913 self.skip_blank();
914 }
915 '\t' if skip_tabs != SkipTabs::No => {
916 encountered_tab = true;
917 self.skip_blank();
918 }
919 '#' if !encountered_tab && !has_yaml_ws => {
921 return Err(ScanError::new(
922 self.mark,
923 "comments must be separated from other tokens by whitespace",
924 ));
925 }
926 '#' => {
927 while !is_breakz(self.look_ch()) {
928 self.skip_non_blank();
929 }
930 }
931 _ => break,
932 }
933 }
934
935 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws))
936 }
937
938 fn fetch_stream_start(&mut self) {
939 let mark = self.mark;
940 self.indent = -1;
941 self.stream_start_produced = true;
942 self.allow_simple_key();
943 self.tokens
944 .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
945 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
946 }
947
948 fn fetch_stream_end(&mut self) -> ScanResult {
949 if self.mark.col != 0 {
951 self.mark.col = 0;
952 self.mark.line += 1;
953 }
954
955 for sk in &mut self.simple_keys {
958 if sk.required && sk.possible {
959 return Err(ScanError::new(self.mark, "simple key expected"));
960 }
961 sk.possible = false;
962 }
963
964 self.unroll_indent(-1);
965 self.remove_simple_key()?;
966 self.disallow_simple_key();
967
968 self.tokens
969 .push_back(Token(self.mark, TokenType::StreamEnd));
970 Ok(())
971 }
972
973 fn fetch_directive(&mut self) -> ScanResult {
974 self.unroll_indent(-1);
975 self.remove_simple_key()?;
976
977 self.disallow_simple_key();
978
979 let tok = self.scan_directive()?;
980 self.tokens.push_back(tok);
981
982 Ok(())
983 }
984
985 fn scan_directive(&mut self) -> Result<Token, ScanError> {
986 let start_mark = self.mark;
987 self.skip_non_blank();
988
989 let name = self.scan_directive_name()?;
990 let tok = match name.as_ref() {
991 "YAML" => self.scan_version_directive_value(&start_mark)?,
992 "TAG" => self.scan_tag_directive_value(&start_mark)?,
993 _ => {
995 while !is_breakz(self.look_ch()) {
997 self.skip_non_blank();
998 }
999 Token(
1001 start_mark,
1002 TokenType::TagDirective(String::new(), String::new()),
1003 )
1004 }
1007 };
1008
1009 self.skip_ws_to_eol(SkipTabs::Yes)?;
1010
1011 if is_breakz(self.ch()) {
1012 self.lookahead(2);
1013 self.skip_linebreak();
1014 Ok(tok)
1015 } else {
1016 Err(ScanError::new(
1017 start_mark,
1018 "while scanning a directive, did not find expected comment or line break",
1019 ))
1020 }
1021 }
1022
1023 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
1024 while is_blank(self.look_ch()) {
1025 self.skip_blank();
1026 }
1027
1028 let major = self.scan_version_directive_number(mark)?;
1029
1030 if self.ch() != '.' {
1031 return Err(ScanError::new(
1032 *mark,
1033 "while scanning a YAML directive, did not find expected digit or '.' character",
1034 ));
1035 }
1036 self.skip_non_blank();
1037
1038 let minor = self.scan_version_directive_number(mark)?;
1039
1040 Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
1041 }
1042
1043 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1044 let start_mark = self.mark;
1045 let mut string = String::new();
1046 while is_alpha(self.look_ch()) {
1047 string.push(self.ch());
1048 self.skip_non_blank();
1049 }
1050
1051 if string.is_empty() {
1052 return Err(ScanError::new(
1053 start_mark,
1054 "while scanning a directive, could not find expected directive name",
1055 ));
1056 }
1057
1058 if !is_blank_or_breakz(self.ch()) {
1059 return Err(ScanError::new(
1060 start_mark,
1061 "while scanning a directive, found unexpected non-alphabetical character",
1062 ));
1063 }
1064
1065 Ok(string)
1066 }
1067
1068 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1069 let mut val = 0u32;
1070 let mut length = 0usize;
1071 while let Some(digit) = self.look_ch().to_digit(10) {
1072 if length + 1 > 9 {
1073 return Err(ScanError::new(
1074 *mark,
1075 "while scanning a YAML directive, found extremely long version number",
1076 ));
1077 }
1078 length += 1;
1079 val = val * 10 + digit;
1080 self.skip_non_blank();
1081 }
1082
1083 if length == 0 {
1084 return Err(ScanError::new(
1085 *mark,
1086 "while scanning a YAML directive, did not find expected version number",
1087 ));
1088 }
1089
1090 Ok(val)
1091 }
1092
1093 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
1094 while is_blank(self.look_ch()) {
1096 self.skip_blank();
1097 }
1098 let handle = self.scan_tag_handle(true, mark)?;
1099
1100 while is_blank(self.look_ch()) {
1102 self.skip_blank();
1103 }
1104
1105 let prefix = self.scan_tag_prefix(mark)?;
1106
1107 self.lookahead(1);
1108
1109 if is_blank_or_breakz(self.ch()) {
1110 Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
1111 } else {
1112 Err(ScanError::new(
1113 *mark,
1114 "while scanning TAG, did not find expected whitespace or line break",
1115 ))
1116 }
1117 }
1118
1119 fn fetch_tag(&mut self) -> ScanResult {
1120 self.save_simple_key();
1121 self.disallow_simple_key();
1122
1123 let tok = self.scan_tag()?;
1124 self.tokens.push_back(tok);
1125 Ok(())
1126 }
1127
1128 fn scan_tag(&mut self) -> Result<Token, ScanError> {
1129 let start_mark = self.mark;
1130 let mut handle = String::new();
1131 let mut suffix;
1132
1133 self.lookahead(2);
1135
1136 if self.buffer[1] == '<' {
1137 suffix = self.scan_verbatim_tag(&start_mark)?;
1138 } else {
1139 handle = self.scan_tag_handle(false, &start_mark)?;
1141 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1143 let is_secondary_handle = handle == "!!";
1145 suffix =
1146 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?;
1147 } else {
1148 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?;
1149 "!".clone_into(&mut handle);
1150 if suffix.is_empty() {
1153 handle.clear();
1154 suffix = "!".to_owned();
1155 }
1156 }
1157 }
1158
1159 if is_blank_or_breakz(self.look_ch()) || (self.flow_level > 0 && is_flow(self.ch())) {
1160 Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
1162 } else {
1163 Err(ScanError::new(
1164 start_mark,
1165 "while scanning a tag, did not find expected whitespace or line break",
1166 ))
1167 }
1168 }
1169
1170 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1171 let mut string = String::new();
1172 if self.look_ch() != '!' {
1173 return Err(ScanError::new(
1174 *mark,
1175 "while scanning a tag, did not find expected '!'",
1176 ));
1177 }
1178
1179 string.push(self.ch());
1180 self.skip_non_blank();
1181
1182 while is_alpha(self.look_ch()) {
1183 string.push(self.ch());
1184 self.skip_non_blank();
1185 }
1186
1187 if self.ch() == '!' {
1189 string.push(self.ch());
1190 self.skip_non_blank();
1191 } else if directive && string != "!" {
1192 return Err(ScanError::new(
1196 *mark,
1197 "while parsing a tag directive, did not find expected '!'",
1198 ));
1199 }
1200 Ok(string)
1201 }
1202
1203 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1209 let mut string = String::new();
1210
1211 if self.look_ch() == '!' {
1212 string.push(self.ch());
1214 self.skip_non_blank();
1215 } else if !is_tag_char(self.ch()) {
1216 return Err(ScanError::new(*start_mark, "invalid global tag character"));
1218 } else if self.ch() == '%' {
1219 string.push(self.scan_uri_escapes(start_mark)?);
1221 } else {
1222 string.push(self.ch());
1224 self.skip_non_blank();
1225 }
1226
1227 while is_uri_char(self.look_ch()) {
1228 if self.ch() == '%' {
1229 string.push(self.scan_uri_escapes(start_mark)?);
1230 } else {
1231 string.push(self.ch());
1232 self.skip_non_blank();
1233 }
1234 }
1235
1236 Ok(string)
1237 }
1238
1239 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1243 self.skip_non_blank();
1245 self.skip_non_blank();
1246
1247 let mut string = String::new();
1248 while is_uri_char(self.look_ch()) {
1249 if self.ch() == '%' {
1250 string.push(self.scan_uri_escapes(start_mark)?);
1251 } else {
1252 string.push(self.ch());
1253 self.skip_non_blank();
1254 }
1255 }
1256
1257 if self.ch() != '>' {
1258 return Err(ScanError::new(
1259 *start_mark,
1260 "while scanning a verbatim tag, did not find the expected '>'",
1261 ));
1262 }
1263 self.skip_non_blank();
1264
1265 Ok(string)
1266 }
1267
1268 fn scan_tag_shorthand_suffix(
1269 &mut self,
1270 _directive: bool,
1271 _is_secondary: bool,
1272 head: &str,
1273 mark: &Marker,
1274 ) -> Result<String, ScanError> {
1275 let mut length = head.len();
1276 let mut string = String::new();
1277
1278 if length > 1 {
1281 string.extend(head.chars().skip(1));
1282 }
1283
1284 while is_tag_char(self.look_ch()) {
1285 if self.ch() == '%' {
1287 string.push(self.scan_uri_escapes(mark)?);
1288 } else {
1289 string.push(self.ch());
1290 self.skip_non_blank();
1291 }
1292
1293 length += 1;
1294 }
1295
1296 if length == 0 {
1297 return Err(ScanError::new(
1298 *mark,
1299 "while parsing a tag, did not find expected tag URI",
1300 ));
1301 }
1302
1303 Ok(string)
1304 }
1305
1306 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1307 let mut width = 0usize;
1308 let mut code = 0u32;
1309 loop {
1310 self.lookahead(3);
1311
1312 if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
1313 return Err(ScanError::new(
1314 *mark,
1315 "while parsing a tag, did not find URI escaped octet",
1316 ));
1317 }
1318
1319 let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
1320 if width == 0 {
1321 width = match octet {
1322 _ if octet & 0x80 == 0x00 => 1,
1323 _ if octet & 0xE0 == 0xC0 => 2,
1324 _ if octet & 0xF0 == 0xE0 => 3,
1325 _ if octet & 0xF8 == 0xF0 => 4,
1326 _ => {
1327 return Err(ScanError::new(
1328 *mark,
1329 "while parsing a tag, found an incorrect leading UTF-8 octet",
1330 ));
1331 }
1332 };
1333 code = octet;
1334 } else {
1335 if octet & 0xc0 != 0x80 {
1336 return Err(ScanError::new(
1337 *mark,
1338 "while parsing a tag, found an incorrect trailing UTF-8 octet",
1339 ));
1340 }
1341 code = (code << 8) + octet;
1342 }
1343
1344 self.skip_n_non_blank(3);
1345
1346 width -= 1;
1347 if width == 0 {
1348 break;
1349 }
1350 }
1351
1352 match char::from_u32(code) {
1353 Some(ch) => Ok(ch),
1354 None => Err(ScanError::new(
1355 *mark,
1356 "while parsing a tag, found an invalid UTF-8 codepoint",
1357 )),
1358 }
1359 }
1360
1361 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1362 self.save_simple_key();
1363 self.disallow_simple_key();
1364
1365 let tok = self.scan_anchor(alias)?;
1366
1367 self.tokens.push_back(tok);
1368
1369 Ok(())
1370 }
1371
1372 fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
1373 let mut string = String::new();
1374 let start_mark = self.mark;
1375
1376 self.skip_non_blank();
1377 while is_anchor_char(self.look_ch()) {
1378 string.push(self.ch());
1379 self.skip_non_blank();
1380 }
1381
1382 if string.is_empty() {
1383 return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
1384 }
1385
1386 if alias {
1387 Ok(Token(start_mark, TokenType::Alias(string)))
1388 } else {
1389 Ok(Token(start_mark, TokenType::Anchor(string)))
1390 }
1391 }
1392
1393 fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
1394 self.save_simple_key();
1396
1397 self.roll_one_col_indent();
1398 self.increase_flow_level()?;
1399
1400 self.allow_simple_key();
1401
1402 let start_mark = self.mark;
1403 self.skip_non_blank();
1404
1405 if tok == TokenType::FlowMappingStart {
1406 self.flow_mapping_started = true;
1407 }
1408
1409 self.skip_ws_to_eol(SkipTabs::Yes)?;
1410
1411 self.tokens.push_back(Token(start_mark, tok));
1412 Ok(())
1413 }
1414
1415 fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
1416 self.remove_simple_key()?;
1417 self.decrease_flow_level();
1418
1419 self.disallow_simple_key();
1420
1421 self.end_implicit_mapping(self.mark);
1422
1423 let start_mark = self.mark;
1424 self.skip_non_blank();
1425 self.skip_ws_to_eol(SkipTabs::Yes)?;
1426
1427 if self.flow_level > 0 {
1433 self.adjacent_value_allowed_at = self.mark.index;
1434 }
1435
1436 self.tokens.push_back(Token(start_mark, tok));
1437 Ok(())
1438 }
1439
1440 fn fetch_flow_entry(&mut self) -> ScanResult {
1442 self.remove_simple_key()?;
1443 self.allow_simple_key();
1444
1445 self.end_implicit_mapping(self.mark);
1446
1447 let start_mark = self.mark;
1448 self.skip_non_blank();
1449 self.skip_ws_to_eol(SkipTabs::Yes)?;
1450
1451 self.tokens
1452 .push_back(Token(start_mark, TokenType::FlowEntry));
1453 Ok(())
1454 }
1455
1456 fn increase_flow_level(&mut self) -> ScanResult {
1457 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1458 self.flow_level = self
1459 .flow_level
1460 .checked_add(1)
1461 .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
1462 Ok(())
1463 }
1464
1465 fn decrease_flow_level(&mut self) {
1466 if self.flow_level > 0 {
1467 self.flow_level -= 1;
1468 self.simple_keys.pop().unwrap();
1469 }
1470 }
1471
1472 fn fetch_block_entry(&mut self) -> ScanResult {
1478 if self.flow_level > 0 {
1479 return Err(ScanError::new(
1481 self.mark,
1482 r#""-" is only valid inside a block"#,
1483 ));
1484 }
1485 if !self.simple_key_allowed {
1487 return Err(ScanError::new(
1488 self.mark,
1489 "block sequence entries are not allowed in this context",
1490 ));
1491 }
1492
1493 if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
1495 if self.mark.col == 0 && mark.col == 0 && self.indent > -1 {
1496 return Err(ScanError::new(*mark, "invalid indentation for anchor"));
1497 }
1498 }
1499
1500 let mark = self.mark;
1502 self.skip_non_blank();
1503
1504 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1506 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
1507 self.lookahead(2);
1508 if found_tabs && self.buffer[0] == '-' && is_blank_or_breakz(self.buffer[1]) {
1509 return Err(ScanError::new(
1510 self.mark,
1511 "'-' must be followed by a valid YAML whitespace",
1512 ));
1513 }
1514
1515 self.skip_ws_to_eol(SkipTabs::No)?;
1516 if is_break(self.look_ch()) || is_flow(self.ch()) {
1517 self.roll_one_col_indent();
1518 }
1519
1520 self.remove_simple_key()?;
1521 self.allow_simple_key();
1522
1523 self.tokens
1524 .push_back(Token(self.mark, TokenType::BlockEntry));
1525
1526 Ok(())
1527 }
1528
1529 fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1530 self.unroll_indent(-1);
1531 self.remove_simple_key()?;
1532 self.disallow_simple_key();
1533
1534 let mark = self.mark;
1535
1536 self.skip_n_non_blank(3);
1537
1538 self.tokens.push_back(Token(mark, t));
1539 Ok(())
1540 }
1541
1542 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1543 self.save_simple_key();
1544 self.allow_simple_key();
1545 let tok = self.scan_block_scalar(literal)?;
1546
1547 self.tokens.push_back(tok);
1548 Ok(())
1549 }
1550
1551 #[allow(clippy::too_many_lines)]
1552 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1553 let start_mark = self.mark;
1554 let mut chomping = Chomping::Clip;
1555 let mut increment: usize = 0;
1556 let mut indent: usize = 0;
1557 let mut trailing_blank: bool;
1558 let mut leading_blank: bool = false;
1559 let style = if literal {
1560 TScalarStyle::Literal
1561 } else {
1562 TScalarStyle::Folded
1563 };
1564
1565 let mut string = String::new();
1566 let mut leading_break = String::new();
1567 let mut trailing_breaks = String::new();
1568 let mut chomping_break = String::new();
1569
1570 self.skip_non_blank();
1572 self.unroll_non_block_indents();
1573
1574 if self.look_ch() == '+' || self.ch() == '-' {
1575 if self.ch() == '+' {
1576 chomping = Chomping::Keep;
1577 } else {
1578 chomping = Chomping::Strip;
1579 }
1580 self.skip_non_blank();
1581 if is_digit(self.look_ch()) {
1582 if self.ch() == '0' {
1583 return Err(ScanError::new(
1584 start_mark,
1585 "while scanning a block scalar, found an indentation indicator equal to 0",
1586 ));
1587 }
1588 increment = (self.ch() as usize) - ('0' as usize);
1589 self.skip_non_blank();
1590 }
1591 } else if is_digit(self.ch()) {
1592 if self.ch() == '0' {
1593 return Err(ScanError::new(
1594 start_mark,
1595 "while scanning a block scalar, found an indentation indicator equal to 0",
1596 ));
1597 }
1598
1599 increment = (self.ch() as usize) - ('0' as usize);
1600 self.skip_non_blank();
1601 self.lookahead(1);
1602 if self.ch() == '+' || self.ch() == '-' {
1603 if self.ch() == '+' {
1604 chomping = Chomping::Keep;
1605 } else {
1606 chomping = Chomping::Strip;
1607 }
1608 self.skip_non_blank();
1609 }
1610 }
1611
1612 self.skip_ws_to_eol(SkipTabs::Yes)?;
1613
1614 if !is_breakz(self.look_ch()) {
1616 return Err(ScanError::new(
1617 start_mark,
1618 "while scanning a block scalar, did not find expected comment or line break",
1619 ));
1620 }
1621
1622 if is_break(self.ch()) {
1623 self.lookahead(2);
1624 self.read_break(&mut chomping_break);
1625 }
1626
1627 if self.look_ch() == '\t' {
1628 return Err(ScanError::new(
1629 start_mark,
1630 "a block scalar content cannot start with a tab",
1631 ));
1632 }
1633
1634 if increment > 0 {
1635 indent = if self.indent >= 0 {
1636 (self.indent + increment as isize) as usize
1637 } else {
1638 increment
1639 }
1640 }
1641
1642 if indent == 0 {
1644 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
1645 } else {
1646 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1647 }
1648
1649 if is_z(self.ch()) {
1654 let contents = match chomping {
1655 Chomping::Strip => String::new(),
1657 _ if self.mark.line == start_mark.line() => String::new(),
1659 Chomping::Clip => chomping_break,
1662 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
1665 Chomping::Keep => trailing_breaks,
1667 };
1668 return Ok(Token(start_mark, TokenType::Scalar(style, contents)));
1669 }
1670
1671 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
1672 return Err(ScanError::new(
1673 self.mark,
1674 "wrongly indented line in block scalar",
1675 ));
1676 }
1677
1678 let mut line_buffer = String::with_capacity(100);
1679 let start_mark = self.mark;
1680 while self.mark.col == indent && !is_z(self.ch()) {
1681 if indent == 0 {
1682 self.lookahead(4);
1683 if self.next_is_document_end() {
1684 break;
1685 }
1686 }
1687
1688 trailing_blank = is_blank(self.ch());
1690 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1691 string.push_str(&trailing_breaks);
1692 if trailing_breaks.is_empty() {
1693 string.push(' ');
1694 }
1695 } else {
1696 string.push_str(&leading_break);
1697 string.push_str(&trailing_breaks);
1698 }
1699
1700 leading_break.clear();
1701 trailing_breaks.clear();
1702
1703 leading_blank = is_blank(self.ch());
1704
1705 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
1706
1707 if is_z(self.ch()) {
1709 break;
1710 }
1711
1712 self.lookahead(2);
1713 self.read_break(&mut leading_break);
1714
1715 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1717 }
1718
1719 if chomping != Chomping::Strip {
1721 string.push_str(&leading_break);
1722 if is_z(self.ch()) && self.mark.col >= indent.max(1) {
1726 string.push('\n');
1727 }
1728 }
1729
1730 if chomping == Chomping::Keep {
1731 string.push_str(&trailing_breaks);
1732 }
1733
1734 Ok(Token(start_mark, TokenType::Scalar(style, string)))
1735 }
1736
1737 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
1747 while !self.buffer.is_empty() && !is_breakz(self.ch()) {
1749 string.push(self.ch());
1750 self.skip_blank();
1756 }
1757
1758 if self.buffer.is_empty() {
1761 let mut c = self.raw_read_ch();
1767 while !is_breakz(c) {
1768 line_buffer.push(c);
1769 c = self.raw_read_ch();
1770 }
1771
1772 self.buffer.push_back(c).unwrap();
1776
1777 self.mark.col += line_buffer.len();
1779 self.mark.index += line_buffer.len();
1780
1781 string.reserve(line_buffer.len());
1783 string.push_str(line_buffer);
1784 line_buffer.clear();
1786 }
1787 }
1788
1789 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
1791 loop {
1792 if indent < BUFFER_LEN - 2 {
1794 self.lookahead(BUFFER_LEN);
1795 while self.mark.col < indent && self.ch() == ' ' {
1796 self.skip_blank();
1797 }
1798 } else {
1799 loop {
1800 self.lookahead(BUFFER_LEN);
1801 while !self.buffer.is_empty() && self.mark.col < indent && self.ch() == ' ' {
1802 self.skip_blank();
1803 }
1804 if self.mark.col == indent || (!self.buffer.is_empty() && self.ch() != ' ') {
1808 break;
1809 }
1810 }
1811 self.lookahead(2);
1812 }
1813
1814 if is_break(self.ch()) {
1816 self.read_break(breaks);
1817 } else {
1818 break;
1820 }
1821 }
1822 }
1823
1824 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
1829 let mut max_indent = 0;
1830 loop {
1831 while self.look_ch() == ' ' {
1833 self.skip_blank();
1834 }
1835
1836 if self.mark.col > max_indent {
1837 max_indent = self.mark.col;
1838 }
1839
1840 if is_break(self.ch()) {
1841 self.lookahead(2);
1843 self.read_break(breaks);
1844 } else {
1845 break;
1847 }
1848 }
1849
1850 *indent = max_indent.max((self.indent + 1) as usize);
1859 if self.indent > 0 {
1860 *indent = (*indent).max(1);
1861 }
1862 }
1863
1864 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1865 self.save_simple_key();
1866 self.disallow_simple_key();
1867
1868 let tok = self.scan_flow_scalar(single)?;
1869
1870 self.skip_to_next_token()?;
1873 self.adjacent_value_allowed_at = self.mark.index;
1874
1875 self.tokens.push_back(tok);
1876 Ok(())
1877 }
1878
1879 #[allow(clippy::too_many_lines)]
1880 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1881 let start_mark = self.mark;
1882
1883 let mut string = String::new();
1884 let mut leading_break = String::new();
1885 let mut trailing_breaks = String::new();
1886 let mut whitespaces = String::new();
1887 let mut leading_blanks;
1888
1889 self.skip_non_blank();
1891
1892 loop {
1893 self.lookahead(4);
1895
1896 if self.mark.col == 0
1897 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1898 || ((self.buffer[0] == '.')
1899 && (self.buffer[1] == '.')
1900 && (self.buffer[2] == '.')))
1901 && is_blank_or_breakz(self.buffer[3])
1902 {
1903 return Err(ScanError::new(
1904 start_mark,
1905 "while scanning a quoted scalar, found unexpected document indicator",
1906 ));
1907 }
1908
1909 if is_z(self.ch()) {
1910 return Err(ScanError::new(
1911 start_mark,
1912 "while scanning a quoted scalar, found unexpected end of stream",
1913 ));
1914 }
1915
1916 if (self.mark.col as isize) < self.indent {
1917 return Err(ScanError::new(
1918 start_mark,
1919 "invalid indentation in quoted scalar",
1920 ));
1921 }
1922
1923 leading_blanks = false;
1924 self.consume_flow_scalar_non_whitespace_chars(
1925 single,
1926 &mut string,
1927 &mut leading_blanks,
1928 &start_mark,
1929 )?;
1930
1931 match self.look_ch() {
1932 '\'' if single => break,
1933 '"' if !single => break,
1934 _ => {}
1935 }
1936
1937 while is_blank(self.ch()) || is_break(self.ch()) {
1939 if is_blank(self.ch()) {
1940 if leading_blanks {
1942 if self.ch() == '\t' && (self.mark.col as isize) < self.indent {
1943 return Err(ScanError::new(
1944 self.mark,
1945 "tab cannot be used as indentation",
1946 ));
1947 }
1948 self.skip_blank();
1949 } else {
1950 whitespaces.push(self.ch());
1951 self.skip_blank();
1952 }
1953 } else {
1954 self.lookahead(2);
1955 if leading_blanks {
1957 self.read_break(&mut trailing_breaks);
1958 } else {
1959 whitespaces.clear();
1960 self.read_break(&mut leading_break);
1961 leading_blanks = true;
1962 }
1963 }
1964 self.lookahead(1);
1965 }
1966
1967 if leading_blanks {
1969 if leading_break.is_empty() {
1970 string.push_str(&leading_break);
1971 string.push_str(&trailing_breaks);
1972 trailing_breaks.clear();
1973 leading_break.clear();
1974 } else {
1975 if trailing_breaks.is_empty() {
1976 string.push(' ');
1977 } else {
1978 string.push_str(&trailing_breaks);
1979 trailing_breaks.clear();
1980 }
1981 leading_break.clear();
1982 }
1983 } else {
1984 string.push_str(&whitespaces);
1985 whitespaces.clear();
1986 }
1987 } self.skip_non_blank();
1991 self.skip_ws_to_eol(SkipTabs::Yes)?;
1993 match self.ch() {
1994 ',' | '}' | ']' if self.flow_level > 0 => {}
1996 c if is_breakz(c) => {}
1998 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2001 ':' if self.flow_level > 0 => {}
2003 _ => {
2004 return Err(ScanError::new(
2005 self.mark,
2006 "invalid trailing content after double-quoted scalar",
2007 ));
2008 }
2009 }
2010
2011 let style = if single {
2012 TScalarStyle::SingleQuoted
2013 } else {
2014 TScalarStyle::DoubleQuoted
2015 };
2016 Ok(Token(start_mark, TokenType::Scalar(style, string)))
2017 }
2018
2019 fn consume_flow_scalar_non_whitespace_chars(
2028 &mut self,
2029 single: bool,
2030 string: &mut String,
2031 leading_blanks: &mut bool,
2032 start_mark: &Marker,
2033 ) -> Result<(), ScanError> {
2034 self.lookahead(2);
2035 while !is_blank_or_breakz(self.ch()) {
2036 match self.ch() {
2037 '\'' if self.buffer[1] == '\'' && single => {
2039 string.push('\'');
2040 self.skip_n_non_blank(2);
2041 }
2042 '\'' if single => break,
2044 '"' if !single => break,
2045 '\\' if !single && is_break(self.buffer[1]) => {
2047 self.lookahead(3);
2048 self.skip_non_blank();
2049 self.skip_linebreak();
2050 *leading_blanks = true;
2051 break;
2052 }
2053 '\\' if !single => {
2055 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2056 }
2057 c => {
2058 string.push(c);
2059 self.skip_non_blank();
2060 }
2061 }
2062 self.lookahead(2);
2063 }
2064 Ok(())
2065 }
2066
2067 fn resolve_flow_scalar_escape_sequence(
2074 &mut self,
2075 start_mark: &Marker,
2076 ) -> Result<char, ScanError> {
2077 let mut code_length = 0usize;
2078 let mut ret = '\0';
2079
2080 match self.buffer[1] {
2081 '0' => ret = '\0',
2082 'a' => ret = '\x07',
2083 'b' => ret = '\x08',
2084 't' | '\t' => ret = '\t',
2085 'n' => ret = '\n',
2086 'v' => ret = '\x0b',
2087 'f' => ret = '\x0c',
2088 'r' => ret = '\x0d',
2089 'e' => ret = '\x1b',
2090 ' ' => ret = '\x20',
2091 '"' => ret = '"',
2092 '/' => ret = '/',
2093 '\\' => ret = '\\',
2094 'N' => ret = char::from_u32(0x85).unwrap(),
2096 '_' => ret = char::from_u32(0xA0).unwrap(),
2098 'L' => ret = char::from_u32(0x2028).unwrap(),
2100 'P' => ret = char::from_u32(0x2029).unwrap(),
2102 'x' => code_length = 2,
2103 'u' => code_length = 4,
2104 'U' => code_length = 8,
2105 _ => {
2106 return Err(ScanError::new(
2107 *start_mark,
2108 "while parsing a quoted scalar, found unknown escape character",
2109 ))
2110 }
2111 }
2112 self.skip_n_non_blank(2);
2113
2114 if code_length > 0 {
2116 self.lookahead(code_length);
2117 let mut value = 0u32;
2118 for i in 0..code_length {
2119 if !is_hex(self.buffer[i]) {
2120 return Err(ScanError::new(
2121 *start_mark,
2122 "while parsing a quoted scalar, did not find expected hexadecimal number",
2123 ));
2124 }
2125 value = (value << 4) + as_hex(self.buffer[i]);
2126 }
2127
2128 let Some(ch) = char::from_u32(value) else {
2129 return Err(ScanError::new(
2130 *start_mark,
2131 "while parsing a quoted scalar, found invalid Unicode character escape code",
2132 ));
2133 };
2134 ret = ch;
2135
2136 self.skip_n_non_blank(code_length);
2137 }
2138 Ok(ret)
2139 }
2140
2141 fn fetch_plain_scalar(&mut self) -> ScanResult {
2142 self.save_simple_key();
2143 self.disallow_simple_key();
2144
2145 let tok = self.scan_plain_scalar()?;
2146
2147 self.tokens.push_back(tok);
2148 Ok(())
2149 }
2150
2151 #[allow(clippy::too_many_lines)]
2156 fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
2157 self.unroll_non_block_indents();
2158 let indent = self.indent + 1;
2159 let start_mark = self.mark;
2160
2161 if self.flow_level > 0 && (start_mark.col as isize) < indent {
2162 return Err(ScanError::new(
2163 start_mark,
2164 "invalid indentation in flow construct",
2165 ));
2166 }
2167
2168 let mut string = String::with_capacity(32);
2169 let mut leading_break = String::with_capacity(32);
2170 let mut trailing_breaks = String::with_capacity(32);
2171 let mut whitespaces = String::with_capacity(32);
2172
2173 loop {
2174 self.lookahead(4);
2175 if self.next_is_document_indicator() || self.ch() == '#' {
2176 break;
2177 }
2178
2179 if self.flow_level > 0 && self.ch() == '-' && is_flow(self.buffer[1]) {
2180 return Err(ScanError::new(
2181 self.mark,
2182 "plain scalar cannot start with '-' followed by ,[]{}",
2183 ));
2184 }
2185
2186 if !is_blank_or_breakz(self.ch()) && self.next_can_be_plain_scalar() {
2187 if self.leading_whitespace {
2188 if leading_break.is_empty() {
2189 string.push_str(&leading_break);
2190 string.push_str(&trailing_breaks);
2191 trailing_breaks.clear();
2192 leading_break.clear();
2193 } else {
2194 if trailing_breaks.is_empty() {
2195 string.push(' ');
2196 } else {
2197 string.push_str(&trailing_breaks);
2198 trailing_breaks.clear();
2199 }
2200 leading_break.clear();
2201 }
2202 self.leading_whitespace = false;
2203 } else if !whitespaces.is_empty() {
2204 string.push_str(&whitespaces);
2205 whitespaces.clear();
2206 }
2207
2208 string.push(self.ch());
2210 self.skip_non_blank();
2211 self.lookahead(2);
2212
2213 while !is_blank_or_breakz(self.ch()) {
2215 if !self.next_can_be_plain_scalar() {
2216 break;
2217 }
2218
2219 string.push(self.ch());
2220 self.skip_non_blank();
2221 self.lookahead(2);
2222 }
2223 }
2224
2225 if !(is_blank(self.ch()) || is_break(self.ch())) {
2230 break;
2231 }
2232
2233 while is_blank(self.look_ch()) || is_break(self.ch()) {
2235 if is_blank(self.ch()) {
2236 if !self.leading_whitespace {
2237 whitespaces.push(self.ch());
2238 self.skip_blank();
2239 } else if (self.mark.col as isize) < indent && self.ch() == '\t' {
2240 self.skip_ws_to_eol(SkipTabs::Yes)?;
2243 if !is_breakz(self.ch()) {
2244 return Err(ScanError::new(
2245 start_mark,
2246 "while scanning a plain scalar, found a tab",
2247 ));
2248 }
2249 } else {
2250 self.skip_blank();
2251 }
2252 } else {
2253 self.lookahead(2);
2254 if self.leading_whitespace {
2256 self.read_break(&mut trailing_breaks);
2257 } else {
2258 whitespaces.clear();
2259 self.read_break(&mut leading_break);
2260 self.leading_whitespace = true;
2261 }
2262 }
2263 }
2264
2265 if self.flow_level == 0 && (self.mark.col as isize) < indent {
2267 break;
2268 }
2269 }
2270
2271 if self.leading_whitespace {
2272 self.allow_simple_key();
2273 }
2274
2275 Ok(Token(
2276 start_mark,
2277 TokenType::Scalar(TScalarStyle::Plain, string),
2278 ))
2279 }
2280
2281 fn fetch_key(&mut self) -> ScanResult {
2282 let start_mark = self.mark;
2283 if self.flow_level == 0 {
2284 if !self.simple_key_allowed {
2286 return Err(ScanError::new(
2287 self.mark,
2288 "mapping keys are not allowed in this context",
2289 ));
2290 }
2291 self.roll_indent(
2292 start_mark.col,
2293 None,
2294 TokenType::BlockMappingStart,
2295 start_mark,
2296 );
2297 } else {
2298 self.flow_mapping_started = true;
2300 }
2301
2302 self.remove_simple_key()?;
2303
2304 if self.flow_level == 0 {
2305 self.allow_simple_key();
2306 } else {
2307 self.disallow_simple_key();
2308 }
2309
2310 self.skip_non_blank();
2311 self.skip_yaml_whitespace()?;
2312 if self.ch() == '\t' {
2313 return Err(ScanError::new(
2314 self.mark(),
2315 "tabs disallowed in this context",
2316 ));
2317 }
2318 self.tokens.push_back(Token(start_mark, TokenType::Key));
2319 Ok(())
2320 }
2321
2322 fn fetch_value(&mut self) -> ScanResult {
2324 let sk = self.simple_keys.last().unwrap().clone();
2325 let start_mark = self.mark;
2326 self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started;
2327
2328 self.skip_non_blank();
2330 if self.look_ch() == '\t'
2331 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
2332 && (self.ch() == '-' || is_alpha(self.ch()))
2333 {
2334 return Err(ScanError::new(
2335 self.mark,
2336 "':' must be followed by a valid YAML whitespace",
2337 ));
2338 }
2339
2340 if sk.possible {
2341 let tok = Token(sk.mark, TokenType::Key);
2343 self.insert_token(sk.token_number - self.tokens_parsed, tok);
2344 if self.implicit_flow_mapping {
2345 if sk.mark.line < start_mark.line {
2346 return Err(ScanError::new(
2347 start_mark,
2348 "illegal placement of ':' indicator",
2349 ));
2350 }
2351 self.insert_token(
2352 sk.token_number - self.tokens_parsed,
2353 Token(self.mark, TokenType::FlowMappingStart),
2354 );
2355 }
2356
2357 self.roll_indent(
2359 sk.mark.col,
2360 Some(sk.token_number),
2361 TokenType::BlockMappingStart,
2362 start_mark,
2363 );
2364 self.roll_one_col_indent();
2365
2366 self.simple_keys.last_mut().unwrap().possible = false;
2367 self.disallow_simple_key();
2368 } else {
2369 if self.implicit_flow_mapping {
2370 self.tokens
2371 .push_back(Token(self.mark, TokenType::FlowMappingStart));
2372 }
2373 if self.flow_level == 0 {
2375 if !self.simple_key_allowed {
2376 return Err(ScanError::new(
2377 start_mark,
2378 "mapping values are not allowed in this context",
2379 ));
2380 }
2381
2382 self.roll_indent(
2383 start_mark.col,
2384 None,
2385 TokenType::BlockMappingStart,
2386 start_mark,
2387 );
2388 }
2389 self.roll_one_col_indent();
2390
2391 if self.flow_level == 0 {
2392 self.allow_simple_key();
2393 } else {
2394 self.disallow_simple_key();
2395 }
2396 }
2397 self.tokens.push_back(Token(start_mark, TokenType::Value));
2398
2399 Ok(())
2400 }
2401
2402 fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
2408 if self.flow_level > 0 {
2409 return;
2410 }
2411
2412 if self.indent <= col as isize {
2416 if let Some(indent) = self.indents.last() {
2417 if !indent.needs_block_end {
2418 self.indent = indent.indent;
2419 self.indents.pop();
2420 }
2421 }
2422 }
2423
2424 if self.indent < col as isize {
2425 self.indents.push(Indent {
2426 indent: self.indent,
2427 needs_block_end: true,
2428 });
2429 self.indent = col as isize;
2430 let tokens_parsed = self.tokens_parsed;
2431 match number {
2432 Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
2433 None => self.tokens.push_back(Token(mark, tok)),
2434 }
2435 }
2436 }
2437
2438 fn unroll_indent(&mut self, col: isize) {
2444 if self.flow_level > 0 {
2445 return;
2446 }
2447 while self.indent > col {
2448 let indent = self.indents.pop().unwrap();
2449 self.indent = indent.indent;
2450 if indent.needs_block_end {
2451 self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
2452 }
2453 }
2454 }
2455
2456 fn roll_one_col_indent(&mut self) {
2462 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
2463 self.indents.push(Indent {
2464 indent: self.indent,
2465 needs_block_end: false,
2466 });
2467 self.indent += 1;
2468 }
2469 }
2470
2471 fn unroll_non_block_indents(&mut self) {
2473 while let Some(indent) = self.indents.last() {
2474 if indent.needs_block_end {
2475 break;
2476 }
2477 self.indent = indent.indent;
2478 self.indents.pop();
2479 }
2480 }
2481
2482 fn save_simple_key(&mut self) {
2484 if self.simple_key_allowed {
2485 let required = self.flow_level == 0
2486 && self.indent == (self.mark.col as isize)
2487 && self.indents.last().unwrap().needs_block_end;
2488 let mut sk = SimpleKey::new(self.mark);
2489 sk.possible = true;
2490 sk.required = required;
2491 sk.token_number = self.tokens_parsed + self.tokens.len();
2492
2493 self.simple_keys.pop();
2494 self.simple_keys.push(sk);
2495 }
2496 }
2497
2498 fn remove_simple_key(&mut self) -> ScanResult {
2499 let last = self.simple_keys.last_mut().unwrap();
2500 if last.possible && last.required {
2501 return Err(ScanError::new(self.mark, "simple key expected"));
2502 }
2503
2504 last.possible = false;
2505 Ok(())
2506 }
2507
2508 #[allow(clippy::inline_always)]
2513 #[inline(always)]
2514 fn next_can_be_plain_scalar(&self) -> bool {
2515 match self.ch() {
2516 ':' if is_blank_or_breakz(self.buffer[1])
2518 || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
2519 {
2520 false
2521 }
2522 c if self.flow_level > 0 && is_flow(c) => false,
2523 _ => true,
2524 }
2525 }
2526
2527 fn is_within_block(&self) -> bool {
2529 !self.indents.is_empty()
2530 }
2531
2532 fn end_implicit_mapping(&mut self, mark: Marker) {
2534 if self.implicit_flow_mapping {
2535 self.implicit_flow_mapping = false;
2536 self.flow_mapping_started = false;
2537 self.tokens
2538 .push_back(Token(mark, TokenType::FlowMappingEnd));
2539 }
2540 }
2541}
2542
2543#[derive(Copy, Clone, Eq, PartialEq)]
2547enum SkipTabs {
2548 Yes,
2550 No,
2552 Result(
2554 bool,
2556 bool,
2558 ),
2559}
2560
2561impl SkipTabs {
2562 fn found_tabs(self) -> bool {
2566 matches!(self, SkipTabs::Result(true, _))
2567 }
2568
2569 fn has_valid_yaml_ws(self) -> bool {
2573 matches!(self, SkipTabs::Result(_, true))
2574 }
2575}
2576
2577#[derive(PartialEq, Eq)]
2581pub enum Chomping {
2582 Strip,
2584 Clip,
2586 Keep,
2588}
2589
2590#[cfg(test)]
2591mod test {
2592 #[test]
2593 fn test_is_anchor_char() {
2594 use super::is_anchor_char;
2595 assert!(is_anchor_char('x'));
2596 }
2597}