1use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26 Identifier(&'input str),
27 StringLiteral(bool, &'input str),
29 AddressLiteral(&'input str),
30 HexLiteral(&'input str),
31 Number(&'input str, &'input str),
33 RationalNumber(&'input str, &'input str, &'input str),
35 HexNumber(&'input str),
36 Divide,
37 Contract,
38 Library,
39 Interface,
40 Function,
41 Pragma,
42 Import,
43
44 Struct,
45 Event,
46 Enum,
47 Type,
48
49 Memory,
50 Storage,
51 Calldata,
52
53 Public,
54 Private,
55 Internal,
56 External,
57
58 Constant,
59
60 New,
61 Delete,
62
63 Pure,
64 View,
65 Payable,
66
67 Do,
68 Continue,
69 Break,
70
71 Throw,
72 Emit,
73 Return,
74 Returns,
75 Revert,
76
77 Uint(u16),
78 Int(u16),
79 Bytes(u8),
80 Byte,
82 DynamicBytes,
83 Bool,
84 Address,
85 String,
86
87 Semicolon,
88 Comma,
89 OpenParenthesis,
90 CloseParenthesis,
91 OpenCurlyBrace,
92 CloseCurlyBrace,
93
94 BitwiseOr,
95 BitwiseOrAssign,
96 Or,
97
98 BitwiseXor,
99 BitwiseXorAssign,
100
101 BitwiseAnd,
102 BitwiseAndAssign,
103 And,
104
105 AddAssign,
106 Increment,
107 Add,
108
109 SubtractAssign,
110 Decrement,
111 Subtract,
112
113 MulAssign,
114 Mul,
115 Power,
116 DivideAssign,
117 ModuloAssign,
118 Modulo,
119
120 Equal,
121 Assign,
122 ColonAssign,
123
124 NotEqual,
125 Not,
126
127 True,
128 False,
129 Else,
130 Anonymous,
131 For,
132 While,
133 If,
134
135 ShiftRight,
136 ShiftRightAssign,
137 Less,
138 LessEqual,
139
140 ShiftLeft,
141 ShiftLeftAssign,
142 More,
143 MoreEqual,
144
145 Constructor,
146 Indexed,
147
148 Member,
149 Colon,
150 OpenBracket,
151 CloseBracket,
152 BitwiseNot,
153 Question,
154
155 Mapping,
156 Arrow,
157
158 Try,
159 Catch,
160
161 Receive,
162 Fallback,
163
164 As,
165 Is,
166 Abstract,
167 Virtual,
168 Override,
169 Using,
170 Modifier,
171 Immutable,
172 Unchecked,
173
174 Assembly,
175 Let,
176 Leave,
177 Switch,
178 Case,
179 Default,
180 YulArrow,
181
182 Annotation(&'input str),
183}
184
185impl<'input> fmt::Display for Token<'input> {
186 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187 match self {
188 Token::Identifier(id) => write!(f, "{id}"),
189 Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
190 Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
191 Token::HexLiteral(hex) => write!(f, "{hex}"),
192 Token::AddressLiteral(address) => write!(f, "{address}"),
193 Token::Number(integer, "") => write!(f, "{integer}"),
194 Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
195 Token::RationalNumber(integer, fraction, "") => {
196 write!(f, "{integer}.{fraction}")
197 }
198 Token::RationalNumber(integer, fraction, exp) => {
199 write!(f, "{integer}.{fraction}e{exp}")
200 }
201 Token::HexNumber(n) => write!(f, "{n}"),
202 Token::Uint(w) => write!(f, "uint{w}"),
203 Token::Int(w) => write!(f, "int{w}"),
204 Token::Bytes(w) => write!(f, "bytes{w}"),
205 Token::Byte => write!(f, "byte"),
206 Token::DynamicBytes => write!(f, "bytes"),
207 Token::Semicolon => write!(f, ";"),
208 Token::Comma => write!(f, ","),
209 Token::OpenParenthesis => write!(f, "("),
210 Token::CloseParenthesis => write!(f, ")"),
211 Token::OpenCurlyBrace => write!(f, "{{"),
212 Token::CloseCurlyBrace => write!(f, "}}"),
213 Token::BitwiseOr => write!(f, "|"),
214 Token::BitwiseOrAssign => write!(f, "|="),
215 Token::Or => write!(f, "||"),
216 Token::BitwiseXor => write!(f, "^"),
217 Token::BitwiseXorAssign => write!(f, "^="),
218 Token::BitwiseAnd => write!(f, "&"),
219 Token::BitwiseAndAssign => write!(f, "&="),
220 Token::And => write!(f, "&&"),
221 Token::AddAssign => write!(f, "+="),
222 Token::Increment => write!(f, "++"),
223 Token::Add => write!(f, "+"),
224 Token::SubtractAssign => write!(f, "-="),
225 Token::Decrement => write!(f, "--"),
226 Token::Subtract => write!(f, "-"),
227 Token::MulAssign => write!(f, "*="),
228 Token::Mul => write!(f, "*"),
229 Token::Power => write!(f, "**"),
230 Token::Divide => write!(f, "/"),
231 Token::DivideAssign => write!(f, "/="),
232 Token::ModuloAssign => write!(f, "%="),
233 Token::Modulo => write!(f, "%"),
234 Token::Equal => write!(f, "=="),
235 Token::Assign => write!(f, "="),
236 Token::ColonAssign => write!(f, ":="),
237 Token::NotEqual => write!(f, "!="),
238 Token::Not => write!(f, "!"),
239 Token::ShiftLeft => write!(f, "<<"),
240 Token::ShiftLeftAssign => write!(f, "<<="),
241 Token::More => write!(f, ">"),
242 Token::MoreEqual => write!(f, ">="),
243 Token::Member => write!(f, "."),
244 Token::Colon => write!(f, ":"),
245 Token::OpenBracket => write!(f, "["),
246 Token::CloseBracket => write!(f, "]"),
247 Token::BitwiseNot => write!(f, "~"),
248 Token::Question => write!(f, "?"),
249 Token::ShiftRightAssign => write!(f, "<<="),
250 Token::ShiftRight => write!(f, "<<"),
251 Token::Less => write!(f, "<"),
252 Token::LessEqual => write!(f, "<="),
253 Token::Bool => write!(f, "bool"),
254 Token::Address => write!(f, "address"),
255 Token::String => write!(f, "string"),
256 Token::Contract => write!(f, "contract"),
257 Token::Library => write!(f, "library"),
258 Token::Interface => write!(f, "interface"),
259 Token::Function => write!(f, "function"),
260 Token::Pragma => write!(f, "pragma"),
261 Token::Import => write!(f, "import"),
262 Token::Struct => write!(f, "struct"),
263 Token::Event => write!(f, "event"),
264 Token::Enum => write!(f, "enum"),
265 Token::Type => write!(f, "type"),
266 Token::Memory => write!(f, "memory"),
267 Token::Storage => write!(f, "storage"),
268 Token::Calldata => write!(f, "calldata"),
269 Token::Public => write!(f, "public"),
270 Token::Private => write!(f, "private"),
271 Token::Internal => write!(f, "internal"),
272 Token::External => write!(f, "external"),
273 Token::Constant => write!(f, "constant"),
274 Token::New => write!(f, "new"),
275 Token::Delete => write!(f, "delete"),
276 Token::Pure => write!(f, "pure"),
277 Token::View => write!(f, "view"),
278 Token::Payable => write!(f, "payable"),
279 Token::Do => write!(f, "do"),
280 Token::Continue => write!(f, "continue"),
281 Token::Break => write!(f, "break"),
282 Token::Throw => write!(f, "throw"),
283 Token::Emit => write!(f, "emit"),
284 Token::Return => write!(f, "return"),
285 Token::Returns => write!(f, "returns"),
286 Token::Revert => write!(f, "revert"),
287 Token::True => write!(f, "true"),
288 Token::False => write!(f, "false"),
289 Token::Else => write!(f, "else"),
290 Token::Anonymous => write!(f, "anonymous"),
291 Token::For => write!(f, "for"),
292 Token::While => write!(f, "while"),
293 Token::If => write!(f, "if"),
294 Token::Constructor => write!(f, "constructor"),
295 Token::Indexed => write!(f, "indexed"),
296 Token::Mapping => write!(f, "mapping"),
297 Token::Arrow => write!(f, "=>"),
298 Token::Try => write!(f, "try"),
299 Token::Catch => write!(f, "catch"),
300 Token::Receive => write!(f, "receive"),
301 Token::Fallback => write!(f, "fallback"),
302 Token::As => write!(f, "as"),
303 Token::Is => write!(f, "is"),
304 Token::Abstract => write!(f, "abstract"),
305 Token::Virtual => write!(f, "virtual"),
306 Token::Override => write!(f, "override"),
307 Token::Using => write!(f, "using"),
308 Token::Modifier => write!(f, "modifier"),
309 Token::Immutable => write!(f, "immutable"),
310 Token::Unchecked => write!(f, "unchecked"),
311 Token::Assembly => write!(f, "assembly"),
312 Token::Let => write!(f, "let"),
313 Token::Leave => write!(f, "leave"),
314 Token::Switch => write!(f, "switch"),
315 Token::Case => write!(f, "case"),
316 Token::Default => write!(f, "default"),
317 Token::YulArrow => write!(f, "->"),
318 Token::Annotation(name) => write!(f, "@{name}"),
319 }
320 }
321}
322
323#[derive(Debug)]
346pub struct Lexer<'input> {
347 input: &'input str,
348 chars: PeekNth<CharIndices<'input>>,
349 comments: &'input mut Vec<Comment>,
350 file_no: usize,
351 parse_semver: bool,
353 last_tokens: [Option<Token<'input>>; 2],
354 pub errors: &'input mut Vec<LexicalError>,
356}
357
358#[derive(Debug, Clone, PartialEq, Eq, Error)]
360#[allow(missing_docs)]
361pub enum LexicalError {
362 #[error("end of file found in comment")]
363 EndOfFileInComment(Loc),
364
365 #[error("end of file found in string literal")]
366 EndOfFileInString(Loc),
367
368 #[error("end of file found in hex literal string")]
369 EndofFileInHex(Loc),
370
371 #[error("missing number")]
372 MissingNumber(Loc),
373
374 #[error("invalid character '{1}' in hex literal string")]
375 InvalidCharacterInHexLiteral(Loc, char),
376
377 #[error("unrecognised token '{1}'")]
378 UnrecognisedToken(Loc, String),
379
380 #[error("missing exponent")]
381 MissingExponent(Loc),
382
383 #[error("'{1}' found where 'from' expected")]
384 ExpectedFrom(Loc, String),
385}
386
387pub fn is_keyword(word: &str) -> bool {
389 KEYWORDS.contains_key(word)
390}
391
392static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
393 "address" => Token::Address,
394 "anonymous" => Token::Anonymous,
395 "bool" => Token::Bool,
396 "break" => Token::Break,
397 "bytes1" => Token::Bytes(1),
398 "bytes2" => Token::Bytes(2),
399 "bytes3" => Token::Bytes(3),
400 "bytes4" => Token::Bytes(4),
401 "bytes5" => Token::Bytes(5),
402 "bytes6" => Token::Bytes(6),
403 "bytes7" => Token::Bytes(7),
404 "bytes8" => Token::Bytes(8),
405 "bytes9" => Token::Bytes(9),
406 "bytes10" => Token::Bytes(10),
407 "bytes11" => Token::Bytes(11),
408 "bytes12" => Token::Bytes(12),
409 "bytes13" => Token::Bytes(13),
410 "bytes14" => Token::Bytes(14),
411 "bytes15" => Token::Bytes(15),
412 "bytes16" => Token::Bytes(16),
413 "bytes17" => Token::Bytes(17),
414 "bytes18" => Token::Bytes(18),
415 "bytes19" => Token::Bytes(19),
416 "bytes20" => Token::Bytes(20),
417 "bytes21" => Token::Bytes(21),
418 "bytes22" => Token::Bytes(22),
419 "bytes23" => Token::Bytes(23),
420 "bytes24" => Token::Bytes(24),
421 "bytes25" => Token::Bytes(25),
422 "bytes26" => Token::Bytes(26),
423 "bytes27" => Token::Bytes(27),
424 "bytes28" => Token::Bytes(28),
425 "bytes29" => Token::Bytes(29),
426 "bytes30" => Token::Bytes(30),
427 "bytes31" => Token::Bytes(31),
428 "bytes32" => Token::Bytes(32),
429 "bytes" => Token::DynamicBytes,
430 "byte" => Token::Byte,
431 "calldata" => Token::Calldata,
432 "case" => Token::Case,
433 "constant" => Token::Constant,
434 "constructor" => Token::Constructor,
435 "continue" => Token::Continue,
436 "contract" => Token::Contract,
437 "default" => Token::Default,
438 "delete" => Token::Delete,
439 "do" => Token::Do,
440 "else" => Token::Else,
441 "emit" => Token::Emit,
442 "enum" => Token::Enum,
443 "event" => Token::Event,
444 "external" => Token::External,
445 "false" => Token::False,
446 "for" => Token::For,
447 "function" => Token::Function,
448 "if" => Token::If,
449 "import" => Token::Import,
450 "indexed" => Token::Indexed,
451 "int8" => Token::Int(8),
452 "int16" => Token::Int(16),
453 "int24" => Token::Int(24),
454 "int32" => Token::Int(32),
455 "int40" => Token::Int(40),
456 "int48" => Token::Int(48),
457 "int56" => Token::Int(56),
458 "int64" => Token::Int(64),
459 "int72" => Token::Int(72),
460 "int80" => Token::Int(80),
461 "int88" => Token::Int(88),
462 "int96" => Token::Int(96),
463 "int104" => Token::Int(104),
464 "int112" => Token::Int(112),
465 "int120" => Token::Int(120),
466 "int128" => Token::Int(128),
467 "int136" => Token::Int(136),
468 "int144" => Token::Int(144),
469 "int152" => Token::Int(152),
470 "int160" => Token::Int(160),
471 "int168" => Token::Int(168),
472 "int176" => Token::Int(176),
473 "int184" => Token::Int(184),
474 "int192" => Token::Int(192),
475 "int200" => Token::Int(200),
476 "int208" => Token::Int(208),
477 "int216" => Token::Int(216),
478 "int224" => Token::Int(224),
479 "int232" => Token::Int(232),
480 "int240" => Token::Int(240),
481 "int248" => Token::Int(248),
482 "int256" => Token::Int(256),
483 "interface" => Token::Interface,
484 "internal" => Token::Internal,
485 "int" => Token::Int(256),
486 "leave" => Token::Leave,
487 "library" => Token::Library,
488 "mapping" => Token::Mapping,
489 "memory" => Token::Memory,
490 "new" => Token::New,
491 "payable" => Token::Payable,
492 "pragma" => Token::Pragma,
493 "private" => Token::Private,
494 "public" => Token::Public,
495 "pure" => Token::Pure,
496 "returns" => Token::Returns,
497 "return" => Token::Return,
498 "revert" => Token::Revert,
499 "storage" => Token::Storage,
500 "string" => Token::String,
501 "struct" => Token::Struct,
502 "switch" => Token::Switch,
503 "throw" => Token::Throw,
504 "true" => Token::True,
505 "type" => Token::Type,
506 "uint8" => Token::Uint(8),
507 "uint16" => Token::Uint(16),
508 "uint24" => Token::Uint(24),
509 "uint32" => Token::Uint(32),
510 "uint40" => Token::Uint(40),
511 "uint48" => Token::Uint(48),
512 "uint56" => Token::Uint(56),
513 "uint64" => Token::Uint(64),
514 "uint72" => Token::Uint(72),
515 "uint80" => Token::Uint(80),
516 "uint88" => Token::Uint(88),
517 "uint96" => Token::Uint(96),
518 "uint104" => Token::Uint(104),
519 "uint112" => Token::Uint(112),
520 "uint120" => Token::Uint(120),
521 "uint128" => Token::Uint(128),
522 "uint136" => Token::Uint(136),
523 "uint144" => Token::Uint(144),
524 "uint152" => Token::Uint(152),
525 "uint160" => Token::Uint(160),
526 "uint168" => Token::Uint(168),
527 "uint176" => Token::Uint(176),
528 "uint184" => Token::Uint(184),
529 "uint192" => Token::Uint(192),
530 "uint200" => Token::Uint(200),
531 "uint208" => Token::Uint(208),
532 "uint216" => Token::Uint(216),
533 "uint224" => Token::Uint(224),
534 "uint232" => Token::Uint(232),
535 "uint240" => Token::Uint(240),
536 "uint248" => Token::Uint(248),
537 "uint256" => Token::Uint(256),
538 "uint" => Token::Uint(256),
539 "view" => Token::View,
540 "while" => Token::While,
541 "try" => Token::Try,
542 "catch" => Token::Catch,
543 "receive" => Token::Receive,
544 "fallback" => Token::Fallback,
545 "as" => Token::As,
546 "is" => Token::Is,
547 "abstract" => Token::Abstract,
548 "virtual" => Token::Virtual,
549 "override" => Token::Override,
550 "using" => Token::Using,
551 "modifier" => Token::Modifier,
552 "immutable" => Token::Immutable,
553 "unchecked" => Token::Unchecked,
554 "assembly" => Token::Assembly,
555 "let" => Token::Let,
556};
557
558impl<'input> Lexer<'input> {
559 pub fn new(
572 input: &'input str,
573 file_no: usize,
574 comments: &'input mut Vec<Comment>,
575 errors: &'input mut Vec<LexicalError>,
576 ) -> Self {
577 Lexer {
578 input,
579 chars: peek_nth(input.char_indices()),
580 comments,
581 file_no,
582 parse_semver: false,
583 last_tokens: [None, None],
584 errors,
585 }
586 }
587
588 fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
589 let mut is_rational = false;
590 if ch == '0' {
591 if let Some((_, 'x')) = self.chars.peek() {
592 self.chars.next();
594
595 let mut end = match self.chars.next() {
596 Some((end, ch)) if ch.is_ascii_hexdigit() => end,
597 Some((..)) => {
598 return Err(LexicalError::MissingNumber(Loc::File(
599 self.file_no,
600 start,
601 start + 1,
602 )));
603 }
604 None => {
605 return Err(LexicalError::EndofFileInHex(Loc::File(
606 self.file_no,
607 start,
608 self.input.len(),
609 )));
610 }
611 };
612
613 while let Some((i, ch)) = self.chars.peek() {
614 if !ch.is_ascii_hexdigit() && *ch != '_' {
615 break;
616 }
617 end = *i;
618 self.chars.next();
619 }
620
621 return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
622 }
623 }
624
625 if ch == '.' {
626 is_rational = true;
627 start -= 1;
628 }
629
630 let mut end = start;
631 while let Some((i, ch)) = self.chars.peek() {
632 if !ch.is_ascii_digit() && *ch != '_' {
633 break;
634 }
635 end = *i;
636 self.chars.next();
637 }
638
639 if self.parse_semver {
640 let integer = &self.input[start..=end];
641 let exp = &self.input[0..0];
642
643 return Ok((start, Token::Number(integer, exp), end + 1));
644 }
645
646 let mut rational_end = end;
647 let mut end_before_rational = end + 1;
648 let mut rational_start = end;
649 if is_rational {
650 end_before_rational = start;
651 rational_start = start + 1;
652 }
653
654 if let Some((_, '.')) = self.chars.peek() {
655 if let Some((i, ch)) = self.chars.peek_nth(1) {
656 if ch.is_ascii_digit() && !is_rational {
657 rational_start = *i;
658 rational_end = *i;
659 is_rational = true;
660 self.chars.next(); while let Some((i, ch)) = self.chars.peek() {
662 if !ch.is_ascii_digit() && *ch != '_' {
663 break;
664 }
665 rational_end = *i;
666 end = *i;
667 self.chars.next();
668 }
669 }
670 }
671 }
672
673 let old_end = end;
674 let mut exp_start = end + 1;
675
676 if let Some((i, 'e' | 'E')) = self.chars.peek() {
677 exp_start = *i + 1;
678 self.chars.next();
679 while matches!(self.chars.peek(), Some((_, '-'))) {
681 self.chars.next();
682 }
683 while let Some((i, ch)) = self.chars.peek() {
684 if !ch.is_ascii_digit() && *ch != '_' {
685 break;
686 }
687 end = *i;
688 self.chars.next();
689 }
690
691 if exp_start > end {
692 return Err(LexicalError::MissingExponent(Loc::File(
693 self.file_no,
694 start,
695 self.input.len(),
696 )));
697 }
698 }
699
700 if is_rational {
701 let integer = &self.input[start..end_before_rational];
702 let fraction = &self.input[rational_start..=rational_end];
703 let exp = &self.input[exp_start..=end];
704
705 return Ok((
706 start,
707 Token::RationalNumber(integer, fraction, exp),
708 end + 1,
709 ));
710 }
711
712 let integer = &self.input[start..=old_end];
713 let exp = &self.input[exp_start..=end];
714
715 Ok((start, Token::Number(integer, exp), end + 1))
716 }
717
718 fn string(
719 &mut self,
720 unicode: bool,
721 token_start: usize,
722 string_start: usize,
723 quote_char: char,
724 ) -> Result<'input> {
725 let mut end;
726
727 let mut last_was_escape = false;
728
729 loop {
730 if let Some((i, ch)) = self.chars.next() {
731 end = i;
732 if !last_was_escape {
733 if ch == quote_char {
734 break;
735 }
736 last_was_escape = ch == '\\';
737 } else {
738 last_was_escape = false;
739 }
740 } else {
741 return Err(LexicalError::EndOfFileInString(Loc::File(
742 self.file_no,
743 token_start,
744 self.input.len(),
745 )));
746 }
747 }
748
749 Ok((
750 token_start,
751 Token::StringLiteral(unicode, &self.input[string_start..end]),
752 end + 1,
753 ))
754 }
755
756 fn next(&mut self) -> Option<Spanned<'input>> {
757 'toplevel: loop {
758 match self.chars.next() {
759 Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
760 let (id, end) = self.match_identifier(start);
761
762 if id == "unicode" {
763 match self.chars.peek() {
764 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
765 let quote_char = *quote_char;
766
767 self.chars.next();
768 let str_res = self.string(true, start, start + 8, quote_char);
769 match str_res {
770 Err(lex_err) => self.errors.push(lex_err),
771 Ok(val) => return Some(val),
772 }
773 }
774 _ => (),
775 }
776 }
777
778 if id == "hex" {
779 match self.chars.peek() {
780 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
781 let quote_char = *quote_char;
782
783 self.chars.next();
784
785 for (i, ch) in &mut self.chars {
786 if ch == quote_char {
787 return Some((
788 start,
789 Token::HexLiteral(&self.input[start..=i]),
790 i + 1,
791 ));
792 }
793
794 if !ch.is_ascii_hexdigit() && ch != '_' {
795 for (_, ch) in &mut self.chars {
797 if ch == quote_char {
798 break;
799 }
800 }
801
802 self.errors.push(
803 LexicalError::InvalidCharacterInHexLiteral(
804 Loc::File(self.file_no, i, i + 1),
805 ch,
806 ),
807 );
808 continue 'toplevel;
809 }
810 }
811
812 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
813 self.file_no,
814 start,
815 self.input.len(),
816 )));
817 return None;
818 }
819 _ => (),
820 }
821 }
822
823 if id == "address" {
824 match self.chars.peek() {
825 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
826 let quote_char = *quote_char;
827
828 self.chars.next();
829
830 for (i, ch) in &mut self.chars {
831 if ch == quote_char {
832 return Some((
833 start,
834 Token::AddressLiteral(&self.input[start..=i]),
835 i + 1,
836 ));
837 }
838 }
839
840 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
841 self.file_no,
842 start,
843 self.input.len(),
844 )));
845 return None;
846 }
847 _ => (),
848 }
849 }
850
851 return if let Some(w) = KEYWORDS.get(id) {
852 Some((start, *w, end))
853 } else {
854 Some((start, Token::Identifier(id), end))
855 };
856 }
857 Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
858 let str_res = self.string(false, start, start + 1, quote_char);
859 match str_res {
860 Err(lex_err) => self.errors.push(lex_err),
861 Ok(val) => return Some(val),
862 }
863 }
864 Some((start, '/')) => {
865 match self.chars.peek() {
866 Some((_, '=')) => {
867 self.chars.next();
868 return Some((start, Token::DivideAssign, start + 2));
869 }
870 Some((_, '/')) => {
871 self.chars.next();
873
874 let mut newline = false;
875
876 let doc_comment = match self.chars.next() {
877 Some((_, '/')) => {
878 !matches!(self.chars.peek(), Some((_, '/')))
880 }
881 Some((_, ch)) if ch == '\n' || ch == '\r' => {
882 newline = true;
883 false
884 }
885 _ => false,
886 };
887
888 let mut last = start + 3;
889
890 if !newline {
891 loop {
892 match self.chars.next() {
893 None => {
894 last = self.input.len();
895 break;
896 }
897 Some((offset, '\n' | '\r')) => {
898 last = offset;
899 break;
900 }
901 Some(_) => (),
902 }
903 }
904 }
905
906 if doc_comment {
907 self.comments.push(Comment::DocLine(
908 Loc::File(self.file_no, start, last),
909 self.input[start..last].to_owned(),
910 ));
911 } else {
912 self.comments.push(Comment::Line(
913 Loc::File(self.file_no, start, last),
914 self.input[start..last].to_owned(),
915 ));
916 }
917 }
918 Some((_, '*')) => {
919 self.chars.next();
921
922 let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
923
924 let mut last = start + 3;
925 let mut seen_star = false;
926
927 loop {
928 if let Some((i, ch)) = self.chars.next() {
929 if seen_star && ch == '/' {
930 break;
931 }
932 seen_star = ch == '*';
933 last = i;
934 } else {
935 self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
936 self.file_no,
937 start,
938 self.input.len(),
939 )));
940 return None;
941 }
942 }
943
944 if doc_comment_start && last > start + 2 {
946 self.comments.push(Comment::DocBlock(
947 Loc::File(self.file_no, start, last + 2),
948 self.input[start..last + 2].to_owned(),
949 ));
950 } else {
951 self.comments.push(Comment::Block(
952 Loc::File(self.file_no, start, last + 2),
953 self.input[start..last + 2].to_owned(),
954 ));
955 }
956 }
957 _ => {
958 return Some((start, Token::Divide, start + 1));
959 }
960 }
961 }
962 Some((start, ch)) if ch.is_ascii_digit() => {
963 let parse_result = self.parse_number(start, ch);
964 match parse_result {
965 Err(lex_err) => {
966 self.errors.push(lex_err.clone());
967 if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
968 return None;
969 }
970 }
971 Ok(parse_result) => return Some(parse_result),
972 }
973 }
974 Some((start, '@')) => {
975 let (id, end) = self.match_identifier(start);
976 if id.len() == 1 {
977 self.errors.push(LexicalError::UnrecognisedToken(
978 Loc::File(self.file_no, start, start + 1),
979 id.to_owned(),
980 ));
981 } else {
982 return Some((start, Token::Annotation(&id[1..]), end));
983 };
984 }
985 Some((i, ';')) => {
986 self.parse_semver = false;
987 return Some((i, Token::Semicolon, i + 1));
988 }
989 Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
990 Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
991 Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
992 Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
993 Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
994 Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
995 Some((i, '=')) => {
996 return match self.chars.peek() {
997 Some((_, '=')) => {
998 self.chars.next();
999 Some((i, Token::Equal, i + 2))
1000 }
1001 Some((_, '>')) => {
1002 self.chars.next();
1003 Some((i, Token::Arrow, i + 2))
1004 }
1005 _ => Some((i, Token::Assign, i + 1)),
1006 }
1007 }
1008 Some((i, '!')) => {
1009 return if let Some((_, '=')) = self.chars.peek() {
1010 self.chars.next();
1011 Some((i, Token::NotEqual, i + 2))
1012 } else {
1013 Some((i, Token::Not, i + 1))
1014 }
1015 }
1016 Some((i, '|')) => {
1017 return match self.chars.peek() {
1018 Some((_, '=')) => {
1019 self.chars.next();
1020 Some((i, Token::BitwiseOrAssign, i + 2))
1021 }
1022 Some((_, '|')) => {
1023 self.chars.next();
1024 Some((i, Token::Or, i + 2))
1025 }
1026 _ => Some((i, Token::BitwiseOr, i + 1)),
1027 };
1028 }
1029 Some((i, '&')) => {
1030 return match self.chars.peek() {
1031 Some((_, '=')) => {
1032 self.chars.next();
1033 Some((i, Token::BitwiseAndAssign, i + 2))
1034 }
1035 Some((_, '&')) => {
1036 self.chars.next();
1037 Some((i, Token::And, i + 2))
1038 }
1039 _ => Some((i, Token::BitwiseAnd, i + 1)),
1040 };
1041 }
1042 Some((i, '^')) => {
1043 return match self.chars.peek() {
1044 Some((_, '=')) => {
1045 self.chars.next();
1046 Some((i, Token::BitwiseXorAssign, i + 2))
1047 }
1048 _ => Some((i, Token::BitwiseXor, i + 1)),
1049 };
1050 }
1051 Some((i, '+')) => {
1052 return match self.chars.peek() {
1053 Some((_, '=')) => {
1054 self.chars.next();
1055 Some((i, Token::AddAssign, i + 2))
1056 }
1057 Some((_, '+')) => {
1058 self.chars.next();
1059 Some((i, Token::Increment, i + 2))
1060 }
1061 _ => Some((i, Token::Add, i + 1)),
1062 };
1063 }
1064 Some((i, '-')) => {
1065 return match self.chars.peek() {
1066 Some((_, '=')) => {
1067 self.chars.next();
1068 Some((i, Token::SubtractAssign, i + 2))
1069 }
1070 Some((_, '-')) => {
1071 self.chars.next();
1072 Some((i, Token::Decrement, i + 2))
1073 }
1074 Some((_, '>')) => {
1075 self.chars.next();
1076 Some((i, Token::YulArrow, i + 2))
1077 }
1078 _ => Some((i, Token::Subtract, i + 1)),
1079 };
1080 }
1081 Some((i, '*')) => {
1082 return match self.chars.peek() {
1083 Some((_, '=')) => {
1084 self.chars.next();
1085 Some((i, Token::MulAssign, i + 2))
1086 }
1087 Some((_, '*')) => {
1088 self.chars.next();
1089 Some((i, Token::Power, i + 2))
1090 }
1091 _ => Some((i, Token::Mul, i + 1)),
1092 };
1093 }
1094 Some((i, '%')) => {
1095 return match self.chars.peek() {
1096 Some((_, '=')) => {
1097 self.chars.next();
1098 Some((i, Token::ModuloAssign, i + 2))
1099 }
1100 _ => Some((i, Token::Modulo, i + 1)),
1101 };
1102 }
1103 Some((i, '<')) => {
1104 return match self.chars.peek() {
1105 Some((_, '<')) => {
1106 self.chars.next();
1107 if let Some((_, '=')) = self.chars.peek() {
1108 self.chars.next();
1109 Some((i, Token::ShiftLeftAssign, i + 3))
1110 } else {
1111 Some((i, Token::ShiftLeft, i + 2))
1112 }
1113 }
1114 Some((_, '=')) => {
1115 self.chars.next();
1116 Some((i, Token::LessEqual, i + 2))
1117 }
1118 _ => Some((i, Token::Less, i + 1)),
1119 };
1120 }
1121 Some((i, '>')) => {
1122 return match self.chars.peek() {
1123 Some((_, '>')) => {
1124 self.chars.next();
1125 if let Some((_, '=')) = self.chars.peek() {
1126 self.chars.next();
1127 Some((i, Token::ShiftRightAssign, i + 3))
1128 } else {
1129 Some((i, Token::ShiftRight, i + 2))
1130 }
1131 }
1132 Some((_, '=')) => {
1133 self.chars.next();
1134 Some((i, Token::MoreEqual, i + 2))
1135 }
1136 _ => Some((i, Token::More, i + 1)),
1137 };
1138 }
1139 Some((i, '.')) => {
1140 if let Some((_, a)) = self.chars.peek() {
1141 if a.is_ascii_digit() && !self.parse_semver {
1142 return match self.parse_number(i + 1, '.') {
1143 Err(lex_error) => {
1144 self.errors.push(lex_error);
1145 None
1146 }
1147 Ok(parse_result) => Some(parse_result),
1148 };
1149 }
1150 }
1151 return Some((i, Token::Member, i + 1));
1152 }
1153 Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1154 Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1155 Some((i, ':')) => {
1156 return match self.chars.peek() {
1157 Some((_, '=')) => {
1158 self.chars.next();
1159 Some((i, Token::ColonAssign, i + 2))
1160 }
1161 _ => Some((i, Token::Colon, i + 1)),
1162 };
1163 }
1164 Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1165 Some((_, ch)) if ch.is_whitespace() => (),
1166 Some((start, _)) => {
1167 let mut end;
1168
1169 loop {
1170 if let Some((i, ch)) = self.chars.next() {
1171 end = i;
1172
1173 if ch.is_whitespace() {
1174 break;
1175 }
1176 } else {
1177 end = self.input.len();
1178 break;
1179 }
1180 }
1181
1182 self.errors.push(LexicalError::UnrecognisedToken(
1183 Loc::File(self.file_no, start, end),
1184 self.input[start..end].to_owned(),
1185 ));
1186 }
1187 None => return None, }
1189 }
1190 }
1191
1192 fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1193 let end;
1194 loop {
1195 if let Some((i, ch)) = self.chars.peek() {
1196 if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1197 end = *i;
1198 break;
1199 }
1200 self.chars.next();
1201 } else {
1202 end = self.input.len();
1203 break;
1204 }
1205 }
1206
1207 (&self.input[start..end], end)
1208 }
1209}
1210
1211impl<'input> Iterator for Lexer<'input> {
1212 type Item = Spanned<'input>;
1213
1214 fn next(&mut self) -> Option<Self::Item> {
1215 if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1219 self.parse_semver = true;
1220 }
1221
1222 let token = self.next();
1223
1224 self.last_tokens = [
1225 self.last_tokens[1],
1226 match token {
1227 Some((_, n, _)) => Some(n),
1228 _ => None,
1229 },
1230 ];
1231
1232 token
1233 }
1234}
1235
1236#[cfg(test)]
1237mod tests {
1238 use super::*;
1239
1240 #[test]
1241 fn test_lexer() {
1242 let mut comments = Vec::new();
1243 let mut errors = Vec::new();
1244
1245 let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g" /** "#;
1246 let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1247 assert_eq!(
1248 tokens,
1249 vec![
1250 (3, Token::Identifier("a"), 4),
1251 (5, Token::Subtract, 6),
1252 (13, Token::Bool, 17),
1253 (18, Token::Identifier("hex"), 21),
1254 (22, Token::Uint(8), 27),
1255 ]
1256 );
1257
1258 assert_eq!(
1259 errors,
1260 vec![
1261 LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1262 LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1263 LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1264 LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1265 LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1266 ]
1267 );
1268
1269 let mut errors = Vec::new();
1270 let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1271
1272 assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1273
1274 let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1275
1276 assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1277
1278 let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1279
1280 assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1281
1282 let tokens = Lexer::new(
1283 "hex\"cafe_dead\" /* adad*** */",
1284 0,
1285 &mut comments,
1286 &mut errors,
1287 )
1288 .collect::<Vec<_>>();
1289
1290 assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1291
1292 let tokens = Lexer::new(
1293 "// foo bar\n0x00fead0_12 00090 0_0",
1294 0,
1295 &mut comments,
1296 &mut errors,
1297 )
1298 .collect::<Vec<_>>();
1299
1300 assert_eq!(
1301 tokens,
1302 vec!(
1303 (11, Token::HexNumber("0x00fead0_12"), 23),
1304 (24, Token::Number("00090", ""), 29),
1305 (30, Token::Number("0_0", ""), 33)
1306 )
1307 );
1308
1309 let tokens = Lexer::new(
1310 "// foo bar\n0x00fead0_12 9.0008 0_0",
1311 0,
1312 &mut comments,
1313 &mut errors,
1314 )
1315 .collect::<Vec<_>>();
1316
1317 assert_eq!(
1318 tokens,
1319 vec!(
1320 (11, Token::HexNumber("0x00fead0_12"), 23),
1321 (24, Token::RationalNumber("9", "0008", ""), 30),
1322 (31, Token::Number("0_0", ""), 34)
1323 )
1324 );
1325
1326 let tokens = Lexer::new(
1327 "// foo bar\n0x00fead0_12 .0008 0.9e2",
1328 0,
1329 &mut comments,
1330 &mut errors,
1331 )
1332 .collect::<Vec<_>>();
1333
1334 assert_eq!(
1335 tokens,
1336 vec!(
1337 (11, Token::HexNumber("0x00fead0_12"), 23),
1338 (24, Token::RationalNumber("", "0008", ""), 29),
1339 (30, Token::RationalNumber("0", "9", "2"), 35)
1340 )
1341 );
1342
1343 let tokens = Lexer::new(
1344 "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1345 0,
1346 &mut comments,
1347 &mut errors,
1348 )
1349 .collect::<Vec<_>>();
1350
1351 assert_eq!(
1352 tokens,
1353 vec!(
1354 (11, Token::HexNumber("0x00fead0_12"), 23),
1355 (24, Token::RationalNumber("", "0008", ""), 29),
1356 (30, Token::RationalNumber("0", "9", "-2"), 36),
1357 (36, Token::Subtract, 37),
1358 (37, Token::Number("2", ""), 38)
1359 )
1360 );
1361
1362 let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1363
1364 assert_eq!(
1365 tokens,
1366 vec!(
1367 (0, Token::RationalNumber("1", "2_3", "2"), 7),
1368 (7, Token::Subtract, 8)
1369 )
1370 );
1371
1372 let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1373
1374 assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1375
1376 let tokens = Lexer::new(
1377 "pragma solidity >=0.5.0 <0.7.0;",
1378 0,
1379 &mut comments,
1380 &mut errors,
1381 )
1382 .collect::<Vec<_>>();
1383
1384 assert_eq!(
1385 tokens,
1386 vec!(
1387 (0, Token::Pragma, 6),
1388 (7, Token::Identifier("solidity"), 15),
1389 (16, Token::MoreEqual, 18),
1390 (18, Token::Number("0", ""), 19),
1391 (19, Token::Member, 20),
1392 (20, Token::Number("5", ""), 21),
1393 (21, Token::Member, 22),
1394 (22, Token::Number("0", ""), 23),
1395 (24, Token::Less, 25),
1396 (25, Token::Number("0", ""), 26),
1397 (26, Token::Member, 27),
1398 (27, Token::Number("7", ""), 28),
1399 (28, Token::Member, 29),
1400 (29, Token::Number("0", ""), 30),
1401 (30, Token::Semicolon, 31),
1402 )
1403 );
1404
1405 let tokens = Lexer::new(
1406 "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1407 0,
1408 &mut comments,
1409 &mut errors,
1410 )
1411 .collect::<Vec<_>>();
1412
1413 assert_eq!(
1414 tokens,
1415 vec!(
1416 (0, Token::Pragma, 6),
1417 (7, Token::Identifier("solidity"), 15),
1418 (17, Token::MoreEqual, 19),
1419 (19, Token::Number("0", ""), 20),
1420 (20, Token::Member, 21),
1421 (21, Token::Number("5", ""), 22),
1422 (22, Token::Member, 23),
1423 (23, Token::Number("0", ""), 24),
1424 (25, Token::Less, 26),
1425 (26, Token::Number("0", ""), 27),
1426 (27, Token::Member, 28),
1427 (28, Token::Number("7", ""), 29),
1428 (29, Token::Member, 30),
1429 (30, Token::Number("0", ""), 31),
1430 (34, Token::Semicolon, 35),
1431 )
1432 );
1433
1434 let tokens =
1435 Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1436
1437 assert_eq!(
1438 tokens,
1439 vec!(
1440 (0, Token::Pragma, 6),
1441 (7, Token::Identifier("solidity"), 15),
1442 (16, Token::Identifier("赤"), 19),
1443 (19, Token::Semicolon, 20)
1444 )
1445 );
1446
1447 let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1448
1449 assert_eq!(
1450 tokens,
1451 vec!(
1452 (0, Token::ShiftRightAssign, 3),
1453 (4, Token::ShiftRight, 6),
1454 (7, Token::MoreEqual, 9),
1455 (10, Token::More, 11),
1456 )
1457 );
1458
1459 let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1460
1461 assert_eq!(
1462 tokens,
1463 vec!(
1464 (0, Token::ShiftLeftAssign, 3),
1465 (4, Token::ShiftLeft, 6),
1466 (7, Token::LessEqual, 9),
1467 (10, Token::Less, 11),
1468 )
1469 );
1470
1471 let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1472
1473 assert_eq!(
1474 tokens,
1475 vec!(
1476 (0, Token::Subtract, 1),
1477 (1, Token::Number("16", ""), 3),
1478 (4, Token::Decrement, 6),
1479 (7, Token::Subtract, 8),
1480 (9, Token::SubtractAssign, 11),
1481 )
1482 );
1483
1484 let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1485
1486 assert_eq!(
1487 tokens,
1488 vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1489 );
1490
1491 let mut errors = Vec::new();
1492 let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1493
1494 assert_eq!(
1495 errors,
1496 vec![LexicalError::InvalidCharacterInHexLiteral(
1497 Loc::File(0, 10, 11),
1498 'g'
1499 )]
1500 );
1501
1502 let mut errors = Vec::new();
1503 let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1504
1505 assert_eq!(
1506 errors,
1507 vec!(LexicalError::UnrecognisedToken(
1508 Loc::File(0, 1, 4),
1509 "€".to_owned()
1510 ))
1511 );
1512
1513 let mut errors = Vec::new();
1514 let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1515
1516 assert_eq!(
1517 errors,
1518 vec!(LexicalError::UnrecognisedToken(
1519 Loc::File(0, 0, 3),
1520 "€".to_owned()
1521 ))
1522 );
1523
1524 let tokens =
1525 Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1526
1527 assert_eq!(
1528 tokens,
1529 vec!(
1530 (0, Token::Pragma, 6),
1531 (7, Token::Identifier("foo"), 10),
1532 (11, Token::Identifier("bar"), 14),
1533 )
1534 );
1535
1536 comments.truncate(0);
1537
1538 let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1539
1540 assert_eq!(tokens, 0);
1541 assert_eq!(
1542 comments,
1543 vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1544 );
1545
1546 comments.truncate(0);
1547
1548 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1549
1550 assert_eq!(tokens, 0);
1551 assert_eq!(
1552 comments,
1553 vec!(
1554 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1555 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1556 )
1557 );
1558
1559 comments.truncate(0);
1560
1561 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1562
1563 assert_eq!(tokens, 0);
1564 assert_eq!(
1565 comments,
1566 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1567 );
1568
1569 comments.truncate(0);
1570
1571 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1572
1573 assert_eq!(tokens, 0);
1574 assert_eq!(
1575 comments,
1576 vec!(Comment::DocBlock(
1577 Loc::File(0, 0, 10),
1578 "/** foo */".to_owned()
1579 ))
1580 );
1581
1582 comments.truncate(0);
1583
1584 let tokens = Lexer::new(
1585 "/** jadajadadjada */\n/* bar */",
1586 0,
1587 &mut comments,
1588 &mut errors,
1589 )
1590 .count();
1591
1592 assert_eq!(tokens, 0);
1593 assert_eq!(
1594 comments,
1595 vec!(
1596 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1597 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1598 )
1599 );
1600
1601 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1602 assert_eq!(tokens, None);
1603
1604 let mut errors = Vec::new();
1605 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1606 assert_eq!(
1607 errors,
1608 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1609 );
1610
1611 let mut errors = Vec::new();
1612 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1613 assert_eq!(tokens, None);
1614
1615 let tokens = Lexer::new(
1617 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1618 0,
1619 &mut comments,
1620 &mut errors,
1621 )
1622 .collect::<Vec<_>>();
1623
1624 assert_eq!(
1625 tokens,
1626 vec!(
1627 (0, Token::MoreEqual, 2),
1628 (5, Token::Member, 6),
1629 (7, Token::Identifier("très"), 12),
1630 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1631 (65, Token::Identifier("カラス"), 74)
1632 )
1633 );
1634
1635 let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1636
1637 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1638
1639 let tokens =
1640 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1641
1642 assert_eq!(
1643 tokens,
1644 vec!(
1645 (0, Token::Identifier("unicode"), 7),
1646 (8, Token::StringLiteral(false, "€"), 13),
1647 )
1648 );
1649
1650 let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1652
1653 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1654
1655 let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1656
1657 assert_eq!(
1658 tokens,
1659 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1660 );
1661
1662 let mut errors = Vec::new();
1663 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1664
1665 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1666 assert_eq!(
1667 errors,
1668 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1669 );
1670
1671 let mut errors = Vec::new();
1672 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1673
1674 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1675 assert_eq!(
1676 errors,
1677 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1678 );
1679
1680 let mut errors = Vec::new();
1681 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1682
1683 assert_eq!(
1684 tokens,
1685 vec!(
1686 (0, Token::Number("42", ""), 2),
1687 (2, Token::Member, 3),
1688 (3, Token::Identifier("a"), 4)
1689 )
1690 );
1691
1692 let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1693
1694 assert_eq!(
1695 tokens,
1696 vec!(
1697 (0, Token::Number("42", ""), 2),
1698 (2, Token::Member, 3),
1699 (3, Token::Member, 4),
1700 (4, Token::Identifier("a"), 5)
1701 )
1702 );
1703
1704 comments.truncate(0);
1705
1706 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1707
1708 assert_eq!(tokens, 0);
1709 assert_eq!(
1710 comments,
1711 vec!(
1712 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1713 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1714 )
1715 );
1716
1717 comments.truncate(0);
1718
1719 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1720
1721 assert_eq!(tokens, 0);
1722 assert_eq!(
1723 comments,
1724 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1725 );
1726
1727 comments.truncate(0);
1728
1729 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1730
1731 assert_eq!(tokens, 0);
1732 assert_eq!(
1733 comments,
1734 vec!(Comment::DocBlock(
1735 Loc::File(0, 0, 10),
1736 "/** foo */".to_owned()
1737 ))
1738 );
1739
1740 comments.truncate(0);
1741
1742 let tokens = Lexer::new(
1743 "/** jadajadadjada */\n/* bar */",
1744 0,
1745 &mut comments,
1746 &mut errors,
1747 )
1748 .count();
1749
1750 assert_eq!(tokens, 0);
1751 assert_eq!(
1752 comments,
1753 vec!(
1754 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1755 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1756 )
1757 );
1758
1759 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1760 assert_eq!(tokens, None);
1761
1762 let mut errors = Vec::new();
1763 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1764 assert_eq!(
1765 errors,
1766 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1767 );
1768
1769 let mut errors = Vec::new();
1770 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1771 assert_eq!(tokens, None);
1772
1773 let tokens = Lexer::new(
1775 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1776 0,
1777 &mut comments,
1778 &mut errors,
1779 )
1780 .collect::<Vec<(usize, Token, usize)>>();
1781
1782 assert_eq!(
1783 tokens,
1784 vec!(
1785 (0, Token::MoreEqual, 2),
1786 (5, Token::Member, 6),
1787 (7, Token::Identifier("très"), 12),
1788 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1789 (65, Token::Identifier("カラス"), 74)
1790 )
1791 );
1792
1793 let tokens =
1794 Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1795 .collect::<Vec<(usize, Token, usize)>>();
1796
1797 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1798
1799 let tokens =
1800 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1801 .collect::<Vec<(usize, Token, usize)>>();
1802
1803 assert_eq!(
1804 tokens,
1805 vec!(
1806 (0, Token::Identifier("unicode"), 7),
1807 (8, Token::StringLiteral(false, "€"), 13),
1808 )
1809 );
1810
1811 let tokens =
1813 Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1814 .collect::<Vec<(usize, Token, usize)>>();
1815
1816 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1817
1818 let tokens =
1819 Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1820 .collect::<Vec<(usize, Token, usize)>>();
1821
1822 assert_eq!(
1823 tokens,
1824 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1825 );
1826
1827 let mut errors = Vec::new();
1828 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1829 .collect::<Vec<(usize, Token, usize)>>();
1830
1831 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1832 assert_eq!(
1833 errors,
1834 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1835 );
1836
1837 let mut errors = Vec::new();
1838 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1839 .collect::<Vec<(usize, Token, usize)>>();
1840
1841 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1842 assert_eq!(
1843 errors,
1844 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1845 );
1846
1847 let mut errors = Vec::new();
1848 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1849 .collect::<Vec<(usize, Token, usize)>>();
1850
1851 assert_eq!(
1852 tokens,
1853 vec!(
1854 (0, Token::Number("42", ""), 2),
1855 (2, Token::Member, 3),
1856 (3, Token::Identifier("a"), 4)
1857 )
1858 );
1859
1860 let tokens =
1861 Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1862 .collect::<Vec<(usize, Token, usize)>>();
1863
1864 assert_eq!(
1865 tokens,
1866 vec!(
1867 (0, Token::Number("42", ""), 2),
1868 (2, Token::Member, 3),
1869 (3, Token::Member, 4),
1870 (4, Token::Identifier("a"), 5)
1871 )
1872 );
1873
1874 let mut errors = Vec::new();
1875 let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1876 .collect::<Vec<(usize, Token, usize)>>();
1877 assert_eq!(
1878 errors,
1879 vec!(LexicalError::InvalidCharacterInHexLiteral(
1880 Loc::File(0, 4, 5),
1881 'g'
1882 ),)
1883 );
1884
1885 let mut errors = Vec::new();
1886 let tokens =
1887 Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1888
1889 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1890
1891 let mut errors = Vec::new();
1892 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1893 .collect::<Vec<(usize, Token, usize)>>();
1894
1895 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1896
1897 let mut errors = Vec::new();
1898 let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1899
1900 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1901
1902 let mut errors = Vec::new();
1903 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1904
1905 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1906
1907 errors.clear();
1908 comments.clear();
1909 let tokens =
1910 Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1911 assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1912 assert!(errors.is_empty());
1913 assert!(comments.is_empty());
1914
1915 errors.clear();
1916 comments.clear();
1917 let tokens =
1918 Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1919 assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1920 assert_eq!(
1921 errors,
1922 vec![LexicalError::UnrecognisedToken(
1923 Loc::File(0, 0, 1),
1924 "@".to_string()
1925 )]
1926 );
1927 assert!(comments.is_empty());
1928 }
1929}