1use std::collections::HashMap;
2
3use crate::error::{ParseError, ParseErrorAndPos};
4use crate::lexer::position::{Position, Span};
5use crate::lexer::reader::Reader;
6use crate::lexer::token::{FloatSuffix, IntBase, IntSuffix, Token, TokenKind};
7
8pub mod position;
9pub mod reader;
10pub mod token;
11
12#[derive(Debug)]
13pub struct File {
14 pub name: String,
15 pub content: String,
16 pub line_ends: Vec<u32>,
17}
18
19pub struct Lexer {
20 reader: Reader,
21 keywords: HashMap<&'static str, TokenKind>,
22}
23
24impl Lexer {
25 #[cfg(test)]
26 pub fn from_str(code: &str) -> Lexer {
27 let reader = Reader::from_string("<<code>>", code);
28 Lexer::new(reader)
29 }
30
31 pub fn new(reader: Reader) -> Lexer {
32 let keywords = keywords_in_map();
33
34 Lexer { reader, keywords }
35 }
36
37 pub fn path(&self) -> &str {
38 self.reader.path()
39 }
40
41 pub fn read_token(&mut self) -> Result<Token, ParseErrorAndPos> {
42 loop {
43 self.skip_white();
44
45 let pos = self.reader.pos();
46 let idx = self.reader.idx();
47 let ch = self.curr();
48
49 if let None = ch {
50 return Ok(Token::new(TokenKind::End, pos, Span::at(idx)));
51 }
52
53 if is_digit(ch) {
54 return self.read_number();
55 } else if self.is_comment_start() {
56 self.read_comment()?;
57 } else if self.is_multi_comment_start() {
58 self.read_multi_comment()?;
59 } else if is_identifier_start(ch) {
60 return self.read_identifier();
61 } else if is_quote(ch) {
62 return self.read_string(true);
63 } else if is_char_quote(ch) {
64 return self.read_char_literal();
65 } else if is_operator(ch) {
66 return self.read_operator();
67 } else {
68 let ch = ch.unwrap();
69
70 return Err(ParseErrorAndPos::new(pos, ParseError::UnknownChar(ch)));
71 }
72 }
73 }
74
75 fn skip_white(&mut self) {
76 while is_whitespace(self.curr()) {
77 self.read_char();
78 }
79 }
80
81 fn read_comment(&mut self) -> Result<(), ParseErrorAndPos> {
82 while !self.curr().is_none() && !is_newline(self.curr()) {
83 self.read_char();
84 }
85
86 Ok(())
87 }
88
89 fn read_multi_comment(&mut self) -> Result<(), ParseErrorAndPos> {
90 let pos = self.reader.pos();
91
92 self.read_char();
93 self.read_char();
94
95 while !self.curr().is_none() && !self.is_multi_comment_end() {
96 self.read_char();
97 }
98
99 if self.curr().is_none() {
100 return Err(ParseErrorAndPos::new(pos, ParseError::UnclosedComment));
101 }
102
103 self.read_char();
104 self.read_char();
105
106 Ok(())
107 }
108
109 fn read_identifier(&mut self) -> Result<Token, ParseErrorAndPos> {
110 let pos = self.reader.pos();
111 let idx = self.reader.idx();
112 let mut value = String::new();
113
114 while is_identifier(self.curr()) {
115 let ch = self.curr().unwrap();
116 self.read_char();
117 value.push(ch);
118 }
119
120 let lookup = self.keywords.get(&value[..]).cloned();
121 let mut ttype;
122
123 if let Some(tok_type) = lookup {
124 ttype = tok_type;
125
126 if ttype == TokenKind::Try {
127 if let Some(ch) = self.curr() {
128 if ch == '!' || ch == '?' {
129 self.read_char();
130
131 ttype = if ch == '!' {
132 TokenKind::TryForce
133 } else {
134 TokenKind::TryOpt
135 };
136 }
137 }
138 }
139 } else if value == "_" {
140 ttype = TokenKind::Underscore;
141 } else {
142 ttype = TokenKind::Identifier(value);
143 }
144
145 let span = self.span_from(idx);
146 Ok(Token::new(ttype, pos, span))
147 }
148
149 fn read_char_literal(&mut self) -> Result<Token, ParseErrorAndPos> {
150 let pos = self.reader.pos();
151 let idx = self.reader.idx();
152
153 self.read_char();
154 let ch = self.read_escaped_char(pos, ParseError::UnclosedChar)?;
155
156 if is_char_quote(self.curr()) {
157 self.read_char();
158
159 let ttype = TokenKind::LitChar(ch);
160 let span = self.span_from(idx);
161 Ok(Token::new(ttype, pos, span))
162 } else {
163 Err(ParseErrorAndPos::new(pos, ParseError::UnclosedChar))
164 }
165 }
166
167 fn read_escaped_char(
168 &mut self,
169 pos: Position,
170 unclosed: ParseError,
171 ) -> Result<char, ParseErrorAndPos> {
172 if let Some(ch) = self.curr() {
173 self.read_char();
174
175 if ch == '\\' {
176 let ch = if let Some(ch) = self.curr() {
177 ch
178 } else {
179 return Err(ParseErrorAndPos::new(pos, unclosed));
180 };
181
182 self.read_char();
183
184 match ch {
185 '\\' => Ok('\\'),
186 'n' => Ok('\n'),
187 't' => Ok('\t'),
188 'r' => Ok('\r'),
189 '\"' => Ok('\"'),
190 '\'' => Ok('\''),
191 '0' => Ok('\0'),
192 '$' => Ok('$'),
193 _ => {
194 let msg = ParseError::InvalidEscapeSequence(ch);
195 Err(ParseErrorAndPos::new(pos, msg))
196 }
197 }
198 } else {
199 Ok(ch)
200 }
201 } else {
202 Err(ParseErrorAndPos::new(pos, unclosed))
203 }
204 }
205
206 fn read_string(&mut self, skip_quote: bool) -> Result<Token, ParseErrorAndPos> {
207 let pos = self.reader.pos();
208 let idx = self.reader.idx();
209 let mut value = String::new();
210
211 if skip_quote {
212 assert_eq!(self.curr(), Some('\"'));
213 self.read_char();
214 }
215
216 while self.curr().is_some() && !is_quote(self.curr()) {
217 if self.curr() == Some('$') && self.next() == Some('{') {
218 self.read_char();
219 self.read_char();
220
221 let ttype = TokenKind::StringExpr(value);
222 let span = self.span_from(idx);
223 return Ok(Token::new(ttype, pos, span));
224 }
225
226 let ch = self.read_escaped_char(pos, ParseError::UnclosedString)?;
227 value.push(ch);
228 }
229
230 if is_quote(self.curr()) {
231 self.read_char();
232
233 let ttype = TokenKind::StringTail(value);
234 let span = self.span_from(idx);
235 Ok(Token::new(ttype, pos, span))
236 } else {
237 Err(ParseErrorAndPos::new(pos, ParseError::UnclosedString))
238 }
239 }
240
241 pub fn read_string_continuation(&mut self) -> Result<Token, ParseErrorAndPos> {
242 self.read_string(false)
243 }
244
245 fn read_operator(&mut self) -> Result<Token, ParseErrorAndPos> {
246 let pos = self.reader.pos();
247 let idx = self.reader.idx();
248 let ch = self.curr().unwrap();
249 self.read_char();
250
251 let nch = self.curr().unwrap_or('x');
252 let nnch = self.next().unwrap_or('x');
253
254 let kind = match ch {
255 '+' => {
256 if nch == '=' {
257 self.read_char();
258 TokenKind::AddEq
259 } else {
260 TokenKind::Add
261 }
262 }
263
264 '-' => {
265 if nch == '>' {
266 self.read_char();
267 TokenKind::Arrow
268 } else {
269 TokenKind::Sub
270 }
271 }
272
273 '*' => TokenKind::Mul,
274 '/' => TokenKind::Div,
275 '%' => TokenKind::Mod,
276
277 '(' => TokenKind::LParen,
278 ')' => TokenKind::RParen,
279 '[' => TokenKind::LBracket,
280 ']' => TokenKind::RBracket,
281 '{' => TokenKind::LBrace,
282 '}' => TokenKind::RBrace,
283
284 '|' => {
285 if nch == '|' {
286 self.read_char();
287 TokenKind::Or
288 } else {
289 TokenKind::BitOr
290 }
291 }
292
293 '&' => {
294 if nch == '&' {
295 self.read_char();
296 TokenKind::And
297 } else {
298 TokenKind::BitAnd
299 }
300 }
301
302 '^' => TokenKind::Caret,
303 '~' => TokenKind::Tilde,
304 ',' => TokenKind::Comma,
305 ';' => TokenKind::Semicolon,
306 ':' => {
307 if nch == ':' {
308 self.read_char();
309 TokenKind::Sep
310 } else {
311 TokenKind::Colon
312 }
313 }
314 '.' => TokenKind::Dot,
315 '=' => {
316 if nch == '=' {
317 self.read_char();
318
319 if nnch == '=' {
320 self.read_char();
321 TokenKind::EqEqEq
322 } else {
323 TokenKind::EqEq
324 }
325 } else {
326 TokenKind::Eq
327 }
328 }
329
330 '<' => match nch {
331 '=' => {
332 self.read_char();
333 TokenKind::Le
334 }
335
336 '<' => {
337 self.read_char();
338 TokenKind::LtLt
339 }
340
341 _ => TokenKind::Lt,
342 },
343
344 '>' => match nch {
345 '=' => {
346 self.read_char();
347 TokenKind::Ge
348 }
349
350 '>' => {
351 self.read_char();
352
353 if nnch == '>' {
354 self.read_char();
355 TokenKind::GtGtGt
356 } else {
357 TokenKind::GtGt
358 }
359 }
360
361 _ => TokenKind::Gt,
362 },
363 '!' => {
364 if nch == '=' {
365 self.read_char();
366
367 if nnch == '=' {
368 self.read_char();
369 TokenKind::NeEqEq
370 } else {
371 TokenKind::Ne
372 }
373 } else {
374 TokenKind::Not
375 }
376 }
377 '@' => TokenKind::At,
378
379 _ => {
380 return Err(ParseErrorAndPos::new(pos, ParseError::UnknownChar(ch)));
381 }
382 };
383
384 let span = self.span_from(idx);
385 Ok(Token::new(kind, pos, span))
386 }
387
388 fn read_number(&mut self) -> Result<Token, ParseErrorAndPos> {
389 let pos = self.reader.pos();
390 let idx = self.reader.idx();
391 let mut value = String::new();
392
393 let base = if self.curr() == Some('0') {
394 let next = self.next();
395
396 match next {
397 Some('x') => {
398 self.read_char();
399 self.read_char();
400
401 IntBase::Hex
402 }
403
404 Some('b') => {
405 self.read_char();
406 self.read_char();
407
408 IntBase::Bin
409 }
410
411 _ => IntBase::Dec,
412 }
413 } else {
414 IntBase::Dec
415 };
416
417 self.read_digits(&mut value, base);
418
419 if base == IntBase::Dec && self.curr() == Some('.') && is_digit(self.next()) {
420 self.read_char();
421 value.push('.');
422
423 self.read_digits(&mut value, IntBase::Dec);
424
425 if self.curr() == Some('e') || self.curr() == Some('E') {
426 value.push(self.curr().unwrap());
427 self.read_char();
428
429 if self.curr() == Some('+') || self.curr() == Some('-') {
430 value.push(self.curr().unwrap());
431 self.read_char();
432 }
433
434 self.read_digits(&mut value, IntBase::Dec);
435 }
436
437 let suffix = match self.curr() {
438 Some('D') => {
439 self.read_char();
440 FloatSuffix::Double
441 }
442
443 Some('F') => {
444 self.read_char();
445 FloatSuffix::Float
446 }
447
448 _ => FloatSuffix::Double,
449 };
450
451 let ttype = TokenKind::LitFloat(value, suffix);
452 let span = self.span_from(idx);
453 return Ok(Token::new(ttype, pos, span));
454 }
455
456 let kind = match self.curr() {
457 Some('L') => {
458 self.read_char();
459 TokenKind::LitInt(value, base, IntSuffix::Long)
460 }
461
462 Some('Y') => {
463 self.read_char();
464 TokenKind::LitInt(value, base, IntSuffix::Byte)
465 }
466
467 Some('D') if base == IntBase::Dec => {
468 self.read_char();
469 TokenKind::LitFloat(value, FloatSuffix::Double)
470 }
471
472 Some('F') if base == IntBase::Dec => {
473 self.read_char();
474 TokenKind::LitFloat(value, FloatSuffix::Float)
475 }
476
477 _ => TokenKind::LitInt(value, base, IntSuffix::Int),
478 };
479
480 let span = self.span_from(idx);
481 Ok(Token::new(kind, pos, span))
482 }
483
484 fn span_from(&self, start: u32) -> Span {
485 Span::new(start, self.reader.idx() - start)
486 }
487
488 fn read_digits(&mut self, buffer: &mut String, base: IntBase) {
489 while is_digit_or_underscore(self.curr(), base) {
490 let ch = self.curr().unwrap();
491 self.read_char();
492 buffer.push(ch);
493 }
494 }
495
496 fn read_char(&mut self) {
497 self.reader.advance();
498 }
499
500 fn curr(&self) -> Option<char> {
501 self.reader.curr()
502 }
503
504 fn next(&self) -> Option<char> {
505 self.reader.nth(1)
506 }
507
508 fn is_comment_start(&self) -> bool {
509 self.curr() == Some('/') && self.next() == Some('/')
510 }
511
512 fn is_multi_comment_start(&self) -> bool {
513 self.curr() == Some('/') && self.next() == Some('*')
514 }
515
516 fn is_multi_comment_end(&self) -> bool {
517 self.curr() == Some('*') && self.next() == Some('/')
518 }
519
520 pub fn file(self) -> File {
521 self.reader.file()
522 }
523}
524
525fn is_digit(ch: Option<char>) -> bool {
526 ch.map(|ch| ch.is_digit(10)).unwrap_or(false)
527}
528
529fn is_digit_or_underscore(ch: Option<char>, base: IntBase) -> bool {
530 ch.map(|ch| ch.is_digit(base.num()) || ch == '_')
531 .unwrap_or(false)
532}
533
534fn is_whitespace(ch: Option<char>) -> bool {
535 ch.map(|ch| ch.is_whitespace()).unwrap_or(false)
536}
537
538fn is_newline(ch: Option<char>) -> bool {
539 ch == Some('\n')
540}
541
542fn is_quote(ch: Option<char>) -> bool {
543 ch == Some('\"')
544}
545
546fn is_char_quote(ch: Option<char>) -> bool {
547 ch == Some('\'')
548}
549
550fn is_operator(ch: Option<char>) -> bool {
551 ch.map(|ch| "^+-*/%&|,=!~;:.()[]{}<>@".contains(ch))
552 .unwrap_or(false)
553}
554
555fn is_identifier_start(ch: Option<char>) -> bool {
556 match ch {
557 Some(ch) => (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_',
558 _ => false,
559 }
560}
561
562fn is_identifier(ch: Option<char>) -> bool {
563 is_identifier_start(ch) || is_digit(ch)
564}
565
566fn keywords_in_map() -> HashMap<&'static str, TokenKind> {
567 let mut keywords = HashMap::new();
568
569 keywords.insert("class", TokenKind::Class);
570 keywords.insert("self", TokenKind::This);
571 keywords.insert("Self", TokenKind::CapitalThis);
572 keywords.insert("super", TokenKind::Super);
573 keywords.insert("fun", TokenKind::Fun);
574 keywords.insert("let", TokenKind::Let);
575 keywords.insert("var", TokenKind::Var);
576 keywords.insert("while", TokenKind::While);
577 keywords.insert("if", TokenKind::If);
578 keywords.insert("else", TokenKind::Else);
579 keywords.insert("for", TokenKind::For);
580 keywords.insert("in", TokenKind::In);
581 keywords.insert("impl", TokenKind::Impl);
582 keywords.insert("loop", TokenKind::Loop);
583 keywords.insert("break", TokenKind::Break);
584 keywords.insert("continue", TokenKind::Continue);
585 keywords.insert("return", TokenKind::Return);
586 keywords.insert("true", TokenKind::True);
587 keywords.insert("false", TokenKind::False);
588 keywords.insert("nil", TokenKind::Nil);
589 keywords.insert("enum", TokenKind::Enum);
590 keywords.insert("type", TokenKind::Type);
591 keywords.insert("alias", TokenKind::Alias);
592 keywords.insert("struct", TokenKind::Struct);
593 keywords.insert("trait", TokenKind::Trait);
594 keywords.insert("module", TokenKind::Module);
595 keywords.insert("throws", TokenKind::Throws);
596 keywords.insert("throw", TokenKind::Throw);
597 keywords.insert("try", TokenKind::Try);
598 keywords.insert("do", TokenKind::Do);
599 keywords.insert("catch", TokenKind::Catch);
600 keywords.insert("finally", TokenKind::Finally);
601 keywords.insert("defer", TokenKind::Defer);
602 keywords.insert("is", TokenKind::Is);
603 keywords.insert("as", TokenKind::As);
604 keywords.insert("const", TokenKind::Const);
605
606 keywords
607}
608
609#[cfg(test)]
610mod tests {
611 use super::*;
612 use crate::lexer::reader::Reader;
613 use crate::lexer::token::TokenKind;
614
615 fn assert_end(reader: &mut Lexer, l: u32, c: u32) {
616 assert_tok(reader, TokenKind::End, l, c);
617 }
618
619 fn assert_tok(reader: &mut Lexer, kind: TokenKind, l: u32, c: u32) {
620 let tok = reader.read_token().unwrap();
621 assert_eq!(kind, tok.kind);
622 assert_eq!(l, tok.position.line);
623 assert_eq!(c, tok.position.column);
624 }
625
626 fn assert_err(reader: &mut Lexer, msg: ParseError, l: u32, c: u32) {
627 let err = reader.read_token().unwrap_err();
628 assert_eq!(msg, err.error);
629 assert_eq!(l, err.pos.line);
630 assert_eq!(c, err.pos.column);
631 }
632
633 #[test]
634 fn test_read_empty_file() {
635 let mut reader = Lexer::from_str("");
636 assert_end(&mut reader, 1, 1);
637 assert_end(&mut reader, 1, 1);
638 }
639
640 #[test]
641 fn test_read_numbers() {
642 let mut reader = Lexer::from_str("1 2\n0123 10");
643 assert_tok(
644 &mut reader,
645 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
646 1,
647 1,
648 );
649 assert_tok(
650 &mut reader,
651 TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
652 1,
653 3,
654 );
655 assert_tok(
656 &mut reader,
657 TokenKind::LitInt("0123".into(), IntBase::Dec, IntSuffix::Int),
658 2,
659 1,
660 );
661 assert_tok(
662 &mut reader,
663 TokenKind::LitInt("10".into(), IntBase::Dec, IntSuffix::Int),
664 2,
665 6,
666 );
667 assert_end(&mut reader, 2, 8);
668
669 let mut reader = Lexer::from_str("12Y 300Y 1_000 1__1");
670 assert_tok(
671 &mut reader,
672 TokenKind::LitInt("12".into(), IntBase::Dec, IntSuffix::Byte),
673 1,
674 1,
675 );
676 assert_tok(
677 &mut reader,
678 TokenKind::LitInt("300".into(), IntBase::Dec, IntSuffix::Byte),
679 1,
680 5,
681 );
682 assert_tok(
683 &mut reader,
684 TokenKind::LitInt("1_000".into(), IntBase::Dec, IntSuffix::Int),
685 1,
686 10,
687 );
688 assert_tok(
689 &mut reader,
690 TokenKind::LitInt("1__1".into(), IntBase::Dec, IntSuffix::Int),
691 1,
692 16,
693 );
694 }
695
696 #[test]
697 fn test_skip_single_line_comment() {
698 let mut reader = Lexer::from_str("//test\n1");
699 assert_tok(
700 &mut reader,
701 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
702 2,
703 1,
704 );
705 assert_end(&mut reader, 2, 2);
706 }
707
708 #[test]
709 fn test_unfinished_line_comment() {
710 let mut reader = Lexer::from_str("//abc");
711 assert_end(&mut reader, 1, 6);
712 }
713
714 #[test]
715 fn test_skip_multi_comment() {
716 let mut reader = Lexer::from_str("/*test*/1");
717 assert_tok(
718 &mut reader,
719 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
720 1,
721 9,
722 );
723 assert_end(&mut reader, 1, 10);
724 }
725
726 #[test]
727 fn test_unfinished_multi_comment() {
728 let mut reader = Lexer::from_str("/*test");
729 assert_err(&mut reader, ParseError::UnclosedComment, 1, 1);
730
731 let mut reader = Lexer::from_str("1/*test");
732 assert_tok(
733 &mut reader,
734 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
735 1,
736 1,
737 );
738 assert_err(&mut reader, ParseError::UnclosedComment, 1, 2);
739 }
740
741 #[test]
742 fn test_read_identifier() {
743 let mut reader = Lexer::from_str("abc ident test");
744 assert_tok(&mut reader, TokenKind::Identifier("abc".into()), 1, 1);
745 assert_tok(&mut reader, TokenKind::Identifier("ident".into()), 1, 5);
746 assert_tok(&mut reader, TokenKind::Identifier("test".into()), 1, 11);
747 assert_end(&mut reader, 1, 15);
748 }
749
750 #[test]
751 fn test_code_with_spaces() {
752 let mut reader = Lexer::from_str("1 2 3");
753 assert_tok(
754 &mut reader,
755 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
756 1,
757 1,
758 );
759 assert_tok(
760 &mut reader,
761 TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
762 1,
763 3,
764 );
765 assert_tok(
766 &mut reader,
767 TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
768 1,
769 5,
770 );
771 assert_end(&mut reader, 1, 6);
772 }
773
774 #[test]
775 fn test_float_numbers() {
776 let mut reader = Lexer::from_str("1F 1.0 0.1F 1.3D 4D");
777 assert_tok(
778 &mut reader,
779 TokenKind::LitFloat("1".into(), FloatSuffix::Float),
780 1,
781 1,
782 );
783 assert_tok(
784 &mut reader,
785 TokenKind::LitFloat("1.0".into(), FloatSuffix::Double),
786 1,
787 4,
788 );
789 assert_tok(
790 &mut reader,
791 TokenKind::LitFloat("0.1".into(), FloatSuffix::Float),
792 1,
793 8,
794 );
795 assert_tok(
796 &mut reader,
797 TokenKind::LitFloat("1.3".into(), FloatSuffix::Double),
798 1,
799 13,
800 );
801 assert_tok(
802 &mut reader,
803 TokenKind::LitFloat("4".into(), FloatSuffix::Double),
804 1,
805 18,
806 );
807 }
808
809 #[test]
810 fn test_float_scientific_notation() {
811 let mut reader = Lexer::from_str("1.0e1 1.0E1 1.0e+1 1.0e-1");
812 assert_tok(
813 &mut reader,
814 TokenKind::LitFloat("1.0e1".into(), FloatSuffix::Double),
815 1,
816 1,
817 );
818 assert_tok(
819 &mut reader,
820 TokenKind::LitFloat("1.0E1".into(), FloatSuffix::Double),
821 1,
822 7,
823 );
824 assert_tok(
825 &mut reader,
826 TokenKind::LitFloat("1.0e+1".into(), FloatSuffix::Double),
827 1,
828 13,
829 );
830 assert_tok(
831 &mut reader,
832 TokenKind::LitFloat("1.0e-1".into(), FloatSuffix::Double),
833 1,
834 20,
835 );
836 }
837
838 #[test]
839 fn test_hex_numbers() {
840 let mut reader = Lexer::from_str("0x1 0x2L 0xABCDEF 0xB1L");
841
842 assert_tok(
843 &mut reader,
844 TokenKind::LitInt("1".into(), IntBase::Hex, IntSuffix::Int),
845 1,
846 1,
847 );
848 assert_tok(
849 &mut reader,
850 TokenKind::LitInt("2".into(), IntBase::Hex, IntSuffix::Long),
851 1,
852 5,
853 );
854 assert_tok(
855 &mut reader,
856 TokenKind::LitInt("ABCDEF".into(), IntBase::Hex, IntSuffix::Int),
857 1,
858 10,
859 );
860 assert_tok(
861 &mut reader,
862 TokenKind::LitInt("B1".into(), IntBase::Hex, IntSuffix::Long),
863 1,
864 19,
865 );
866 }
867
868 #[test]
869 fn test_code_with_newlines() {
870 let mut reader = Lexer::from_str("1\n2\n3");
871 assert_tok(
872 &mut reader,
873 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
874 1,
875 1,
876 );
877 assert_tok(
878 &mut reader,
879 TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
880 2,
881 1,
882 );
883 assert_tok(
884 &mut reader,
885 TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
886 3,
887 1,
888 );
889 assert_end(&mut reader, 3, 2);
890 }
891
892 #[test]
893 fn test_code_with_tabs() {
894 let mut reader = Lexer::from_str("1\t2\t3");
895 assert_tok(
896 &mut reader,
897 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
898 1,
899 1,
900 );
901 assert_tok(
902 &mut reader,
903 TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
904 1,
905 5,
906 );
907 assert_tok(
908 &mut reader,
909 TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
910 1,
911 9,
912 );
913 assert_end(&mut reader, 1, 10);
914 }
915
916 #[test]
917 fn test_code_with_tabwidth8() {
918 let mut reader = Reader::from_string("<<code>>", "1\t2\n1234567\t8\n12345678\t9");
919 reader.set_tabwidth(8);
920 let mut reader = Lexer::new(reader);
921
922 assert_tok(
923 &mut reader,
924 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
925 1,
926 1,
927 );
928 assert_tok(
929 &mut reader,
930 TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
931 1,
932 9,
933 );
934 assert_tok(
935 &mut reader,
936 TokenKind::LitInt("1234567".into(), IntBase::Dec, IntSuffix::Int),
937 2,
938 1,
939 );
940 assert_tok(
941 &mut reader,
942 TokenKind::LitInt("8".into(), IntBase::Dec, IntSuffix::Int),
943 2,
944 9,
945 );
946 assert_tok(
947 &mut reader,
948 TokenKind::LitInt("12345678".into(), IntBase::Dec, IntSuffix::Int),
949 3,
950 1,
951 );
952 assert_tok(
953 &mut reader,
954 TokenKind::LitInt("9".into(), IntBase::Dec, IntSuffix::Int),
955 3,
956 17,
957 );
958 assert_end(&mut reader, 3, 18);
959 }
960
961 #[test]
962 fn test_string_with_newline() {
963 let mut reader = Lexer::from_str("\"abc\ndef\"");
964 assert_tok(&mut reader, TokenKind::StringTail("abc\ndef".into()), 1, 1);
965 }
966
967 #[test]
968 fn test_escape_sequences() {
969 let mut reader = Lexer::from_str("\"\\\"\"");
970 assert_tok(&mut reader, TokenKind::StringTail("\"".into()), 1, 1);
971
972 let mut reader = Lexer::from_str("\"\\$\"");
973 assert_tok(&mut reader, TokenKind::StringTail("$".into()), 1, 1);
974
975 let mut reader = Lexer::from_str("\"\\\'\"");
976 assert_tok(&mut reader, TokenKind::StringTail("'".into()), 1, 1);
977
978 let mut reader = Lexer::from_str("\"\\t\"");
979 assert_tok(&mut reader, TokenKind::StringTail("\t".into()), 1, 1);
980
981 let mut reader = Lexer::from_str("\"\\n\"");
982 assert_tok(&mut reader, TokenKind::StringTail("\n".into()), 1, 1);
983
984 let mut reader = Lexer::from_str("\"\\r\"");
985 assert_tok(&mut reader, TokenKind::StringTail("\r".into()), 1, 1);
986
987 let mut reader = Lexer::from_str("\"\\\\\"");
988 assert_tok(&mut reader, TokenKind::StringTail("\\".into()), 1, 1);
989
990 let mut reader = Lexer::from_str("\"\\");
991 assert_err(&mut reader, ParseError::UnclosedString, 1, 1);
992 }
993
994 #[test]
995 fn test_unclosed_string() {
996 let mut reader = Lexer::from_str("\"abc");
997 assert_err(&mut reader, ParseError::UnclosedString, 1, 1);
998 }
999
1000 #[test]
1001 fn test_unclosed_char() {
1002 let mut reader = Lexer::from_str("'a");
1003 assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1004
1005 let mut reader = Lexer::from_str("'\\");
1006 assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1007
1008 let mut reader = Lexer::from_str("'\\n");
1009 assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1010
1011 let mut reader = Lexer::from_str("'ab'");
1012 assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1013
1014 let mut reader = Lexer::from_str("'");
1015 assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1016 }
1017
1018 #[test]
1019 fn test_string() {
1020 let mut reader = Lexer::from_str("\"abc\"");
1021 assert_tok(&mut reader, TokenKind::StringTail("abc".into()), 1, 1);
1022 assert_end(&mut reader, 1, 6);
1023 }
1024
1025 #[test]
1026 fn test_keywords() {
1027 let mut reader = Lexer::from_str("fun let while if else self class");
1028 assert_tok(&mut reader, TokenKind::Fun, 1, 1);
1029 assert_tok(&mut reader, TokenKind::Let, 1, 5);
1030 assert_tok(&mut reader, TokenKind::While, 1, 9);
1031 assert_tok(&mut reader, TokenKind::If, 1, 15);
1032 assert_tok(&mut reader, TokenKind::Else, 1, 18);
1033
1034 let mut reader = Lexer::from_str("self class super");
1035 assert_tok(&mut reader, TokenKind::This, 1, 1);
1036 assert_tok(&mut reader, TokenKind::Class, 1, 6);
1037 assert_tok(&mut reader, TokenKind::Super, 1, 12);
1038
1039 let mut reader = Lexer::from_str("loop break continue return nil");
1040 assert_tok(&mut reader, TokenKind::Loop, 1, 1);
1041 assert_tok(&mut reader, TokenKind::Break, 1, 6);
1042 assert_tok(&mut reader, TokenKind::Continue, 1, 12);
1043 assert_tok(&mut reader, TokenKind::Return, 1, 21);
1044 assert_tok(&mut reader, TokenKind::Nil, 1, 28);
1045
1046 let mut reader = Lexer::from_str("type struct enum alias trait const");
1047 assert_tok(&mut reader, TokenKind::Type, 1, 1);
1048 assert_tok(&mut reader, TokenKind::Struct, 1, 6);
1049 assert_tok(&mut reader, TokenKind::Enum, 1, 13);
1050 assert_tok(&mut reader, TokenKind::Alias, 1, 18);
1051 assert_tok(&mut reader, TokenKind::Trait, 1, 24);
1052 assert_tok(&mut reader, TokenKind::Const, 1, 30);
1053
1054 let mut reader = Lexer::from_str("for in impl Self");
1055 assert_tok(&mut reader, TokenKind::For, 1, 1);
1056 assert_tok(&mut reader, TokenKind::In, 1, 5);
1057 assert_tok(&mut reader, TokenKind::Impl, 1, 8);
1058 assert_tok(&mut reader, TokenKind::CapitalThis, 1, 13);
1059
1060 let mut reader = Lexer::from_str("defer");
1061 assert_tok(&mut reader, TokenKind::Defer, 1, 1);
1062 }
1063
1064 #[test]
1065 fn test_operators() {
1066 let mut reader = Lexer::from_str("==+=-*/%~.@");
1067 assert_tok(&mut reader, TokenKind::EqEq, 1, 1);
1068 assert_tok(&mut reader, TokenKind::AddEq, 1, 3);
1069 assert_tok(&mut reader, TokenKind::Sub, 1, 5);
1070 assert_tok(&mut reader, TokenKind::Mul, 1, 6);
1071 assert_tok(&mut reader, TokenKind::Div, 1, 7);
1072 assert_tok(&mut reader, TokenKind::Mod, 1, 8);
1073 assert_tok(&mut reader, TokenKind::Tilde, 1, 9);
1074 assert_tok(&mut reader, TokenKind::Dot, 1, 10);
1075 assert_tok(&mut reader, TokenKind::At, 1, 11);
1076
1077 let mut reader = Lexer::from_str("<=<>=><");
1078 assert_tok(&mut reader, TokenKind::Le, 1, 1);
1079 assert_tok(&mut reader, TokenKind::Lt, 1, 3);
1080 assert_tok(&mut reader, TokenKind::Ge, 1, 4);
1081 assert_tok(&mut reader, TokenKind::Gt, 1, 6);
1082 assert_tok(&mut reader, TokenKind::Lt, 1, 7);
1083
1084 let mut reader = Lexer::from_str("!=====!");
1085 assert_tok(&mut reader, TokenKind::NeEqEq, 1, 1);
1086 assert_tok(&mut reader, TokenKind::EqEqEq, 1, 4);
1087 assert_tok(&mut reader, TokenKind::Not, 1, 7);
1088
1089 let mut reader = Lexer::from_str("!=!");
1090 assert_tok(&mut reader, TokenKind::Ne, 1, 1);
1091 assert_tok(&mut reader, TokenKind::Not, 1, 3);
1092
1093 let mut reader = Lexer::from_str("->");
1094 assert_tok(&mut reader, TokenKind::Arrow, 1, 1);
1095
1096 let mut reader = Lexer::from_str("try!try?1");
1097 assert_tok(&mut reader, TokenKind::TryForce, 1, 1);
1098 assert_tok(&mut reader, TokenKind::TryOpt, 1, 5);
1099 assert_tok(
1100 &mut reader,
1101 TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
1102 1,
1103 9,
1104 );
1105
1106 let mut reader = Lexer::from_str(">><<>>>_::");
1107 assert_tok(&mut reader, TokenKind::GtGt, 1, 1);
1108 assert_tok(&mut reader, TokenKind::LtLt, 1, 3);
1109 assert_tok(&mut reader, TokenKind::GtGtGt, 1, 5);
1110 assert_tok(&mut reader, TokenKind::Underscore, 1, 8);
1111 assert_tok(&mut reader, TokenKind::Sep, 1, 9);
1112 }
1113}