dora_parser/
lexer.rs

1use std::collections::HashMap;
2
3use crate::error::{ParseError, ParseErrorAndPos};
4use crate::lexer::position::{Position, Span};
5use crate::lexer::reader::Reader;
6use crate::lexer::token::{FloatSuffix, IntBase, IntSuffix, Token, TokenKind};
7
8pub mod position;
9pub mod reader;
10pub mod token;
11
12#[derive(Debug)]
13pub struct File {
14    pub name: String,
15    pub content: String,
16    pub line_ends: Vec<u32>,
17}
18
19pub struct Lexer {
20    reader: Reader,
21    keywords: HashMap<&'static str, TokenKind>,
22}
23
24impl Lexer {
25    #[cfg(test)]
26    pub fn from_str(code: &str) -> Lexer {
27        let reader = Reader::from_string("<<code>>", code);
28        Lexer::new(reader)
29    }
30
31    pub fn new(reader: Reader) -> Lexer {
32        let keywords = keywords_in_map();
33
34        Lexer { reader, keywords }
35    }
36
37    pub fn path(&self) -> &str {
38        self.reader.path()
39    }
40
41    pub fn read_token(&mut self) -> Result<Token, ParseErrorAndPos> {
42        loop {
43            self.skip_white();
44
45            let pos = self.reader.pos();
46            let idx = self.reader.idx();
47            let ch = self.curr();
48
49            if let None = ch {
50                return Ok(Token::new(TokenKind::End, pos, Span::at(idx)));
51            }
52
53            if is_digit(ch) {
54                return self.read_number();
55            } else if self.is_comment_start() {
56                self.read_comment()?;
57            } else if self.is_multi_comment_start() {
58                self.read_multi_comment()?;
59            } else if is_identifier_start(ch) {
60                return self.read_identifier();
61            } else if is_quote(ch) {
62                return self.read_string(true);
63            } else if is_char_quote(ch) {
64                return self.read_char_literal();
65            } else if is_operator(ch) {
66                return self.read_operator();
67            } else {
68                let ch = ch.unwrap();
69
70                return Err(ParseErrorAndPos::new(pos, ParseError::UnknownChar(ch)));
71            }
72        }
73    }
74
75    fn skip_white(&mut self) {
76        while is_whitespace(self.curr()) {
77            self.read_char();
78        }
79    }
80
81    fn read_comment(&mut self) -> Result<(), ParseErrorAndPos> {
82        while !self.curr().is_none() && !is_newline(self.curr()) {
83            self.read_char();
84        }
85
86        Ok(())
87    }
88
89    fn read_multi_comment(&mut self) -> Result<(), ParseErrorAndPos> {
90        let pos = self.reader.pos();
91
92        self.read_char();
93        self.read_char();
94
95        while !self.curr().is_none() && !self.is_multi_comment_end() {
96            self.read_char();
97        }
98
99        if self.curr().is_none() {
100            return Err(ParseErrorAndPos::new(pos, ParseError::UnclosedComment));
101        }
102
103        self.read_char();
104        self.read_char();
105
106        Ok(())
107    }
108
109    fn read_identifier(&mut self) -> Result<Token, ParseErrorAndPos> {
110        let pos = self.reader.pos();
111        let idx = self.reader.idx();
112        let mut value = String::new();
113
114        while is_identifier(self.curr()) {
115            let ch = self.curr().unwrap();
116            self.read_char();
117            value.push(ch);
118        }
119
120        let lookup = self.keywords.get(&value[..]).cloned();
121        let mut ttype;
122
123        if let Some(tok_type) = lookup {
124            ttype = tok_type;
125
126            if ttype == TokenKind::Try {
127                if let Some(ch) = self.curr() {
128                    if ch == '!' || ch == '?' {
129                        self.read_char();
130
131                        ttype = if ch == '!' {
132                            TokenKind::TryForce
133                        } else {
134                            TokenKind::TryOpt
135                        };
136                    }
137                }
138            }
139        } else if value == "_" {
140            ttype = TokenKind::Underscore;
141        } else {
142            ttype = TokenKind::Identifier(value);
143        }
144
145        let span = self.span_from(idx);
146        Ok(Token::new(ttype, pos, span))
147    }
148
149    fn read_char_literal(&mut self) -> Result<Token, ParseErrorAndPos> {
150        let pos = self.reader.pos();
151        let idx = self.reader.idx();
152
153        self.read_char();
154        let ch = self.read_escaped_char(pos, ParseError::UnclosedChar)?;
155
156        if is_char_quote(self.curr()) {
157            self.read_char();
158
159            let ttype = TokenKind::LitChar(ch);
160            let span = self.span_from(idx);
161            Ok(Token::new(ttype, pos, span))
162        } else {
163            Err(ParseErrorAndPos::new(pos, ParseError::UnclosedChar))
164        }
165    }
166
167    fn read_escaped_char(
168        &mut self,
169        pos: Position,
170        unclosed: ParseError,
171    ) -> Result<char, ParseErrorAndPos> {
172        if let Some(ch) = self.curr() {
173            self.read_char();
174
175            if ch == '\\' {
176                let ch = if let Some(ch) = self.curr() {
177                    ch
178                } else {
179                    return Err(ParseErrorAndPos::new(pos, unclosed));
180                };
181
182                self.read_char();
183
184                match ch {
185                    '\\' => Ok('\\'),
186                    'n' => Ok('\n'),
187                    't' => Ok('\t'),
188                    'r' => Ok('\r'),
189                    '\"' => Ok('\"'),
190                    '\'' => Ok('\''),
191                    '0' => Ok('\0'),
192                    '$' => Ok('$'),
193                    _ => {
194                        let msg = ParseError::InvalidEscapeSequence(ch);
195                        Err(ParseErrorAndPos::new(pos, msg))
196                    }
197                }
198            } else {
199                Ok(ch)
200            }
201        } else {
202            Err(ParseErrorAndPos::new(pos, unclosed))
203        }
204    }
205
206    fn read_string(&mut self, skip_quote: bool) -> Result<Token, ParseErrorAndPos> {
207        let pos = self.reader.pos();
208        let idx = self.reader.idx();
209        let mut value = String::new();
210
211        if skip_quote {
212            assert_eq!(self.curr(), Some('\"'));
213            self.read_char();
214        }
215
216        while self.curr().is_some() && !is_quote(self.curr()) {
217            if self.curr() == Some('$') && self.next() == Some('{') {
218                self.read_char();
219                self.read_char();
220
221                let ttype = TokenKind::StringExpr(value);
222                let span = self.span_from(idx);
223                return Ok(Token::new(ttype, pos, span));
224            }
225
226            let ch = self.read_escaped_char(pos, ParseError::UnclosedString)?;
227            value.push(ch);
228        }
229
230        if is_quote(self.curr()) {
231            self.read_char();
232
233            let ttype = TokenKind::StringTail(value);
234            let span = self.span_from(idx);
235            Ok(Token::new(ttype, pos, span))
236        } else {
237            Err(ParseErrorAndPos::new(pos, ParseError::UnclosedString))
238        }
239    }
240
241    pub fn read_string_continuation(&mut self) -> Result<Token, ParseErrorAndPos> {
242        self.read_string(false)
243    }
244
245    fn read_operator(&mut self) -> Result<Token, ParseErrorAndPos> {
246        let pos = self.reader.pos();
247        let idx = self.reader.idx();
248        let ch = self.curr().unwrap();
249        self.read_char();
250
251        let nch = self.curr().unwrap_or('x');
252        let nnch = self.next().unwrap_or('x');
253
254        let kind = match ch {
255            '+' => {
256                if nch == '=' {
257                    self.read_char();
258                    TokenKind::AddEq
259                } else {
260                    TokenKind::Add
261                }
262            }
263
264            '-' => {
265                if nch == '>' {
266                    self.read_char();
267                    TokenKind::Arrow
268                } else {
269                    TokenKind::Sub
270                }
271            }
272
273            '*' => TokenKind::Mul,
274            '/' => TokenKind::Div,
275            '%' => TokenKind::Mod,
276
277            '(' => TokenKind::LParen,
278            ')' => TokenKind::RParen,
279            '[' => TokenKind::LBracket,
280            ']' => TokenKind::RBracket,
281            '{' => TokenKind::LBrace,
282            '}' => TokenKind::RBrace,
283
284            '|' => {
285                if nch == '|' {
286                    self.read_char();
287                    TokenKind::Or
288                } else {
289                    TokenKind::BitOr
290                }
291            }
292
293            '&' => {
294                if nch == '&' {
295                    self.read_char();
296                    TokenKind::And
297                } else {
298                    TokenKind::BitAnd
299                }
300            }
301
302            '^' => TokenKind::Caret,
303            '~' => TokenKind::Tilde,
304            ',' => TokenKind::Comma,
305            ';' => TokenKind::Semicolon,
306            ':' => {
307                if nch == ':' {
308                    self.read_char();
309                    TokenKind::Sep
310                } else {
311                    TokenKind::Colon
312                }
313            }
314            '.' => TokenKind::Dot,
315            '=' => {
316                if nch == '=' {
317                    self.read_char();
318
319                    if nnch == '=' {
320                        self.read_char();
321                        TokenKind::EqEqEq
322                    } else {
323                        TokenKind::EqEq
324                    }
325                } else {
326                    TokenKind::Eq
327                }
328            }
329
330            '<' => match nch {
331                '=' => {
332                    self.read_char();
333                    TokenKind::Le
334                }
335
336                '<' => {
337                    self.read_char();
338                    TokenKind::LtLt
339                }
340
341                _ => TokenKind::Lt,
342            },
343
344            '>' => match nch {
345                '=' => {
346                    self.read_char();
347                    TokenKind::Ge
348                }
349
350                '>' => {
351                    self.read_char();
352
353                    if nnch == '>' {
354                        self.read_char();
355                        TokenKind::GtGtGt
356                    } else {
357                        TokenKind::GtGt
358                    }
359                }
360
361                _ => TokenKind::Gt,
362            },
363            '!' => {
364                if nch == '=' {
365                    self.read_char();
366
367                    if nnch == '=' {
368                        self.read_char();
369                        TokenKind::NeEqEq
370                    } else {
371                        TokenKind::Ne
372                    }
373                } else {
374                    TokenKind::Not
375                }
376            }
377            '@' => TokenKind::At,
378
379            _ => {
380                return Err(ParseErrorAndPos::new(pos, ParseError::UnknownChar(ch)));
381            }
382        };
383
384        let span = self.span_from(idx);
385        Ok(Token::new(kind, pos, span))
386    }
387
388    fn read_number(&mut self) -> Result<Token, ParseErrorAndPos> {
389        let pos = self.reader.pos();
390        let idx = self.reader.idx();
391        let mut value = String::new();
392
393        let base = if self.curr() == Some('0') {
394            let next = self.next();
395
396            match next {
397                Some('x') => {
398                    self.read_char();
399                    self.read_char();
400
401                    IntBase::Hex
402                }
403
404                Some('b') => {
405                    self.read_char();
406                    self.read_char();
407
408                    IntBase::Bin
409                }
410
411                _ => IntBase::Dec,
412            }
413        } else {
414            IntBase::Dec
415        };
416
417        self.read_digits(&mut value, base);
418
419        if base == IntBase::Dec && self.curr() == Some('.') && is_digit(self.next()) {
420            self.read_char();
421            value.push('.');
422
423            self.read_digits(&mut value, IntBase::Dec);
424
425            if self.curr() == Some('e') || self.curr() == Some('E') {
426                value.push(self.curr().unwrap());
427                self.read_char();
428
429                if self.curr() == Some('+') || self.curr() == Some('-') {
430                    value.push(self.curr().unwrap());
431                    self.read_char();
432                }
433
434                self.read_digits(&mut value, IntBase::Dec);
435            }
436
437            let suffix = match self.curr() {
438                Some('D') => {
439                    self.read_char();
440                    FloatSuffix::Double
441                }
442
443                Some('F') => {
444                    self.read_char();
445                    FloatSuffix::Float
446                }
447
448                _ => FloatSuffix::Double,
449            };
450
451            let ttype = TokenKind::LitFloat(value, suffix);
452            let span = self.span_from(idx);
453            return Ok(Token::new(ttype, pos, span));
454        }
455
456        let kind = match self.curr() {
457            Some('L') => {
458                self.read_char();
459                TokenKind::LitInt(value, base, IntSuffix::Long)
460            }
461
462            Some('Y') => {
463                self.read_char();
464                TokenKind::LitInt(value, base, IntSuffix::Byte)
465            }
466
467            Some('D') if base == IntBase::Dec => {
468                self.read_char();
469                TokenKind::LitFloat(value, FloatSuffix::Double)
470            }
471
472            Some('F') if base == IntBase::Dec => {
473                self.read_char();
474                TokenKind::LitFloat(value, FloatSuffix::Float)
475            }
476
477            _ => TokenKind::LitInt(value, base, IntSuffix::Int),
478        };
479
480        let span = self.span_from(idx);
481        Ok(Token::new(kind, pos, span))
482    }
483
484    fn span_from(&self, start: u32) -> Span {
485        Span::new(start, self.reader.idx() - start)
486    }
487
488    fn read_digits(&mut self, buffer: &mut String, base: IntBase) {
489        while is_digit_or_underscore(self.curr(), base) {
490            let ch = self.curr().unwrap();
491            self.read_char();
492            buffer.push(ch);
493        }
494    }
495
496    fn read_char(&mut self) {
497        self.reader.advance();
498    }
499
500    fn curr(&self) -> Option<char> {
501        self.reader.curr()
502    }
503
504    fn next(&self) -> Option<char> {
505        self.reader.nth(1)
506    }
507
508    fn is_comment_start(&self) -> bool {
509        self.curr() == Some('/') && self.next() == Some('/')
510    }
511
512    fn is_multi_comment_start(&self) -> bool {
513        self.curr() == Some('/') && self.next() == Some('*')
514    }
515
516    fn is_multi_comment_end(&self) -> bool {
517        self.curr() == Some('*') && self.next() == Some('/')
518    }
519
520    pub fn file(self) -> File {
521        self.reader.file()
522    }
523}
524
525fn is_digit(ch: Option<char>) -> bool {
526    ch.map(|ch| ch.is_digit(10)).unwrap_or(false)
527}
528
529fn is_digit_or_underscore(ch: Option<char>, base: IntBase) -> bool {
530    ch.map(|ch| ch.is_digit(base.num()) || ch == '_')
531        .unwrap_or(false)
532}
533
534fn is_whitespace(ch: Option<char>) -> bool {
535    ch.map(|ch| ch.is_whitespace()).unwrap_or(false)
536}
537
538fn is_newline(ch: Option<char>) -> bool {
539    ch == Some('\n')
540}
541
542fn is_quote(ch: Option<char>) -> bool {
543    ch == Some('\"')
544}
545
546fn is_char_quote(ch: Option<char>) -> bool {
547    ch == Some('\'')
548}
549
550fn is_operator(ch: Option<char>) -> bool {
551    ch.map(|ch| "^+-*/%&|,=!~;:.()[]{}<>@".contains(ch))
552        .unwrap_or(false)
553}
554
555fn is_identifier_start(ch: Option<char>) -> bool {
556    match ch {
557        Some(ch) => (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_',
558        _ => false,
559    }
560}
561
562fn is_identifier(ch: Option<char>) -> bool {
563    is_identifier_start(ch) || is_digit(ch)
564}
565
566fn keywords_in_map() -> HashMap<&'static str, TokenKind> {
567    let mut keywords = HashMap::new();
568
569    keywords.insert("class", TokenKind::Class);
570    keywords.insert("self", TokenKind::This);
571    keywords.insert("Self", TokenKind::CapitalThis);
572    keywords.insert("super", TokenKind::Super);
573    keywords.insert("fun", TokenKind::Fun);
574    keywords.insert("let", TokenKind::Let);
575    keywords.insert("var", TokenKind::Var);
576    keywords.insert("while", TokenKind::While);
577    keywords.insert("if", TokenKind::If);
578    keywords.insert("else", TokenKind::Else);
579    keywords.insert("for", TokenKind::For);
580    keywords.insert("in", TokenKind::In);
581    keywords.insert("impl", TokenKind::Impl);
582    keywords.insert("loop", TokenKind::Loop);
583    keywords.insert("break", TokenKind::Break);
584    keywords.insert("continue", TokenKind::Continue);
585    keywords.insert("return", TokenKind::Return);
586    keywords.insert("true", TokenKind::True);
587    keywords.insert("false", TokenKind::False);
588    keywords.insert("nil", TokenKind::Nil);
589    keywords.insert("enum", TokenKind::Enum);
590    keywords.insert("type", TokenKind::Type);
591    keywords.insert("alias", TokenKind::Alias);
592    keywords.insert("struct", TokenKind::Struct);
593    keywords.insert("trait", TokenKind::Trait);
594    keywords.insert("module", TokenKind::Module);
595    keywords.insert("throws", TokenKind::Throws);
596    keywords.insert("throw", TokenKind::Throw);
597    keywords.insert("try", TokenKind::Try);
598    keywords.insert("do", TokenKind::Do);
599    keywords.insert("catch", TokenKind::Catch);
600    keywords.insert("finally", TokenKind::Finally);
601    keywords.insert("defer", TokenKind::Defer);
602    keywords.insert("is", TokenKind::Is);
603    keywords.insert("as", TokenKind::As);
604    keywords.insert("const", TokenKind::Const);
605
606    keywords
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612    use crate::lexer::reader::Reader;
613    use crate::lexer::token::TokenKind;
614
615    fn assert_end(reader: &mut Lexer, l: u32, c: u32) {
616        assert_tok(reader, TokenKind::End, l, c);
617    }
618
619    fn assert_tok(reader: &mut Lexer, kind: TokenKind, l: u32, c: u32) {
620        let tok = reader.read_token().unwrap();
621        assert_eq!(kind, tok.kind);
622        assert_eq!(l, tok.position.line);
623        assert_eq!(c, tok.position.column);
624    }
625
626    fn assert_err(reader: &mut Lexer, msg: ParseError, l: u32, c: u32) {
627        let err = reader.read_token().unwrap_err();
628        assert_eq!(msg, err.error);
629        assert_eq!(l, err.pos.line);
630        assert_eq!(c, err.pos.column);
631    }
632
633    #[test]
634    fn test_read_empty_file() {
635        let mut reader = Lexer::from_str("");
636        assert_end(&mut reader, 1, 1);
637        assert_end(&mut reader, 1, 1);
638    }
639
640    #[test]
641    fn test_read_numbers() {
642        let mut reader = Lexer::from_str("1 2\n0123 10");
643        assert_tok(
644            &mut reader,
645            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
646            1,
647            1,
648        );
649        assert_tok(
650            &mut reader,
651            TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
652            1,
653            3,
654        );
655        assert_tok(
656            &mut reader,
657            TokenKind::LitInt("0123".into(), IntBase::Dec, IntSuffix::Int),
658            2,
659            1,
660        );
661        assert_tok(
662            &mut reader,
663            TokenKind::LitInt("10".into(), IntBase::Dec, IntSuffix::Int),
664            2,
665            6,
666        );
667        assert_end(&mut reader, 2, 8);
668
669        let mut reader = Lexer::from_str("12Y 300Y 1_000 1__1");
670        assert_tok(
671            &mut reader,
672            TokenKind::LitInt("12".into(), IntBase::Dec, IntSuffix::Byte),
673            1,
674            1,
675        );
676        assert_tok(
677            &mut reader,
678            TokenKind::LitInt("300".into(), IntBase::Dec, IntSuffix::Byte),
679            1,
680            5,
681        );
682        assert_tok(
683            &mut reader,
684            TokenKind::LitInt("1_000".into(), IntBase::Dec, IntSuffix::Int),
685            1,
686            10,
687        );
688        assert_tok(
689            &mut reader,
690            TokenKind::LitInt("1__1".into(), IntBase::Dec, IntSuffix::Int),
691            1,
692            16,
693        );
694    }
695
696    #[test]
697    fn test_skip_single_line_comment() {
698        let mut reader = Lexer::from_str("//test\n1");
699        assert_tok(
700            &mut reader,
701            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
702            2,
703            1,
704        );
705        assert_end(&mut reader, 2, 2);
706    }
707
708    #[test]
709    fn test_unfinished_line_comment() {
710        let mut reader = Lexer::from_str("//abc");
711        assert_end(&mut reader, 1, 6);
712    }
713
714    #[test]
715    fn test_skip_multi_comment() {
716        let mut reader = Lexer::from_str("/*test*/1");
717        assert_tok(
718            &mut reader,
719            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
720            1,
721            9,
722        );
723        assert_end(&mut reader, 1, 10);
724    }
725
726    #[test]
727    fn test_unfinished_multi_comment() {
728        let mut reader = Lexer::from_str("/*test");
729        assert_err(&mut reader, ParseError::UnclosedComment, 1, 1);
730
731        let mut reader = Lexer::from_str("1/*test");
732        assert_tok(
733            &mut reader,
734            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
735            1,
736            1,
737        );
738        assert_err(&mut reader, ParseError::UnclosedComment, 1, 2);
739    }
740
741    #[test]
742    fn test_read_identifier() {
743        let mut reader = Lexer::from_str("abc ident test");
744        assert_tok(&mut reader, TokenKind::Identifier("abc".into()), 1, 1);
745        assert_tok(&mut reader, TokenKind::Identifier("ident".into()), 1, 5);
746        assert_tok(&mut reader, TokenKind::Identifier("test".into()), 1, 11);
747        assert_end(&mut reader, 1, 15);
748    }
749
750    #[test]
751    fn test_code_with_spaces() {
752        let mut reader = Lexer::from_str("1 2 3");
753        assert_tok(
754            &mut reader,
755            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
756            1,
757            1,
758        );
759        assert_tok(
760            &mut reader,
761            TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
762            1,
763            3,
764        );
765        assert_tok(
766            &mut reader,
767            TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
768            1,
769            5,
770        );
771        assert_end(&mut reader, 1, 6);
772    }
773
774    #[test]
775    fn test_float_numbers() {
776        let mut reader = Lexer::from_str("1F 1.0 0.1F 1.3D 4D");
777        assert_tok(
778            &mut reader,
779            TokenKind::LitFloat("1".into(), FloatSuffix::Float),
780            1,
781            1,
782        );
783        assert_tok(
784            &mut reader,
785            TokenKind::LitFloat("1.0".into(), FloatSuffix::Double),
786            1,
787            4,
788        );
789        assert_tok(
790            &mut reader,
791            TokenKind::LitFloat("0.1".into(), FloatSuffix::Float),
792            1,
793            8,
794        );
795        assert_tok(
796            &mut reader,
797            TokenKind::LitFloat("1.3".into(), FloatSuffix::Double),
798            1,
799            13,
800        );
801        assert_tok(
802            &mut reader,
803            TokenKind::LitFloat("4".into(), FloatSuffix::Double),
804            1,
805            18,
806        );
807    }
808
809    #[test]
810    fn test_float_scientific_notation() {
811        let mut reader = Lexer::from_str("1.0e1 1.0E1 1.0e+1 1.0e-1");
812        assert_tok(
813            &mut reader,
814            TokenKind::LitFloat("1.0e1".into(), FloatSuffix::Double),
815            1,
816            1,
817        );
818        assert_tok(
819            &mut reader,
820            TokenKind::LitFloat("1.0E1".into(), FloatSuffix::Double),
821            1,
822            7,
823        );
824        assert_tok(
825            &mut reader,
826            TokenKind::LitFloat("1.0e+1".into(), FloatSuffix::Double),
827            1,
828            13,
829        );
830        assert_tok(
831            &mut reader,
832            TokenKind::LitFloat("1.0e-1".into(), FloatSuffix::Double),
833            1,
834            20,
835        );
836    }
837
838    #[test]
839    fn test_hex_numbers() {
840        let mut reader = Lexer::from_str("0x1 0x2L 0xABCDEF 0xB1L");
841
842        assert_tok(
843            &mut reader,
844            TokenKind::LitInt("1".into(), IntBase::Hex, IntSuffix::Int),
845            1,
846            1,
847        );
848        assert_tok(
849            &mut reader,
850            TokenKind::LitInt("2".into(), IntBase::Hex, IntSuffix::Long),
851            1,
852            5,
853        );
854        assert_tok(
855            &mut reader,
856            TokenKind::LitInt("ABCDEF".into(), IntBase::Hex, IntSuffix::Int),
857            1,
858            10,
859        );
860        assert_tok(
861            &mut reader,
862            TokenKind::LitInt("B1".into(), IntBase::Hex, IntSuffix::Long),
863            1,
864            19,
865        );
866    }
867
868    #[test]
869    fn test_code_with_newlines() {
870        let mut reader = Lexer::from_str("1\n2\n3");
871        assert_tok(
872            &mut reader,
873            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
874            1,
875            1,
876        );
877        assert_tok(
878            &mut reader,
879            TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
880            2,
881            1,
882        );
883        assert_tok(
884            &mut reader,
885            TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
886            3,
887            1,
888        );
889        assert_end(&mut reader, 3, 2);
890    }
891
892    #[test]
893    fn test_code_with_tabs() {
894        let mut reader = Lexer::from_str("1\t2\t3");
895        assert_tok(
896            &mut reader,
897            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
898            1,
899            1,
900        );
901        assert_tok(
902            &mut reader,
903            TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
904            1,
905            5,
906        );
907        assert_tok(
908            &mut reader,
909            TokenKind::LitInt("3".into(), IntBase::Dec, IntSuffix::Int),
910            1,
911            9,
912        );
913        assert_end(&mut reader, 1, 10);
914    }
915
916    #[test]
917    fn test_code_with_tabwidth8() {
918        let mut reader = Reader::from_string("<<code>>", "1\t2\n1234567\t8\n12345678\t9");
919        reader.set_tabwidth(8);
920        let mut reader = Lexer::new(reader);
921
922        assert_tok(
923            &mut reader,
924            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
925            1,
926            1,
927        );
928        assert_tok(
929            &mut reader,
930            TokenKind::LitInt("2".into(), IntBase::Dec, IntSuffix::Int),
931            1,
932            9,
933        );
934        assert_tok(
935            &mut reader,
936            TokenKind::LitInt("1234567".into(), IntBase::Dec, IntSuffix::Int),
937            2,
938            1,
939        );
940        assert_tok(
941            &mut reader,
942            TokenKind::LitInt("8".into(), IntBase::Dec, IntSuffix::Int),
943            2,
944            9,
945        );
946        assert_tok(
947            &mut reader,
948            TokenKind::LitInt("12345678".into(), IntBase::Dec, IntSuffix::Int),
949            3,
950            1,
951        );
952        assert_tok(
953            &mut reader,
954            TokenKind::LitInt("9".into(), IntBase::Dec, IntSuffix::Int),
955            3,
956            17,
957        );
958        assert_end(&mut reader, 3, 18);
959    }
960
961    #[test]
962    fn test_string_with_newline() {
963        let mut reader = Lexer::from_str("\"abc\ndef\"");
964        assert_tok(&mut reader, TokenKind::StringTail("abc\ndef".into()), 1, 1);
965    }
966
967    #[test]
968    fn test_escape_sequences() {
969        let mut reader = Lexer::from_str("\"\\\"\"");
970        assert_tok(&mut reader, TokenKind::StringTail("\"".into()), 1, 1);
971
972        let mut reader = Lexer::from_str("\"\\$\"");
973        assert_tok(&mut reader, TokenKind::StringTail("$".into()), 1, 1);
974
975        let mut reader = Lexer::from_str("\"\\\'\"");
976        assert_tok(&mut reader, TokenKind::StringTail("'".into()), 1, 1);
977
978        let mut reader = Lexer::from_str("\"\\t\"");
979        assert_tok(&mut reader, TokenKind::StringTail("\t".into()), 1, 1);
980
981        let mut reader = Lexer::from_str("\"\\n\"");
982        assert_tok(&mut reader, TokenKind::StringTail("\n".into()), 1, 1);
983
984        let mut reader = Lexer::from_str("\"\\r\"");
985        assert_tok(&mut reader, TokenKind::StringTail("\r".into()), 1, 1);
986
987        let mut reader = Lexer::from_str("\"\\\\\"");
988        assert_tok(&mut reader, TokenKind::StringTail("\\".into()), 1, 1);
989
990        let mut reader = Lexer::from_str("\"\\");
991        assert_err(&mut reader, ParseError::UnclosedString, 1, 1);
992    }
993
994    #[test]
995    fn test_unclosed_string() {
996        let mut reader = Lexer::from_str("\"abc");
997        assert_err(&mut reader, ParseError::UnclosedString, 1, 1);
998    }
999
1000    #[test]
1001    fn test_unclosed_char() {
1002        let mut reader = Lexer::from_str("'a");
1003        assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1004
1005        let mut reader = Lexer::from_str("'\\");
1006        assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1007
1008        let mut reader = Lexer::from_str("'\\n");
1009        assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1010
1011        let mut reader = Lexer::from_str("'ab'");
1012        assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1013
1014        let mut reader = Lexer::from_str("'");
1015        assert_err(&mut reader, ParseError::UnclosedChar, 1, 1);
1016    }
1017
1018    #[test]
1019    fn test_string() {
1020        let mut reader = Lexer::from_str("\"abc\"");
1021        assert_tok(&mut reader, TokenKind::StringTail("abc".into()), 1, 1);
1022        assert_end(&mut reader, 1, 6);
1023    }
1024
1025    #[test]
1026    fn test_keywords() {
1027        let mut reader = Lexer::from_str("fun let while if else self class");
1028        assert_tok(&mut reader, TokenKind::Fun, 1, 1);
1029        assert_tok(&mut reader, TokenKind::Let, 1, 5);
1030        assert_tok(&mut reader, TokenKind::While, 1, 9);
1031        assert_tok(&mut reader, TokenKind::If, 1, 15);
1032        assert_tok(&mut reader, TokenKind::Else, 1, 18);
1033
1034        let mut reader = Lexer::from_str("self class super");
1035        assert_tok(&mut reader, TokenKind::This, 1, 1);
1036        assert_tok(&mut reader, TokenKind::Class, 1, 6);
1037        assert_tok(&mut reader, TokenKind::Super, 1, 12);
1038
1039        let mut reader = Lexer::from_str("loop break continue return nil");
1040        assert_tok(&mut reader, TokenKind::Loop, 1, 1);
1041        assert_tok(&mut reader, TokenKind::Break, 1, 6);
1042        assert_tok(&mut reader, TokenKind::Continue, 1, 12);
1043        assert_tok(&mut reader, TokenKind::Return, 1, 21);
1044        assert_tok(&mut reader, TokenKind::Nil, 1, 28);
1045
1046        let mut reader = Lexer::from_str("type struct enum alias trait const");
1047        assert_tok(&mut reader, TokenKind::Type, 1, 1);
1048        assert_tok(&mut reader, TokenKind::Struct, 1, 6);
1049        assert_tok(&mut reader, TokenKind::Enum, 1, 13);
1050        assert_tok(&mut reader, TokenKind::Alias, 1, 18);
1051        assert_tok(&mut reader, TokenKind::Trait, 1, 24);
1052        assert_tok(&mut reader, TokenKind::Const, 1, 30);
1053
1054        let mut reader = Lexer::from_str("for in impl Self");
1055        assert_tok(&mut reader, TokenKind::For, 1, 1);
1056        assert_tok(&mut reader, TokenKind::In, 1, 5);
1057        assert_tok(&mut reader, TokenKind::Impl, 1, 8);
1058        assert_tok(&mut reader, TokenKind::CapitalThis, 1, 13);
1059
1060        let mut reader = Lexer::from_str("defer");
1061        assert_tok(&mut reader, TokenKind::Defer, 1, 1);
1062    }
1063
1064    #[test]
1065    fn test_operators() {
1066        let mut reader = Lexer::from_str("==+=-*/%~.@");
1067        assert_tok(&mut reader, TokenKind::EqEq, 1, 1);
1068        assert_tok(&mut reader, TokenKind::AddEq, 1, 3);
1069        assert_tok(&mut reader, TokenKind::Sub, 1, 5);
1070        assert_tok(&mut reader, TokenKind::Mul, 1, 6);
1071        assert_tok(&mut reader, TokenKind::Div, 1, 7);
1072        assert_tok(&mut reader, TokenKind::Mod, 1, 8);
1073        assert_tok(&mut reader, TokenKind::Tilde, 1, 9);
1074        assert_tok(&mut reader, TokenKind::Dot, 1, 10);
1075        assert_tok(&mut reader, TokenKind::At, 1, 11);
1076
1077        let mut reader = Lexer::from_str("<=<>=><");
1078        assert_tok(&mut reader, TokenKind::Le, 1, 1);
1079        assert_tok(&mut reader, TokenKind::Lt, 1, 3);
1080        assert_tok(&mut reader, TokenKind::Ge, 1, 4);
1081        assert_tok(&mut reader, TokenKind::Gt, 1, 6);
1082        assert_tok(&mut reader, TokenKind::Lt, 1, 7);
1083
1084        let mut reader = Lexer::from_str("!=====!");
1085        assert_tok(&mut reader, TokenKind::NeEqEq, 1, 1);
1086        assert_tok(&mut reader, TokenKind::EqEqEq, 1, 4);
1087        assert_tok(&mut reader, TokenKind::Not, 1, 7);
1088
1089        let mut reader = Lexer::from_str("!=!");
1090        assert_tok(&mut reader, TokenKind::Ne, 1, 1);
1091        assert_tok(&mut reader, TokenKind::Not, 1, 3);
1092
1093        let mut reader = Lexer::from_str("->");
1094        assert_tok(&mut reader, TokenKind::Arrow, 1, 1);
1095
1096        let mut reader = Lexer::from_str("try!try?1");
1097        assert_tok(&mut reader, TokenKind::TryForce, 1, 1);
1098        assert_tok(&mut reader, TokenKind::TryOpt, 1, 5);
1099        assert_tok(
1100            &mut reader,
1101            TokenKind::LitInt("1".into(), IntBase::Dec, IntSuffix::Int),
1102            1,
1103            9,
1104        );
1105
1106        let mut reader = Lexer::from_str(">><<>>>_::");
1107        assert_tok(&mut reader, TokenKind::GtGt, 1, 1);
1108        assert_tok(&mut reader, TokenKind::LtLt, 1, 3);
1109        assert_tok(&mut reader, TokenKind::GtGtGt, 1, 5);
1110        assert_tok(&mut reader, TokenKind::Underscore, 1, 8);
1111        assert_tok(&mut reader, TokenKind::Sep, 1, 9);
1112    }
1113}