sway_parse/
token.rs

1use core::mem;
2use extension_trait::extension_trait;
3use num_bigint::BigUint;
4use sway_ast::literal::{LitChar, LitInt, LitIntType, LitString, Literal};
5use sway_ast::token::{
6    Comment, CommentKind, CommentedGroup, CommentedTokenStream, CommentedTokenTree, DocComment,
7    DocStyle, Punct, Spacing, TokenStream,
8};
9use sway_error::error::CompileError;
10use sway_error::handler::{ErrorEmitted, Handler};
11use sway_error::lex_error::{LexError, LexErrorKind};
12use sway_types::span::Source;
13use sway_types::{
14    ast::{Delimiter, PunctKind},
15    Ident, SourceId, Span, Spanned,
16};
17use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
18use unicode_xid::UnicodeXID;
19
20#[extension_trait]
21impl CharExt for char {
22    /// Converts the character into an opening delimiter, if any.
23    fn as_open_delimiter(self) -> Option<Delimiter> {
24        match self {
25            '(' => Some(Delimiter::Parenthesis),
26            '{' => Some(Delimiter::Brace),
27            '[' => Some(Delimiter::Bracket),
28            _ => None,
29        }
30    }
31
32    /// Converts the character into a closing delimiter, if any.
33    fn as_close_delimiter(self) -> Option<Delimiter> {
34        match self {
35            ')' => Some(Delimiter::Parenthesis),
36            '}' => Some(Delimiter::Brace),
37            ']' => Some(Delimiter::Bracket),
38            _ => None,
39        }
40    }
41
42    /// Determines what sort of punctuation this character is, if any.
43    fn as_punct_kind(self) -> Option<PunctKind> {
44        match self {
45            ';' => Some(PunctKind::Semicolon),
46            ':' => Some(PunctKind::Colon),
47            '/' => Some(PunctKind::ForwardSlash),
48            ',' => Some(PunctKind::Comma),
49            '*' => Some(PunctKind::Star),
50            '+' => Some(PunctKind::Add),
51            '-' => Some(PunctKind::Sub),
52            '<' => Some(PunctKind::LessThan),
53            '>' => Some(PunctKind::GreaterThan),
54            '=' => Some(PunctKind::Equals),
55            '.' => Some(PunctKind::Dot),
56            '!' => Some(PunctKind::Bang),
57            '%' => Some(PunctKind::Percent),
58            '&' => Some(PunctKind::Ampersand),
59            '^' => Some(PunctKind::Caret),
60            '|' => Some(PunctKind::Pipe),
61            '_' => Some(PunctKind::Underscore),
62            '#' => Some(PunctKind::Sharp),
63            _ => None,
64        }
65    }
66}
67
68struct CharIndicesInner<'a> {
69    src: &'a str,
70    position: usize,
71}
72
73impl Iterator for CharIndicesInner<'_> {
74    type Item = (usize, char);
75
76    fn next(&mut self) -> Option<(usize, char)> {
77        let mut char_indices = self.src[self.position..].char_indices();
78        let (_, c) = char_indices.next()?;
79        let ret = (self.position, c);
80        match char_indices.next() {
81            Some((char_width, _)) => self.position += char_width,
82            None => self.position = self.src.len(),
83        };
84        Some(ret)
85    }
86}
87
88type CharIndices<'a> = std::iter::Peekable<CharIndicesInner<'a>>;
89type Result<T> = core::result::Result<T, ErrorEmitted>;
90
91struct Lexer<'l> {
92    handler: &'l Handler,
93    src: &'l Source,
94    source_id: &'l Option<SourceId>,
95    stream: &'l mut CharIndices<'l>,
96}
97
98pub fn lex(
99    handler: &Handler,
100    src: Source,
101    start: usize,
102    end: usize,
103    source_id: Option<SourceId>,
104) -> Result<TokenStream> {
105    lex_commented(handler, src, start, end, &source_id).map(|stream| stream.strip_comments())
106}
107
108pub fn lex_commented(
109    handler: &Handler,
110    src: Source,
111    start: usize,
112    end: usize,
113    source_id: &Option<SourceId>,
114) -> Result<CommentedTokenStream> {
115    let stream = &mut CharIndicesInner {
116        src: &src.text[..end],
117        position: start,
118    }
119    .peekable();
120    let mut l = Lexer {
121        handler,
122        src: &src,
123        source_id,
124        stream,
125    };
126    let mut file_start_offset: usize = 0;
127
128    let mut parent_token_trees = Vec::new();
129    let mut token_trees = Vec::new();
130    while let Some((mut index, mut character)) = l.stream.next() {
131        if character.is_whitespace() {
132            // if the beginning of a file starts with whitespace
133            // we must keep track to ensure that the module level docs
134            // will get inserted into the tree correctly
135            if index - file_start_offset == 0 {
136                file_start_offset += character.len_utf8();
137            }
138            continue;
139        }
140        if character == '/' {
141            match l.stream.peek() {
142                Some((_, '/')) => {
143                    // search_end is the index at which we stop looking backwards for
144                    // a newline
145                    let search_end = token_trees
146                        .last()
147                        .map(|tt| {
148                            if let CommentedTokenTree::Tree(t) = tt {
149                                t.span().end()
150                            } else {
151                                0
152                            }
153                        })
154                        .unwrap_or_default();
155
156                    let has_newline = src.text[search_end..index]
157                        .chars()
158                        .rev()
159                        .take_while(|c| c.is_whitespace())
160                        .filter(|&c| c == '\n')
161                        .count()
162                        > 0;
163                    // We found a comment at the start of file, which should be accounted for as a Newlined comment.
164                    let start_of_file_found = search_end == 0 && index == 0;
165
166                    let comment_kind = if has_newline || start_of_file_found {
167                        CommentKind::Newlined
168                    } else {
169                        CommentKind::Trailing
170                    };
171
172                    let ctt = lex_line_comment(&mut l, end, index, comment_kind);
173                    token_trees.push(ctt);
174                    continue;
175                }
176                Some((_, '*')) => {
177                    if let Some(token) = lex_block_comment(&mut l, index) {
178                        token_trees.push(token);
179                    }
180                    continue;
181                }
182                Some(_) | None => {}
183            }
184        }
185
186        if character.is_xid_start() || character == '_' {
187            // Raw identifier, e.g., `r#foo`? Then mark as such, stripping the prefix `r#`.
188            let is_raw_ident = character == 'r' && matches!(l.stream.peek(), Some((_, '#')));
189            if is_raw_ident {
190                l.stream.next();
191                if let Some((next_index, next_character)) = l.stream.next() {
192                    character = next_character;
193                    index = next_index;
194                }
195                if !(character.is_xid_start() || character == '_') {
196                    let kind = LexErrorKind::InvalidCharacter {
197                        position: index,
198                        character,
199                    };
200                    let span = span_one(&l, index, character);
201                    error(l.handler, LexError { kind, span });
202                    continue;
203                }
204            }
205
206            // Don't accept just `_` as an identifier.
207            let not_is_single_underscore = character != '_'
208                || l.stream
209                    .peek()
210                    .is_some_and(|(_, next)| next.is_xid_continue());
211            if not_is_single_underscore {
212                // Consume until we hit other than `XID_CONTINUE`.
213                while l.stream.next_if(|(_, c)| c.is_xid_continue()).is_some() {}
214                let ident = Ident::new_with_raw(span_until(&mut l, index), is_raw_ident);
215                token_trees.push(CommentedTokenTree::Tree(ident.into()));
216                continue;
217            }
218        }
219        if let Some(delimiter) = character.as_open_delimiter() {
220            let token_trees = mem::take(&mut token_trees);
221            parent_token_trees.push((token_trees, index, delimiter));
222            continue;
223        }
224        if let Some(close_delimiter) = character.as_close_delimiter() {
225            match parent_token_trees.pop() {
226                None => {
227                    // Recover by ignoring the unexpected closing delim,
228                    // giving the parser opportunities to realize the need for an opening delim
229                    // in e.g., this example:
230                    //
231                    // fn foo() // <-- Parser expects grouped tokens in `{ ... }` here.
232                    //     let x = 0;
233                    // } // <- This recovery.
234                    let kind = LexErrorKind::UnexpectedCloseDelimiter {
235                        position: index,
236                        close_delimiter,
237                    };
238                    let span = span_one(&l, index, character);
239                    error(l.handler, LexError { kind, span });
240                }
241                Some((parent, open_index, open_delimiter)) => {
242                    if open_delimiter != close_delimiter {
243                        // Recover on e.g., a `{ )` mismatch by having `)` interpreted as `}`.
244                        let kind = LexErrorKind::MismatchedDelimiters {
245                            open_position: open_index,
246                            close_position: index,
247                            open_delimiter,
248                            close_delimiter,
249                        };
250                        let span = span_one(&l, index, character);
251                        error(l.handler, LexError { kind, span });
252                    }
253                    token_trees = lex_close_delimiter(
254                        &mut l,
255                        index,
256                        parent,
257                        token_trees,
258                        open_index,
259                        open_delimiter,
260                    );
261                }
262            }
263            continue;
264        }
265        if let Some(token) = lex_string(&mut l, index, character)? {
266            token_trees.push(token);
267            continue;
268        }
269        if let Some(token) = lex_char(&mut l, index, character)? {
270            token_trees.push(token);
271            continue;
272        }
273        if let Some(token) = lex_int_lit(&mut l, index, character)? {
274            token_trees.push(token);
275            continue;
276        }
277        if let Some(token) = lex_punctuation(&mut l, index, character) {
278            token_trees.push(token);
279            continue;
280        }
281
282        // Recover by simply ignoring the character.
283        // NOTE(Centril): I'm not sure how good of an idea this is... time will tell.
284        let kind = LexErrorKind::InvalidCharacter {
285            position: index,
286            character,
287        };
288        let span = span_one(&l, index, character);
289        error(l.handler, LexError { kind, span });
290        continue;
291    }
292
293    // Recover all unclosed delimiters.
294    while let Some((parent, open_index, open_delimiter)) = parent_token_trees.pop() {
295        let kind = LexErrorKind::UnclosedDelimiter {
296            open_position: open_index,
297            open_delimiter,
298        };
299        let span = span_one(&l, open_index, open_delimiter.as_open_char());
300        error(l.handler, LexError { kind, span });
301
302        token_trees = lex_close_delimiter(
303            &mut l,
304            src.text.len(),
305            parent,
306            token_trees,
307            open_index,
308            open_delimiter,
309        );
310    }
311    Ok(CommentedTokenStream {
312        token_trees,
313        full_span: span(&l, start, end),
314    })
315}
316
317fn lex_close_delimiter(
318    l: &mut Lexer<'_>,
319    index: usize,
320    mut parent: Vec<CommentedTokenTree>,
321    token_trees: Vec<CommentedTokenTree>,
322    open_index: usize,
323    delimiter: Delimiter,
324) -> Vec<CommentedTokenTree> {
325    let start_index = open_index + delimiter.as_open_char().len_utf8();
326    let full_span = span(l, start_index, index);
327    let group = CommentedGroup {
328        token_stream: CommentedTokenStream {
329            token_trees,
330            full_span,
331        },
332        delimiter,
333        span: span_until(l, open_index),
334    };
335    parent.push(CommentedTokenTree::Tree(group.into()));
336    parent
337}
338
339fn lex_line_comment(
340    l: &mut Lexer<'_>,
341    end: usize,
342    index: usize,
343    comment_kind: CommentKind,
344) -> CommentedTokenTree {
345    let _ = l.stream.next();
346
347    // Find end; either at EOF or at `\n`.
348    let end = l
349        .stream
350        .find(|(_, character)| *character == '\n')
351        .map_or(end, |(end, _)| end);
352    let sp = span(l, index, end);
353
354    let doc_style = match (sp.as_str().chars().nth(2), sp.as_str().chars().nth(3)) {
355        // `//!` is an inner line doc comment.
356        (Some('!'), _) => Some(DocStyle::Inner),
357        // `////` (more than 3 slashes) is not considered a doc comment.
358        (Some('/'), Some('/')) => None,
359        // `///` is an outer line doc comment.
360        (Some('/'), _) => Some(DocStyle::Outer),
361        _ => None,
362    };
363
364    if let Some(doc_style) = doc_style {
365        let doc_comment = DocComment {
366            span: sp,
367            doc_style,
368            content_span: span(l, index + 3, end),
369        };
370        CommentedTokenTree::Tree(doc_comment.into())
371    } else {
372        Comment {
373            span: sp,
374            comment_kind,
375        }
376        .into()
377    }
378}
379
380fn lex_block_comment(l: &mut Lexer<'_>, index: usize) -> Option<CommentedTokenTree> {
381    // Lexing a multi-line comment.
382    let _ = l.stream.next();
383    let mut unclosed_indices = vec![index];
384
385    let unclosed_multiline_comment = |l: &Lexer<'_>, unclosed_indices: Vec<_>| {
386        let span = span(l, *unclosed_indices.last().unwrap(), l.src.text.len() - 1);
387        let kind = LexErrorKind::UnclosedMultilineComment { unclosed_indices };
388        error(l.handler, LexError { kind, span });
389        None
390    };
391
392    // We first start by assuming that block comments are inlined.
393    let mut comment_kind = CommentKind::Inlined;
394
395    loop {
396        match l.stream.next() {
397            None => return unclosed_multiline_comment(l, unclosed_indices),
398            Some((_, '*')) => match l.stream.next() {
399                None => return unclosed_multiline_comment(l, unclosed_indices),
400                // Matched `*/`, so we're closing some multi-line comment. It could be nested.
401                Some((slash_ix, '/')) => {
402                    let start = unclosed_indices.pop().unwrap();
403                    if unclosed_indices.is_empty() {
404                        // For the purposes of lexing,
405                        // nested multi-line comments constitute a single multi-line comment.
406                        // We could represent them as several ones, but that's unnecessary.
407                        let end = slash_ix + '/'.len_utf8();
408                        let span = span(l, start, end);
409                        return Some(Comment { span, comment_kind }.into());
410                    }
411                }
412                Some(_) => {}
413            },
414            // Found nested multi-line comment.
415            Some((next_index, '/')) => match l.stream.next() {
416                None => return unclosed_multiline_comment(l, unclosed_indices),
417                Some((_, '*')) => unclosed_indices.push(next_index),
418                Some(_) => {}
419            },
420            Some((_, '\n')) => {
421                // If we find a newline character while lexing, this means that the block comment is multiline.
422                // Example:
423                // /* this is a
424                //    multilined block comment */
425                comment_kind = CommentKind::Multilined;
426            }
427            Some(_) => {}
428        }
429    }
430}
431
432fn lex_string(
433    l: &mut Lexer<'_>,
434    index: usize,
435    character: char,
436) -> Result<Option<CommentedTokenTree>> {
437    if character != '"' {
438        return Ok(None);
439    }
440    let mut parsed = String::new();
441    loop {
442        let unclosed_string_lit = |l: &Lexer<'_>, end| {
443            error(
444                l.handler,
445                LexError {
446                    kind: LexErrorKind::UnclosedStringLiteral { position: index },
447                    span: span(l, index, end),
448                },
449            )
450        };
451        let (next_index, next_character) = l.stream.next().ok_or_else(|| {
452            // last character may not be a unicode boundary
453            let mut end = l.src.text.len() - 1;
454            while !l.src.text.is_char_boundary(end) {
455                end -= 1;
456            }
457            unclosed_string_lit(l, end)
458        })?;
459        parsed.push(match next_character {
460            '\\' => parse_escape_code(l)
461                .map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.text.len())))?,
462            '"' => break,
463            // do not allow text direction codepoints
464            ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
465                let kind = LexErrorKind::UnicodeTextDirInLiteral {
466                    position: next_index,
467                    character: next_character,
468                };
469                let span = span_one(l, next_index, next_character);
470                error(l.handler, LexError { span, kind });
471                continue;
472            }
473            _ => next_character,
474        });
475    }
476    let span = span_until(l, index);
477    let literal = Literal::String(LitString { span, parsed });
478    Ok(Some(CommentedTokenTree::Tree(literal.into())))
479}
480
481fn lex_char(
482    l: &mut Lexer<'_>,
483    index: usize,
484    character: char,
485) -> Result<Option<CommentedTokenTree>> {
486    let is_quote = |c| c == '\'';
487    if !is_quote(character) {
488        return Ok(None);
489    }
490
491    let unclosed_char_lit = |l: &Lexer<'_>| {
492        let err = LexError {
493            kind: LexErrorKind::UnclosedCharLiteral { position: index },
494            span: span(l, index, l.src.text.len()),
495        };
496        error(l.handler, err)
497    };
498    let next = |l: &mut Lexer<'_>| l.stream.next().ok_or_else(|| unclosed_char_lit(l));
499    let escape = |l: &mut Lexer<'_>, next_char| {
500        if next_char == '\\' {
501            parse_escape_code(l).map_err(|e| e.unwrap_or_else(|| unclosed_char_lit(l)))
502        } else {
503            Ok(next_char)
504        }
505    };
506
507    let (next_index, next_char) = next(l)?;
508    // do not allow text direction codepoints
509    if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
510        let kind = LexErrorKind::UnicodeTextDirInLiteral {
511            position: next_index,
512            character: next_char,
513        };
514        let span = span_one(l, next_index, next_char);
515        error(l.handler, LexError { span, kind });
516    }
517
518    let parsed = escape(l, next_char)?;
519
520    // Consume the closing `'`.
521    let (next_index, next_char) = next(l)?;
522    let sp = span_until(l, index);
523
524    // Not a closing quote? Then this is e.g., 'ab'.
525    // Most likely the user meant a string literal, so recover as that instead.
526    let literal = if !is_quote(next_char) {
527        let mut string = String::new();
528        string.push(parsed);
529        string.push(escape(l, next_char)?);
530        loop {
531            let (_, next_char) = next(l)?;
532            if is_quote(next_char) {
533                break;
534            }
535            string.push(next_char);
536        }
537
538        // Emit the expected closing quote error.
539        error(
540            l.handler,
541            LexError {
542                kind: LexErrorKind::ExpectedCloseQuote {
543                    position: next_index,
544                },
545                span: span(l, next_index, next_index + string.len()),
546            },
547        );
548
549        Literal::String(LitString {
550            span: sp,
551            parsed: string,
552        })
553    } else {
554        Literal::Char(LitChar { span: sp, parsed })
555    };
556
557    Ok(Some(CommentedTokenTree::Tree(literal.into())))
558}
559
560fn parse_escape_code(l: &mut Lexer<'_>) -> core::result::Result<char, Option<ErrorEmitted>> {
561    let error = |kind, span| Err(Some(error(l.handler, LexError { kind, span })));
562
563    match l.stream.next() {
564        None => Err(None),
565        Some((_, '"')) => Ok('"'),
566        Some((_, '\'')) => Ok('\''),
567        Some((_, 'n')) => Ok('\n'),
568        Some((_, 'r')) => Ok('\r'),
569        Some((_, 't')) => Ok('\t'),
570        Some((_, '\\')) => Ok('\\'),
571        Some((_, '0')) => Ok('\0'),
572        Some((index, 'x')) => {
573            let (high, low) = match (l.stream.next(), l.stream.next()) {
574                (Some((_, high)), Some((_, low))) => (high, low),
575                _ => return Err(None),
576            };
577            let (high, low) = match (high.to_digit(16), low.to_digit(16)) {
578                (Some(high), Some(low)) => (high, low),
579                _ => return error(LexErrorKind::InvalidHexEscape, span_until(l, index)),
580            };
581            let parsed_character = char::from_u32((high << 4) | low).unwrap();
582            Ok(parsed_character)
583        }
584        Some((index, 'u')) => {
585            match l.stream.next() {
586                None => return Err(None),
587                Some((_, '{')) => (),
588                Some((_, unexpected_char)) => {
589                    let span = span_one(l, index, unexpected_char);
590                    let kind = LexErrorKind::UnicodeEscapeMissingBrace { position: index };
591                    return error(kind, span);
592                }
593            }
594            let mut digits_start_position_opt = None;
595            let mut char_value = BigUint::from(0u32);
596            let digits_end_position = loop {
597                let (position, digit) = match l.stream.next() {
598                    None => return Err(None),
599                    Some((position, '}')) => break position,
600                    Some((position, digit)) => (position, digit),
601                };
602                if digits_start_position_opt.is_none() {
603                    digits_start_position_opt = Some(position);
604                };
605                let digit = match digit.to_digit(16) {
606                    None => {
607                        let span = span_one(l, position, digit);
608                        let kind = LexErrorKind::InvalidUnicodeEscapeDigit { position };
609                        return error(kind, span);
610                    }
611                    Some(digit) => digit,
612                };
613                char_value *= 16u32;
614                char_value += digit;
615            };
616            let digits_start_position = digits_start_position_opt.unwrap_or(digits_end_position);
617            let char_value = match u32::try_from(char_value) {
618                Err(..) => {
619                    let span = span(l, digits_start_position, digits_end_position);
620                    let kind = LexErrorKind::UnicodeEscapeOutOfRange { position: index };
621                    return error(kind, span);
622                }
623                Ok(char_value) => char_value,
624            };
625            let parsed_character = match char::from_u32(char_value) {
626                None => {
627                    let span_all = span_until(l, index);
628                    let kind = LexErrorKind::UnicodeEscapeInvalidCharValue { span: span_all };
629                    let span = span(l, digits_start_position, digits_end_position);
630                    return error(kind, span);
631                }
632                Some(parsed_character) => parsed_character,
633            };
634            Ok(parsed_character)
635        }
636        Some((index, unexpected_char)) => error(
637            LexErrorKind::InvalidEscapeCode { position: index },
638            span_one(l, index, unexpected_char),
639        ),
640    }
641}
642
643fn lex_int_lit(
644    l: &mut Lexer<'_>,
645    index: usize,
646    character: char,
647) -> Result<Option<CommentedTokenTree>> {
648    let digit = match character.to_digit(10) {
649        None => return Ok(None),
650        Some(d) => d,
651    };
652
653    let decimal_int_lit = |l, digit: u32| {
654        let mut big_uint = BigUint::from(digit);
655        let end_opt = parse_digits(&mut big_uint, l, 10);
656        (big_uint, end_opt)
657    };
658    let (big_uint, end_opt) = if digit == 0 {
659        let prefixed_int_lit = |l: &mut Lexer<'_>, radix| {
660            let _ = l.stream.next();
661            let d = l.stream.next();
662            let incomplete_int_lit = |end| {
663                let kind = match radix {
664                    16 => LexErrorKind::IncompleteHexIntLiteral { position: index },
665                    8 => LexErrorKind::IncompleteOctalIntLiteral { position: index },
666                    2 => LexErrorKind::IncompleteBinaryIntLiteral { position: index },
667                    _ => unreachable!(),
668                };
669                let span = span(l, index, end);
670                error(l.handler, LexError { kind, span })
671            };
672            let (digit_pos, digit) = d.ok_or_else(|| incomplete_int_lit(l.src.text.len()))?;
673            let radix_digit = digit
674                .to_digit(radix)
675                .ok_or_else(|| incomplete_int_lit(digit_pos))?;
676            let mut big_uint = BigUint::from(radix_digit);
677            let end_opt = parse_digits(&mut big_uint, l, radix);
678            Ok((big_uint, end_opt))
679        };
680
681        match l.stream.peek() {
682            Some((_, 'x')) => prefixed_int_lit(l, 16)?,
683            Some((_, 'o')) => prefixed_int_lit(l, 8)?,
684            Some((_, 'b')) => prefixed_int_lit(l, 2)?,
685            Some((_, '_' | '0'..='9')) => decimal_int_lit(l, 0),
686            Some(&(next_index, _)) => (BigUint::from(0u32), Some(next_index)),
687            None => (BigUint::from(0u32), None),
688        }
689    } else {
690        decimal_int_lit(l, digit)
691    };
692
693    let ty_opt = lex_int_ty_opt(l)?;
694
695    let literal = Literal::Int(LitInt {
696        span: span(l, index, end_opt.unwrap_or(l.src.text.len())),
697        parsed: big_uint,
698        ty_opt,
699        is_generated_b256: false,
700    });
701
702    Ok(Some(CommentedTokenTree::Tree(literal.into())))
703}
704
705fn lex_int_ty_opt(l: &mut Lexer<'_>) -> Result<Option<(LitIntType, Span)>> {
706    let (suffix_start_position, c) = match l.stream.next_if(|(_, c)| c.is_xid_continue()) {
707        None => return Ok(None),
708        Some(x) => x,
709    };
710    let mut suffix = String::from(c);
711    let suffix_end_position = loop {
712        match l.stream.peek() {
713            Some((_, c)) if c.is_xid_continue() => {
714                suffix.push(*c);
715                let _ = l.stream.next();
716            }
717            Some((pos, _)) => break *pos,
718            None => break l.src.text.len(),
719        }
720    };
721    // Parse the suffix to a known one, or if unknown, recover by throwing it away.
722    let ty = match parse_int_suffix(&suffix) {
723        Some(s) => s,
724        None => {
725            let span = span(l, suffix_start_position, suffix_end_position);
726            let kind = LexErrorKind::InvalidIntSuffix {
727                suffix: Ident::new(span.clone()),
728            };
729            error(l.handler, LexError { kind, span });
730            return Ok(None);
731        }
732    };
733    let span = span_until(l, suffix_start_position);
734    Ok(Some((ty, span)))
735}
736
737/// Interpret the given `suffix` string as a `LitIntType`.
738pub fn parse_int_suffix(suffix: &str) -> Option<LitIntType> {
739    Some(match suffix {
740        "u8" => LitIntType::U8,
741        "u16" => LitIntType::U16,
742        "u32" => LitIntType::U32,
743        "u64" => LitIntType::U64,
744        "u256" => LitIntType::U256,
745        "i8" => LitIntType::I8,
746        "i16" => LitIntType::I16,
747        "i32" => LitIntType::I32,
748        "i64" => LitIntType::I64,
749        _ => return None,
750    })
751}
752
753fn parse_digits(big_uint: &mut BigUint, l: &mut Lexer<'_>, radix: u32) -> Option<usize> {
754    loop {
755        match l.stream.peek() {
756            None => break None,
757            Some((_, '_')) => {
758                let _ = l.stream.next();
759            }
760            Some(&(index, character)) => match character.to_digit(radix) {
761                None => break Some(index),
762                Some(digit) => {
763                    let _ = l.stream.next();
764                    *big_uint *= radix;
765                    *big_uint += digit;
766                }
767            },
768        };
769    }
770}
771
772fn lex_punctuation(l: &mut Lexer<'_>, index: usize, character: char) -> Option<CommentedTokenTree> {
773    let punct = Punct {
774        kind: character.as_punct_kind()?,
775        spacing: match l.stream.peek() {
776            Some((_, next_character)) if next_character.as_punct_kind().is_some() => Spacing::Joint,
777            _ => Spacing::Alone,
778        },
779        span: span_until(l, index),
780    };
781    Some(CommentedTokenTree::Tree(punct.into()))
782}
783
784fn span_until(l: &mut Lexer<'_>, start: usize) -> Span {
785    let end = l.stream.peek().map_or(l.src.text.len(), |(end, _)| *end);
786    span(l, start, end)
787}
788
789fn span_one(l: &Lexer<'_>, start: usize, c: char) -> Span {
790    span(l, start, start + c.len_utf8())
791}
792
793fn span(l: &Lexer<'_>, start: usize, end: usize) -> Span {
794    Span::new(l.src.clone(), start, end, *l.source_id).unwrap()
795}
796
797/// Emit a lexer error.
798fn error(handler: &Handler, error: LexError) -> ErrorEmitted {
799    handler.emit_err(CompileError::Lex { error })
800}
801
802#[cfg(test)]
803mod tests {
804    use super::*;
805    use assert_matches::assert_matches;
806    use sway_ast::{
807        literal::{LitChar, Literal},
808        token::{
809            Comment, CommentKind, CommentedTokenTree, CommentedTree, DocComment, DocStyle,
810            TokenTree,
811        },
812    };
813    use sway_error::{
814        error::CompileError,
815        handler::Handler,
816        lex_error::{LexError, LexErrorKind},
817    };
818
819    #[test]
820    fn lex_bidi() {
821        let input = "
822            script;
823            use std::string::String;
824            fn main() {
825                let a = String::from_ascii_str(\"fuel\");
826                let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
827                if a.as_bytes() == b.as_bytes() {
828                    log(\"same\");
829                } else {
830                    log(\"different\");
831                }
832                let lrm = '\u{202E}';
833                log(lrm);
834            }
835        ";
836        let start = 0;
837        let end = input.len();
838        let path = None;
839        let handler = Handler::default();
840        let _stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
841        let (errors, warnings) = handler.consume();
842        assert_eq!(warnings.len(), 0);
843        assert_eq!(errors.len(), 5);
844        for err in errors {
845            assert_matches!(
846                err,
847                CompileError::Lex {
848                    error: LexError {
849                        span: _,
850                        kind: LexErrorKind::UnicodeTextDirInLiteral {
851                            position: _,
852                            character: _
853                        }
854                    }
855                }
856            );
857        }
858    }
859
860    #[test]
861    fn lex_commented_token_stream() {
862        let input = r#"
863        //
864        // Single-line comment.
865        struct Foo {
866            /* multi-
867             * line-
868             * comment */
869            bar: i32, // trailing comment
870        }
871        "#;
872        let start = 0;
873        let end = input.len();
874        let path = None;
875        let handler = Handler::default();
876        let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
877        assert!(handler.consume().0.is_empty());
878        let mut tts = stream.token_trees().iter();
879        assert_eq!(tts.next().unwrap().span().as_str(), "//");
880        assert_eq!(
881            tts.next().unwrap().span().as_str(),
882            "// Single-line comment."
883        );
884        assert_eq!(tts.next().unwrap().span().as_str(), "struct");
885        assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
886        {
887            let group = match tts.next() {
888                Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
889                _ => panic!("expected group"),
890            };
891            let mut tts = group.token_stream.token_trees().iter();
892            assert_eq!(
893                tts.next().unwrap().span().as_str(),
894                "/* multi-\n             * line-\n             * comment */",
895            );
896            assert_eq!(tts.next().unwrap().span().as_str(), "bar");
897            assert_eq!(tts.next().unwrap().span().as_str(), ":");
898            assert_eq!(tts.next().unwrap().span().as_str(), "i32");
899            assert_eq!(tts.next().unwrap().span().as_str(), ",");
900            assert_matches!(
901                tts.next(),
902                Some(CommentedTokenTree::Comment(Comment {
903                    span,
904                    comment_kind: CommentKind::Trailing,
905                })) if span.as_str() ==  "// trailing comment"
906            );
907            assert!(tts.next().is_none());
908        }
909        assert!(tts.next().is_none());
910    }
911
912    #[test]
913    fn lex_comments_check_comment_kind() {
914        let input = r#"
915        // CommentKind::Newlined
916        abi Foo {
917            // CommentKind::Newlined
918            fn bar(); // CommentKind::Trailing
919            // CommentKind::Newlined
920        }
921        "#;
922        let start = 0;
923        let end = input.len();
924        let path = None;
925        let handler = Handler::default();
926        let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
927        assert!(handler.consume().0.is_empty());
928        let mut tts = stream.token_trees().iter();
929
930        assert_matches!(
931            tts.next(),
932            Some(CommentedTokenTree::Comment(Comment {
933                span,
934                comment_kind: CommentKind::Newlined,
935            })) if span.as_str() ==  "// CommentKind::Newlined"
936        );
937        assert_eq!(tts.next().unwrap().span().as_str(), "abi");
938        assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
939
940        {
941            let group = match tts.next() {
942                Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
943                _ => panic!("expected group"),
944            };
945            let mut tts = group.token_stream.token_trees().iter();
946
947            assert_matches!(
948                tts.next(),
949                Some(CommentedTokenTree::Comment(Comment {
950                    span,
951                    comment_kind: CommentKind::Newlined,
952                })) if span.as_str() ==  "// CommentKind::Newlined"
953            );
954            assert_eq!(tts.next().unwrap().span().as_str(), "fn");
955            assert_eq!(tts.next().unwrap().span().as_str(), "bar");
956            assert_eq!(tts.next().unwrap().span().as_str(), "()");
957            assert_eq!(tts.next().unwrap().span().as_str(), ";");
958            assert_matches!(
959                tts.next(),
960                Some(CommentedTokenTree::Comment(Comment {
961                    span,
962                    comment_kind: CommentKind::Trailing,
963                })) if span.as_str() ==  "// CommentKind::Trailing"
964            );
965            assert_matches!(
966                tts.next(),
967                Some(CommentedTokenTree::Comment(Comment {
968                    span,
969                    comment_kind: CommentKind::Newlined,
970                })) if span.as_str() ==  "// CommentKind::Newlined"
971            );
972            assert!(tts.next().is_none());
973        }
974    }
975
976    #[test]
977    fn lex_doc_comments() {
978        let input = r#"
979        //none
980        ////none
981        //!inner
982        //! inner
983        ///outer
984        /// outer
985        "#;
986        let start = 0;
987        let end = input.len();
988        let path = None;
989        let handler = Handler::default();
990        let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
991        assert!(handler.consume().0.is_empty());
992        let mut tts = stream.token_trees().iter();
993        assert_matches!(
994            tts.next(),
995            Some(CommentedTokenTree::Comment(Comment {
996                span,
997                comment_kind: CommentKind::Newlined,
998            })) if span.as_str() ==  "//none"
999        );
1000        assert_matches!(
1001            tts.next(),
1002            Some(CommentedTokenTree::Comment(Comment {
1003                span,
1004                comment_kind: CommentKind::Newlined,
1005            })) if span.as_str() ==  "////none"
1006        );
1007        assert_matches!(
1008            tts.next(),
1009            Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1010                doc_style: DocStyle::Inner,
1011                span,
1012                content_span
1013            }))) if span.as_str() ==  "//!inner" && content_span.as_str() == "inner"
1014        );
1015        assert_matches!(
1016            tts.next(),
1017            Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1018                doc_style: DocStyle::Inner,
1019                span,
1020                content_span
1021            }))) if span.as_str() ==  "//! inner" && content_span.as_str() == " inner"
1022        );
1023        assert_matches!(
1024            tts.next(),
1025            Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1026                doc_style: DocStyle::Outer,
1027                span,
1028                content_span
1029            }))) if span.as_str() ==  "///outer" && content_span.as_str() == "outer"
1030        );
1031        assert_matches!(
1032            tts.next(),
1033            Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1034                doc_style: DocStyle::Outer,
1035                span,
1036                content_span
1037            }))) if span.as_str() ==  "/// outer" && content_span.as_str() == " outer"
1038        );
1039        assert_eq!(tts.next(), None);
1040    }
1041
1042    #[test]
1043    fn lex_char_escaped_quote() {
1044        let input = r"
1045        '\''
1046        ";
1047        let handler = Handler::default();
1048        let stream = lex(&handler, input.into(), 0, input.len(), None).unwrap();
1049        assert!(handler.consume().0.is_empty());
1050        let mut tts = stream.token_trees().iter();
1051        assert_matches!(
1052            tts.next(),
1053            Some(TokenTree::Literal(Literal::Char(LitChar {
1054                parsed: '\'',
1055                ..
1056            })))
1057        );
1058        assert_eq!(tts.next(), None);
1059    }
1060}