1use core::mem;
2use extension_trait::extension_trait;
3use num_bigint::BigUint;
4use std::sync::Arc;
5use sway_ast::literal::{LitChar, LitInt, LitIntType, LitString, Literal};
6use sway_ast::token::{
7 Comment, CommentKind, CommentedGroup, CommentedTokenStream, CommentedTokenTree, DocComment,
8 DocStyle, GenericTokenTree, Punct, Spacing, TokenStream,
9};
10use sway_error::error::CompileError;
11use sway_error::handler::{ErrorEmitted, Handler};
12use sway_error::lex_error::{LexError, LexErrorKind};
13use sway_types::{
14 ast::{Delimiter, PunctKind},
15 Ident, SourceId, Span, Spanned,
16};
17use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
18use unicode_xid::UnicodeXID;
19
20#[extension_trait]
21impl CharExt for char {
22 fn as_open_delimiter(self) -> Option<Delimiter> {
24 match self {
25 '(' => Some(Delimiter::Parenthesis),
26 '{' => Some(Delimiter::Brace),
27 '[' => Some(Delimiter::Bracket),
28 _ => None,
29 }
30 }
31
32 fn as_close_delimiter(self) -> Option<Delimiter> {
34 match self {
35 ')' => Some(Delimiter::Parenthesis),
36 '}' => Some(Delimiter::Brace),
37 ']' => Some(Delimiter::Bracket),
38 _ => None,
39 }
40 }
41
42 fn as_punct_kind(self) -> Option<PunctKind> {
44 match self {
45 ';' => Some(PunctKind::Semicolon),
46 ':' => Some(PunctKind::Colon),
47 '/' => Some(PunctKind::ForwardSlash),
48 ',' => Some(PunctKind::Comma),
49 '*' => Some(PunctKind::Star),
50 '+' => Some(PunctKind::Add),
51 '-' => Some(PunctKind::Sub),
52 '<' => Some(PunctKind::LessThan),
53 '>' => Some(PunctKind::GreaterThan),
54 '=' => Some(PunctKind::Equals),
55 '.' => Some(PunctKind::Dot),
56 '!' => Some(PunctKind::Bang),
57 '%' => Some(PunctKind::Percent),
58 '&' => Some(PunctKind::Ampersand),
59 '^' => Some(PunctKind::Caret),
60 '|' => Some(PunctKind::Pipe),
61 '_' => Some(PunctKind::Underscore),
62 '#' => Some(PunctKind::Sharp),
63 _ => None,
64 }
65 }
66}
67
68struct CharIndicesInner<'a> {
69 src: &'a str,
70 position: usize,
71}
72
73impl Iterator for CharIndicesInner<'_> {
74 type Item = (usize, char);
75
76 fn next(&mut self) -> Option<(usize, char)> {
77 let mut char_indices = self.src[self.position..].char_indices();
78 let (_, c) = char_indices.next()?;
79 let ret = (self.position, c);
80 match char_indices.next() {
81 Some((char_width, _)) => self.position += char_width,
82 None => self.position = self.src.len(),
83 };
84 Some(ret)
85 }
86}
87
88type CharIndices<'a> = std::iter::Peekable<CharIndicesInner<'a>>;
89type Result<T> = core::result::Result<T, ErrorEmitted>;
90
91struct Lexer<'l> {
92 handler: &'l Handler,
93 src: &'l Arc<str>,
94 source_id: &'l Option<SourceId>,
95 stream: &'l mut CharIndices<'l>,
96}
97
98pub fn lex(
99 handler: &Handler,
100 src: &Arc<str>,
101 start: usize,
102 end: usize,
103 source_id: Option<SourceId>,
104) -> Result<TokenStream> {
105 lex_commented(handler, src, start, end, &source_id).map(|stream| stream.strip_comments())
106}
107
108pub fn lex_commented(
109 handler: &Handler,
110 src: &Arc<str>,
111 start: usize,
112 end: usize,
113 source_id: &Option<SourceId>,
114) -> Result<CommentedTokenStream> {
115 let stream = &mut CharIndicesInner {
116 src: &src[..end],
117 position: start,
118 }
119 .peekable();
120 let mut l = Lexer {
121 handler,
122 src,
123 source_id,
124 stream,
125 };
126 let mut gather_module_docs = false;
127 let mut file_start_offset: usize = 0;
128
129 let mut parent_token_trees = Vec::new();
130 let mut token_trees = Vec::new();
131 while let Some((mut index, mut character)) = l.stream.next() {
132 if character.is_whitespace() {
133 if index - file_start_offset == 0 {
137 file_start_offset += character.len_utf8();
138 }
139 continue;
140 }
141 if character == '/' {
142 match l.stream.peek() {
143 Some((_, '/')) => {
144 let search_end = token_trees
147 .last()
148 .map(|tt| {
149 if let CommentedTokenTree::Tree(t) = tt {
150 t.span().end()
151 } else {
152 0
153 }
154 })
155 .unwrap_or_default();
156
157 let has_newline = src[search_end..index]
158 .chars()
159 .rev()
160 .take_while(|c| c.is_whitespace())
161 .filter(|&c| c == '\n')
162 .count()
163 > 0;
164 let start_of_file_found = search_end == 0 && index == 0;
166
167 let comment_kind = if has_newline || start_of_file_found {
168 CommentKind::Newlined
169 } else {
170 CommentKind::Trailing
171 };
172
173 let ctt = lex_line_comment(
174 &mut l,
175 end,
176 index,
177 comment_kind,
178 file_start_offset,
179 gather_module_docs,
180 );
181 if let CommentedTokenTree::Tree(GenericTokenTree::DocComment(DocComment {
182 doc_style: DocStyle::Inner,
183 ..
184 })) = &ctt
185 {
186 gather_module_docs = true;
187 }
188 token_trees.push(ctt);
189 continue;
190 }
191 Some((_, '*')) => {
192 if let Some(token) = lex_block_comment(&mut l, index) {
193 token_trees.push(token);
194 }
195 continue;
196 }
197 Some(_) | None => {}
198 }
199 } else {
200 gather_module_docs = false;
201 }
202
203 if character.is_xid_start() || character == '_' {
204 let is_raw_ident = character == 'r' && matches!(l.stream.peek(), Some((_, '#')));
206 if is_raw_ident {
207 l.stream.next();
208 if let Some((next_index, next_character)) = l.stream.next() {
209 character = next_character;
210 index = next_index;
211 }
212 if !(character.is_xid_start() || character == '_') {
213 let kind = LexErrorKind::InvalidCharacter {
214 position: index,
215 character,
216 };
217 let span = span_one(&l, index, character);
218 error(l.handler, LexError { kind, span });
219 continue;
220 }
221 }
222
223 let not_is_single_underscore = character != '_'
225 || l.stream
226 .peek()
227 .is_some_and(|(_, next)| next.is_xid_continue());
228 if not_is_single_underscore {
229 while l.stream.next_if(|(_, c)| c.is_xid_continue()).is_some() {}
231 let ident = Ident::new_with_raw(span_until(&mut l, index), is_raw_ident);
232 token_trees.push(CommentedTokenTree::Tree(ident.into()));
233 continue;
234 }
235 }
236 if let Some(delimiter) = character.as_open_delimiter() {
237 let token_trees = mem::take(&mut token_trees);
238 parent_token_trees.push((token_trees, index, delimiter));
239 continue;
240 }
241 if let Some(close_delimiter) = character.as_close_delimiter() {
242 match parent_token_trees.pop() {
243 None => {
244 let kind = LexErrorKind::UnexpectedCloseDelimiter {
252 position: index,
253 close_delimiter,
254 };
255 let span = span_one(&l, index, character);
256 error(l.handler, LexError { kind, span });
257 }
258 Some((parent, open_index, open_delimiter)) => {
259 if open_delimiter != close_delimiter {
260 let kind = LexErrorKind::MismatchedDelimiters {
262 open_position: open_index,
263 close_position: index,
264 open_delimiter,
265 close_delimiter,
266 };
267 let span = span_one(&l, index, character);
268 error(l.handler, LexError { kind, span });
269 }
270 token_trees = lex_close_delimiter(
271 &mut l,
272 index,
273 parent,
274 token_trees,
275 open_index,
276 open_delimiter,
277 );
278 }
279 }
280 continue;
281 }
282 if let Some(token) = lex_string(&mut l, index, character)? {
283 token_trees.push(token);
284 continue;
285 }
286 if let Some(token) = lex_char(&mut l, index, character)? {
287 token_trees.push(token);
288 continue;
289 }
290 if let Some(token) = lex_int_lit(&mut l, index, character)? {
291 token_trees.push(token);
292 continue;
293 }
294 if let Some(token) = lex_punctuation(&mut l, index, character) {
295 token_trees.push(token);
296 continue;
297 }
298
299 let kind = LexErrorKind::InvalidCharacter {
302 position: index,
303 character,
304 };
305 let span = span_one(&l, index, character);
306 error(l.handler, LexError { kind, span });
307 continue;
308 }
309
310 while let Some((parent, open_index, open_delimiter)) = parent_token_trees.pop() {
312 let kind = LexErrorKind::UnclosedDelimiter {
313 open_position: open_index,
314 open_delimiter,
315 };
316 let span = span_one(&l, open_index, open_delimiter.as_open_char());
317 error(l.handler, LexError { kind, span });
318
319 token_trees = lex_close_delimiter(
320 &mut l,
321 src.len(),
322 parent,
323 token_trees,
324 open_index,
325 open_delimiter,
326 );
327 }
328 Ok(CommentedTokenStream {
329 token_trees,
330 full_span: span(&l, start, end),
331 })
332}
333
334fn lex_close_delimiter(
335 l: &mut Lexer<'_>,
336 index: usize,
337 mut parent: Vec<CommentedTokenTree>,
338 token_trees: Vec<CommentedTokenTree>,
339 open_index: usize,
340 delimiter: Delimiter,
341) -> Vec<CommentedTokenTree> {
342 let start_index = open_index + delimiter.as_open_char().len_utf8();
343 let full_span = span(l, start_index, index);
344 let group = CommentedGroup {
345 token_stream: CommentedTokenStream {
346 token_trees,
347 full_span,
348 },
349 delimiter,
350 span: span_until(l, open_index),
351 };
352 parent.push(CommentedTokenTree::Tree(group.into()));
353 parent
354}
355
356fn lex_line_comment(
357 l: &mut Lexer<'_>,
358 end: usize,
359 index: usize,
360 comment_kind: CommentKind,
361 offset: usize,
362 gather_module_docs: bool,
363) -> CommentedTokenTree {
364 let _ = l.stream.next();
365
366 let end = l
368 .stream
369 .find(|(_, character)| *character == '\n')
370 .map_or(end, |(end, _)| end);
371 let sp = span(l, index, end);
372
373 let doc_style = match (sp.as_str().chars().nth(2), sp.as_str().chars().nth(3)) {
374 (Some('!'), _) => {
376 if index - offset == 0 || gather_module_docs {
377 Some(DocStyle::Inner)
380 } else {
381 None
382 }
383 }
384 (Some('/'), Some('/')) => None,
386 (Some('/'), _) => Some(DocStyle::Outer),
388 _ => None,
389 };
390
391 if let Some(doc_style) = doc_style {
392 let doc_comment = DocComment {
393 span: sp,
394 doc_style,
395 content_span: span(l, index + 3, end),
396 };
397 CommentedTokenTree::Tree(doc_comment.into())
398 } else {
399 Comment {
400 span: sp,
401 comment_kind,
402 }
403 .into()
404 }
405}
406
407fn lex_block_comment(l: &mut Lexer<'_>, index: usize) -> Option<CommentedTokenTree> {
408 let _ = l.stream.next();
410 let mut unclosed_indices = vec![index];
411
412 let unclosed_multiline_comment = |l: &Lexer<'_>, unclosed_indices: Vec<_>| {
413 let span = span(l, *unclosed_indices.last().unwrap(), l.src.len() - 1);
414 let kind = LexErrorKind::UnclosedMultilineComment { unclosed_indices };
415 error(l.handler, LexError { kind, span });
416 None
417 };
418
419 let mut comment_kind = CommentKind::Inlined;
421
422 loop {
423 match l.stream.next() {
424 None => return unclosed_multiline_comment(l, unclosed_indices),
425 Some((_, '*')) => match l.stream.next() {
426 None => return unclosed_multiline_comment(l, unclosed_indices),
427 Some((slash_ix, '/')) => {
429 let start = unclosed_indices.pop().unwrap();
430 if unclosed_indices.is_empty() {
431 let end = slash_ix + '/'.len_utf8();
435 let span = span(l, start, end);
436 return Some(Comment { span, comment_kind }.into());
437 }
438 }
439 Some(_) => {}
440 },
441 Some((next_index, '/')) => match l.stream.next() {
443 None => return unclosed_multiline_comment(l, unclosed_indices),
444 Some((_, '*')) => unclosed_indices.push(next_index),
445 Some(_) => {}
446 },
447 Some((_, '\n')) => {
448 comment_kind = CommentKind::Multilined;
453 }
454 Some(_) => {}
455 }
456 }
457}
458
459fn lex_string(
460 l: &mut Lexer<'_>,
461 index: usize,
462 character: char,
463) -> Result<Option<CommentedTokenTree>> {
464 if character != '"' {
465 return Ok(None);
466 }
467 let mut parsed = String::new();
468 loop {
469 let unclosed_string_lit = |l: &Lexer<'_>, end| {
470 error(
471 l.handler,
472 LexError {
473 kind: LexErrorKind::UnclosedStringLiteral { position: index },
474 span: span(l, index, end),
475 },
476 )
477 };
478 let (next_index, next_character) = l.stream.next().ok_or_else(|| {
479 let mut end = l.src.len() - 1;
481 while !l.src.is_char_boundary(end) {
482 end -= 1;
483 }
484 unclosed_string_lit(l, end)
485 })?;
486 parsed.push(match next_character {
487 '\\' => parse_escape_code(l)
488 .map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.len())))?,
489 '"' => break,
490 ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
492 let kind = LexErrorKind::UnicodeTextDirInLiteral {
493 position: next_index,
494 character: next_character,
495 };
496 let span = span_one(l, next_index, next_character);
497 error(l.handler, LexError { span, kind });
498 continue;
499 }
500 _ => next_character,
501 });
502 }
503 let span = span_until(l, index);
504 let literal = Literal::String(LitString { span, parsed });
505 Ok(Some(CommentedTokenTree::Tree(literal.into())))
506}
507
508fn lex_char(
509 l: &mut Lexer<'_>,
510 index: usize,
511 character: char,
512) -> Result<Option<CommentedTokenTree>> {
513 let is_quote = |c| c == '\'';
514 if !is_quote(character) {
515 return Ok(None);
516 }
517
518 let unclosed_char_lit = |l: &Lexer<'_>| {
519 let err = LexError {
520 kind: LexErrorKind::UnclosedCharLiteral { position: index },
521 span: span(l, index, l.src.len()),
522 };
523 error(l.handler, err)
524 };
525 let next = |l: &mut Lexer<'_>| l.stream.next().ok_or_else(|| unclosed_char_lit(l));
526 let escape = |l: &mut Lexer<'_>, next_char| {
527 if next_char == '\\' {
528 parse_escape_code(l).map_err(|e| e.unwrap_or_else(|| unclosed_char_lit(l)))
529 } else {
530 Ok(next_char)
531 }
532 };
533
534 let (next_index, next_char) = next(l)?;
535 if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
537 let kind = LexErrorKind::UnicodeTextDirInLiteral {
538 position: next_index,
539 character: next_char,
540 };
541 let span = span_one(l, next_index, next_char);
542 error(l.handler, LexError { span, kind });
543 }
544
545 let parsed = escape(l, next_char)?;
546
547 let (next_index, next_char) = next(l)?;
549 let sp = span_until(l, index);
550
551 let literal = if !is_quote(next_char) {
554 let mut string = String::new();
555 string.push(parsed);
556 string.push(escape(l, next_char)?);
557 loop {
558 let (_, next_char) = next(l)?;
559 if is_quote(next_char) {
560 break;
561 }
562 string.push(next_char);
563 }
564
565 error(
567 l.handler,
568 LexError {
569 kind: LexErrorKind::ExpectedCloseQuote {
570 position: next_index,
571 },
572 span: span(l, next_index, next_index + string.len()),
573 },
574 );
575
576 Literal::String(LitString {
577 span: sp,
578 parsed: string,
579 })
580 } else {
581 Literal::Char(LitChar { span: sp, parsed })
582 };
583
584 Ok(Some(CommentedTokenTree::Tree(literal.into())))
585}
586
587fn parse_escape_code(l: &mut Lexer<'_>) -> core::result::Result<char, Option<ErrorEmitted>> {
588 let error = |kind, span| Err(Some(error(l.handler, LexError { kind, span })));
589
590 match l.stream.next() {
591 None => Err(None),
592 Some((_, '"')) => Ok('"'),
593 Some((_, '\'')) => Ok('\''),
594 Some((_, 'n')) => Ok('\n'),
595 Some((_, 'r')) => Ok('\r'),
596 Some((_, 't')) => Ok('\t'),
597 Some((_, '\\')) => Ok('\\'),
598 Some((_, '0')) => Ok('\0'),
599 Some((index, 'x')) => {
600 let (high, low) = match (l.stream.next(), l.stream.next()) {
601 (Some((_, high)), Some((_, low))) => (high, low),
602 _ => return Err(None),
603 };
604 let (high, low) = match (high.to_digit(16), low.to_digit(16)) {
605 (Some(high), Some(low)) => (high, low),
606 _ => return error(LexErrorKind::InvalidHexEscape, span_until(l, index)),
607 };
608 let parsed_character = char::from_u32((high << 4) | low).unwrap();
609 Ok(parsed_character)
610 }
611 Some((index, 'u')) => {
612 match l.stream.next() {
613 None => return Err(None),
614 Some((_, '{')) => (),
615 Some((_, unexpected_char)) => {
616 let span = span_one(l, index, unexpected_char);
617 let kind = LexErrorKind::UnicodeEscapeMissingBrace { position: index };
618 return error(kind, span);
619 }
620 }
621 let mut digits_start_position_opt = None;
622 let mut char_value = BigUint::from(0u32);
623 let digits_end_position = loop {
624 let (position, digit) = match l.stream.next() {
625 None => return Err(None),
626 Some((position, '}')) => break position,
627 Some((position, digit)) => (position, digit),
628 };
629 if digits_start_position_opt.is_none() {
630 digits_start_position_opt = Some(position);
631 };
632 let digit = match digit.to_digit(16) {
633 None => {
634 let span = span_one(l, position, digit);
635 let kind = LexErrorKind::InvalidUnicodeEscapeDigit { position };
636 return error(kind, span);
637 }
638 Some(digit) => digit,
639 };
640 char_value *= 16u32;
641 char_value += digit;
642 };
643 let digits_start_position = digits_start_position_opt.unwrap_or(digits_end_position);
644 let char_value = match u32::try_from(char_value) {
645 Err(..) => {
646 let span = span(l, digits_start_position, digits_end_position);
647 let kind = LexErrorKind::UnicodeEscapeOutOfRange { position: index };
648 return error(kind, span);
649 }
650 Ok(char_value) => char_value,
651 };
652 let parsed_character = match char::from_u32(char_value) {
653 None => {
654 let span_all = span_until(l, index);
655 let kind = LexErrorKind::UnicodeEscapeInvalidCharValue { span: span_all };
656 let span = span(l, digits_start_position, digits_end_position);
657 return error(kind, span);
658 }
659 Some(parsed_character) => parsed_character,
660 };
661 Ok(parsed_character)
662 }
663 Some((index, unexpected_char)) => error(
664 LexErrorKind::InvalidEscapeCode { position: index },
665 span_one(l, index, unexpected_char),
666 ),
667 }
668}
669
670fn lex_int_lit(
671 l: &mut Lexer<'_>,
672 index: usize,
673 character: char,
674) -> Result<Option<CommentedTokenTree>> {
675 let digit = match character.to_digit(10) {
676 None => return Ok(None),
677 Some(d) => d,
678 };
679
680 let decimal_int_lit = |l, digit: u32| {
681 let mut big_uint = BigUint::from(digit);
682 let end_opt = parse_digits(&mut big_uint, l, 10);
683 (big_uint, end_opt)
684 };
685 let (radix, (big_uint, end_opt)) = if digit == 0 {
686 let prefixed_int_lit = |l: &mut Lexer<'_>, radix| {
687 let _ = l.stream.next();
688 let d = l.stream.next();
689 let incomplete_int_lit = |end| {
690 let kind = match radix {
691 16 => LexErrorKind::IncompleteHexIntLiteral { position: index },
692 8 => LexErrorKind::IncompleteOctalIntLiteral { position: index },
693 2 => LexErrorKind::IncompleteBinaryIntLiteral { position: index },
694 _ => unreachable!(),
695 };
696 let span = span(l, index, end);
697 error(l.handler, LexError { kind, span })
698 };
699 let (digit_pos, digit) = d.ok_or_else(|| incomplete_int_lit(l.src.len()))?;
700 let radix_digit = digit
701 .to_digit(radix)
702 .ok_or_else(|| incomplete_int_lit(digit_pos))?;
703 let mut big_uint = BigUint::from(radix_digit);
704 let end_opt = parse_digits(&mut big_uint, l, radix);
705 Ok((big_uint, end_opt))
706 };
707
708 match l.stream.peek() {
709 Some((_, 'x')) => (16, prefixed_int_lit(l, 16)?),
710 Some((_, 'o')) => (8, prefixed_int_lit(l, 8)?),
711 Some((_, 'b')) => (2, prefixed_int_lit(l, 2)?),
712 Some((_, '_' | '0'..='9')) => (10, decimal_int_lit(l, 0)),
713 Some(&(next_index, _)) => (10, (BigUint::from(0u32), Some(next_index))),
714 None => (10, (BigUint::from(0u32), None)),
715 }
716 } else {
717 (10, decimal_int_lit(l, digit))
718 };
719
720 let ty_opt = lex_int_ty_opt(l)?;
721
722 if let Some((LitIntType::U256, span)) = &ty_opt {
724 if radix != 16 {
725 return Err(error(
726 l.handler,
727 LexError {
728 kind: LexErrorKind::U256NotInHex,
729 span: span.clone(),
730 },
731 ));
732 }
733 }
734
735 let literal = Literal::Int(LitInt {
736 span: span(l, index, end_opt.unwrap_or(l.src.len())),
737 parsed: big_uint,
738 ty_opt,
739 is_generated_b256: false,
740 });
741
742 Ok(Some(CommentedTokenTree::Tree(literal.into())))
743}
744
745fn lex_int_ty_opt(l: &mut Lexer<'_>) -> Result<Option<(LitIntType, Span)>> {
746 let (suffix_start_position, c) = match l.stream.next_if(|(_, c)| c.is_xid_continue()) {
747 None => return Ok(None),
748 Some(x) => x,
749 };
750 let mut suffix = String::from(c);
751 let suffix_end_position = loop {
752 match l.stream.peek() {
753 Some((_, c)) if c.is_xid_continue() => {
754 suffix.push(*c);
755 let _ = l.stream.next();
756 }
757 Some((pos, _)) => break *pos,
758 None => break l.src.len(),
759 }
760 };
761 let ty = match parse_int_suffix(&suffix) {
763 Some(s) => s,
764 None => {
765 let span = span(l, suffix_start_position, suffix_end_position);
766 let kind = LexErrorKind::InvalidIntSuffix {
767 suffix: Ident::new(span.clone()),
768 };
769 error(l.handler, LexError { kind, span });
770 return Ok(None);
771 }
772 };
773 let span = span_until(l, suffix_start_position);
774 Ok(Some((ty, span)))
775}
776
777pub fn parse_int_suffix(suffix: &str) -> Option<LitIntType> {
779 Some(match suffix {
780 "u8" => LitIntType::U8,
781 "u16" => LitIntType::U16,
782 "u32" => LitIntType::U32,
783 "u64" => LitIntType::U64,
784 "u256" => LitIntType::U256,
785 "i8" => LitIntType::I8,
786 "i16" => LitIntType::I16,
787 "i32" => LitIntType::I32,
788 "i64" => LitIntType::I64,
789 _ => return None,
790 })
791}
792
793fn parse_digits(big_uint: &mut BigUint, l: &mut Lexer<'_>, radix: u32) -> Option<usize> {
794 loop {
795 match l.stream.peek() {
796 None => break None,
797 Some((_, '_')) => {
798 let _ = l.stream.next();
799 }
800 Some(&(index, character)) => match character.to_digit(radix) {
801 None => break Some(index),
802 Some(digit) => {
803 let _ = l.stream.next();
804 *big_uint *= radix;
805 *big_uint += digit;
806 }
807 },
808 };
809 }
810}
811
812fn lex_punctuation(l: &mut Lexer<'_>, index: usize, character: char) -> Option<CommentedTokenTree> {
813 let punct = Punct {
814 kind: character.as_punct_kind()?,
815 spacing: match l.stream.peek() {
816 Some((_, next_character)) if next_character.as_punct_kind().is_some() => Spacing::Joint,
817 _ => Spacing::Alone,
818 },
819 span: span_until(l, index),
820 };
821 Some(CommentedTokenTree::Tree(punct.into()))
822}
823
824fn span_until(l: &mut Lexer<'_>, start: usize) -> Span {
825 let end = l.stream.peek().map_or(l.src.len(), |(end, _)| *end);
826 span(l, start, end)
827}
828
829fn span_one(l: &Lexer<'_>, start: usize, c: char) -> Span {
830 span(l, start, start + c.len_utf8())
831}
832
833fn span(l: &Lexer<'_>, start: usize, end: usize) -> Span {
834 Span::new(l.src.clone(), start, end, *l.source_id).unwrap()
835}
836
837fn error(handler: &Handler, error: LexError) -> ErrorEmitted {
839 handler.emit_err(CompileError::Lex { error })
840}
841
842#[cfg(test)]
843mod tests {
844 use super::*;
845 use assert_matches::assert_matches;
846 use std::sync::Arc;
847 use sway_ast::{
848 literal::{LitChar, Literal},
849 token::{
850 Comment, CommentKind, CommentedTokenTree, CommentedTree, DocComment, DocStyle,
851 TokenTree,
852 },
853 };
854 use sway_error::{
855 error::CompileError,
856 handler::Handler,
857 lex_error::{LexError, LexErrorKind},
858 };
859
860 #[test]
861 fn lex_bidi() {
862 let input = "
863 script;
864 use std::string::String;
865 fn main() {
866 let a = String::from_ascii_str(\"fuel\");
867 let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
868 if a.as_bytes() == b.as_bytes() {
869 log(\"same\");
870 } else {
871 log(\"different\");
872 }
873 let lrm = '\u{202E}';
874 log(lrm);
875 }
876 ";
877 let start = 0;
878 let end = input.len();
879 let path = None;
880 let handler = Handler::default();
881 let _stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
882 let (errors, warnings) = handler.consume();
883 assert_eq!(warnings.len(), 0);
884 assert_eq!(errors.len(), 5);
885 for err in errors {
886 assert_matches!(
887 err,
888 CompileError::Lex {
889 error: LexError {
890 span: _,
891 kind: LexErrorKind::UnicodeTextDirInLiteral {
892 position: _,
893 character: _
894 }
895 }
896 }
897 );
898 }
899 }
900
901 #[test]
902 fn lex_commented_token_stream() {
903 let input = r#"
904 //
905 // Single-line comment.
906 struct Foo {
907 /* multi-
908 * line-
909 * comment */
910 bar: i32, // trailing comment
911 }
912 "#;
913 let start = 0;
914 let end = input.len();
915 let path = None;
916 let handler = Handler::default();
917 let stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
918 assert!(handler.consume().0.is_empty());
919 let mut tts = stream.token_trees().iter();
920 assert_eq!(tts.next().unwrap().span().as_str(), "//");
921 assert_eq!(
922 tts.next().unwrap().span().as_str(),
923 "// Single-line comment."
924 );
925 assert_eq!(tts.next().unwrap().span().as_str(), "struct");
926 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
927 {
928 let group = match tts.next() {
929 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
930 _ => panic!("expected group"),
931 };
932 let mut tts = group.token_stream.token_trees().iter();
933 assert_eq!(
934 tts.next().unwrap().span().as_str(),
935 "/* multi-\n * line-\n * comment */",
936 );
937 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
938 assert_eq!(tts.next().unwrap().span().as_str(), ":");
939 assert_eq!(tts.next().unwrap().span().as_str(), "i32");
940 assert_eq!(tts.next().unwrap().span().as_str(), ",");
941 assert_matches!(
942 tts.next(),
943 Some(CommentedTokenTree::Comment(Comment {
944 span,
945 comment_kind: CommentKind::Trailing,
946 })) if span.as_str() == "// trailing comment"
947 );
948 assert!(tts.next().is_none());
949 }
950 assert!(tts.next().is_none());
951 }
952
953 #[test]
954 fn lex_comments_check_comment_kind() {
955 let input = r#"
956 // CommentKind::Newlined
957 abi Foo {
958 // CommentKind::Newlined
959 fn bar(); // CommentKind::Trailing
960 // CommentKind::Newlined
961 }
962 "#;
963 let start = 0;
964 let end = input.len();
965 let path = None;
966 let handler = Handler::default();
967 let stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
968 assert!(handler.consume().0.is_empty());
969 let mut tts = stream.token_trees().iter();
970
971 assert_matches!(
972 tts.next(),
973 Some(CommentedTokenTree::Comment(Comment {
974 span,
975 comment_kind: CommentKind::Newlined,
976 })) if span.as_str() == "// CommentKind::Newlined"
977 );
978 assert_eq!(tts.next().unwrap().span().as_str(), "abi");
979 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
980
981 {
982 let group = match tts.next() {
983 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
984 _ => panic!("expected group"),
985 };
986 let mut tts = group.token_stream.token_trees().iter();
987
988 assert_matches!(
989 tts.next(),
990 Some(CommentedTokenTree::Comment(Comment {
991 span,
992 comment_kind: CommentKind::Newlined,
993 })) if span.as_str() == "// CommentKind::Newlined"
994 );
995 assert_eq!(tts.next().unwrap().span().as_str(), "fn");
996 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
997 assert_eq!(tts.next().unwrap().span().as_str(), "()");
998 assert_eq!(tts.next().unwrap().span().as_str(), ";");
999 assert_matches!(
1000 tts.next(),
1001 Some(CommentedTokenTree::Comment(Comment {
1002 span,
1003 comment_kind: CommentKind::Trailing,
1004 })) if span.as_str() == "// CommentKind::Trailing"
1005 );
1006 assert_matches!(
1007 tts.next(),
1008 Some(CommentedTokenTree::Comment(Comment {
1009 span,
1010 comment_kind: CommentKind::Newlined,
1011 })) if span.as_str() == "// CommentKind::Newlined"
1012 );
1013 assert!(tts.next().is_none());
1014 }
1015 }
1016
1017 #[test]
1018 fn lex_doc_comments() {
1019 let input = r#"
1020 //none
1021 ////none
1022 //!inner
1023 //! inner
1024 ///outer
1025 /// outer
1026 "#;
1027 let start = 0;
1028 let end = input.len();
1029 let path = None;
1030 let handler = Handler::default();
1031 let stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
1032 assert!(handler.consume().0.is_empty());
1033 let mut tts = stream.token_trees().iter();
1034 assert_matches!(
1035 tts.next(),
1036 Some(CommentedTokenTree::Comment(Comment {
1037 span,
1038 comment_kind: CommentKind::Newlined,
1039 })) if span.as_str() == "//none"
1040 );
1041 assert_matches!(
1042 tts.next(),
1043 Some(CommentedTokenTree::Comment(Comment {
1044 span,
1045 comment_kind: CommentKind::Newlined,
1046 })) if span.as_str() == "////none"
1047 );
1048 assert_matches!(
1049 tts.next(),
1050 Some(CommentedTokenTree::Comment(Comment {
1051 span,
1052 comment_kind: CommentKind::Newlined,
1053 })) if span.as_str() == "//!inner"
1054 );
1055 assert_matches!(
1056 tts.next(),
1057 Some(CommentedTokenTree::Comment(Comment {
1058 span,
1059 comment_kind: CommentKind::Newlined,
1060 })) if span.as_str() == "//! inner"
1061 );
1062 assert_matches!(
1063 tts.next(),
1064 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1065 doc_style: DocStyle::Outer,
1066 span,
1067 content_span
1068 }))) if span.as_str() == "///outer" && content_span.as_str() == "outer"
1069 );
1070 assert_matches!(
1071 tts.next(),
1072 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1073 doc_style: DocStyle::Outer,
1074 span,
1075 content_span
1076 }))) if span.as_str() == "/// outer" && content_span.as_str() == " outer"
1077 );
1078 assert_eq!(tts.next(), None);
1079 }
1080
1081 #[test]
1082 fn lex_char_escaped_quote() {
1083 let input = r"
1084 '\''
1085 ";
1086 let handler = Handler::default();
1087 let stream = lex(&handler, &Arc::from(input), 0, input.len(), None).unwrap();
1088 assert!(handler.consume().0.is_empty());
1089 let mut tts = stream.token_trees().iter();
1090 assert_matches!(
1091 tts.next(),
1092 Some(TokenTree::Literal(Literal::Char(LitChar {
1093 parsed: '\'',
1094 ..
1095 })))
1096 );
1097 assert_eq!(tts.next(), None);
1098 }
1099}