1use core::mem;
2use extension_trait::extension_trait;
3use num_bigint::BigUint;
4use sway_ast::literal::{LitChar, LitInt, LitIntType, LitString, Literal};
5use sway_ast::token::{
6 Comment, CommentKind, CommentedGroup, CommentedTokenStream, CommentedTokenTree, DocComment,
7 DocStyle, Punct, Spacing, TokenStream,
8};
9use sway_error::error::CompileError;
10use sway_error::handler::{ErrorEmitted, Handler};
11use sway_error::lex_error::{LexError, LexErrorKind};
12use sway_types::span::Source;
13use sway_types::{
14 ast::{Delimiter, PunctKind},
15 Ident, SourceId, Span, Spanned,
16};
17use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
18use unicode_xid::UnicodeXID;
19
20#[extension_trait]
21impl CharExt for char {
22 fn as_open_delimiter(self) -> Option<Delimiter> {
24 match self {
25 '(' => Some(Delimiter::Parenthesis),
26 '{' => Some(Delimiter::Brace),
27 '[' => Some(Delimiter::Bracket),
28 _ => None,
29 }
30 }
31
32 fn as_close_delimiter(self) -> Option<Delimiter> {
34 match self {
35 ')' => Some(Delimiter::Parenthesis),
36 '}' => Some(Delimiter::Brace),
37 ']' => Some(Delimiter::Bracket),
38 _ => None,
39 }
40 }
41
42 fn as_punct_kind(self) -> Option<PunctKind> {
44 match self {
45 ';' => Some(PunctKind::Semicolon),
46 ':' => Some(PunctKind::Colon),
47 '/' => Some(PunctKind::ForwardSlash),
48 ',' => Some(PunctKind::Comma),
49 '*' => Some(PunctKind::Star),
50 '+' => Some(PunctKind::Add),
51 '-' => Some(PunctKind::Sub),
52 '<' => Some(PunctKind::LessThan),
53 '>' => Some(PunctKind::GreaterThan),
54 '=' => Some(PunctKind::Equals),
55 '.' => Some(PunctKind::Dot),
56 '!' => Some(PunctKind::Bang),
57 '%' => Some(PunctKind::Percent),
58 '&' => Some(PunctKind::Ampersand),
59 '^' => Some(PunctKind::Caret),
60 '|' => Some(PunctKind::Pipe),
61 '_' => Some(PunctKind::Underscore),
62 '#' => Some(PunctKind::Sharp),
63 _ => None,
64 }
65 }
66}
67
68struct CharIndicesInner<'a> {
69 src: &'a str,
70 position: usize,
71}
72
73impl Iterator for CharIndicesInner<'_> {
74 type Item = (usize, char);
75
76 fn next(&mut self) -> Option<(usize, char)> {
77 let mut char_indices = self.src[self.position..].char_indices();
78 let (_, c) = char_indices.next()?;
79 let ret = (self.position, c);
80 match char_indices.next() {
81 Some((char_width, _)) => self.position += char_width,
82 None => self.position = self.src.len(),
83 };
84 Some(ret)
85 }
86}
87
88type CharIndices<'a> = std::iter::Peekable<CharIndicesInner<'a>>;
89type Result<T> = core::result::Result<T, ErrorEmitted>;
90
91struct Lexer<'l> {
92 handler: &'l Handler,
93 src: &'l Source,
94 source_id: &'l Option<SourceId>,
95 stream: &'l mut CharIndices<'l>,
96}
97
98pub fn lex(
99 handler: &Handler,
100 src: Source,
101 start: usize,
102 end: usize,
103 source_id: Option<SourceId>,
104) -> Result<TokenStream> {
105 lex_commented(handler, src, start, end, &source_id).map(|stream| stream.strip_comments())
106}
107
108pub fn lex_commented(
109 handler: &Handler,
110 src: Source,
111 start: usize,
112 end: usize,
113 source_id: &Option<SourceId>,
114) -> Result<CommentedTokenStream> {
115 let stream = &mut CharIndicesInner {
116 src: &src.text[..end],
117 position: start,
118 }
119 .peekable();
120 let mut l = Lexer {
121 handler,
122 src: &src,
123 source_id,
124 stream,
125 };
126 let mut file_start_offset: usize = 0;
127
128 let mut parent_token_trees = Vec::new();
129 let mut token_trees = Vec::new();
130 while let Some((mut index, mut character)) = l.stream.next() {
131 if character.is_whitespace() {
132 if index - file_start_offset == 0 {
136 file_start_offset += character.len_utf8();
137 }
138 continue;
139 }
140 if character == '/' {
141 match l.stream.peek() {
142 Some((_, '/')) => {
143 let search_end = token_trees
146 .last()
147 .map(|tt| {
148 if let CommentedTokenTree::Tree(t) = tt {
149 t.span().end()
150 } else {
151 0
152 }
153 })
154 .unwrap_or_default();
155
156 let has_newline = src.text[search_end..index]
157 .chars()
158 .rev()
159 .take_while(|c| c.is_whitespace())
160 .filter(|&c| c == '\n')
161 .count()
162 > 0;
163 let start_of_file_found = search_end == 0 && index == 0;
165
166 let comment_kind = if has_newline || start_of_file_found {
167 CommentKind::Newlined
168 } else {
169 CommentKind::Trailing
170 };
171
172 let ctt = lex_line_comment(&mut l, end, index, comment_kind);
173 token_trees.push(ctt);
174 continue;
175 }
176 Some((_, '*')) => {
177 if let Some(token) = lex_block_comment(&mut l, index) {
178 token_trees.push(token);
179 }
180 continue;
181 }
182 Some(_) | None => {}
183 }
184 }
185
186 if character.is_xid_start() || character == '_' {
187 let is_raw_ident = character == 'r' && matches!(l.stream.peek(), Some((_, '#')));
189 if is_raw_ident {
190 l.stream.next();
191 if let Some((next_index, next_character)) = l.stream.next() {
192 character = next_character;
193 index = next_index;
194 }
195 if !(character.is_xid_start() || character == '_') {
196 let kind = LexErrorKind::InvalidCharacter {
197 position: index,
198 character,
199 };
200 let span = span_one(&l, index, character);
201 error(l.handler, LexError { kind, span });
202 continue;
203 }
204 }
205
206 let not_is_single_underscore = character != '_'
208 || l.stream
209 .peek()
210 .is_some_and(|(_, next)| next.is_xid_continue());
211 if not_is_single_underscore {
212 while l.stream.next_if(|(_, c)| c.is_xid_continue()).is_some() {}
214 let ident = Ident::new_with_raw(span_until(&mut l, index), is_raw_ident);
215 token_trees.push(CommentedTokenTree::Tree(ident.into()));
216 continue;
217 }
218 }
219 if let Some(delimiter) = character.as_open_delimiter() {
220 let token_trees = mem::take(&mut token_trees);
221 parent_token_trees.push((token_trees, index, delimiter));
222 continue;
223 }
224 if let Some(close_delimiter) = character.as_close_delimiter() {
225 match parent_token_trees.pop() {
226 None => {
227 let kind = LexErrorKind::UnexpectedCloseDelimiter {
235 position: index,
236 close_delimiter,
237 };
238 let span = span_one(&l, index, character);
239 error(l.handler, LexError { kind, span });
240 }
241 Some((parent, open_index, open_delimiter)) => {
242 if open_delimiter != close_delimiter {
243 let kind = LexErrorKind::MismatchedDelimiters {
245 open_position: open_index,
246 close_position: index,
247 open_delimiter,
248 close_delimiter,
249 };
250 let span = span_one(&l, index, character);
251 error(l.handler, LexError { kind, span });
252 }
253 token_trees = lex_close_delimiter(
254 &mut l,
255 index,
256 parent,
257 token_trees,
258 open_index,
259 open_delimiter,
260 );
261 }
262 }
263 continue;
264 }
265 if let Some(token) = lex_string(&mut l, index, character)? {
266 token_trees.push(token);
267 continue;
268 }
269 if let Some(token) = lex_char(&mut l, index, character)? {
270 token_trees.push(token);
271 continue;
272 }
273 if let Some(token) = lex_int_lit(&mut l, index, character)? {
274 token_trees.push(token);
275 continue;
276 }
277 if let Some(token) = lex_punctuation(&mut l, index, character) {
278 token_trees.push(token);
279 continue;
280 }
281
282 let kind = LexErrorKind::InvalidCharacter {
285 position: index,
286 character,
287 };
288 let span = span_one(&l, index, character);
289 error(l.handler, LexError { kind, span });
290 continue;
291 }
292
293 while let Some((parent, open_index, open_delimiter)) = parent_token_trees.pop() {
295 let kind = LexErrorKind::UnclosedDelimiter {
296 open_position: open_index,
297 open_delimiter,
298 };
299 let span = span_one(&l, open_index, open_delimiter.as_open_char());
300 error(l.handler, LexError { kind, span });
301
302 token_trees = lex_close_delimiter(
303 &mut l,
304 src.text.len(),
305 parent,
306 token_trees,
307 open_index,
308 open_delimiter,
309 );
310 }
311 Ok(CommentedTokenStream {
312 token_trees,
313 full_span: span(&l, start, end),
314 })
315}
316
317fn lex_close_delimiter(
318 l: &mut Lexer<'_>,
319 index: usize,
320 mut parent: Vec<CommentedTokenTree>,
321 token_trees: Vec<CommentedTokenTree>,
322 open_index: usize,
323 delimiter: Delimiter,
324) -> Vec<CommentedTokenTree> {
325 let start_index = open_index + delimiter.as_open_char().len_utf8();
326 let full_span = span(l, start_index, index);
327 let group = CommentedGroup {
328 token_stream: CommentedTokenStream {
329 token_trees,
330 full_span,
331 },
332 delimiter,
333 span: span_until(l, open_index),
334 };
335 parent.push(CommentedTokenTree::Tree(group.into()));
336 parent
337}
338
339fn lex_line_comment(
340 l: &mut Lexer<'_>,
341 end: usize,
342 index: usize,
343 comment_kind: CommentKind,
344) -> CommentedTokenTree {
345 let _ = l.stream.next();
346
347 let end = l
349 .stream
350 .find(|(_, character)| *character == '\n')
351 .map_or(end, |(end, _)| end);
352 let sp = span(l, index, end);
353
354 let doc_style = match (sp.as_str().chars().nth(2), sp.as_str().chars().nth(3)) {
355 (Some('!'), _) => Some(DocStyle::Inner),
357 (Some('/'), Some('/')) => None,
359 (Some('/'), _) => Some(DocStyle::Outer),
361 _ => None,
362 };
363
364 if let Some(doc_style) = doc_style {
365 let doc_comment = DocComment {
366 span: sp,
367 doc_style,
368 content_span: span(l, index + 3, end),
369 };
370 CommentedTokenTree::Tree(doc_comment.into())
371 } else {
372 Comment {
373 span: sp,
374 comment_kind,
375 }
376 .into()
377 }
378}
379
380fn lex_block_comment(l: &mut Lexer<'_>, index: usize) -> Option<CommentedTokenTree> {
381 let _ = l.stream.next();
383 let mut unclosed_indices = vec![index];
384
385 let unclosed_multiline_comment = |l: &Lexer<'_>, unclosed_indices: Vec<_>| {
386 let span = span(l, *unclosed_indices.last().unwrap(), l.src.text.len() - 1);
387 let kind = LexErrorKind::UnclosedMultilineComment { unclosed_indices };
388 error(l.handler, LexError { kind, span });
389 None
390 };
391
392 let mut comment_kind = CommentKind::Inlined;
394
395 loop {
396 match l.stream.next() {
397 None => return unclosed_multiline_comment(l, unclosed_indices),
398 Some((_, '*')) => match l.stream.next() {
399 None => return unclosed_multiline_comment(l, unclosed_indices),
400 Some((slash_ix, '/')) => {
402 let start = unclosed_indices.pop().unwrap();
403 if unclosed_indices.is_empty() {
404 let end = slash_ix + '/'.len_utf8();
408 let span = span(l, start, end);
409 return Some(Comment { span, comment_kind }.into());
410 }
411 }
412 Some(_) => {}
413 },
414 Some((next_index, '/')) => match l.stream.next() {
416 None => return unclosed_multiline_comment(l, unclosed_indices),
417 Some((_, '*')) => unclosed_indices.push(next_index),
418 Some(_) => {}
419 },
420 Some((_, '\n')) => {
421 comment_kind = CommentKind::Multilined;
426 }
427 Some(_) => {}
428 }
429 }
430}
431
432fn lex_string(
433 l: &mut Lexer<'_>,
434 index: usize,
435 character: char,
436) -> Result<Option<CommentedTokenTree>> {
437 if character != '"' {
438 return Ok(None);
439 }
440 let mut parsed = String::new();
441 loop {
442 let unclosed_string_lit = |l: &Lexer<'_>, end| {
443 error(
444 l.handler,
445 LexError {
446 kind: LexErrorKind::UnclosedStringLiteral { position: index },
447 span: span(l, index, end),
448 },
449 )
450 };
451 let (next_index, next_character) = l.stream.next().ok_or_else(|| {
452 let mut end = l.src.text.len() - 1;
454 while !l.src.text.is_char_boundary(end) {
455 end -= 1;
456 }
457 unclosed_string_lit(l, end)
458 })?;
459 parsed.push(match next_character {
460 '\\' => parse_escape_code(l)
461 .map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.text.len())))?,
462 '"' => break,
463 ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
465 let kind = LexErrorKind::UnicodeTextDirInLiteral {
466 position: next_index,
467 character: next_character,
468 };
469 let span = span_one(l, next_index, next_character);
470 error(l.handler, LexError { span, kind });
471 continue;
472 }
473 _ => next_character,
474 });
475 }
476 let span = span_until(l, index);
477 let literal = Literal::String(LitString { span, parsed });
478 Ok(Some(CommentedTokenTree::Tree(literal.into())))
479}
480
481fn lex_char(
482 l: &mut Lexer<'_>,
483 index: usize,
484 character: char,
485) -> Result<Option<CommentedTokenTree>> {
486 let is_quote = |c| c == '\'';
487 if !is_quote(character) {
488 return Ok(None);
489 }
490
491 let unclosed_char_lit = |l: &Lexer<'_>| {
492 let err = LexError {
493 kind: LexErrorKind::UnclosedCharLiteral { position: index },
494 span: span(l, index, l.src.text.len()),
495 };
496 error(l.handler, err)
497 };
498 let next = |l: &mut Lexer<'_>| l.stream.next().ok_or_else(|| unclosed_char_lit(l));
499 let escape = |l: &mut Lexer<'_>, next_char| {
500 if next_char == '\\' {
501 parse_escape_code(l).map_err(|e| e.unwrap_or_else(|| unclosed_char_lit(l)))
502 } else {
503 Ok(next_char)
504 }
505 };
506
507 let (next_index, next_char) = next(l)?;
508 if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
510 let kind = LexErrorKind::UnicodeTextDirInLiteral {
511 position: next_index,
512 character: next_char,
513 };
514 let span = span_one(l, next_index, next_char);
515 error(l.handler, LexError { span, kind });
516 }
517
518 let parsed = escape(l, next_char)?;
519
520 let (next_index, next_char) = next(l)?;
522 let sp = span_until(l, index);
523
524 let literal = if !is_quote(next_char) {
527 let mut string = String::new();
528 string.push(parsed);
529 string.push(escape(l, next_char)?);
530 loop {
531 let (_, next_char) = next(l)?;
532 if is_quote(next_char) {
533 break;
534 }
535 string.push(next_char);
536 }
537
538 error(
540 l.handler,
541 LexError {
542 kind: LexErrorKind::ExpectedCloseQuote {
543 position: next_index,
544 },
545 span: span(l, next_index, next_index + string.len()),
546 },
547 );
548
549 Literal::String(LitString {
550 span: sp,
551 parsed: string,
552 })
553 } else {
554 Literal::Char(LitChar { span: sp, parsed })
555 };
556
557 Ok(Some(CommentedTokenTree::Tree(literal.into())))
558}
559
560fn parse_escape_code(l: &mut Lexer<'_>) -> core::result::Result<char, Option<ErrorEmitted>> {
561 let error = |kind, span| Err(Some(error(l.handler, LexError { kind, span })));
562
563 match l.stream.next() {
564 None => Err(None),
565 Some((_, '"')) => Ok('"'),
566 Some((_, '\'')) => Ok('\''),
567 Some((_, 'n')) => Ok('\n'),
568 Some((_, 'r')) => Ok('\r'),
569 Some((_, 't')) => Ok('\t'),
570 Some((_, '\\')) => Ok('\\'),
571 Some((_, '0')) => Ok('\0'),
572 Some((index, 'x')) => {
573 let (high, low) = match (l.stream.next(), l.stream.next()) {
574 (Some((_, high)), Some((_, low))) => (high, low),
575 _ => return Err(None),
576 };
577 let (high, low) = match (high.to_digit(16), low.to_digit(16)) {
578 (Some(high), Some(low)) => (high, low),
579 _ => return error(LexErrorKind::InvalidHexEscape, span_until(l, index)),
580 };
581 let parsed_character = char::from_u32((high << 4) | low).unwrap();
582 Ok(parsed_character)
583 }
584 Some((index, 'u')) => {
585 match l.stream.next() {
586 None => return Err(None),
587 Some((_, '{')) => (),
588 Some((_, unexpected_char)) => {
589 let span = span_one(l, index, unexpected_char);
590 let kind = LexErrorKind::UnicodeEscapeMissingBrace { position: index };
591 return error(kind, span);
592 }
593 }
594 let mut digits_start_position_opt = None;
595 let mut char_value = BigUint::from(0u32);
596 let digits_end_position = loop {
597 let (position, digit) = match l.stream.next() {
598 None => return Err(None),
599 Some((position, '}')) => break position,
600 Some((position, digit)) => (position, digit),
601 };
602 if digits_start_position_opt.is_none() {
603 digits_start_position_opt = Some(position);
604 };
605 let digit = match digit.to_digit(16) {
606 None => {
607 let span = span_one(l, position, digit);
608 let kind = LexErrorKind::InvalidUnicodeEscapeDigit { position };
609 return error(kind, span);
610 }
611 Some(digit) => digit,
612 };
613 char_value *= 16u32;
614 char_value += digit;
615 };
616 let digits_start_position = digits_start_position_opt.unwrap_or(digits_end_position);
617 let char_value = match u32::try_from(char_value) {
618 Err(..) => {
619 let span = span(l, digits_start_position, digits_end_position);
620 let kind = LexErrorKind::UnicodeEscapeOutOfRange { position: index };
621 return error(kind, span);
622 }
623 Ok(char_value) => char_value,
624 };
625 let parsed_character = match char::from_u32(char_value) {
626 None => {
627 let span_all = span_until(l, index);
628 let kind = LexErrorKind::UnicodeEscapeInvalidCharValue { span: span_all };
629 let span = span(l, digits_start_position, digits_end_position);
630 return error(kind, span);
631 }
632 Some(parsed_character) => parsed_character,
633 };
634 Ok(parsed_character)
635 }
636 Some((index, unexpected_char)) => error(
637 LexErrorKind::InvalidEscapeCode { position: index },
638 span_one(l, index, unexpected_char),
639 ),
640 }
641}
642
643fn lex_int_lit(
644 l: &mut Lexer<'_>,
645 index: usize,
646 character: char,
647) -> Result<Option<CommentedTokenTree>> {
648 let digit = match character.to_digit(10) {
649 None => return Ok(None),
650 Some(d) => d,
651 };
652
653 let decimal_int_lit = |l, digit: u32| {
654 let mut big_uint = BigUint::from(digit);
655 let end_opt = parse_digits(&mut big_uint, l, 10);
656 (big_uint, end_opt)
657 };
658 let (big_uint, end_opt) = if digit == 0 {
659 let prefixed_int_lit = |l: &mut Lexer<'_>, radix| {
660 let _ = l.stream.next();
661 let d = l.stream.next();
662 let incomplete_int_lit = |end| {
663 let kind = match radix {
664 16 => LexErrorKind::IncompleteHexIntLiteral { position: index },
665 8 => LexErrorKind::IncompleteOctalIntLiteral { position: index },
666 2 => LexErrorKind::IncompleteBinaryIntLiteral { position: index },
667 _ => unreachable!(),
668 };
669 let span = span(l, index, end);
670 error(l.handler, LexError { kind, span })
671 };
672 let (digit_pos, digit) = d.ok_or_else(|| incomplete_int_lit(l.src.text.len()))?;
673 let radix_digit = digit
674 .to_digit(radix)
675 .ok_or_else(|| incomplete_int_lit(digit_pos))?;
676 let mut big_uint = BigUint::from(radix_digit);
677 let end_opt = parse_digits(&mut big_uint, l, radix);
678 Ok((big_uint, end_opt))
679 };
680
681 match l.stream.peek() {
682 Some((_, 'x')) => prefixed_int_lit(l, 16)?,
683 Some((_, 'o')) => prefixed_int_lit(l, 8)?,
684 Some((_, 'b')) => prefixed_int_lit(l, 2)?,
685 Some((_, '_' | '0'..='9')) => decimal_int_lit(l, 0),
686 Some(&(next_index, _)) => (BigUint::from(0u32), Some(next_index)),
687 None => (BigUint::from(0u32), None),
688 }
689 } else {
690 decimal_int_lit(l, digit)
691 };
692
693 let ty_opt = lex_int_ty_opt(l)?;
694
695 let literal = Literal::Int(LitInt {
696 span: span(l, index, end_opt.unwrap_or(l.src.text.len())),
697 parsed: big_uint,
698 ty_opt,
699 is_generated_b256: false,
700 });
701
702 Ok(Some(CommentedTokenTree::Tree(literal.into())))
703}
704
705fn lex_int_ty_opt(l: &mut Lexer<'_>) -> Result<Option<(LitIntType, Span)>> {
706 let (suffix_start_position, c) = match l.stream.next_if(|(_, c)| c.is_xid_continue()) {
707 None => return Ok(None),
708 Some(x) => x,
709 };
710 let mut suffix = String::from(c);
711 let suffix_end_position = loop {
712 match l.stream.peek() {
713 Some((_, c)) if c.is_xid_continue() => {
714 suffix.push(*c);
715 let _ = l.stream.next();
716 }
717 Some((pos, _)) => break *pos,
718 None => break l.src.text.len(),
719 }
720 };
721 let ty = match parse_int_suffix(&suffix) {
723 Some(s) => s,
724 None => {
725 let span = span(l, suffix_start_position, suffix_end_position);
726 let kind = LexErrorKind::InvalidIntSuffix {
727 suffix: Ident::new(span.clone()),
728 };
729 error(l.handler, LexError { kind, span });
730 return Ok(None);
731 }
732 };
733 let span = span_until(l, suffix_start_position);
734 Ok(Some((ty, span)))
735}
736
737pub fn parse_int_suffix(suffix: &str) -> Option<LitIntType> {
739 Some(match suffix {
740 "u8" => LitIntType::U8,
741 "u16" => LitIntType::U16,
742 "u32" => LitIntType::U32,
743 "u64" => LitIntType::U64,
744 "u256" => LitIntType::U256,
745 "i8" => LitIntType::I8,
746 "i16" => LitIntType::I16,
747 "i32" => LitIntType::I32,
748 "i64" => LitIntType::I64,
749 _ => return None,
750 })
751}
752
753fn parse_digits(big_uint: &mut BigUint, l: &mut Lexer<'_>, radix: u32) -> Option<usize> {
754 loop {
755 match l.stream.peek() {
756 None => break None,
757 Some((_, '_')) => {
758 let _ = l.stream.next();
759 }
760 Some(&(index, character)) => match character.to_digit(radix) {
761 None => break Some(index),
762 Some(digit) => {
763 let _ = l.stream.next();
764 *big_uint *= radix;
765 *big_uint += digit;
766 }
767 },
768 };
769 }
770}
771
772fn lex_punctuation(l: &mut Lexer<'_>, index: usize, character: char) -> Option<CommentedTokenTree> {
773 let punct = Punct {
774 kind: character.as_punct_kind()?,
775 spacing: match l.stream.peek() {
776 Some((_, next_character)) if next_character.as_punct_kind().is_some() => Spacing::Joint,
777 _ => Spacing::Alone,
778 },
779 span: span_until(l, index),
780 };
781 Some(CommentedTokenTree::Tree(punct.into()))
782}
783
784fn span_until(l: &mut Lexer<'_>, start: usize) -> Span {
785 let end = l.stream.peek().map_or(l.src.text.len(), |(end, _)| *end);
786 span(l, start, end)
787}
788
789fn span_one(l: &Lexer<'_>, start: usize, c: char) -> Span {
790 span(l, start, start + c.len_utf8())
791}
792
793fn span(l: &Lexer<'_>, start: usize, end: usize) -> Span {
794 Span::new(l.src.clone(), start, end, *l.source_id).unwrap()
795}
796
797fn error(handler: &Handler, error: LexError) -> ErrorEmitted {
799 handler.emit_err(CompileError::Lex { error })
800}
801
802#[cfg(test)]
803mod tests {
804 use super::*;
805 use assert_matches::assert_matches;
806 use sway_ast::{
807 literal::{LitChar, Literal},
808 token::{
809 Comment, CommentKind, CommentedTokenTree, CommentedTree, DocComment, DocStyle,
810 TokenTree,
811 },
812 };
813 use sway_error::{
814 error::CompileError,
815 handler::Handler,
816 lex_error::{LexError, LexErrorKind},
817 };
818
819 #[test]
820 fn lex_bidi() {
821 let input = "
822 script;
823 use std::string::String;
824 fn main() {
825 let a = String::from_ascii_str(\"fuel\");
826 let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
827 if a.as_bytes() == b.as_bytes() {
828 log(\"same\");
829 } else {
830 log(\"different\");
831 }
832 let lrm = '\u{202E}';
833 log(lrm);
834 }
835 ";
836 let start = 0;
837 let end = input.len();
838 let path = None;
839 let handler = Handler::default();
840 let _stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
841 let (errors, warnings) = handler.consume();
842 assert_eq!(warnings.len(), 0);
843 assert_eq!(errors.len(), 5);
844 for err in errors {
845 assert_matches!(
846 err,
847 CompileError::Lex {
848 error: LexError {
849 span: _,
850 kind: LexErrorKind::UnicodeTextDirInLiteral {
851 position: _,
852 character: _
853 }
854 }
855 }
856 );
857 }
858 }
859
860 #[test]
861 fn lex_commented_token_stream() {
862 let input = r#"
863 //
864 // Single-line comment.
865 struct Foo {
866 /* multi-
867 * line-
868 * comment */
869 bar: i32, // trailing comment
870 }
871 "#;
872 let start = 0;
873 let end = input.len();
874 let path = None;
875 let handler = Handler::default();
876 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
877 assert!(handler.consume().0.is_empty());
878 let mut tts = stream.token_trees().iter();
879 assert_eq!(tts.next().unwrap().span().as_str(), "//");
880 assert_eq!(
881 tts.next().unwrap().span().as_str(),
882 "// Single-line comment."
883 );
884 assert_eq!(tts.next().unwrap().span().as_str(), "struct");
885 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
886 {
887 let group = match tts.next() {
888 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
889 _ => panic!("expected group"),
890 };
891 let mut tts = group.token_stream.token_trees().iter();
892 assert_eq!(
893 tts.next().unwrap().span().as_str(),
894 "/* multi-\n * line-\n * comment */",
895 );
896 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
897 assert_eq!(tts.next().unwrap().span().as_str(), ":");
898 assert_eq!(tts.next().unwrap().span().as_str(), "i32");
899 assert_eq!(tts.next().unwrap().span().as_str(), ",");
900 assert_matches!(
901 tts.next(),
902 Some(CommentedTokenTree::Comment(Comment {
903 span,
904 comment_kind: CommentKind::Trailing,
905 })) if span.as_str() == "// trailing comment"
906 );
907 assert!(tts.next().is_none());
908 }
909 assert!(tts.next().is_none());
910 }
911
912 #[test]
913 fn lex_comments_check_comment_kind() {
914 let input = r#"
915 // CommentKind::Newlined
916 abi Foo {
917 // CommentKind::Newlined
918 fn bar(); // CommentKind::Trailing
919 // CommentKind::Newlined
920 }
921 "#;
922 let start = 0;
923 let end = input.len();
924 let path = None;
925 let handler = Handler::default();
926 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
927 assert!(handler.consume().0.is_empty());
928 let mut tts = stream.token_trees().iter();
929
930 assert_matches!(
931 tts.next(),
932 Some(CommentedTokenTree::Comment(Comment {
933 span,
934 comment_kind: CommentKind::Newlined,
935 })) if span.as_str() == "// CommentKind::Newlined"
936 );
937 assert_eq!(tts.next().unwrap().span().as_str(), "abi");
938 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
939
940 {
941 let group = match tts.next() {
942 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
943 _ => panic!("expected group"),
944 };
945 let mut tts = group.token_stream.token_trees().iter();
946
947 assert_matches!(
948 tts.next(),
949 Some(CommentedTokenTree::Comment(Comment {
950 span,
951 comment_kind: CommentKind::Newlined,
952 })) if span.as_str() == "// CommentKind::Newlined"
953 );
954 assert_eq!(tts.next().unwrap().span().as_str(), "fn");
955 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
956 assert_eq!(tts.next().unwrap().span().as_str(), "()");
957 assert_eq!(tts.next().unwrap().span().as_str(), ";");
958 assert_matches!(
959 tts.next(),
960 Some(CommentedTokenTree::Comment(Comment {
961 span,
962 comment_kind: CommentKind::Trailing,
963 })) if span.as_str() == "// CommentKind::Trailing"
964 );
965 assert_matches!(
966 tts.next(),
967 Some(CommentedTokenTree::Comment(Comment {
968 span,
969 comment_kind: CommentKind::Newlined,
970 })) if span.as_str() == "// CommentKind::Newlined"
971 );
972 assert!(tts.next().is_none());
973 }
974 }
975
976 #[test]
977 fn lex_doc_comments() {
978 let input = r#"
979 //none
980 ////none
981 //!inner
982 //! inner
983 ///outer
984 /// outer
985 "#;
986 let start = 0;
987 let end = input.len();
988 let path = None;
989 let handler = Handler::default();
990 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
991 assert!(handler.consume().0.is_empty());
992 let mut tts = stream.token_trees().iter();
993 assert_matches!(
994 tts.next(),
995 Some(CommentedTokenTree::Comment(Comment {
996 span,
997 comment_kind: CommentKind::Newlined,
998 })) if span.as_str() == "//none"
999 );
1000 assert_matches!(
1001 tts.next(),
1002 Some(CommentedTokenTree::Comment(Comment {
1003 span,
1004 comment_kind: CommentKind::Newlined,
1005 })) if span.as_str() == "////none"
1006 );
1007 assert_matches!(
1008 tts.next(),
1009 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1010 doc_style: DocStyle::Inner,
1011 span,
1012 content_span
1013 }))) if span.as_str() == "//!inner" && content_span.as_str() == "inner"
1014 );
1015 assert_matches!(
1016 tts.next(),
1017 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1018 doc_style: DocStyle::Inner,
1019 span,
1020 content_span
1021 }))) if span.as_str() == "//! inner" && content_span.as_str() == " inner"
1022 );
1023 assert_matches!(
1024 tts.next(),
1025 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1026 doc_style: DocStyle::Outer,
1027 span,
1028 content_span
1029 }))) if span.as_str() == "///outer" && content_span.as_str() == "outer"
1030 );
1031 assert_matches!(
1032 tts.next(),
1033 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1034 doc_style: DocStyle::Outer,
1035 span,
1036 content_span
1037 }))) if span.as_str() == "/// outer" && content_span.as_str() == " outer"
1038 );
1039 assert_eq!(tts.next(), None);
1040 }
1041
1042 #[test]
1043 fn lex_char_escaped_quote() {
1044 let input = r"
1045 '\''
1046 ";
1047 let handler = Handler::default();
1048 let stream = lex(&handler, input.into(), 0, input.len(), None).unwrap();
1049 assert!(handler.consume().0.is_empty());
1050 let mut tts = stream.token_trees().iter();
1051 assert_matches!(
1052 tts.next(),
1053 Some(TokenTree::Literal(Literal::Char(LitChar {
1054 parsed: '\'',
1055 ..
1056 })))
1057 );
1058 assert_eq!(tts.next(), None);
1059 }
1060}