1use std::mem::take;
2
3use smallvec::{smallvec, SmallVec};
4use swc_common::{BytePos, Span};
5use tracing::trace;
6
7use super::{
8 comments_buffer::{BufferedComment, BufferedCommentKind},
9 Context, Input, Lexer,
10};
11use crate::{
12 error::{Error, SyntaxError},
13 input::Tokens,
14 lexer::util::CharExt,
15 token::{BinOpToken, Keyword, Token, TokenAndSpan, TokenKind, WordKind},
16 EsVersion, Syntax,
17};
18
19#[derive(Clone)]
23pub(super) struct State {
24 pub is_expr_allowed: bool,
25 pub next_regexp: Option<BytePos>,
26 pub had_line_break: bool,
28 pub had_line_break_before_last: bool,
30 is_first: bool,
32 pub start: BytePos,
33 pub cur_line: usize,
34 pub line_start: BytePos,
35 pub prev_hi: BytePos,
36 pub tpl_start: BytePos,
37
38 context: TokenContexts,
39 syntax: Syntax,
40
41 token_type: Option<TokenType>,
42}
43
44#[derive(Debug, Copy, Clone, PartialEq, Eq)]
45enum TokenType {
46 Template,
47 Dot,
48 Colon,
49 LBrace,
50 RParen,
51 Semi,
52 BinOp(BinOpToken),
53 Keyword(Keyword),
54 JSXName,
55 JSXText,
56 JSXTagStart,
57 JSXTagEnd,
58 Arrow,
59 Other {
60 before_expr: bool,
61 can_have_trailing_comment: bool,
62 },
63}
64impl TokenType {
65 #[inline]
66 const fn before_expr(self) -> bool {
67 match self {
68 TokenType::JSXName
69 | TokenType::JSXTagStart
70 | TokenType::JSXTagEnd
71 | TokenType::Template
72 | TokenType::Dot
73 | TokenType::RParen => false,
74
75 TokenType::JSXText
76 | TokenType::Colon
77 | TokenType::LBrace
78 | TokenType::Semi
79 | TokenType::Arrow => true,
80
81 TokenType::BinOp(b) => b.before_expr(),
82 TokenType::Keyword(k) => k.before_expr(),
83 TokenType::Other { before_expr, .. } => before_expr,
84 }
85 }
86}
87
88impl From<TokenKind> for TokenType {
89 #[inline]
90 fn from(t: TokenKind) -> Self {
91 match t {
92 TokenKind::Template { .. } => TokenType::Template,
93 TokenKind::Dot => TokenType::Dot,
94 TokenKind::Colon => TokenType::Colon,
95 TokenKind::LBrace => TokenType::LBrace,
96 TokenKind::RParen => TokenType::RParen,
97 TokenKind::Semi => TokenType::Semi,
98 TokenKind::JSXTagEnd => TokenType::JSXTagEnd,
99 TokenKind::JSXTagStart => TokenType::JSXTagStart,
100 TokenKind::JSXText { .. } => TokenType::JSXText,
101 TokenKind::JSXName { .. } => TokenType::JSXName,
102 TokenKind::BinOp(op) => TokenType::BinOp(op),
103 TokenKind::Arrow => TokenType::Arrow,
104
105 TokenKind::Word(WordKind::Keyword(k)) => TokenType::Keyword(k),
106 _ => TokenType::Other {
107 before_expr: t.before_expr(),
108 can_have_trailing_comment: matches!(
109 t,
110 TokenKind::Num { .. }
111 | TokenKind::Str { .. }
112 | TokenKind::Word(WordKind::Ident(..))
113 | TokenKind::DollarLBrace
114 | TokenKind::Regex
115 | TokenKind::BigInt { .. }
116 | TokenKind::JSXText { .. }
117 | TokenKind::RBrace
118 ),
119 },
120 }
121 }
122}
123
124impl Tokens for Lexer<'_> {
125 #[inline]
126 fn set_ctx(&mut self, ctx: Context) {
127 if ctx.module && !self.module_errors.borrow().is_empty() {
128 let mut module_errors = self.module_errors.borrow_mut();
129 self.errors.borrow_mut().append(&mut *module_errors);
130 }
131 self.ctx = ctx
132 }
133
134 #[inline]
135 fn ctx(&self) -> Context {
136 self.ctx
137 }
138
139 #[inline]
140 fn syntax(&self) -> Syntax {
141 self.syntax
142 }
143
144 #[inline]
145 fn target(&self) -> EsVersion {
146 self.target
147 }
148
149 #[inline]
150 fn start_pos(&self) -> BytePos {
151 self.start_pos
152 }
153
154 #[inline]
155 fn set_expr_allowed(&mut self, allow: bool) {
156 self.set_expr_allowed(allow)
157 }
158
159 #[inline]
160 fn set_next_regexp(&mut self, start: Option<BytePos>) {
161 self.state.next_regexp = start;
162 }
163
164 #[inline]
165 fn token_context(&self) -> &TokenContexts {
166 &self.state.context
167 }
168
169 #[inline]
170 fn token_context_mut(&mut self) -> &mut TokenContexts {
171 &mut self.state.context
172 }
173
174 #[inline]
175 fn set_token_context(&mut self, c: TokenContexts) {
176 self.state.context = c;
177 }
178
179 fn add_error(&self, error: Error) {
180 self.errors.borrow_mut().push(error);
181 }
182
183 fn add_module_mode_error(&self, error: Error) {
184 if self.ctx.module {
185 self.add_error(error);
186 return;
187 }
188 self.module_errors.borrow_mut().push(error);
189 }
190
191 fn take_errors(&mut self) -> Vec<Error> {
192 take(&mut self.errors.borrow_mut())
193 }
194
195 fn take_script_module_errors(&mut self) -> Vec<Error> {
196 take(&mut self.module_errors.borrow_mut())
197 }
198
199 fn end_pos(&self) -> BytePos {
200 self.input.end_pos()
201 }
202}
203
204impl Lexer<'_> {
205 #[cold]
209 #[inline(never)]
210 fn consume_pending_comments(&mut self) {
211 if let Some(comments) = self.comments.as_mut() {
212 let comments_buffer = self.comments_buffer.as_mut().unwrap();
213 let last = self.state.prev_hi;
214
215 for c in comments_buffer.take_pending_leading() {
217 if last == self.start_pos {
221 comments_buffer.push(BufferedComment {
222 kind: BufferedCommentKind::Leading,
223 pos: last,
224 comment: c,
225 });
226 } else {
227 comments_buffer.push(BufferedComment {
228 kind: BufferedCommentKind::Trailing,
229 pos: last,
230 comment: c,
231 });
232 }
233 }
234
235 for comment in comments_buffer.take_comments() {
237 match comment.kind {
238 BufferedCommentKind::Leading => {
239 comments.add_leading(comment.pos, comment.comment);
240 }
241 BufferedCommentKind::Trailing => {
242 comments.add_trailing(comment.pos, comment.comment);
243 }
244 }
245 }
246 }
247 }
248
249 fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
250 if let Some(start) = self.state.next_regexp {
251 return Ok(Some(self.read_regexp(start)?));
252 }
253
254 if self.state.is_first {
255 if let Some(shebang) = self.read_shebang()? {
256 return Ok(Some(Token::Shebang(shebang)));
257 }
258 }
259
260 self.state.had_line_break = self.state.is_first;
261 self.state.is_first = false;
262
263 if self.state.can_skip_space() {
265 self.skip_space::<true>();
266 *start = self.input.cur_pos();
267 };
268
269 match self.input.cur() {
270 Some(..) => {}
271 None => {
273 self.consume_pending_comments();
274
275 return Ok(None);
276 }
277 };
278
279 self.state.start = *start;
286
287 if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type {
288 if self.state.context.current() == Some(TokenContext::JSXExpr) {
290 return self.read_jsx_token();
291 }
292
293 let c = self.cur();
294 if let Some(c) = c {
295 if self.state.context.current() == Some(TokenContext::JSXOpeningTag)
296 || self.state.context.current() == Some(TokenContext::JSXClosingTag)
297 {
298 if c.is_ident_start() {
299 return self.read_jsx_word().map(Some);
300 }
301
302 if c == '>' {
303 unsafe {
304 self.input.bump();
306 }
307 return Ok(Some(Token::JSXTagEnd));
308 }
309
310 if (c == '\'' || c == '"')
311 && self.state.context.current() == Some(TokenContext::JSXOpeningTag)
312 {
313 return self.read_jsx_str(c).map(Some);
314 }
315 }
316
317 if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') {
318 let had_line_break_before_last = self.had_line_break_before_last();
319 let cur_pos = self.input.cur_pos();
320
321 unsafe {
322 self.input.bump();
324 }
325
326 if had_line_break_before_last && self.is_str("<<<<<< ") {
327 let span = Span::new(cur_pos, cur_pos + BytePos(7));
328
329 self.emit_error_span(span, SyntaxError::TS1185);
330 self.skip_line_comment(6);
331 self.skip_space::<true>();
332 return self.read_token();
333 }
334
335 return Ok(Some(Token::JSXTagStart));
336 }
337 }
338 }
339
340 if let Some(TokenContext::Tpl {}) = self.state.context.current() {
341 let start = self.state.tpl_start;
342 return self.read_tmpl_token(start).map(Some);
343 }
344
345 self.read_token()
346 }
347}
348
349impl Iterator for Lexer<'_> {
350 type Item = TokenAndSpan;
351
352 fn next(&mut self) -> Option<Self::Item> {
353 let mut start = self.cur_pos();
354
355 let res = self.next_token(&mut start);
356
357 let token = match res.map_err(Token::Error).map_err(Some) {
358 Ok(t) => t,
359 Err(e) => e,
360 };
361
362 let span = self.span(start);
363 if let Some(ref token) = token {
364 if let Some(comments) = self.comments_buffer.as_mut() {
365 for comment in comments.take_pending_leading() {
366 comments.push(BufferedComment {
367 kind: BufferedCommentKind::Leading,
368 pos: start,
369 comment,
370 });
371 }
372 }
373
374 self.state.update(start, token.kind());
375 self.state.prev_hi = self.last_pos();
376 self.state.had_line_break_before_last = self.had_line_break_before_last();
377 }
378
379 token.map(|token| {
380 TokenAndSpan {
382 token,
383 had_line_break: self.had_line_break_before_last(),
384 span,
385 }
386 })
387 }
388}
389
390impl State {
391 pub fn new(syntax: Syntax, start_pos: BytePos) -> Self {
392 let context = TokenContexts(smallvec![TokenContext::BraceStmt]);
393
394 State {
395 is_expr_allowed: true,
396 next_regexp: None,
397 had_line_break: false,
398 had_line_break_before_last: false,
399 is_first: true,
400 start: BytePos(0),
401 cur_line: 1,
402 line_start: BytePos(0),
403 prev_hi: start_pos,
404 tpl_start: BytePos::DUMMY,
405 context,
406 syntax,
407 token_type: None,
408 }
409 }
410}
411
412impl State {
413 pub fn can_skip_space(&self) -> bool {
414 !self
415 .context
416 .current()
417 .map(|t| t.preserve_space())
418 .unwrap_or(false)
419 }
420
421 pub fn can_have_trailing_line_comment(&self) -> bool {
422 match self.token_type {
423 Some(TokenType::BinOp(..)) => false,
424 _ => true,
425 }
426 }
427
428 pub fn can_have_trailing_comment(&self) -> bool {
429 match self.token_type {
430 Some(TokenType::Keyword(..)) => false,
431 Some(TokenType::Semi) | Some(TokenType::LBrace) => true,
432 Some(TokenType::Other {
433 can_have_trailing_comment,
434 ..
435 }) => can_have_trailing_comment,
436 _ => false,
437 }
438 }
439
440 pub fn last_was_tpl_element(&self) -> bool {
441 matches!(self.token_type, Some(TokenType::Template))
442 }
443
444 fn update(&mut self, start: BytePos, next: TokenKind) {
445 if cfg!(feature = "debug") {
446 trace!(
447 "updating state: next={:?}, had_line_break={} ",
448 next,
449 self.had_line_break
450 );
451 }
452
453 let prev = self.token_type.take();
454 self.token_type = Some(TokenType::from(next));
455
456 self.is_expr_allowed = self.is_expr_allowed_on_next(prev, start, next);
457 }
458
459 fn is_expr_allowed_on_next(
462 &mut self,
463 prev: Option<TokenType>,
464 start: BytePos,
465 next: TokenKind,
466 ) -> bool {
467 let State {
468 ref mut context,
469 had_line_break,
470 had_line_break_before_last,
471 is_expr_allowed,
472 syntax,
473 ..
474 } = *self;
475
476 let is_next_keyword = matches!(next, TokenKind::Word(WordKind::Keyword(..)));
477
478 if is_next_keyword && prev == Some(TokenType::Dot) {
479 false
480 } else {
481 match next {
483 TokenKind::RParen | TokenKind::RBrace => {
484 if context.len() == 1 {
486 return true;
487 }
488
489 let out = context.pop().unwrap();
490
491 if out == TokenContext::BraceStmt
493 && matches!(
494 context.current(),
495 Some(TokenContext::FnExpr | TokenContext::ClassExpr)
496 )
497 {
498 context.pop();
499 return false;
500 }
501
502 if out == TokenContext::TplQuasi {
504 match context.current() {
505 Some(TokenContext::Tpl { .. }) => return false,
506 _ => return true,
507 }
508 }
509
510 !out.is_expr()
512 }
513
514 TokenKind::Word(WordKind::Keyword(Keyword::Function)) => {
515 if is_expr_allowed
518 && !context.is_brace_block(prev, had_line_break, is_expr_allowed)
519 {
520 context.push(TokenContext::FnExpr);
521 }
522 false
523 }
524
525 TokenKind::Word(WordKind::Keyword(Keyword::Class)) => {
526 if is_expr_allowed
527 && !context.is_brace_block(prev, had_line_break, is_expr_allowed)
528 {
529 context.push(TokenContext::ClassExpr);
530 }
531 false
532 }
533
534 TokenKind::Colon
535 if matches!(
536 context.current(),
537 Some(TokenContext::FnExpr | TokenContext::ClassExpr)
538 ) =>
539 {
540 context.pop(); true
547 }
548
549 known_ident_token!("of")
551 if Some(TokenContext::ParenStmt { is_for_loop: true }) == context.current() =>
552 {
553 !prev
555 .expect("context.current() if ParenStmt, so prev token cannot be None")
556 .before_expr()
557 }
558
559 TokenKind::Word(WordKind::Ident(..)) => {
560 match prev {
562 Some(prev) => match prev {
563 TokenType::Keyword(Keyword::Let)
565 | TokenType::Keyword(Keyword::Const)
566 | TokenType::Keyword(Keyword::Var)
567 if had_line_break_before_last =>
568 {
569 true
570 }
571 _ => false,
572 },
573 _ => false,
574 }
575 }
576
577 TokenKind::LBrace => {
578 let cur = context.current();
579 if syntax.jsx() && cur == Some(TokenContext::JSXOpeningTag) {
580 context.push(TokenContext::BraceExpr)
581 } else if syntax.jsx() && cur == Some(TokenContext::JSXExpr) {
582 context.push(TokenContext::TplQuasi);
583 } else {
584 let next_ctxt =
585 if context.is_brace_block(prev, had_line_break, is_expr_allowed) {
586 TokenContext::BraceStmt
587 } else {
588 TokenContext::BraceExpr
589 };
590 context.push(next_ctxt);
591 }
592 true
593 }
594
595 TokenKind::BinOp(BinOpToken::Div)
596 if syntax.jsx() && prev == Some(TokenType::JSXTagStart) =>
597 {
598 context.pop();
599 context.pop(); context.push(TokenContext::JSXClosingTag); false
602 }
603
604 TokenKind::DollarLBrace => {
605 context.push(TokenContext::TplQuasi);
606 true
607 }
608
609 TokenKind::LParen => {
610 context.push(match prev {
613 Some(TokenType::Keyword(k)) => match k {
614 Keyword::If | Keyword::With | Keyword::While => {
615 TokenContext::ParenStmt { is_for_loop: false }
616 }
617 Keyword::For => TokenContext::ParenStmt { is_for_loop: true },
618 _ => TokenContext::ParenExpr,
619 },
620 _ => TokenContext::ParenExpr,
621 });
622 true
623 }
624
625 TokenKind::PlusPlus | TokenKind::MinusMinus => is_expr_allowed,
627
628 TokenKind::BackQuote => {
629 if let Some(TokenContext::Tpl { .. }) = context.current() {
631 context.pop();
632 } else {
633 self.tpl_start = start;
634 context.push(TokenContext::Tpl);
635 }
636 false
637 }
638
639 TokenKind::JSXTagStart => {
641 context.push(TokenContext::JSXExpr); context.push(TokenContext::JSXOpeningTag); false
644 }
645
646 TokenKind::JSXTagEnd => {
648 let out = context.pop();
649 if (out == Some(TokenContext::JSXOpeningTag)
650 && prev == Some(TokenType::BinOp(BinOpToken::Div)))
651 || out == Some(TokenContext::JSXClosingTag)
652 {
653 context.pop();
654 context.current() == Some(TokenContext::JSXExpr)
655 } else {
656 true
657 }
658 }
659
660 _ => next.before_expr(),
661 }
662 }
663 }
664}
665
666#[derive(Clone, Default)]
667pub struct TokenContexts(pub(crate) SmallVec<[TokenContext; 128]>);
668
669impl TokenContexts {
670 fn is_brace_block(
673 &self,
674 prev: Option<TokenType>,
675 had_line_break: bool,
676 is_expr_allowed: bool,
677 ) -> bool {
678 if let Some(TokenType::Colon) = prev {
679 match self.current() {
680 Some(TokenContext::BraceStmt) => return true,
681 Some(TokenContext::BraceExpr) => return false,
684 _ => {}
685 };
686 }
687
688 match prev {
689 Some(TokenType::Keyword(Keyword::Return))
699 | Some(TokenType::Keyword(Keyword::Yield)) => {
700 return had_line_break;
701 }
702
703 Some(TokenType::Keyword(Keyword::Else))
704 | Some(TokenType::Semi)
705 | None
706 | Some(TokenType::RParen) => {
707 return true;
708 }
709
710 Some(TokenType::LBrace) => {
712 if self.current() == Some(TokenContext::BraceExpr) {
715 let len = self.len();
716 if let Some(TokenContext::JSXOpeningTag) = self.0.get(len - 2) {
717 return true;
718 }
719 }
720
721 return self.current() == Some(TokenContext::BraceStmt);
722 }
723
724 Some(TokenType::BinOp(BinOpToken::Lt)) | Some(TokenType::BinOp(BinOpToken::Gt)) => {
726 return true
727 }
728
729 Some(TokenType::Arrow) => return true,
731 _ => {}
732 }
733
734 if had_line_break {
735 if let Some(TokenType::Other {
736 before_expr: false, ..
737 }) = prev
738 {
739 return true;
740 }
741 }
742
743 !is_expr_allowed
744 }
745
746 #[inline]
747 pub fn len(&self) -> usize {
748 self.0.len()
749 }
750
751 #[inline]
752 pub fn is_empty(&self) -> bool {
753 self.0.is_empty()
754 }
755
756 #[inline]
757 pub fn pop(&mut self) -> Option<TokenContext> {
758 let opt = self.0.pop();
759 if cfg!(feature = "debug") {
760 trace!("context.pop({:?}): {:?}", opt, self.0);
761 }
762 opt
763 }
764
765 #[inline]
766 pub fn current(&self) -> Option<TokenContext> {
767 self.0.last().cloned()
768 }
769
770 #[inline]
771 fn push(&mut self, t: TokenContext) {
772 self.0.push(t);
773
774 if cfg!(feature = "debug") {
775 trace!("context.push({:?}): {:?}", t, self.0);
776 }
777 }
778}
779
780#[derive(Debug, Clone, Copy, PartialEq, Eq)]
784pub enum TokenContext {
785 BraceStmt,
786 BraceExpr,
787 TplQuasi,
788 ParenStmt {
789 is_for_loop: bool,
791 },
792 ParenExpr,
793 Tpl,
794 FnExpr,
795 ClassExpr,
796 JSXOpeningTag,
797 JSXClosingTag,
798 JSXExpr,
799}
800
801impl TokenContext {
802 pub(crate) const fn is_expr(&self) -> bool {
803 matches!(
804 self,
805 Self::BraceExpr
806 | Self::TplQuasi
807 | Self::ParenExpr
808 | Self::Tpl
809 | Self::FnExpr
810 | Self::ClassExpr
811 | Self::JSXExpr
812 )
813 }
814
815 pub(crate) const fn preserve_space(&self) -> bool {
816 match self {
817 Self::Tpl | Self::JSXExpr => true,
818 _ => false,
819 }
820 }
821}
822
823#[cfg(test)]
824pub(crate) fn with_lexer<F, Ret>(
825 syntax: Syntax,
826 target: EsVersion,
827 s: &str,
828 f: F,
829) -> Result<Ret, ::testing::StdErr>
830where
831 F: FnOnce(&mut Lexer<'_>) -> Result<Ret, ()>,
832{
833 crate::with_test_sess(s, |_, fm| {
834 let mut l = Lexer::new(syntax, target, fm, None);
835 let res = f(&mut l);
836
837 #[cfg(debug_assertions)]
838 let c = TokenContexts(smallvec![TokenContext::BraceStmt]);
839 #[cfg(debug_assertions)]
840 debug_assert_eq!(l.state.context.0, c.0);
841
842 res
843 })
844}
845
846#[cfg(test)]
847pub(crate) fn lex(syntax: Syntax, s: &'static str) -> Vec<TokenAndSpan> {
848 with_lexer(syntax, Default::default(), s, |l| Ok(l.collect())).unwrap()
849}
850
851#[cfg(test)]
853pub(crate) fn lex_module_errors(syntax: Syntax, s: &'static str) -> Vec<Error> {
854 with_lexer(syntax, Default::default(), s, |l| {
855 l.ctx.strict = true;
856 l.ctx.module = true;
857
858 let _: Vec<_> = l.collect();
859
860 Ok(l.take_errors())
861 })
862 .unwrap()
863}
864
865#[cfg(test)]
866pub(crate) fn lex_tokens(syntax: Syntax, s: &'static str) -> Vec<Token> {
867 with_lexer(syntax, Default::default(), s, |l| {
868 Ok(l.map(|ts| ts.token).collect())
869 })
870 .unwrap()
871}
872
873#[cfg(test)]
876pub(crate) fn lex_errors(syntax: Syntax, s: &'static str) -> (Vec<Token>, Vec<Error>) {
877 with_lexer(syntax, EsVersion::Es2020, s, |l| {
878 let tokens = l.map(|ts| ts.token).collect();
879 let errors = l.take_errors();
880 Ok((tokens, errors))
881 })
882 .unwrap()
883}