swc_ecma_parser/parser/
input.rs

1use std::{cell::RefCell, mem, mem::take, rc::Rc};
2
3use debug_unreachable::debug_unreachable;
4use lexer::TokenContexts;
5use swc_common::{BytePos, Span};
6
7use super::Parser;
8use crate::{
9    error::Error,
10    lexer::{self},
11    token::*,
12    Context, EsVersion, Syntax,
13};
14
15/// Clone should be cheap if you are parsing typescript because typescript
16/// syntax requires backtracking.
17pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
18    fn set_ctx(&mut self, ctx: Context);
19    fn ctx(&self) -> Context;
20    fn syntax(&self) -> Syntax;
21    fn target(&self) -> EsVersion;
22
23    fn start_pos(&self) -> BytePos {
24        BytePos(0)
25    }
26
27    fn set_expr_allowed(&mut self, allow: bool);
28    fn set_next_regexp(&mut self, start: Option<BytePos>);
29
30    fn token_context(&self) -> &lexer::TokenContexts;
31    fn token_context_mut(&mut self) -> &mut lexer::TokenContexts;
32    fn set_token_context(&mut self, _c: lexer::TokenContexts);
33
34    /// Implementors should use Rc<RefCell<Vec<Error>>>.
35    ///
36    /// It is required because parser should backtrack while parsing typescript
37    /// code.
38    fn add_error(&self, error: Error);
39
40    /// Add an error which is valid syntax in script mode.
41    ///
42    /// This errors should be dropped if it's not a module.
43    ///
44    /// Implementor should check for if [Context].module, and buffer errors if
45    /// module is false. Also, implementors should move errors to the error
46    /// buffer on set_ctx if the parser mode become module mode.
47    fn add_module_mode_error(&self, error: Error);
48
49    fn end_pos(&self) -> BytePos;
50
51    fn take_errors(&mut self) -> Vec<Error>;
52
53    /// If the program was parsed as a script, this contains the module
54    /// errors should the program be identified as a module in the future.
55    fn take_script_module_errors(&mut self) -> Vec<Error>;
56}
57
58#[derive(Clone)]
59pub struct TokensInput {
60    iter: <Vec<TokenAndSpan> as IntoIterator>::IntoIter,
61    ctx: Context,
62    syntax: Syntax,
63    start_pos: BytePos,
64    target: EsVersion,
65    token_ctx: TokenContexts,
66    errors: Rc<RefCell<Vec<Error>>>,
67    module_errors: Rc<RefCell<Vec<Error>>>,
68}
69
70impl TokensInput {
71    pub fn new(tokens: Vec<TokenAndSpan>, ctx: Context, syntax: Syntax, target: EsVersion) -> Self {
72        let start_pos = tokens.first().map(|t| t.span.lo).unwrap_or(BytePos(0));
73
74        TokensInput {
75            iter: tokens.into_iter(),
76            ctx,
77            syntax,
78            start_pos,
79            target,
80            token_ctx: Default::default(),
81            errors: Default::default(),
82            module_errors: Default::default(),
83        }
84    }
85}
86
87impl Iterator for TokensInput {
88    type Item = TokenAndSpan;
89
90    fn next(&mut self) -> Option<Self::Item> {
91        self.iter.next()
92    }
93}
94
95impl Tokens for TokensInput {
96    fn set_ctx(&mut self, ctx: Context) {
97        if ctx.module && !self.module_errors.borrow().is_empty() {
98            let mut module_errors = self.module_errors.borrow_mut();
99            self.errors.borrow_mut().append(&mut *module_errors);
100        }
101        self.ctx = ctx;
102    }
103
104    fn ctx(&self) -> Context {
105        self.ctx
106    }
107
108    fn syntax(&self) -> Syntax {
109        self.syntax
110    }
111
112    fn target(&self) -> EsVersion {
113        self.target
114    }
115
116    fn start_pos(&self) -> BytePos {
117        self.start_pos
118    }
119
120    fn set_expr_allowed(&mut self, _: bool) {}
121
122    fn set_next_regexp(&mut self, _: Option<BytePos>) {}
123
124    fn token_context(&self) -> &TokenContexts {
125        &self.token_ctx
126    }
127
128    fn token_context_mut(&mut self) -> &mut TokenContexts {
129        &mut self.token_ctx
130    }
131
132    fn set_token_context(&mut self, c: TokenContexts) {
133        self.token_ctx = c;
134    }
135
136    fn add_error(&self, error: Error) {
137        self.errors.borrow_mut().push(error);
138    }
139
140    fn add_module_mode_error(&self, error: Error) {
141        if self.ctx.module {
142            self.add_error(error);
143            return;
144        }
145        self.module_errors.borrow_mut().push(error);
146    }
147
148    fn take_errors(&mut self) -> Vec<Error> {
149        take(&mut self.errors.borrow_mut())
150    }
151
152    fn take_script_module_errors(&mut self) -> Vec<Error> {
153        take(&mut self.module_errors.borrow_mut())
154    }
155
156    fn end_pos(&self) -> BytePos {
157        self.iter
158            .as_slice()
159            .last()
160            .map(|t| t.span.hi)
161            .unwrap_or(self.start_pos)
162    }
163}
164
165/// Note: Lexer need access to parser's context to lex correctly.
166#[derive(Debug)]
167pub struct Capturing<I: Tokens> {
168    inner: I,
169    captured: Rc<RefCell<Vec<TokenAndSpan>>>,
170}
171
172impl<I: Tokens> Clone for Capturing<I> {
173    fn clone(&self) -> Self {
174        Capturing {
175            inner: self.inner.clone(),
176            captured: self.captured.clone(),
177        }
178    }
179}
180
181impl<I: Tokens> Capturing<I> {
182    pub fn new(input: I) -> Self {
183        Capturing {
184            inner: input,
185            captured: Default::default(),
186        }
187    }
188
189    pub fn tokens(&self) -> Rc<RefCell<Vec<TokenAndSpan>>> {
190        self.captured.clone()
191    }
192
193    /// Take captured tokens
194    pub fn take(&mut self) -> Vec<TokenAndSpan> {
195        mem::take(&mut *self.captured.borrow_mut())
196    }
197}
198
199impl<I: Tokens> Iterator for Capturing<I> {
200    type Item = TokenAndSpan;
201
202    fn next(&mut self) -> Option<Self::Item> {
203        let next = self.inner.next();
204
205        match next {
206            Some(ts) => {
207                let mut v = self.captured.borrow_mut();
208
209                // remove tokens that could change due to backtracing
210                while let Some(last) = v.last() {
211                    if last.span.lo >= ts.span.lo {
212                        v.pop();
213                    } else {
214                        break;
215                    }
216                }
217
218                v.push(ts.clone());
219
220                Some(ts)
221            }
222            None => None,
223        }
224    }
225}
226
227impl<I: Tokens> Tokens for Capturing<I> {
228    fn set_ctx(&mut self, ctx: Context) {
229        self.inner.set_ctx(ctx)
230    }
231
232    fn ctx(&self) -> Context {
233        self.inner.ctx()
234    }
235
236    fn syntax(&self) -> Syntax {
237        self.inner.syntax()
238    }
239
240    fn target(&self) -> EsVersion {
241        self.inner.target()
242    }
243
244    fn start_pos(&self) -> BytePos {
245        self.inner.start_pos()
246    }
247
248    fn set_expr_allowed(&mut self, allow: bool) {
249        self.inner.set_expr_allowed(allow)
250    }
251
252    fn set_next_regexp(&mut self, start: Option<BytePos>) {
253        self.inner.set_next_regexp(start);
254    }
255
256    fn token_context(&self) -> &TokenContexts {
257        self.inner.token_context()
258    }
259
260    fn token_context_mut(&mut self) -> &mut TokenContexts {
261        self.inner.token_context_mut()
262    }
263
264    fn set_token_context(&mut self, c: TokenContexts) {
265        self.inner.set_token_context(c)
266    }
267
268    fn add_error(&self, error: Error) {
269        self.inner.add_error(error);
270    }
271
272    fn add_module_mode_error(&self, error: Error) {
273        self.inner.add_module_mode_error(error)
274    }
275
276    fn take_errors(&mut self) -> Vec<Error> {
277        self.inner.take_errors()
278    }
279
280    fn take_script_module_errors(&mut self) -> Vec<Error> {
281        self.inner.take_script_module_errors()
282    }
283
284    fn end_pos(&self) -> BytePos {
285        self.inner.end_pos()
286    }
287}
288
289/// This struct is responsible for managing current token and peeked token.
290#[derive(Clone)]
291pub(super) struct Buffer<I: Tokens> {
292    iter: I,
293    /// Span of the previous token.
294    prev_span: Span,
295    cur: Option<TokenAndSpan>,
296    /// Peeked token
297    next: Option<TokenAndSpan>,
298}
299
300impl<I: Tokens> Parser<I> {
301    pub fn input(&mut self) -> &mut I {
302        &mut self.input.iter
303    }
304
305    pub(crate) fn input_ref(&self) -> &I {
306        &self.input.iter
307    }
308}
309
310impl<I: Tokens> Buffer<I> {
311    pub fn new(lexer: I) -> Self {
312        let start_pos = lexer.start_pos();
313        Buffer {
314            iter: lexer,
315            cur: None,
316            prev_span: Span::new(start_pos, start_pos),
317            next: None,
318        }
319    }
320
321    pub fn store(&mut self, token: Token) {
322        debug_assert!(self.next.is_none());
323        debug_assert!(self.cur.is_none());
324        let span = self.prev_span;
325
326        self.cur = Some(TokenAndSpan {
327            span,
328            token,
329            had_line_break: false,
330        });
331    }
332
333    #[allow(dead_code)]
334    pub fn cur_debug(&self) -> Option<&Token> {
335        self.cur.as_ref().map(|it| &it.token)
336    }
337
338    #[cold]
339    #[inline(never)]
340    pub fn dump_cur(&mut self) -> String {
341        match self.cur() {
342            Some(v) => format!("{:?}", v),
343            None => "<eof>".to_string(),
344        }
345    }
346
347    /// Returns current token.
348    pub fn bump(&mut self) -> Token {
349        let prev = match self.cur.take() {
350            Some(t) => t,
351            None => unsafe {
352                debug_unreachable!(
353                    "Current token is `None`. Parser should not call bump() without knowing \
354                     current token"
355                )
356            },
357        };
358        self.prev_span = prev.span;
359
360        prev.token
361    }
362
363    pub fn knows_cur(&self) -> bool {
364        self.cur.is_some()
365    }
366
367    pub fn peek(&mut self) -> Option<&Token> {
368        debug_assert!(
369            self.cur.is_some(),
370            "parser should not call peek() without knowing current token"
371        );
372
373        if self.next.is_none() {
374            self.next = self.iter.next();
375        }
376
377        self.next.as_ref().map(|ts| &ts.token)
378    }
379
380    /// Returns true on eof.
381    pub fn had_line_break_before_cur(&mut self) -> bool {
382        self.cur();
383
384        self.cur
385            .as_ref()
386            .map(|it| it.had_line_break)
387            .unwrap_or_else(|| true)
388    }
389
390    /// This returns true on eof.
391    pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool {
392        let _ = self.peek();
393        self.next
394            .as_ref()
395            .map(|item| item.had_line_break)
396            .unwrap_or({
397                // return true on eof.
398                true
399            })
400    }
401
402    /// Get current token. Returns `None` only on eof.
403    #[inline]
404    pub fn cur(&mut self) -> Option<&Token> {
405        if self.cur.is_none() {
406            // If we have peeked a token, take it instead of calling lexer.next()
407            self.cur = self.next.take().or_else(|| self.iter.next());
408        }
409
410        match &self.cur {
411            Some(v) => Some(&v.token),
412            None => None,
413        }
414    }
415
416    #[inline]
417    pub fn cut_lshift(&mut self) {
418        debug_assert!(
419            self.is(&tok!("<<")),
420            "parser should only call cut_lshift when encountering LShift token"
421        );
422        self.cur = Some(TokenAndSpan {
423            token: tok!('<'),
424            span: self.cur_span().with_lo(self.cur_span().lo + BytePos(1)),
425            had_line_break: false,
426        });
427    }
428
429    pub fn merge_lt_gt(&mut self) {
430        debug_assert!(
431            self.is(&tok!('<')) || self.is(&tok!('>')),
432            "parser should only call merge_lt_gt when encountering '<' or '>' token"
433        );
434
435        let span = self.cur_span();
436
437        if self.peek().is_none() {
438            return;
439        }
440
441        let next = self.next.as_ref().unwrap();
442
443        if span.hi != next.span.lo {
444            return;
445        }
446
447        let cur = self.cur.take().unwrap();
448        let next = self.next.take().unwrap();
449
450        let token = match (&cur.token, &next.token) {
451            (tok!('>'), tok!('>')) => tok!(">>"),
452            (tok!('>'), tok!('=')) => tok!(">="),
453            (tok!('>'), tok!(">>")) => tok!(">>>"),
454            (tok!('>'), tok!(">=")) => tok!(">>="),
455            (tok!('>'), tok!(">>=")) => tok!(">>>="),
456            (tok!('<'), tok!('<')) => tok!("<<"),
457            (tok!('<'), tok!('=')) => tok!("<="),
458            (tok!('<'), tok!("<=")) => tok!("<<="),
459
460            _ => {
461                self.cur = Some(cur);
462                self.next = Some(next);
463                return;
464            }
465        };
466        let span = span.with_hi(next.span.hi);
467
468        self.cur = Some(TokenAndSpan {
469            token,
470            span,
471            had_line_break: cur.had_line_break,
472        });
473    }
474
475    #[inline]
476    pub fn is(&mut self, expected: &Token) -> bool {
477        match self.cur() {
478            Some(t) => *expected == *t,
479            _ => false,
480        }
481    }
482
483    #[inline]
484    pub fn eat(&mut self, expected: &Token) -> bool {
485        let v = self.is(expected);
486        if v {
487            self.bump();
488        }
489        v
490    }
491
492    /// Returns start of current token.
493    #[inline]
494    pub fn cur_pos(&mut self) -> BytePos {
495        let _ = self.cur();
496        self.cur
497            .as_ref()
498            .map(|item| item.span.lo)
499            .unwrap_or_else(|| {
500                // eof
501                self.last_pos()
502            })
503    }
504
505    #[inline]
506    pub fn cur_span(&self) -> Span {
507        let data = self
508            .cur
509            .as_ref()
510            .map(|item| item.span)
511            .unwrap_or(self.prev_span);
512
513        Span::new(data.lo, data.hi)
514    }
515
516    /// Returns last byte position of previous token.
517    #[inline]
518    pub fn last_pos(&self) -> BytePos {
519        self.prev_span.hi
520    }
521
522    /// Returns span of the previous token.
523    #[inline]
524    pub fn prev_span(&self) -> Span {
525        self.prev_span
526    }
527
528    #[inline]
529    pub(crate) fn get_ctx(&self) -> Context {
530        self.iter.ctx()
531    }
532
533    #[inline]
534    pub(crate) fn set_ctx(&mut self, ctx: Context) {
535        self.iter.set_ctx(ctx);
536    }
537
538    #[inline]
539    pub fn syntax(&self) -> Syntax {
540        self.iter.syntax()
541    }
542
543    #[inline]
544    pub fn target(&self) -> EsVersion {
545        self.iter.target()
546    }
547
548    #[inline]
549    pub(crate) fn set_expr_allowed(&mut self, allow: bool) {
550        self.iter.set_expr_allowed(allow)
551    }
552
553    #[inline]
554    pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
555        self.iter.set_next_regexp(start);
556    }
557
558    #[inline]
559    pub(crate) fn token_context(&self) -> &lexer::TokenContexts {
560        self.iter.token_context()
561    }
562
563    #[inline]
564    pub(crate) fn token_context_mut(&mut self) -> &mut lexer::TokenContexts {
565        self.iter.token_context_mut()
566    }
567
568    #[inline]
569    pub(crate) fn set_token_context(&mut self, c: lexer::TokenContexts) {
570        self.iter.set_token_context(c)
571    }
572
573    #[inline]
574    pub(crate) fn end_pos(&self) -> BytePos {
575        self.iter.end_pos()
576    }
577}