wast/
parser.rs

1//! Traits for parsing the WebAssembly Text format
2//!
3//! This module contains the traits, abstractions, and utilities needed to
4//! define custom parsers for WebAssembly text format items. This module exposes
5//! a recursive descent parsing strategy and centers around the [`Parse`] trait
6//! for defining new fragments of WebAssembly text syntax.
7//!
8//! The top-level [`parse`] function can be used to fully parse AST fragments:
9//!
10//! ```
11//! use wast::Wat;
12//! use wast::parser::{self, ParseBuffer};
13//!
14//! # fn foo() -> Result<(), wast::Error> {
15//! let wat = "(module (func))";
16//! let buf = ParseBuffer::new(wat)?;
17//! let module = parser::parse::<Wat>(&buf)?;
18//! # Ok(())
19//! # }
20//! ```
21//!
22//! and you can also define your own new syntax with the [`Parse`] trait:
23//!
24//! ```
25//! use wast::kw;
26//! use wast::core::{Import, Func};
27//! use wast::parser::{Parser, Parse, Result};
28//!
29//! // Fields of a WebAssembly which only allow imports and functions, and all
30//! // imports must come before all the functions
31//! struct OnlyImportsAndFunctions<'a> {
32//!     imports: Vec<Import<'a>>,
33//!     functions: Vec<Func<'a>>,
34//! }
35//!
36//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
37//!     fn parse(parser: Parser<'a>) -> Result<Self> {
38//!         // While the second token is `import` (the first is `(`, so we care
39//!         // about the second) we parse an `ast::ModuleImport` inside of
40//!         // parentheses. The `parens` function here ensures that what we
41//!         // parse inside of it is surrounded by `(` and `)`.
42//!         let mut imports = Vec::new();
43//!         while parser.peek2::<kw::import>()? {
44//!             let import = parser.parens(|p| p.parse())?;
45//!             imports.push(import);
46//!         }
47//!
48//!         // Afterwards we assume everything else is a function. Note that
49//!         // `parse` here is a generic function and type inference figures out
50//!         // that we're parsing functions here and imports above.
51//!         let mut functions = Vec::new();
52//!         while !parser.is_empty() {
53//!             let func = parser.parens(|p| p.parse())?;
54//!             functions.push(func);
55//!         }
56//!
57//!         Ok(OnlyImportsAndFunctions { imports, functions })
58//!     }
59//! }
60//! ```
61//!
62//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can
63//! likely also draw inspiration from the excellent examples in the `syn` crate.
64
65use crate::lexer::{Float, Integer, Lexer, Token, TokenKind};
66use crate::token::Span;
67use crate::Error;
68use bumpalo::Bump;
69use std::borrow::Cow;
70use std::cell::{Cell, RefCell};
71use std::collections::HashMap;
72use std::fmt;
73use std::usize;
74
75/// The maximum recursive depth of parens to parse.
76///
77/// This is sort of a fundamental limitation of the way this crate is
78/// designed. Everything is done through recursive descent parsing which
79/// means, well, that we're recursively going down the stack as we parse
80/// nested data structures. While we can handle this for wasm expressions
81/// since that's a pretty local decision, handling this for nested
82/// modules/components which be far trickier. For now we just say that when
83/// the parser goes too deep we return an error saying there's too many
84/// nested items. It would be great to not return an error here, though!
85#[cfg(feature = "wasm-module")]
86pub(crate) const MAX_PARENS_DEPTH: usize = 100;
87
88/// A top-level convenience parsing function that parses a `T` from `buf` and
89/// requires that all tokens in `buf` are consume.
90///
91/// This generic parsing function can be used to parse any `T` implementing the
92/// [`Parse`] trait. It is not used from [`Parse`] trait implementations.
93///
94/// # Examples
95///
96/// ```
97/// use wast::Wat;
98/// use wast::parser::{self, ParseBuffer};
99///
100/// # fn foo() -> Result<(), wast::Error> {
101/// let wat = "(module (func))";
102/// let buf = ParseBuffer::new(wat)?;
103/// let module = parser::parse::<Wat>(&buf)?;
104/// # Ok(())
105/// # }
106/// ```
107///
108/// or parsing simply a fragment
109///
110/// ```
111/// use wast::parser::{self, ParseBuffer};
112///
113/// # fn foo() -> Result<(), wast::Error> {
114/// let wat = "12";
115/// let buf = ParseBuffer::new(wat)?;
116/// let val = parser::parse::<u32>(&buf)?;
117/// assert_eq!(val, 12);
118/// # Ok(())
119/// # }
120/// ```
121pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> {
122    let parser = buf.parser();
123    let result = parser.parse()?;
124    if parser.cursor().token()?.is_none() {
125        Ok(result)
126    } else {
127        Err(parser.error("extra tokens remaining after parse"))
128    }
129}
130
131/// A trait for parsing a fragment of syntax in a recursive descent fashion.
132///
133/// The [`Parse`] trait is main abstraction you'll be working with when defining
134/// custom parser or custom syntax for your WebAssembly text format (or when
135/// using the official format items). Almost all items in the
136/// [`core`](crate::core) module implement the [`Parse`] trait, and you'll
137/// commonly use this with:
138///
139/// * The top-level [`parse`] function to parse an entire input.
140/// * The intermediate [`Parser::parse`] function to parse an item out of an
141///   input stream and then parse remaining items.
142///
143/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the
144/// parser as they parse syntax. Once a token is consume it cannot be
145/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`]
146/// can be used to determine what to parse next.
147///
148/// ## When to parse `(` and `)`?
149///
150/// Conventionally types are not responsible for parsing their own `(` and `)`
151/// tokens which surround the type. For example WebAssembly imports look like:
152///
153/// ```text
154/// (import "foo" "bar" (func (type 0)))
155/// ```
156///
157/// but the [`Import`](crate::core::Import) type parser looks like:
158///
159/// ```
160/// # use wast::kw;
161/// # use wast::parser::{Parser, Parse, Result};
162/// # struct Import<'a>(&'a str);
163/// impl<'a> Parse<'a> for Import<'a> {
164///     fn parse(parser: Parser<'a>) -> Result<Self> {
165///         parser.parse::<kw::import>()?;
166///         // ...
167/// # panic!()
168///     }
169/// }
170/// ```
171///
172/// It is assumed here that the `(` and `)` tokens which surround an `import`
173/// statement in the WebAssembly text format are parsed by the parent item
174/// parsing `Import`.
175///
176/// Note that this is just a convention, so it's not necessarily required for
177/// all types. It's recommended that your types stick to this convention where
178/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying
179/// to parse too many parenthesis.
180///
181/// # Examples
182///
183/// Let's say you want to define your own WebAssembly text format which only
184/// contains imports and functions. You also require all imports to be listed
185/// before all functions. An example [`Parse`] implementation might look like:
186///
187/// ```
188/// use wast::core::{Import, Func};
189/// use wast::kw;
190/// use wast::parser::{Parser, Parse, Result};
191///
192/// // Fields of a WebAssembly which only allow imports and functions, and all
193/// // imports must come before all the functions
194/// struct OnlyImportsAndFunctions<'a> {
195///     imports: Vec<Import<'a>>,
196///     functions: Vec<Func<'a>>,
197/// }
198///
199/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
200///     fn parse(parser: Parser<'a>) -> Result<Self> {
201///         // While the second token is `import` (the first is `(`, so we care
202///         // about the second) we parse an `ast::ModuleImport` inside of
203///         // parentheses. The `parens` function here ensures that what we
204///         // parse inside of it is surrounded by `(` and `)`.
205///         let mut imports = Vec::new();
206///         while parser.peek2::<kw::import>()? {
207///             let import = parser.parens(|p| p.parse())?;
208///             imports.push(import);
209///         }
210///
211///         // Afterwards we assume everything else is a function. Note that
212///         // `parse` here is a generic function and type inference figures out
213///         // that we're parsing functions here and imports above.
214///         let mut functions = Vec::new();
215///         while !parser.is_empty() {
216///             let func = parser.parens(|p| p.parse())?;
217///             functions.push(func);
218///         }
219///
220///         Ok(OnlyImportsAndFunctions { imports, functions })
221///     }
222/// }
223/// ```
224pub trait Parse<'a>: Sized {
225    /// Attempts to parse `Self` from `parser`, returning an error if it could
226    /// not be parsed.
227    ///
228    /// This method will mutate the state of `parser` after attempting to parse
229    /// an instance of `Self`. If an error happens then it is likely fatal and
230    /// there is no guarantee of how many tokens have been consumed from
231    /// `parser`.
232    ///
233    /// As recommended in the documentation of [`Parse`], implementations of
234    /// this function should not start out by parsing `(` and `)` tokens, but
235    /// rather parents calling recursive parsers should parse the `(` and `)`
236    /// tokens for their child item that's being parsed.
237    ///
238    /// # Errors
239    ///
240    /// This function will return an error if `Self` could not be parsed. Note
241    /// that creating an [`Error`] is not exactly a cheap operation, so
242    /// [`Error`] is typically fatal and propagated all the way back to the top
243    /// parse call site.
244    fn parse(parser: Parser<'a>) -> Result<Self>;
245}
246
247impl<'a, T> Parse<'a> for Box<T>
248where
249    T: Parse<'a>,
250{
251    fn parse(parser: Parser<'a>) -> Result<Self> {
252        Ok(Box::new(parser.parse()?))
253    }
254}
255
256/// A trait for types which be used to "peek" to see if they're the next token
257/// in an input stream of [`Parser`].
258///
259/// Often when implementing [`Parse`] you'll need to query what the next token
260/// in the stream is to figure out what to parse next. This [`Peek`] trait
261/// defines the set of types that can be tested whether they're the next token
262/// in the input stream.
263///
264/// Implementations of [`Peek`] should only be present on types that consume
265/// exactly one token (not zero, not more, exactly one). Types implementing
266/// [`Peek`] should also typically implement [`Parse`] should also typically
267/// implement [`Parse`].
268///
269/// See the documentation of [`Parser::peek`] for example usage.
270pub trait Peek {
271    /// Tests to see whether this token is the first token within the [`Cursor`]
272    /// specified.
273    ///
274    /// Returns `true` if [`Parse`] for this type is highly likely to succeed
275    /// failing no other error conditions happening (like an integer literal
276    /// being too big).
277    fn peek(cursor: Cursor<'_>) -> Result<bool>;
278
279    /// The same as `peek`, except it checks the token immediately following
280    /// the current token.
281    fn peek2(mut cursor: Cursor<'_>) -> Result<bool> {
282        match cursor.token()? {
283            Some(token) => cursor.advance_past(&token),
284            None => return Ok(false),
285        }
286        Self::peek(cursor)
287    }
288
289    /// Returns a human-readable name of this token to display when generating
290    /// errors about this token missing.
291    fn display() -> &'static str;
292}
293
294/// A convenience type definition for `Result` where the error is hardwired to
295/// [`Error`].
296pub type Result<T, E = Error> = std::result::Result<T, E>;
297
298/// A low-level buffer of tokens which represents a completely lexed file.
299///
300/// A `ParseBuffer` will immediately lex an entire file and then store all
301/// tokens internally. A `ParseBuffer` only used to pass to the top-level
302/// [`parse`] function.
303pub struct ParseBuffer<'a> {
304    lexer: Lexer<'a>,
305    cur: Cell<Position>,
306    known_annotations: RefCell<HashMap<String, usize>>,
307    track_instr_spans: bool,
308    depth: Cell<usize>,
309    strings: Bump,
310}
311
312/// The current position within a `Lexer` that we're at. This simultaneously
313/// stores the byte position that the lexer was last positioned at as well as
314/// the next significant token.
315///
316/// Note that "significant" here does not mean that `token` is the next token
317/// to be lexed at `offset`. Instead it's the next non-whitespace,
318/// non-annotation, non-coment token. This simple cache-of-sorts avoids
319/// re-parsing tokens the majority of the time, or at least that's the
320/// intention.
321///
322/// If `token` is set to `None` then it means that either it hasn't been
323/// calculated at or the lexer is at EOF. Basically it means go talk to the
324/// lexer.
325#[derive(Copy, Clone)]
326struct Position {
327    offset: usize,
328    token: Option<Token>,
329}
330
331/// An in-progress parser for the tokens of a WebAssembly text file.
332///
333/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is
334/// interacted with to parse new items. Cloning [`Parser`] or copying a parser
335/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`]
336/// and clone two items.
337///
338/// For more information about a [`Parser`] see its methods.
339#[derive(Copy, Clone)]
340pub struct Parser<'a> {
341    buf: &'a ParseBuffer<'a>,
342}
343
344/// A helpful structure to perform a lookahead of one token to determine what to
345/// parse.
346///
347/// For more information see the [`Parser::lookahead1`] method.
348pub struct Lookahead1<'a> {
349    parser: Parser<'a>,
350    attempts: Vec<&'static str>,
351}
352
353/// An immutable cursor into a list of tokens.
354///
355/// This cursor cannot be mutated but can be used to parse more tokens in a list
356/// of tokens. Cursors are created from the [`Parser::step`] method. This is a
357/// very low-level parsing structure and you likely won't use it much.
358#[derive(Copy, Clone)]
359pub struct Cursor<'a> {
360    parser: Parser<'a>,
361    pos: Position,
362}
363
364impl ParseBuffer<'_> {
365    /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
366    ///
367    /// # Errors
368    ///
369    /// Returns an error if `input` fails to lex.
370    pub fn new(input: &str) -> Result<ParseBuffer<'_>> {
371        ParseBuffer::new_with_lexer(Lexer::new(input))
372    }
373
374    /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
375    ///
376    /// # Errors
377    ///
378    /// Returns an error if `input` fails to lex.
379    pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> {
380        Ok(ParseBuffer {
381            lexer,
382            depth: Cell::new(0),
383            cur: Cell::new(Position {
384                offset: 0,
385                token: None,
386            }),
387            known_annotations: Default::default(),
388            strings: Default::default(),
389            track_instr_spans: false,
390        })
391    }
392
393    /// Indicates whether the [`Expression::instr_spans`] field will be filled
394    /// in.
395    ///
396    /// This is useful when enabling DWARF debugging information via
397    /// [`EncodeOptions::dwarf`], for example.
398    ///
399    /// [`Expression::instr_spans`]: crate::core::Expression::instr_spans
400    /// [`EncodeOptions::dwarf`]: crate::core::EncodeOptions::dwarf
401    pub fn track_instr_spans(&mut self, track: bool) -> &mut Self {
402        self.track_instr_spans = track;
403        self
404    }
405
406    fn parser(&self) -> Parser<'_> {
407        Parser { buf: self }
408    }
409
410    /// Stores an owned allocation in this `Parser` to attach the lifetime of
411    /// the vector to `self`.
412    ///
413    /// This will return a reference to `s`, but one that's safely rooted in the
414    /// `Parser`.
415    fn push_str(&self, s: Vec<u8>) -> &[u8] {
416        self.strings.alloc_slice_copy(&s)
417    }
418
419    /// Lexes the next "significant" token from the `pos` specified.
420    ///
421    /// This will skip irrelevant tokens such as whitespace, comments, and
422    /// unknown annotations.
423    fn advance_token(&self, mut pos: usize) -> Result<Option<Token>> {
424        let token = loop {
425            let token = match self.lexer.parse(&mut pos)? {
426                Some(token) => token,
427                None => return Ok(None),
428            };
429            match token.kind {
430                // Always skip whitespace and comments.
431                TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment => {
432                    continue
433                }
434
435                // If an lparen is seen then this may be skipped if it's an
436                // annotation of the form `(@foo ...)`. In this situation
437                // everything up to and including the closing rparen is skipped.
438                //
439                // Note that the annotation is only skipped if it's an unknown
440                // annotation as known annotations are specifically registered
441                // as "someone's gonna parse this".
442                TokenKind::LParen => {
443                    if let Some(annotation) = self.lexer.annotation(pos)? {
444                        let text = annotation.annotation(self.lexer.input())?;
445                        match self.known_annotations.borrow().get(&text[..]) {
446                            Some(0) | None => {
447                                self.skip_annotation(&mut pos)?;
448                                continue;
449                            }
450                            Some(_) => {}
451                        }
452                    }
453                    break token;
454                }
455                _ => break token,
456            }
457        };
458        Ok(Some(token))
459    }
460
461    fn skip_annotation(&self, pos: &mut usize) -> Result<()> {
462        let mut depth = 1;
463        let span = Span { offset: *pos };
464        loop {
465            let token = match self.lexer.parse(pos)? {
466                Some(token) => token,
467                None => {
468                    break Err(Error::new(span, "unclosed annotation".to_string()));
469                }
470            };
471            match token.kind {
472                TokenKind::LParen => depth += 1,
473                TokenKind::RParen => {
474                    depth -= 1;
475                    if depth == 0 {
476                        break Ok(());
477                    }
478                }
479                _ => {}
480            }
481        }
482    }
483}
484
485impl<'a> Parser<'a> {
486    /// Returns whether there are no more `Token` tokens to parse from this
487    /// [`Parser`].
488    ///
489    /// This indicates that either we've reached the end of the input, or we're
490    /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the
491    /// `)` token.
492    ///
493    /// Note that if `false` is returned there *may* be more comments. Comments
494    /// and whitespace are not considered for whether this parser is empty.
495    pub fn is_empty(self) -> bool {
496        match self.cursor().token() {
497            Ok(Some(token)) => matches!(token.kind, TokenKind::RParen),
498            Ok(None) => true,
499            Err(_) => false,
500        }
501    }
502
503    #[cfg(feature = "wasm-module")]
504    pub(crate) fn has_meaningful_tokens(self) -> bool {
505        self.buf.lexer.iter(0).any(|t| match t {
506            Ok(token) => !matches!(
507                token.kind,
508                TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
509            ),
510            Err(_) => true,
511        })
512    }
513
514    /// Parses a `T` from this [`Parser`].
515    ///
516    /// This method has a trivial definition (it simply calls
517    /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is
518    /// what you'll call 99% of the time in a [`Parse`] implementation in order
519    /// to parse sub-items.
520    ///
521    /// Typically you always want to use `?` with the result of this method, you
522    /// should not handle errors and decide what else to parse. To handle
523    /// branches in parsing, use [`Parser::peek`].
524    ///
525    /// # Examples
526    ///
527    /// A good example of using `parse` is to see how the [`TableType`] type is
528    /// parsed in this crate. A [`TableType`] is defined in the official
529    /// specification as [`tabletype`][spec] and is defined as:
530    ///
531    /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types
532    ///
533    /// ```text
534    /// tabletype ::= lim:limits et:reftype
535    /// ```
536    ///
537    /// so to parse a [`TableType`] we recursively need to parse a [`Limits`]
538    /// and a [`RefType`]
539    ///
540    /// ```
541    /// # use wast::core::*;
542    /// # use wast::parser::*;
543    /// struct TableType<'a> {
544    ///     limits: Limits,
545    ///     elem: RefType<'a>,
546    /// }
547    ///
548    /// impl<'a> Parse<'a> for TableType<'a> {
549    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
550    ///         // parse the `lim` then `et` in sequence
551    ///         Ok(TableType {
552    ///             limits: parser.parse()?,
553    ///             elem: parser.parse()?,
554    ///         })
555    ///     }
556    /// }
557    /// ```
558    ///
559    /// [`Limits`]: crate::core::Limits
560    /// [`TableType`]: crate::core::TableType
561    /// [`RefType`]: crate::core::RefType
562    pub fn parse<T: Parse<'a>>(self) -> Result<T> {
563        T::parse(self)
564    }
565
566    /// Performs a cheap test to see whether the current token in this stream is
567    /// `T`.
568    ///
569    /// This method can be used to efficiently determine what next to parse. The
570    /// [`Peek`] trait is defined for types which can be used to test if they're
571    /// the next item in the input stream.
572    ///
573    /// Nothing is actually parsed in this method, nor does this mutate the
574    /// state of this [`Parser`]. Instead, this simply performs a check.
575    ///
576    /// This method is frequently combined with the [`Parser::lookahead1`]
577    /// method to automatically produce nice error messages if some tokens
578    /// aren't found.
579    ///
580    /// # Examples
581    ///
582    /// For an example of using the `peek` method let's take a look at parsing
583    /// the [`Limits`] type. This is [defined in the official spec][spec] as:
584    ///
585    /// ```text
586    /// limits ::= n:u32
587    ///          | n:u32 m:u32
588    /// ```
589    ///
590    /// which means that it's either one `u32` token or two, so we need to know
591    /// whether to consume two tokens or one:
592    ///
593    /// ```
594    /// # use wast::parser::*;
595    /// struct Limits {
596    ///     min: u32,
597    ///     max: Option<u32>,
598    /// }
599    ///
600    /// impl<'a> Parse<'a> for Limits {
601    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
602    ///         // Always parse the first number...
603    ///         let min = parser.parse()?;
604    ///
605    ///         // ... and then test if there's a second number before parsing
606    ///         let max = if parser.peek::<u32>()? {
607    ///             Some(parser.parse()?)
608    ///         } else {
609    ///             None
610    ///         };
611    ///
612    ///         Ok(Limits { min, max })
613    ///     }
614    /// }
615    /// ```
616    ///
617    /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits
618    /// [`Limits`]: crate::core::Limits
619    pub fn peek<T: Peek>(self) -> Result<bool> {
620        T::peek(self.cursor())
621    }
622
623    /// Same as the [`Parser::peek`] method, except checks the next token, not
624    /// the current token.
625    pub fn peek2<T: Peek>(self) -> Result<bool> {
626        T::peek2(self.cursor())
627    }
628
629    /// Same as the [`Parser::peek2`] method, except checks the next next token,
630    /// not the next token.
631    pub fn peek3<T: Peek>(self) -> Result<bool> {
632        let mut cursor = self.cursor();
633        match cursor.token()? {
634            Some(token) => cursor.advance_past(&token),
635            None => return Ok(false),
636        }
637        match cursor.token()? {
638            Some(token) => cursor.advance_past(&token),
639            None => return Ok(false),
640        }
641        T::peek(cursor)
642    }
643
644    /// A helper structure to perform a sequence of `peek` operations and if
645    /// they all fail produce a nice error message.
646    ///
647    /// This method purely exists for conveniently producing error messages and
648    /// provides no functionality that [`Parser::peek`] doesn't already give.
649    /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`],
650    /// which is the same method as [`Parser::peek`]. The difference is that the
651    /// [`Lookahead1::error`] method needs no arguments.
652    ///
653    /// # Examples
654    ///
655    /// Let's look at the parsing of [`Index`]. This type is either a `u32` or
656    /// an [`Id`] and is used in name resolution primarily. The [official
657    /// grammar for an index][spec] is:
658    ///
659    /// ```text
660    /// idx ::= x:u32
661    ///       | v:id
662    /// ```
663    ///
664    /// Which is to say that an index is either a `u32` or an [`Id`]. When
665    /// parsing an [`Index`] we can do:
666    ///
667    /// ```
668    /// # use wast::token::*;
669    /// # use wast::parser::*;
670    /// enum Index<'a> {
671    ///     Num(u32),
672    ///     Id(Id<'a>),
673    /// }
674    ///
675    /// impl<'a> Parse<'a> for Index<'a> {
676    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
677    ///         let mut l = parser.lookahead1();
678    ///         if l.peek::<Id>()? {
679    ///             Ok(Index::Id(parser.parse()?))
680    ///         } else if l.peek::<u32>()? {
681    ///             Ok(Index::Num(parser.parse()?))
682    ///         } else {
683    ///             // produces error message of `expected identifier or u32`
684    ///             Err(l.error())
685    ///         }
686    ///     }
687    /// }
688    /// ```
689    ///
690    /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices
691    /// [`Index`]: crate::token::Index
692    /// [`Id`]: crate::token::Id
693    pub fn lookahead1(self) -> Lookahead1<'a> {
694        Lookahead1 {
695            attempts: Vec::new(),
696            parser: self,
697        }
698    }
699
700    /// Parse an item surrounded by parentheses.
701    ///
702    /// WebAssembly's text format is all based on s-expressions, so naturally
703    /// you're going to want to parse a lot of parenthesized things! As noted in
704    /// the documentation of [`Parse`] you typically don't parse your own
705    /// surrounding `(` and `)` tokens, but the parser above you parsed them for
706    /// you. This is method method the parser above you uses.
707    ///
708    /// This method will parse a `(` token, and then call `f` on a sub-parser
709    /// which when finished asserts that a `)` token is the next token. This
710    /// requires that `f` consumes all tokens leading up to the paired `)`.
711    ///
712    /// Usage will often simply be `parser.parens(|p| p.parse())?` to
713    /// automatically parse a type within parentheses, but you can, as always,
714    /// go crazy and do whatever you'd like too.
715    ///
716    /// # Examples
717    ///
718    /// A good example of this is to see how a `Module` is parsed. This isn't
719    /// the exact definition, but it's close enough!
720    ///
721    /// ```
722    /// # use wast::kw;
723    /// # use wast::core::*;
724    /// # use wast::parser::*;
725    /// struct Module<'a> {
726    ///     fields: Vec<ModuleField<'a>>,
727    /// }
728    ///
729    /// impl<'a> Parse<'a> for Module<'a> {
730    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
731    ///         // Modules start out with a `module` keyword
732    ///         parser.parse::<kw::module>()?;
733    ///
734    ///         // And then everything else is `(field ...)`, so while we've got
735    ///         // items left we continuously parse parenthesized items.
736    ///         let mut fields = Vec::new();
737    ///         while !parser.is_empty() {
738    ///             fields.push(parser.parens(|p| p.parse())?);
739    ///         }
740    ///         Ok(Module { fields })
741    ///     }
742    /// }
743    /// ```
744    pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> {
745        self.buf.depth.set(self.buf.depth.get() + 1);
746        let before = self.buf.cur.get();
747        let res = self.step(|cursor| {
748            let mut cursor = match cursor.lparen()? {
749                Some(rest) => rest,
750                None => return Err(cursor.error("expected `(`")),
751            };
752            cursor.parser.buf.cur.set(cursor.pos);
753            let result = f(cursor.parser)?;
754
755            // Reset our cursor's state to whatever the current state of the
756            // parser is.
757            cursor.pos = cursor.parser.buf.cur.get();
758
759            match cursor.rparen()? {
760                Some(rest) => Ok((result, rest)),
761                None => Err(cursor.error("expected `)`")),
762            }
763        });
764        self.buf.depth.set(self.buf.depth.get() - 1);
765        if res.is_err() {
766            self.buf.cur.set(before);
767        }
768        res
769    }
770
771    /// Return the depth of nested parens we've parsed so far.
772    ///
773    /// This is a low-level method that is only useful for implementing
774    /// recursion limits in custom parsers.
775    pub fn parens_depth(&self) -> usize {
776        self.buf.depth.get()
777    }
778
779    /// Checks that the parser parens depth hasn't exceeded the maximum depth.
780    #[cfg(feature = "wasm-module")]
781    pub(crate) fn depth_check(&self) -> Result<()> {
782        if self.parens_depth() > MAX_PARENS_DEPTH {
783            Err(self.error("item nesting too deep"))
784        } else {
785            Ok(())
786        }
787    }
788
789    fn cursor(self) -> Cursor<'a> {
790        Cursor {
791            parser: self,
792            pos: self.buf.cur.get(),
793        }
794    }
795
796    /// A low-level parsing method you probably won't use.
797    ///
798    /// This is used to implement parsing of the most primitive types in the
799    /// [`core`](crate::core) module. You probably don't want to use this, but
800    /// probably want to use something like [`Parser::parse`] or
801    /// [`Parser::parens`].
802    pub fn step<F, T>(self, f: F) -> Result<T>
803    where
804        F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>,
805    {
806        let (result, cursor) = f(self.cursor())?;
807        self.buf.cur.set(cursor.pos);
808        Ok(result)
809    }
810
811    /// Creates an error whose line/column information is pointing at the
812    /// current token.
813    ///
814    /// This is used to produce human-readable error messages which point to the
815    /// right location in the input stream, and the `msg` here is arbitrary text
816    /// used to associate with the error and indicate why it was generated.
817    pub fn error(self, msg: impl fmt::Display) -> Error {
818        self.error_at(self.cursor().cur_span(), msg)
819    }
820
821    /// Creates an error whose line/column information is pointing at the
822    /// given span.
823    pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error {
824        Error::parse(span, self.buf.lexer.input(), msg.to_string())
825    }
826
827    /// Returns the span of the current token
828    pub fn cur_span(&self) -> Span {
829        self.cursor().cur_span()
830    }
831
832    /// Returns the span of the previous token
833    pub fn prev_span(&self) -> Span {
834        self.cursor()
835            .prev_span()
836            .unwrap_or_else(|| Span::from_offset(0))
837    }
838
839    /// Registers a new known annotation with this parser to allow parsing
840    /// annotations with this name.
841    ///
842    /// [WebAssembly annotations][annotation] are a proposal for the text format
843    /// which allows decorating the text format with custom structured
844    /// information. By default all annotations are ignored when parsing, but
845    /// the whole purpose of them is to sometimes parse them!
846    ///
847    /// To support parsing text annotations this method is used to allow
848    /// annotations and their tokens to *not* be skipped. Once an annotation is
849    /// registered with this method, then while the return value has not been
850    /// dropped (e.g. the scope of where this function is called) annotations
851    /// with the name `annotation` will be parse of the token stream and not
852    /// implicitly skipped.
853    ///
854    /// # Skipping annotations
855    ///
856    /// The behavior of skipping unknown/unregistered annotations can be
857    /// somewhat subtle and surprising, so if you're interested in parsing
858    /// annotations it's important to point out the importance of this method
859    /// and where to call it.
860    ///
861    /// Generally when parsing tokens you'll be bottoming out in various
862    /// `Cursor` methods. These are all documented as advancing the stream as
863    /// much as possible to the next token, skipping "irrelevant stuff" like
864    /// comments, whitespace, etc. The `Cursor` methods will also skip unknown
865    /// annotations. This means that if you parse *any* token, it will skip over
866    /// any number of annotations that are unknown at all times.
867    ///
868    /// To parse an annotation you must, before parsing any token of the
869    /// annotation, register the annotation via this method. This includes the
870    /// beginning `(` token, which is otherwise skipped if the annotation isn't
871    /// marked as registered. Typically parser parse the *contents* of an
872    /// s-expression, so this means that the outer parser of an s-expression
873    /// must register the custom annotation name, rather than the inner parser.
874    ///
875    /// # Return
876    ///
877    /// This function returns an RAII guard which, when dropped, will unregister
878    /// the `annotation` given. Parsing `annotation` is only supported while the
879    /// returned value is still alive, and once dropped the parser will go back
880    /// to skipping annotations with the name `annotation`.
881    ///
882    /// # Example
883    ///
884    /// Let's see an example of how the `@name` annotation is parsed for modules
885    /// to get an idea of how this works:
886    ///
887    /// ```
888    /// # use wast::kw;
889    /// # use wast::token::NameAnnotation;
890    /// # use wast::parser::*;
891    /// struct Module<'a> {
892    ///     name: Option<NameAnnotation<'a>>,
893    /// }
894    ///
895    /// impl<'a> Parse<'a> for Module<'a> {
896    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
897    ///         // Modules start out with a `module` keyword
898    ///         parser.parse::<kw::module>()?;
899    ///
900    ///         // Next may be `(@name "foo")`. Typically this annotation would
901    ///         // skipped, but we don't want it skipped, so we register it.
902    ///         // Note that the parse implementation of
903    ///         // `Option<NameAnnotation>` is the one that consumes the
904    ///         // parentheses here.
905    ///         let _r = parser.register_annotation("name");
906    ///         let name = parser.parse()?;
907    ///
908    ///         // ... and normally you'd otherwise parse module fields here ...
909    ///
910    ///         Ok(Module { name })
911    ///     }
912    /// }
913    /// ```
914    ///
915    /// Another example is how we parse the `@custom` annotation. Note that this
916    /// is parsed as part of `ModuleField`, so note how the annotation is
917    /// registered *before* we parse the parentheses of the annotation.
918    ///
919    /// ```
920    /// # use wast::{kw, annotation};
921    /// # use wast::core::Custom;
922    /// # use wast::parser::*;
923    /// struct Module<'a> {
924    ///     fields: Vec<ModuleField<'a>>,
925    /// }
926    ///
927    /// impl<'a> Parse<'a> for Module<'a> {
928    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
929    ///         // Modules start out with a `module` keyword
930    ///         parser.parse::<kw::module>()?;
931    ///
932    ///         // register the `@custom` annotation *first* before we start
933    ///         // parsing fields, because each field is contained in
934    ///         // parentheses and to parse the parentheses of an annotation we
935    ///         // have to known to not skip it.
936    ///         let _r = parser.register_annotation("custom");
937    ///
938    ///         let mut fields = Vec::new();
939    ///         while !parser.is_empty() {
940    ///             fields.push(parser.parens(|p| p.parse())?);
941    ///         }
942    ///         Ok(Module { fields })
943    ///     }
944    /// }
945    ///
946    /// enum ModuleField<'a> {
947    ///     Custom(Custom<'a>),
948    ///     // ...
949    /// }
950    ///
951    /// impl<'a> Parse<'a> for ModuleField<'a> {
952    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
953    ///         // Note that because we have previously registered the `@custom`
954    ///         // annotation with the parser we known that `peek` methods like
955    ///         // this, working on the annotation token, are enabled to ever
956    ///         // return `true`.
957    ///         if parser.peek::<annotation::custom>()? {
958    ///             return Ok(ModuleField::Custom(parser.parse()?));
959    ///         }
960    ///
961    ///         // .. typically we'd parse other module fields here...
962    ///
963    ///         Err(parser.error("unknown module field"))
964    ///     }
965    /// }
966    /// ```
967    ///
968    /// [annotation]: https://github.com/WebAssembly/annotations
969    pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b
970    where
971        'a: 'b,
972    {
973        let mut annotations = self.buf.known_annotations.borrow_mut();
974        if !annotations.contains_key(annotation) {
975            annotations.insert(annotation.to_string(), 0);
976        }
977        *annotations.get_mut(annotation).unwrap() += 1;
978
979        return RemoveOnDrop(self, annotation);
980
981        struct RemoveOnDrop<'a>(Parser<'a>, &'a str);
982
983        impl Drop for RemoveOnDrop<'_> {
984            fn drop(&mut self) {
985                let mut annotations = self.0.buf.known_annotations.borrow_mut();
986                let slot = annotations.get_mut(self.1).unwrap();
987                *slot -= 1;
988            }
989        }
990    }
991
992    #[cfg(feature = "wasm-module")]
993    pub(crate) fn track_instr_spans(&self) -> bool {
994        self.buf.track_instr_spans
995    }
996
997    #[cfg(feature = "wasm-module")]
998    pub(crate) fn with_standard_annotations_registered<R>(
999        self,
1000        f: impl FnOnce(Self) -> Result<R>,
1001    ) -> Result<R> {
1002        let _r = self.register_annotation("custom");
1003        let _r = self.register_annotation("producers");
1004        let _r = self.register_annotation("name");
1005        let _r = self.register_annotation("dylink.0");
1006        let _r = self.register_annotation("metadata.code.branch_hint");
1007        f(self)
1008    }
1009}
1010
1011impl<'a> Cursor<'a> {
1012    /// Returns the span of the next `Token` token.
1013    ///
1014    /// Does not take into account whitespace or comments.
1015    pub fn cur_span(&self) -> Span {
1016        let offset = match self.token() {
1017            Ok(Some(t)) => t.offset,
1018            Ok(None) => self.parser.buf.lexer.input().len(),
1019            Err(_) => self.pos.offset,
1020        };
1021        Span { offset }
1022    }
1023
1024    /// Returns the span of the previous `Token` token.
1025    ///
1026    /// Does not take into account whitespace or comments.
1027    pub(crate) fn prev_span(&self) -> Option<Span> {
1028        // TODO
1029        Some(Span {
1030            offset: self.pos.offset,
1031        })
1032        // let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?;
1033        // Some(Span {
1034        //     offset: token.offset,
1035        // })
1036    }
1037
1038    /// Same as [`Parser::error`], but works with the current token in this
1039    /// [`Cursor`] instead.
1040    pub fn error(&self, msg: impl fmt::Display) -> Error {
1041        self.parser.error_at(self.cur_span(), msg)
1042    }
1043
1044    /// Tests whether the next token is an lparen
1045    pub fn peek_lparen(self) -> Result<bool> {
1046        Ok(matches!(
1047            self.token()?,
1048            Some(Token {
1049                kind: TokenKind::LParen,
1050                ..
1051            })
1052        ))
1053    }
1054
1055    /// Tests whether the next token is an rparen
1056    pub fn peek_rparen(self) -> Result<bool> {
1057        Ok(matches!(
1058            self.token()?,
1059            Some(Token {
1060                kind: TokenKind::RParen,
1061                ..
1062            })
1063        ))
1064    }
1065
1066    /// Tests whether the next token is an id
1067    pub fn peek_id(self) -> Result<bool> {
1068        Ok(matches!(
1069            self.token()?,
1070            Some(Token {
1071                kind: TokenKind::Id,
1072                ..
1073            })
1074        ))
1075    }
1076
1077    /// Tests whether the next token is reserved
1078    pub fn peek_reserved(self) -> Result<bool> {
1079        Ok(matches!(
1080            self.token()?,
1081            Some(Token {
1082                kind: TokenKind::Reserved,
1083                ..
1084            })
1085        ))
1086    }
1087
1088    /// Tests whether the next token is a keyword
1089    pub fn peek_keyword(self) -> Result<bool> {
1090        Ok(matches!(
1091            self.token()?,
1092            Some(Token {
1093                kind: TokenKind::Keyword,
1094                ..
1095            })
1096        ))
1097    }
1098
1099    /// Tests whether the next token is an integer
1100    pub fn peek_integer(self) -> Result<bool> {
1101        Ok(matches!(
1102            self.token()?,
1103            Some(Token {
1104                kind: TokenKind::Integer(_),
1105                ..
1106            })
1107        ))
1108    }
1109
1110    /// Tests whether the next token is a float
1111    pub fn peek_float(self) -> Result<bool> {
1112        Ok(matches!(
1113            self.token()?,
1114            Some(Token {
1115                kind: TokenKind::Float(_),
1116                ..
1117            })
1118        ))
1119    }
1120
1121    /// Tests whether the next token is a string
1122    pub fn peek_string(self) -> Result<bool> {
1123        Ok(matches!(
1124            self.token()?,
1125            Some(Token {
1126                kind: TokenKind::String,
1127                ..
1128            })
1129        ))
1130    }
1131
1132    /// Attempts to advance this cursor if the current token is a `(`.
1133    ///
1134    /// If the current token is `(`, returns a new [`Cursor`] pointing at the
1135    /// rest of the tokens in the stream. Otherwise returns `None`.
1136    ///
1137    /// This function will automatically skip over any comments, whitespace, or
1138    /// unknown annotations.
1139    pub fn lparen(mut self) -> Result<Option<Self>> {
1140        let token = match self.token()? {
1141            Some(token) => token,
1142            None => return Ok(None),
1143        };
1144        match token.kind {
1145            TokenKind::LParen => {}
1146            _ => return Ok(None),
1147        }
1148        self.advance_past(&token);
1149        Ok(Some(self))
1150    }
1151
1152    /// Attempts to advance this cursor if the current token is a `)`.
1153    ///
1154    /// If the current token is `)`, returns a new [`Cursor`] pointing at the
1155    /// rest of the tokens in the stream. Otherwise returns `None`.
1156    ///
1157    /// This function will automatically skip over any comments, whitespace, or
1158    /// unknown annotations.
1159    pub fn rparen(mut self) -> Result<Option<Self>> {
1160        let token = match self.token()? {
1161            Some(token) => token,
1162            None => return Ok(None),
1163        };
1164        match token.kind {
1165            TokenKind::RParen => {}
1166            _ => return Ok(None),
1167        }
1168        self.advance_past(&token);
1169        Ok(Some(self))
1170    }
1171
1172    /// Attempts to advance this cursor if the current token is a
1173    /// [`Token::Id`](crate::lexer::Token)
1174    ///
1175    /// If the current token is `Id`, returns the identifier minus the leading
1176    /// `$` character as well as a new [`Cursor`] pointing at the rest of the
1177    /// tokens in the stream. Otherwise returns `None`.
1178    ///
1179    /// This function will automatically skip over any comments, whitespace, or
1180    /// unknown annotations.
1181    pub fn id(mut self) -> Result<Option<(&'a str, Self)>> {
1182        let token = match self.token()? {
1183            Some(token) => token,
1184            None => return Ok(None),
1185        };
1186        match token.kind {
1187            TokenKind::Id => {}
1188            _ => return Ok(None),
1189        }
1190        self.advance_past(&token);
1191        let id = match token.id(self.parser.buf.lexer.input())? {
1192            Cow::Borrowed(id) => id,
1193            // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1194            // this owned string to `Vec<u8>` and then convert it back to `&str`
1195            // out the other end.
1196            Cow::Owned(s) => std::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap(),
1197        };
1198        Ok(Some((id, self)))
1199    }
1200
1201    /// Attempts to advance this cursor if the current token is a
1202    /// [`Token::Keyword`](crate::lexer::Token)
1203    ///
1204    /// If the current token is `Keyword`, returns the keyword as well as a new
1205    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1206    /// returns `None`.
1207    ///
1208    /// This function will automatically skip over any comments, whitespace, or
1209    /// unknown annotations.
1210    pub fn keyword(mut self) -> Result<Option<(&'a str, Self)>> {
1211        let token = match self.token()? {
1212            Some(token) => token,
1213            None => return Ok(None),
1214        };
1215        match token.kind {
1216            TokenKind::Keyword => {}
1217            _ => return Ok(None),
1218        }
1219        self.advance_past(&token);
1220        Ok(Some((token.keyword(self.parser.buf.lexer.input()), self)))
1221    }
1222
1223    /// Attempts to advance this cursor if the current token is a
1224    /// [`Token::Annotation`](crate::lexer::Token)
1225    ///
1226    /// If the current token is `Annotation`, returns the annotation token as well
1227    /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1228    /// Otherwise returns `None`.
1229    ///
1230    /// This function will automatically skip over any comments, whitespace, or
1231    /// unknown annotations.
1232    pub fn annotation(mut self) -> Result<Option<(&'a str, Self)>> {
1233        let token = match self.token()? {
1234            Some(token) => token,
1235            None => return Ok(None),
1236        };
1237        match token.kind {
1238            TokenKind::Annotation => {}
1239            _ => return Ok(None),
1240        }
1241        self.advance_past(&token);
1242        let annotation = match token.annotation(self.parser.buf.lexer.input())? {
1243            Cow::Borrowed(id) => id,
1244            // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1245            // this owned string to `Vec<u8>` and then convert it back to `&str`
1246            // out the other end.
1247            Cow::Owned(s) => std::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap(),
1248        };
1249        Ok(Some((annotation, self)))
1250    }
1251
1252    /// Attempts to advance this cursor if the current token is a
1253    /// [`Token::Reserved`](crate::lexer::Token)
1254    ///
1255    /// If the current token is `Reserved`, returns the reserved token as well
1256    /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1257    /// Otherwise returns `None`.
1258    ///
1259    /// This function will automatically skip over any comments, whitespace, or
1260    /// unknown annotations.
1261    pub fn reserved(mut self) -> Result<Option<(&'a str, Self)>> {
1262        let token = match self.token()? {
1263            Some(token) => token,
1264            None => return Ok(None),
1265        };
1266        match token.kind {
1267            TokenKind::Reserved => {}
1268            _ => return Ok(None),
1269        }
1270        self.advance_past(&token);
1271        Ok(Some((token.reserved(self.parser.buf.lexer.input()), self)))
1272    }
1273
1274    /// Attempts to advance this cursor if the current token is a
1275    /// [`Token::Integer`](crate::lexer::Token)
1276    ///
1277    /// If the current token is `Integer`, returns the integer as well as a new
1278    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1279    /// returns `None`.
1280    ///
1281    /// This function will automatically skip over any comments, whitespace, or
1282    /// unknown annotations.
1283    pub fn integer(mut self) -> Result<Option<(Integer<'a>, Self)>> {
1284        let token = match self.token()? {
1285            Some(token) => token,
1286            None => return Ok(None),
1287        };
1288        let i = match token.kind {
1289            TokenKind::Integer(i) => i,
1290            _ => return Ok(None),
1291        };
1292        self.advance_past(&token);
1293        Ok(Some((
1294            token.integer(self.parser.buf.lexer.input(), i),
1295            self,
1296        )))
1297    }
1298
1299    /// Attempts to advance this cursor if the current token is a
1300    /// [`Token::Float`](crate::lexer::Token)
1301    ///
1302    /// If the current token is `Float`, returns the float as well as a new
1303    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1304    /// returns `None`.
1305    ///
1306    /// This function will automatically skip over any comments, whitespace, or
1307    /// unknown annotations.
1308    pub fn float(mut self) -> Result<Option<(Float<'a>, Self)>> {
1309        let token = match self.token()? {
1310            Some(token) => token,
1311            None => return Ok(None),
1312        };
1313        let f = match token.kind {
1314            TokenKind::Float(f) => f,
1315            _ => return Ok(None),
1316        };
1317        self.advance_past(&token);
1318        Ok(Some((token.float(self.parser.buf.lexer.input(), f), self)))
1319    }
1320
1321    /// Attempts to advance this cursor if the current token is a
1322    /// [`Token::String`](crate::lexer::Token)
1323    ///
1324    /// If the current token is `String`, returns the byte value of the string
1325    /// as well as a new [`Cursor`] pointing at the rest of the tokens in the
1326    /// stream. Otherwise returns `None`.
1327    ///
1328    /// This function will automatically skip over any comments, whitespace, or
1329    /// unknown annotations.
1330    pub fn string(mut self) -> Result<Option<(&'a [u8], Self)>> {
1331        let token = match self.token()? {
1332            Some(token) => token,
1333            None => return Ok(None),
1334        };
1335        match token.kind {
1336            TokenKind::String => {}
1337            _ => return Ok(None),
1338        }
1339        let string = match token.string(self.parser.buf.lexer.input()) {
1340            Cow::Borrowed(s) => s,
1341            Cow::Owned(s) => self.parser.buf.push_str(s),
1342        };
1343        self.advance_past(&token);
1344        Ok(Some((string, self)))
1345    }
1346
1347    /// Attempts to advance this cursor if the current token is a
1348    /// [`Token::LineComment`](crate::lexer::Token) or a
1349    /// [`Token::BlockComment`](crate::lexer::Token)
1350    ///
1351    /// This function will only skip whitespace, no other tokens.
1352    pub fn comment(mut self) -> Result<Option<(&'a str, Self)>> {
1353        let start = self.pos.offset;
1354        self.pos.token = None;
1355        let comment = loop {
1356            let token = match self.parser.buf.lexer.parse(&mut self.pos.offset)? {
1357                Some(token) => token,
1358                None => return Ok(None),
1359            };
1360            match token.kind {
1361                TokenKind::LineComment | TokenKind::BlockComment => {
1362                    break token.src(self.parser.buf.lexer.input());
1363                }
1364                TokenKind::Whitespace => {}
1365                _ => {
1366                    self.pos.offset = start;
1367                    return Ok(None);
1368                }
1369            }
1370        };
1371        Ok(Some((comment, self)))
1372    }
1373
1374    fn token(&self) -> Result<Option<Token>> {
1375        match self.pos.token {
1376            Some(token) => Ok(Some(token)),
1377            None => self.parser.buf.advance_token(self.pos.offset),
1378        }
1379    }
1380
1381    fn advance_past(&mut self, token: &Token) {
1382        self.pos.offset = token.offset + (token.len as usize);
1383        self.pos.token = self
1384            .parser
1385            .buf
1386            .advance_token(self.pos.offset)
1387            .unwrap_or(None);
1388    }
1389}
1390
1391impl<'a> Lookahead1<'a> {
1392    /// Attempts to see if `T` is the next token in the [`Parser`] this
1393    /// [`Lookahead1`] references.
1394    ///
1395    /// For more information see [`Parser::lookahead1`] and [`Parser::peek`]
1396    pub fn peek<T: Peek>(&mut self) -> Result<bool> {
1397        Ok(if self.parser.peek::<T>()? {
1398            true
1399        } else {
1400            self.attempts.push(T::display());
1401            false
1402        })
1403    }
1404
1405    /// Returns the underlying parser that this lookahead is looking at.
1406    pub fn parser(&self) -> Parser<'a> {
1407        self.parser
1408    }
1409
1410    /// Generates an error message saying that one of the tokens passed to
1411    /// [`Lookahead1::peek`] method was expected.
1412    ///
1413    /// Before calling this method you should call [`Lookahead1::peek`] for all
1414    /// possible tokens you'd like to parse.
1415    pub fn error(self) -> Error {
1416        match self.attempts.len() {
1417            0 => {
1418                if self.parser.is_empty() {
1419                    self.parser.error("unexpected end of input")
1420                } else {
1421                    self.parser.error("unexpected token")
1422                }
1423            }
1424            1 => {
1425                let message = format!("unexpected token, expected {}", self.attempts[0]);
1426                self.parser.error(&message)
1427            }
1428            2 => {
1429                let message = format!(
1430                    "unexpected token, expected {} or {}",
1431                    self.attempts[0], self.attempts[1]
1432                );
1433                self.parser.error(&message)
1434            }
1435            _ => {
1436                let join = self.attempts.join(", ");
1437                let message = format!("unexpected token, expected one of: {}", join);
1438                self.parser.error(&message)
1439            }
1440        }
1441    }
1442}
1443
1444impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> {
1445    fn parse(parser: Parser<'a>) -> Result<Option<T>> {
1446        if parser.peek::<T>()? {
1447            Ok(Some(parser.parse()?))
1448        } else {
1449            Ok(None)
1450        }
1451    }
1452}
wast/parser.rs

wast/
parser.rs