wasmparser/
parser.rs

1use crate::binary_reader::WASM_MAGIC_NUMBER;
2use crate::prelude::*;
3#[cfg(feature = "features")]
4use crate::WasmFeatures;
5#[cfg(feature = "component-model")]
6use crate::{
7    limits::MAX_WASM_MODULE_SIZE, ComponentCanonicalSectionReader, ComponentExportSectionReader,
8    ComponentImportSectionReader, ComponentInstanceSectionReader, ComponentStartFunction,
9    ComponentTypeSectionReader, CoreTypeSectionReader, InstanceSectionReader, SectionLimited,
10};
11use crate::{
12    BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
13    ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
14    ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
15    TypeSectionReader,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30//                      allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39    /// The encoding format is a WebAssembly module.
40    Module,
41    /// The encoding format is a WebAssembly component.
42    Component,
43}
44
45/// An incremental parser of a binary WebAssembly module or component.
46///
47/// This type is intended to be used to incrementally parse a WebAssembly module
48/// or component as bytes become available for the module. This can also be used
49/// to parse modules or components that are already entirely resident within memory.
50///
51/// This primary function for a parser is the [`Parser::parse`] function which
52/// will incrementally consume input. You can also use the [`Parser::parse_all`]
53/// function to parse a module or component that is entirely resident in memory.
54#[derive(Debug, Clone)]
55pub struct Parser {
56    state: State,
57    offset: u64,
58    max_size: u64,
59    encoding: Encoding,
60    #[cfg(feature = "features")]
61    features: WasmFeatures,
62}
63
64#[derive(Debug, Clone)]
65enum State {
66    Header,
67    SectionStart,
68    FunctionBody { remaining: u32, len: u32 },
69}
70
71/// A successful return payload from [`Parser::parse`].
72///
73/// On success one of two possible values can be returned, either that more data
74/// is needed to continue parsing or a chunk of the input was parsed, indicating
75/// how much of it was parsed.
76#[derive(Debug)]
77pub enum Chunk<'a> {
78    /// This can be returned at any time and indicates that more data is needed
79    /// to proceed with parsing. Zero bytes were consumed from the input to
80    /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
81    /// bytes are needed to continue parsing.
82    NeedMoreData(u64),
83
84    /// A chunk was successfully parsed.
85    Parsed {
86        /// This many bytes of the `data` input to [`Parser::parse`] were
87        /// consumed to produce `payload`.
88        consumed: usize,
89        /// The value that we actually parsed.
90        payload: Payload<'a>,
91    },
92}
93
94/// Values that can be parsed from a WebAssembly module or component.
95///
96/// This enumeration is all possible chunks of pieces that can be parsed by a
97/// [`Parser`] from a binary WebAssembly module or component. Note that for many
98/// sections the entire section is parsed all at once, whereas other functions,
99/// like the code section, are parsed incrementally. This is a distinction where some
100/// sections, like the type section, are required to be fully resident in memory
101/// (fully downloaded) before proceeding. Other sections, like the code section,
102/// can be processed in a streaming fashion where each function is extracted
103/// individually so it can possibly be shipped to another thread while you wait
104/// for more functions to get downloaded.
105///
106/// Note that payloads, when returned, do not indicate that the module or component
107/// is valid. For example when you receive a `Payload::TypeSection` the type
108/// section itself has not yet actually been parsed. The reader returned will be
109/// able to parse it, but you'll have to actually iterate the reader to do the
110/// full parse. Each payload returned is intended to be a *window* into the
111/// original `data` passed to [`Parser::parse`] which can be further processed
112/// if necessary.
113#[non_exhaustive]
114pub enum Payload<'a> {
115    /// Indicates the header of a WebAssembly module or component.
116    Version {
117        /// The version number found in the header.
118        num: u16,
119        /// The encoding format being parsed.
120        encoding: Encoding,
121        /// The range of bytes that were parsed to consume the header of the
122        /// module or component. Note that this range is relative to the start
123        /// of the byte stream.
124        range: Range<usize>,
125    },
126
127    /// A module type section was received and the provided reader can be
128    /// used to parse the contents of the type section.
129    TypeSection(TypeSectionReader<'a>),
130    /// A module import section was received and the provided reader can be
131    /// used to parse the contents of the import section.
132    ImportSection(ImportSectionReader<'a>),
133    /// A module function section was received and the provided reader can be
134    /// used to parse the contents of the function section.
135    FunctionSection(FunctionSectionReader<'a>),
136    /// A module table section was received and the provided reader can be
137    /// used to parse the contents of the table section.
138    TableSection(TableSectionReader<'a>),
139    /// A module memory section was received and the provided reader can be
140    /// used to parse the contents of the memory section.
141    MemorySection(MemorySectionReader<'a>),
142    /// A module tag section was received, and the provided reader can be
143    /// used to parse the contents of the tag section.
144    TagSection(TagSectionReader<'a>),
145    /// A module global section was received and the provided reader can be
146    /// used to parse the contents of the global section.
147    GlobalSection(GlobalSectionReader<'a>),
148    /// A module export section was received, and the provided reader can be
149    /// used to parse the contents of the export section.
150    ExportSection(ExportSectionReader<'a>),
151    /// A module start section was received.
152    StartSection {
153        /// The start function index
154        func: u32,
155        /// The range of bytes that specify the `func` field, specified in
156        /// offsets relative to the start of the byte stream.
157        range: Range<usize>,
158    },
159    /// A module element section was received and the provided reader can be
160    /// used to parse the contents of the element section.
161    ElementSection(ElementSectionReader<'a>),
162    /// A module data count section was received.
163    DataCountSection {
164        /// The number of data segments.
165        count: u32,
166        /// The range of bytes that specify the `count` field, specified in
167        /// offsets relative to the start of the byte stream.
168        range: Range<usize>,
169    },
170    /// A module data section was received and the provided reader can be
171    /// used to parse the contents of the data section.
172    DataSection(DataSectionReader<'a>),
173    /// Indicator of the start of the code section of a WebAssembly module.
174    ///
175    /// This entry is returned whenever the code section starts. The `count`
176    /// field indicates how many entries are in this code section. After
177    /// receiving this start marker you're guaranteed that the next `count`
178    /// items will be either `CodeSectionEntry` or an error will be returned.
179    ///
180    /// This, unlike other sections, is intended to be used for streaming the
181    /// contents of the code section. The code section is not required to be
182    /// fully resident in memory when we parse it. Instead a [`Parser`] is
183    /// capable of parsing piece-by-piece of a code section.
184    CodeSectionStart {
185        /// The number of functions in this section.
186        count: u32,
187        /// The range of bytes that represent this section, specified in
188        /// offsets relative to the start of the byte stream.
189        range: Range<usize>,
190        /// The size, in bytes, of the remaining contents of this section.
191        ///
192        /// This can be used in combination with [`Parser::skip_section`]
193        /// where the caller will know how many bytes to skip before feeding
194        /// bytes into `Parser` again.
195        size: u32,
196    },
197    /// An entry of the code section, a function, was parsed from a WebAssembly
198    /// module.
199    ///
200    /// This entry indicates that a function was successfully received from the
201    /// code section, and the payload here is the window into the original input
202    /// where the function resides. Note that the function itself has not been
203    /// parsed, it's only been outlined. You'll need to process the
204    /// `FunctionBody` provided to test whether it parses and/or is valid.
205    CodeSectionEntry(FunctionBody<'a>),
206
207    /// A core module section was received and the provided parser can be
208    /// used to parse the nested module.
209    ///
210    /// This variant is special in that it returns a sub-`Parser`. Upon
211    /// receiving a `ModuleSection` it is expected that the returned
212    /// `Parser` will be used instead of the parent `Parser` until the parse has
213    /// finished. You'll need to feed data into the `Parser` returned until it
214    /// returns `Payload::End`. After that you'll switch back to the parent
215    /// parser to resume parsing the rest of the current component.
216    ///
217    /// Note that binaries will not be parsed correctly if you feed the data for
218    /// a nested module into the parent [`Parser`].
219    #[cfg(feature = "component-model")]
220    ModuleSection {
221        /// The parser for the nested module.
222        parser: Parser,
223        /// The range of bytes that represent the nested module in the
224        /// original byte stream.
225        ///
226        /// Note that, to better support streaming parsing and validation, the
227        /// validator does *not* check that this range is in bounds.
228        unchecked_range: Range<usize>,
229    },
230    /// A core instance section was received and the provided parser can be
231    /// used to parse the contents of the core instance section.
232    ///
233    /// Currently this section is only parsed in a component.
234    #[cfg(feature = "component-model")]
235    InstanceSection(InstanceSectionReader<'a>),
236    /// A core type section was received and the provided parser can be
237    /// used to parse the contents of the core type section.
238    ///
239    /// Currently this section is only parsed in a component.
240    #[cfg(feature = "component-model")]
241    CoreTypeSection(CoreTypeSectionReader<'a>),
242    /// A component section from a WebAssembly component was received and the
243    /// provided parser can be used to parse the nested component.
244    ///
245    /// This variant is special in that it returns a sub-`Parser`. Upon
246    /// receiving a `ComponentSection` it is expected that the returned
247    /// `Parser` will be used instead of the parent `Parser` until the parse has
248    /// finished. You'll need to feed data into the `Parser` returned until it
249    /// returns `Payload::End`. After that you'll switch back to the parent
250    /// parser to resume parsing the rest of the current component.
251    ///
252    /// Note that binaries will not be parsed correctly if you feed the data for
253    /// a nested component into the parent [`Parser`].
254    #[cfg(feature = "component-model")]
255    ComponentSection {
256        /// The parser for the nested component.
257        parser: Parser,
258        /// The range of bytes that represent the nested component in the
259        /// original byte stream.
260        ///
261        /// Note that, to better support streaming parsing and validation, the
262        /// validator does *not* check that this range is in bounds.
263        unchecked_range: Range<usize>,
264    },
265    /// A component instance section was received and the provided reader can be
266    /// used to parse the contents of the component instance section.
267    #[cfg(feature = "component-model")]
268    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
269    /// A component alias section was received and the provided reader can be
270    /// used to parse the contents of the component alias section.
271    #[cfg(feature = "component-model")]
272    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
273    /// A component type section was received and the provided reader can be
274    /// used to parse the contents of the component type section.
275    #[cfg(feature = "component-model")]
276    ComponentTypeSection(ComponentTypeSectionReader<'a>),
277    /// A component canonical section was received and the provided reader can be
278    /// used to parse the contents of the component canonical section.
279    #[cfg(feature = "component-model")]
280    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
281    /// A component start section was received.
282    #[cfg(feature = "component-model")]
283    ComponentStartSection {
284        /// The start function description.
285        start: ComponentStartFunction,
286        /// The range of bytes that specify the `start` field.
287        range: Range<usize>,
288    },
289    /// A component import section was received and the provided reader can be
290    /// used to parse the contents of the component import section.
291    #[cfg(feature = "component-model")]
292    ComponentImportSection(ComponentImportSectionReader<'a>),
293    /// A component export section was received, and the provided reader can be
294    /// used to parse the contents of the component export section.
295    #[cfg(feature = "component-model")]
296    ComponentExportSection(ComponentExportSectionReader<'a>),
297
298    /// A module or component custom section was received.
299    CustomSection(CustomSectionReader<'a>),
300
301    /// An unknown section was found.
302    ///
303    /// This variant is returned for all unknown sections encountered. This
304    /// likely wants to be interpreted as an error by consumers of the parser,
305    /// but this can also be used to parse sections currently unsupported by
306    /// the parser.
307    UnknownSection {
308        /// The 8-bit identifier for this section.
309        id: u8,
310        /// The contents of this section.
311        contents: &'a [u8],
312        /// The range of bytes, relative to the start of the original data
313        /// stream, that the contents of this section reside in.
314        range: Range<usize>,
315    },
316
317    /// The end of the WebAssembly module or component was reached.
318    ///
319    /// The value is the offset in the input byte stream where the end
320    /// was reached.
321    End(usize),
322}
323
324const CUSTOM_SECTION: u8 = 0;
325const TYPE_SECTION: u8 = 1;
326const IMPORT_SECTION: u8 = 2;
327const FUNCTION_SECTION: u8 = 3;
328const TABLE_SECTION: u8 = 4;
329const MEMORY_SECTION: u8 = 5;
330const GLOBAL_SECTION: u8 = 6;
331const EXPORT_SECTION: u8 = 7;
332const START_SECTION: u8 = 8;
333const ELEMENT_SECTION: u8 = 9;
334const CODE_SECTION: u8 = 10;
335const DATA_SECTION: u8 = 11;
336const DATA_COUNT_SECTION: u8 = 12;
337const TAG_SECTION: u8 = 13;
338
339#[cfg(feature = "component-model")]
340const COMPONENT_MODULE_SECTION: u8 = 1;
341#[cfg(feature = "component-model")]
342const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
343#[cfg(feature = "component-model")]
344const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
345#[cfg(feature = "component-model")]
346const COMPONENT_SECTION: u8 = 4;
347#[cfg(feature = "component-model")]
348const COMPONENT_INSTANCE_SECTION: u8 = 5;
349#[cfg(feature = "component-model")]
350const COMPONENT_ALIAS_SECTION: u8 = 6;
351#[cfg(feature = "component-model")]
352const COMPONENT_TYPE_SECTION: u8 = 7;
353#[cfg(feature = "component-model")]
354const COMPONENT_CANONICAL_SECTION: u8 = 8;
355#[cfg(feature = "component-model")]
356const COMPONENT_START_SECTION: u8 = 9;
357#[cfg(feature = "component-model")]
358const COMPONENT_IMPORT_SECTION: u8 = 10;
359#[cfg(feature = "component-model")]
360const COMPONENT_EXPORT_SECTION: u8 = 11;
361
362impl Parser {
363    /// Creates a new parser.
364    ///
365    /// Reports errors and ranges relative to `offset` provided, where `offset`
366    /// is some logical offset within the input stream that we're parsing.
367    pub fn new(offset: u64) -> Parser {
368        Parser {
369            state: State::Header,
370            offset,
371            max_size: u64::MAX,
372            // Assume the encoding is a module until we know otherwise
373            encoding: Encoding::Module,
374            #[cfg(feature = "features")]
375            features: WasmFeatures::all(),
376        }
377    }
378
379    /// Tests whether `bytes` looks like a core WebAssembly module.
380    ///
381    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
382    /// starts with the standard core WebAssembly header.
383    pub fn is_core_wasm(bytes: &[u8]) -> bool {
384        const HEADER: [u8; 8] = [
385            WASM_MAGIC_NUMBER[0],
386            WASM_MAGIC_NUMBER[1],
387            WASM_MAGIC_NUMBER[2],
388            WASM_MAGIC_NUMBER[3],
389            WASM_MODULE_VERSION.to_le_bytes()[0],
390            WASM_MODULE_VERSION.to_le_bytes()[1],
391            KIND_MODULE.to_le_bytes()[0],
392            KIND_MODULE.to_le_bytes()[1],
393        ];
394        bytes.starts_with(&HEADER)
395    }
396
397    /// Tests whether `bytes` looks like a WebAssembly component.
398    ///
399    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
400    /// starts with the standard WebAssembly component header.
401    pub fn is_component(bytes: &[u8]) -> bool {
402        const HEADER: [u8; 8] = [
403            WASM_MAGIC_NUMBER[0],
404            WASM_MAGIC_NUMBER[1],
405            WASM_MAGIC_NUMBER[2],
406            WASM_MAGIC_NUMBER[3],
407            WASM_COMPONENT_VERSION.to_le_bytes()[0],
408            WASM_COMPONENT_VERSION.to_le_bytes()[1],
409            KIND_COMPONENT.to_le_bytes()[0],
410            KIND_COMPONENT.to_le_bytes()[1],
411        ];
412        bytes.starts_with(&HEADER)
413    }
414
415    /// Returns the currently active set of wasm features that this parser is
416    /// using while parsing.
417    ///
418    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
419    ///
420    /// For more information see [`BinaryReader::new`].
421    #[cfg(feature = "features")]
422    pub fn features(&self) -> WasmFeatures {
423        self.features
424    }
425
426    /// Sets the wasm features active while parsing to the `features` specified.
427    ///
428    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
429    ///
430    /// For more information see [`BinaryReader::new`].
431    #[cfg(feature = "features")]
432    pub fn set_features(&mut self, features: WasmFeatures) {
433        self.features = features;
434    }
435
436    /// Returns the original offset that this parser is currently at.
437    pub fn offset(&self) -> u64 {
438        self.offset
439    }
440
441    /// Attempts to parse a chunk of data.
442    ///
443    /// This method will attempt to parse the next incremental portion of a
444    /// WebAssembly binary. Data available for the module or component is
445    /// provided as `data`, and the data can be incomplete if more data has yet
446    /// to arrive. The `eof` flag indicates whether more data will ever be received.
447    ///
448    /// There are two ways parsing can succeed with this method:
449    ///
450    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
451    ///   in `data` to parse a payload. The caller needs to wait for more data to
452    ///   be available in this situation before calling this method again. It is
453    ///   guaranteed that this is only returned if `eof` is `false`.
454    ///
455    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
456    ///   successfully parsed. The payload is available in this variant of what
457    ///   was parsed, and this also indicates how many bytes of `data` was
458    ///   consumed. It's expected that the caller will not provide these bytes
459    ///   back to the [`Parser`] again.
460    ///
461    /// Note that all `Chunk` return values are connected, with a lifetime, to
462    /// the input buffer. Each parsed chunk borrows the input buffer and is a
463    /// view into it for successfully parsed chunks.
464    ///
465    /// It is expected that you'll call this method until `Payload::End` is
466    /// reached, at which point you're guaranteed that the parse has completed.
467    /// Note that complete parsing, for the top-level module or component,
468    /// implies that `data` is empty and `eof` is `true`.
469    ///
470    /// # Errors
471    ///
472    /// Parse errors are returned as an `Err`. Errors can happen when the
473    /// structure of the data is unexpected or if sections are too large for
474    /// example. Note that errors are not returned for malformed *contents* of
475    /// sections here. Sections are generally not individually parsed and each
476    /// returned [`Payload`] needs to be iterated over further to detect all
477    /// errors.
478    ///
479    /// # Examples
480    ///
481    /// An example of reading a wasm file from a stream (`std::io::Read`) and
482    /// incrementally parsing it.
483    ///
484    /// ```
485    /// use std::io::Read;
486    /// use anyhow::Result;
487    /// use wasmparser::{Parser, Chunk, Payload::*};
488    ///
489    /// fn parse(mut reader: impl Read) -> Result<()> {
490    ///     let mut buf = Vec::new();
491    ///     let mut cur = Parser::new(0);
492    ///     let mut eof = false;
493    ///     let mut stack = Vec::new();
494    ///
495    ///     loop {
496    ///         let (payload, consumed) = match cur.parse(&buf, eof)? {
497    ///             Chunk::NeedMoreData(hint) => {
498    ///                 assert!(!eof); // otherwise an error would be returned
499    ///
500    ///                 // Use the hint to preallocate more space, then read
501    ///                 // some more data into our buffer.
502    ///                 //
503    ///                 // Note that the buffer management here is not ideal,
504    ///                 // but it's compact enough to fit in an example!
505    ///                 let len = buf.len();
506    ///                 buf.extend((0..hint).map(|_| 0u8));
507    ///                 let n = reader.read(&mut buf[len..])?;
508    ///                 buf.truncate(len + n);
509    ///                 eof = n == 0;
510    ///                 continue;
511    ///             }
512    ///
513    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
514    ///         };
515    ///
516    ///         match payload {
517    ///             // Sections for WebAssembly modules
518    ///             Version { .. } => { /* ... */ }
519    ///             TypeSection(_) => { /* ... */ }
520    ///             ImportSection(_) => { /* ... */ }
521    ///             FunctionSection(_) => { /* ... */ }
522    ///             TableSection(_) => { /* ... */ }
523    ///             MemorySection(_) => { /* ... */ }
524    ///             TagSection(_) => { /* ... */ }
525    ///             GlobalSection(_) => { /* ... */ }
526    ///             ExportSection(_) => { /* ... */ }
527    ///             StartSection { .. } => { /* ... */ }
528    ///             ElementSection(_) => { /* ... */ }
529    ///             DataCountSection { .. } => { /* ... */ }
530    ///             DataSection(_) => { /* ... */ }
531    ///
532    ///             // Here we know how many functions we'll be receiving as
533    ///             // `CodeSectionEntry`, so we can prepare for that, and
534    ///             // afterwards we can parse and handle each function
535    ///             // individually.
536    ///             CodeSectionStart { .. } => { /* ... */ }
537    ///             CodeSectionEntry(body) => {
538    ///                 // here we can iterate over `body` to parse the function
539    ///                 // and its locals
540    ///             }
541    ///
542    ///             // Sections for WebAssembly components
543    ///             InstanceSection(_) => { /* ... */ }
544    ///             CoreTypeSection(_) => { /* ... */ }
545    ///             ComponentInstanceSection(_) => { /* ... */ }
546    ///             ComponentAliasSection(_) => { /* ... */ }
547    ///             ComponentTypeSection(_) => { /* ... */ }
548    ///             ComponentCanonicalSection(_) => { /* ... */ }
549    ///             ComponentStartSection { .. } => { /* ... */ }
550    ///             ComponentImportSection(_) => { /* ... */ }
551    ///             ComponentExportSection(_) => { /* ... */ }
552    ///
553    ///             ModuleSection { parser, .. }
554    ///             | ComponentSection { parser, .. } => {
555    ///                 stack.push(cur.clone());
556    ///                 cur = parser.clone();
557    ///             }
558    ///
559    ///             CustomSection(_) => { /* ... */ }
560    ///
561    ///             // Once we've reached the end of a parser we either resume
562    ///             // at the parent parser or we break out of the loop because
563    ///             // we're done.
564    ///             End(_) => {
565    ///                 if let Some(parent_parser) = stack.pop() {
566    ///                     cur = parent_parser;
567    ///                 } else {
568    ///                     break;
569    ///                 }
570    ///             }
571    ///
572    ///             // most likely you'd return an error here
573    ///             _ => { /* ... */ }
574    ///         }
575    ///
576    ///         // once we're done processing the payload we can forget the
577    ///         // original.
578    ///         buf.drain(..consumed);
579    ///     }
580    ///
581    ///     Ok(())
582    /// }
583    ///
584    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
585    /// ```
586    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
587        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
588            (&data[..(self.max_size as usize)], true)
589        } else {
590            (data, eof)
591        };
592        // TODO: thread through `offset: u64` to `BinaryReader`, remove
593        // the cast here.
594        let starting_offset = self.offset as usize;
595        let mut reader = BinaryReader::new(data, starting_offset);
596        #[cfg(feature = "features")]
597        {
598            reader.set_features(self.features);
599        }
600        match self.parse_reader(&mut reader, eof) {
601            Ok(payload) => {
602                // Be sure to update our offset with how far we got in the
603                // reader
604                let consumed = reader.original_position() - starting_offset;
605                self.offset += usize_to_u64(consumed);
606                self.max_size -= usize_to_u64(consumed);
607                Ok(Chunk::Parsed {
608                    consumed: consumed,
609                    payload,
610                })
611            }
612            Err(e) => {
613                // If we're at EOF then there's no way we can recover from any
614                // error, so continue to propagate it.
615                if eof {
616                    return Err(e);
617                }
618
619                // If our error doesn't look like it can be resolved with more
620                // data being pulled down, then propagate it, otherwise switch
621                // the error to "feed me please"
622                match e.inner.needed_hint {
623                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
624                    None => Err(e),
625                }
626            }
627        }
628    }
629
630    fn parse_reader<'a>(
631        &mut self,
632        reader: &mut BinaryReader<'a>,
633        eof: bool,
634    ) -> Result<Payload<'a>> {
635        use Payload::*;
636
637        match self.state {
638            State::Header => {
639                let start = reader.original_position();
640                let header_version = reader.read_header_version()?;
641                self.encoding = match (header_version >> 16) as u16 {
642                    KIND_MODULE => Encoding::Module,
643                    KIND_COMPONENT => Encoding::Component,
644                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
645                };
646                let num = header_version as u16;
647                self.state = State::SectionStart;
648                Ok(Version {
649                    num,
650                    encoding: self.encoding,
651                    range: start..reader.original_position(),
652                })
653            }
654            State::SectionStart => {
655                // If we're at eof and there are no bytes in our buffer, then
656                // that means we reached the end of the data since it's
657                // just a bunch of sections concatenated after the header.
658                if eof && reader.bytes_remaining() == 0 {
659                    return Ok(Payload::End(reader.original_position()));
660                }
661
662                let id_pos = reader.original_position();
663                let id = reader.read_u8()?;
664                if id & 0x80 != 0 {
665                    return Err(BinaryReaderError::new("malformed section id", id_pos));
666                }
667                let len_pos = reader.original_position();
668                let mut len = reader.read_var_u32()?;
669
670                // Test to make sure that this section actually fits within
671                // `Parser::max_size`. This doesn't matter for top-level modules
672                // but it is required for nested modules/components to correctly ensure
673                // that all sections live entirely within their section of the
674                // file.
675                let consumed = reader.original_position() - id_pos;
676                let section_overflow = self
677                    .max_size
678                    .checked_sub(usize_to_u64(consumed))
679                    .and_then(|s| s.checked_sub(len.into()))
680                    .is_none();
681                if section_overflow {
682                    return Err(BinaryReaderError::new("section too large", len_pos));
683                }
684
685                match (self.encoding, id) {
686                    // Sections for both modules and components.
687                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
688
689                    // Module sections
690                    (Encoding::Module, TYPE_SECTION) => {
691                        section(reader, len, TypeSectionReader::new, TypeSection)
692                    }
693                    (Encoding::Module, IMPORT_SECTION) => {
694                        section(reader, len, ImportSectionReader::new, ImportSection)
695                    }
696                    (Encoding::Module, FUNCTION_SECTION) => {
697                        section(reader, len, FunctionSectionReader::new, FunctionSection)
698                    }
699                    (Encoding::Module, TABLE_SECTION) => {
700                        section(reader, len, TableSectionReader::new, TableSection)
701                    }
702                    (Encoding::Module, MEMORY_SECTION) => {
703                        section(reader, len, MemorySectionReader::new, MemorySection)
704                    }
705                    (Encoding::Module, GLOBAL_SECTION) => {
706                        section(reader, len, GlobalSectionReader::new, GlobalSection)
707                    }
708                    (Encoding::Module, EXPORT_SECTION) => {
709                        section(reader, len, ExportSectionReader::new, ExportSection)
710                    }
711                    (Encoding::Module, START_SECTION) => {
712                        let (func, range) = single_item(reader, len, "start")?;
713                        Ok(StartSection { func, range })
714                    }
715                    (Encoding::Module, ELEMENT_SECTION) => {
716                        section(reader, len, ElementSectionReader::new, ElementSection)
717                    }
718                    (Encoding::Module, CODE_SECTION) => {
719                        let start = reader.original_position();
720                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
721                        let range = start..reader.original_position() + len as usize;
722                        self.state = State::FunctionBody {
723                            remaining: count,
724                            len,
725                        };
726                        Ok(CodeSectionStart {
727                            count,
728                            range,
729                            size: len,
730                        })
731                    }
732                    (Encoding::Module, DATA_SECTION) => {
733                        section(reader, len, DataSectionReader::new, DataSection)
734                    }
735                    (Encoding::Module, DATA_COUNT_SECTION) => {
736                        let (count, range) = single_item(reader, len, "data count")?;
737                        Ok(DataCountSection { count, range })
738                    }
739                    (Encoding::Module, TAG_SECTION) => {
740                        section(reader, len, TagSectionReader::new, TagSection)
741                    }
742
743                    // Component sections
744                    #[cfg(feature = "component-model")]
745                    (Encoding::Component, COMPONENT_MODULE_SECTION)
746                    | (Encoding::Component, COMPONENT_SECTION) => {
747                        if len as usize > MAX_WASM_MODULE_SIZE {
748                            bail!(
749                                len_pos,
750                                "{} section is too large",
751                                if id == 1 { "module" } else { "component " }
752                            );
753                        }
754
755                        let range = reader.original_position()
756                            ..reader.original_position() + usize::try_from(len).unwrap();
757                        self.max_size -= u64::from(len);
758                        self.offset += u64::from(len);
759                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
760                        #[cfg(feature = "features")]
761                        {
762                            parser.features = self.features;
763                        }
764                        parser.max_size = u64::from(len);
765
766                        Ok(match id {
767                            1 => ModuleSection {
768                                parser,
769                                unchecked_range: range,
770                            },
771                            4 => ComponentSection {
772                                parser,
773                                unchecked_range: range,
774                            },
775                            _ => unreachable!(),
776                        })
777                    }
778                    #[cfg(feature = "component-model")]
779                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
780                        section(reader, len, InstanceSectionReader::new, InstanceSection)
781                    }
782                    #[cfg(feature = "component-model")]
783                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
784                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
785                    }
786                    #[cfg(feature = "component-model")]
787                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
788                        reader,
789                        len,
790                        ComponentInstanceSectionReader::new,
791                        ComponentInstanceSection,
792                    ),
793                    #[cfg(feature = "component-model")]
794                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
795                        section(reader, len, SectionLimited::new, ComponentAliasSection)
796                    }
797                    #[cfg(feature = "component-model")]
798                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
799                        reader,
800                        len,
801                        ComponentTypeSectionReader::new,
802                        ComponentTypeSection,
803                    ),
804                    #[cfg(feature = "component-model")]
805                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
806                        reader,
807                        len,
808                        ComponentCanonicalSectionReader::new,
809                        ComponentCanonicalSection,
810                    ),
811                    #[cfg(feature = "component-model")]
812                    (Encoding::Component, COMPONENT_START_SECTION) => {
813                        let (start, range) = single_item(reader, len, "component start")?;
814                        Ok(ComponentStartSection { start, range })
815                    }
816                    #[cfg(feature = "component-model")]
817                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
818                        reader,
819                        len,
820                        ComponentImportSectionReader::new,
821                        ComponentImportSection,
822                    ),
823                    #[cfg(feature = "component-model")]
824                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
825                        reader,
826                        len,
827                        ComponentExportSectionReader::new,
828                        ComponentExportSection,
829                    ),
830                    (_, id) => {
831                        let offset = reader.original_position();
832                        let contents = reader.read_bytes(len as usize)?;
833                        let range = offset..offset + len as usize;
834                        Ok(UnknownSection {
835                            id,
836                            contents,
837                            range,
838                        })
839                    }
840                }
841            }
842
843            // Once we hit 0 remaining incrementally parsed items, with 0
844            // remaining bytes in each section, we're done and can switch back
845            // to parsing sections.
846            State::FunctionBody {
847                remaining: 0,
848                len: 0,
849            } => {
850                self.state = State::SectionStart;
851                self.parse_reader(reader, eof)
852            }
853
854            // ... otherwise trailing bytes with no remaining entries in these
855            // sections indicates an error.
856            State::FunctionBody { remaining: 0, len } => {
857                debug_assert!(len > 0);
858                let offset = reader.original_position();
859                Err(BinaryReaderError::new(
860                    "trailing bytes at end of section",
861                    offset,
862                ))
863            }
864
865            // Functions are relatively easy to parse when we know there's at
866            // least one remaining and at least one byte available to read
867            // things.
868            //
869            // We use the remaining length try to read a u32 size of the
870            // function, and using that size we require the entire function be
871            // resident in memory. This means that we're reading whole chunks of
872            // functions at a time.
873            //
874            // Limiting via `Parser::max_size` (nested parsing) happens above in
875            // `fn parse`, and limiting by our section size happens via
876            // `delimited`. Actual parsing of the function body is delegated to
877            // the caller to iterate over the `FunctionBody` structure.
878            State::FunctionBody { remaining, mut len } => {
879                let body = delimited(reader, &mut len, |r| {
880                    Ok(FunctionBody::new(r.read_reader()?))
881                })?;
882                self.state = State::FunctionBody {
883                    remaining: remaining - 1,
884                    len,
885                };
886                Ok(CodeSectionEntry(body))
887            }
888        }
889    }
890
891    /// Convenience function that can be used to parse a module or component
892    /// that is entirely resident in memory.
893    ///
894    /// This function will parse the `data` provided as a WebAssembly module
895    /// or component.
896    ///
897    /// Note that when this function yields sections that provide parsers,
898    /// no further action is required for those sections as payloads from
899    /// those parsers will be automatically returned.
900    ///
901    /// # Examples
902    ///
903    /// An example of reading a wasm file from a stream (`std::io::Read`) into
904    /// a buffer and then parsing it.
905    ///
906    /// ```
907    /// use std::io::Read;
908    /// use anyhow::Result;
909    /// use wasmparser::{Parser, Chunk, Payload::*};
910    ///
911    /// fn parse(mut reader: impl Read) -> Result<()> {
912    ///     let mut buf = Vec::new();
913    ///     reader.read_to_end(&mut buf)?;
914    ///     let parser = Parser::new(0);
915    ///
916    ///     for payload in parser.parse_all(&buf) {
917    ///         match payload? {
918    ///             // Sections for WebAssembly modules
919    ///             Version { .. } => { /* ... */ }
920    ///             TypeSection(_) => { /* ... */ }
921    ///             ImportSection(_) => { /* ... */ }
922    ///             FunctionSection(_) => { /* ... */ }
923    ///             TableSection(_) => { /* ... */ }
924    ///             MemorySection(_) => { /* ... */ }
925    ///             TagSection(_) => { /* ... */ }
926    ///             GlobalSection(_) => { /* ... */ }
927    ///             ExportSection(_) => { /* ... */ }
928    ///             StartSection { .. } => { /* ... */ }
929    ///             ElementSection(_) => { /* ... */ }
930    ///             DataCountSection { .. } => { /* ... */ }
931    ///             DataSection(_) => { /* ... */ }
932    ///
933    ///             // Here we know how many functions we'll be receiving as
934    ///             // `CodeSectionEntry`, so we can prepare for that, and
935    ///             // afterwards we can parse and handle each function
936    ///             // individually.
937    ///             CodeSectionStart { .. } => { /* ... */ }
938    ///             CodeSectionEntry(body) => {
939    ///                 // here we can iterate over `body` to parse the function
940    ///                 // and its locals
941    ///             }
942    ///
943    ///             // Sections for WebAssembly components
944    ///             ModuleSection { .. } => { /* ... */ }
945    ///             InstanceSection(_) => { /* ... */ }
946    ///             CoreTypeSection(_) => { /* ... */ }
947    ///             ComponentSection { .. } => { /* ... */ }
948    ///             ComponentInstanceSection(_) => { /* ... */ }
949    ///             ComponentAliasSection(_) => { /* ... */ }
950    ///             ComponentTypeSection(_) => { /* ... */ }
951    ///             ComponentCanonicalSection(_) => { /* ... */ }
952    ///             ComponentStartSection { .. } => { /* ... */ }
953    ///             ComponentImportSection(_) => { /* ... */ }
954    ///             ComponentExportSection(_) => { /* ... */ }
955    ///
956    ///             CustomSection(_) => { /* ... */ }
957    ///
958    ///             // Once we've reached the end of a parser we either resume
959    ///             // at the parent parser or the payload iterator is at its
960    ///             // end and we're done.
961    ///             End(_) => {}
962    ///
963    ///             // most likely you'd return an error here, but if you want
964    ///             // you can also inspect the raw contents of unknown sections
965    ///             other => {
966    ///                 match other.as_section() {
967    ///                     Some((id, range)) => { /* ... */ }
968    ///                     None => { /* ... */ }
969    ///                 }
970    ///             }
971    ///         }
972    ///     }
973    ///
974    ///     Ok(())
975    /// }
976    ///
977    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
978    /// ```
979    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
980        let mut stack = Vec::new();
981        let mut cur = self;
982        let mut done = false;
983        iter::from_fn(move || {
984            if done {
985                return None;
986            }
987            let payload = match cur.parse(data, true) {
988                // Propagate all errors
989                Err(e) => {
990                    done = true;
991                    return Some(Err(e));
992                }
993
994                // This isn't possible because `eof` is always true.
995                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
996
997                Ok(Chunk::Parsed { payload, consumed }) => {
998                    data = &data[consumed..];
999                    payload
1000                }
1001            };
1002
1003            match &payload {
1004                #[cfg(feature = "component-model")]
1005                Payload::ModuleSection { parser, .. }
1006                | Payload::ComponentSection { parser, .. } => {
1007                    stack.push(cur.clone());
1008                    cur = parser.clone();
1009                }
1010                Payload::End(_) => match stack.pop() {
1011                    Some(p) => cur = p,
1012                    None => done = true,
1013                },
1014
1015                _ => {}
1016            }
1017
1018            Some(Ok(payload))
1019        })
1020    }
1021
1022    /// Skip parsing the code section entirely.
1023    ///
1024    /// This function can be used to indicate, after receiving
1025    /// `CodeSectionStart`, that the section will not be parsed.
1026    ///
1027    /// The caller will be responsible for skipping `size` bytes (found in the
1028    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1029    /// after the `size` bytes have been skipped.
1030    ///
1031    /// # Panics
1032    ///
1033    /// This function will panic if the parser is not in a state where it's
1034    /// parsing the code section.
1035    ///
1036    /// # Examples
1037    ///
1038    /// ```
1039    /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1040    /// use core::ops::Range;
1041    ///
1042    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1043    ///     let mut parser = Parser::new(0);
1044    ///     loop {
1045    ///         let payload = match parser.parse(wasm, true)? {
1046    ///             Chunk::Parsed { consumed, payload } => {
1047    ///                 wasm = &wasm[consumed..];
1048    ///                 payload
1049    ///             }
1050    ///             // this state isn't possible with `eof = true`
1051    ///             Chunk::NeedMoreData(_) => unreachable!(),
1052    ///         };
1053    ///         match payload {
1054    ///             TypeSection(s) => print_range("type section", &s.range()),
1055    ///             ImportSection(s) => print_range("import section", &s.range()),
1056    ///             // .. other sections
1057    ///
1058    ///             // Print the range of the code section we see, but don't
1059    ///             // actually iterate over each individual function.
1060    ///             CodeSectionStart { range, size, .. } => {
1061    ///                 print_range("code section", &range);
1062    ///                 parser.skip_section();
1063    ///                 wasm = &wasm[size as usize..];
1064    ///             }
1065    ///             End(_) => break,
1066    ///             _ => {}
1067    ///         }
1068    ///     }
1069    ///     Ok(())
1070    /// }
1071    ///
1072    /// fn print_range(section: &str, range: &Range<usize>) {
1073    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1074    /// }
1075    /// ```
1076    pub fn skip_section(&mut self) {
1077        let skip = match self.state {
1078            State::FunctionBody { remaining: _, len } => len,
1079            _ => panic!("wrong state to call `skip_section`"),
1080        };
1081        self.offset += u64::from(skip);
1082        self.max_size -= u64::from(skip);
1083        self.state = State::SectionStart;
1084    }
1085}
1086
1087fn usize_to_u64(a: usize) -> u64 {
1088    a.try_into().unwrap()
1089}
1090
1091/// Parses an entire section resident in memory into a `Payload`.
1092///
1093/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1094/// to construct the section to return.
1095fn section<'a, T>(
1096    reader: &mut BinaryReader<'a>,
1097    len: u32,
1098    ctor: fn(BinaryReader<'a>) -> Result<T>,
1099    variant: fn(T) -> Payload<'a>,
1100) -> Result<Payload<'a>> {
1101    let reader = reader.skip(|r| {
1102        r.read_bytes(len as usize)?;
1103        Ok(())
1104    })?;
1105    // clear the hint for "need this many more bytes" here because we already
1106    // read all the bytes, so it's not possible to read more bytes if this
1107    // fails.
1108    let reader = ctor(reader).map_err(clear_hint)?;
1109    Ok(variant(reader))
1110}
1111
1112/// Reads a section that is represented by a single uleb-encoded `u32`.
1113fn single_item<'a, T>(
1114    reader: &mut BinaryReader<'a>,
1115    len: u32,
1116    desc: &str,
1117) -> Result<(T, Range<usize>)>
1118where
1119    T: FromReader<'a>,
1120{
1121    let range = reader.original_position()..reader.original_position() + len as usize;
1122    let mut content = reader.skip(|r| {
1123        r.read_bytes(len as usize)?;
1124        Ok(())
1125    })?;
1126    // We can't recover from "unexpected eof" here because our entire section is
1127    // already resident in memory, so clear the hint for how many more bytes are
1128    // expected.
1129    let ret = content.read().map_err(clear_hint)?;
1130    if !content.eof() {
1131        bail!(
1132            content.original_position(),
1133            "unexpected content in the {desc} section",
1134        );
1135    }
1136    Ok((ret, range))
1137}
1138
1139/// Attempts to parse using `f`.
1140///
1141/// This will update `*len` with the number of bytes consumed, and it will cause
1142/// a failure to be returned instead of the number of bytes consumed exceeds
1143/// what `*len` currently is.
1144fn delimited<'a, T>(
1145    reader: &mut BinaryReader<'a>,
1146    len: &mut u32,
1147    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1148) -> Result<T> {
1149    let start = reader.original_position();
1150    let ret = f(reader)?;
1151    *len = match (reader.original_position() - start)
1152        .try_into()
1153        .ok()
1154        .and_then(|i| len.checked_sub(i))
1155    {
1156        Some(i) => i,
1157        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1158    };
1159    Ok(ret)
1160}
1161
1162impl Default for Parser {
1163    fn default() -> Parser {
1164        Parser::new(0)
1165    }
1166}
1167
1168impl Payload<'_> {
1169    /// If this `Payload` represents a section in the original wasm module then
1170    /// the section's id and range within the original wasm binary are returned.
1171    ///
1172    /// Not all payloads refer to entire sections, such as the `Version` and
1173    /// `CodeSectionEntry` variants. These variants will return `None` from this
1174    /// function.
1175    ///
1176    /// Otherwise this function will return `Some` where the first element is
1177    /// the byte identifier for the section and the second element is the range
1178    /// of the contents of the section within the original wasm binary.
1179    ///
1180    /// The purpose of this method is to enable tools to easily iterate over
1181    /// entire sections if necessary and handle sections uniformly, for example
1182    /// dropping custom sections while preserving all other sections.
1183    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1184        use Payload::*;
1185
1186        match self {
1187            Version { .. } => None,
1188            TypeSection(s) => Some((TYPE_SECTION, s.range())),
1189            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1190            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1191            TableSection(s) => Some((TABLE_SECTION, s.range())),
1192            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1193            TagSection(s) => Some((TAG_SECTION, s.range())),
1194            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1195            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1196            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1197            DataSection(s) => Some((DATA_SECTION, s.range())),
1198            StartSection { range, .. } => Some((START_SECTION, range.clone())),
1199            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1200            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1201            CodeSectionEntry(_) => None,
1202
1203            #[cfg(feature = "component-model")]
1204            ModuleSection {
1205                unchecked_range: range,
1206                ..
1207            } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1208            #[cfg(feature = "component-model")]
1209            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1210            #[cfg(feature = "component-model")]
1211            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1212            #[cfg(feature = "component-model")]
1213            ComponentSection {
1214                unchecked_range: range,
1215                ..
1216            } => Some((COMPONENT_SECTION, range.clone())),
1217            #[cfg(feature = "component-model")]
1218            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1219            #[cfg(feature = "component-model")]
1220            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1221            #[cfg(feature = "component-model")]
1222            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1223            #[cfg(feature = "component-model")]
1224            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1225            #[cfg(feature = "component-model")]
1226            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1227            #[cfg(feature = "component-model")]
1228            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1229            #[cfg(feature = "component-model")]
1230            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1231
1232            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1233
1234            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1235
1236            End(_) => None,
1237        }
1238    }
1239}
1240
1241impl fmt::Debug for Payload<'_> {
1242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1243        use Payload::*;
1244        match self {
1245            Version {
1246                num,
1247                encoding,
1248                range,
1249            } => f
1250                .debug_struct("Version")
1251                .field("num", num)
1252                .field("encoding", encoding)
1253                .field("range", range)
1254                .finish(),
1255
1256            // Module sections
1257            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1258            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1259            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1260            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1261            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1262            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1263            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1264            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1265            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1266            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1267            StartSection { func, range } => f
1268                .debug_struct("StartSection")
1269                .field("func", func)
1270                .field("range", range)
1271                .finish(),
1272            DataCountSection { count, range } => f
1273                .debug_struct("DataCountSection")
1274                .field("count", count)
1275                .field("range", range)
1276                .finish(),
1277            CodeSectionStart { count, range, size } => f
1278                .debug_struct("CodeSectionStart")
1279                .field("count", count)
1280                .field("range", range)
1281                .field("size", size)
1282                .finish(),
1283            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1284
1285            // Component sections
1286            #[cfg(feature = "component-model")]
1287            ModuleSection {
1288                parser: _,
1289                unchecked_range: range,
1290            } => f
1291                .debug_struct("ModuleSection")
1292                .field("range", range)
1293                .finish(),
1294            #[cfg(feature = "component-model")]
1295            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1296            #[cfg(feature = "component-model")]
1297            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1298            #[cfg(feature = "component-model")]
1299            ComponentSection {
1300                parser: _,
1301                unchecked_range: range,
1302            } => f
1303                .debug_struct("ComponentSection")
1304                .field("range", range)
1305                .finish(),
1306            #[cfg(feature = "component-model")]
1307            ComponentInstanceSection(_) => f
1308                .debug_tuple("ComponentInstanceSection")
1309                .field(&"...")
1310                .finish(),
1311            #[cfg(feature = "component-model")]
1312            ComponentAliasSection(_) => f
1313                .debug_tuple("ComponentAliasSection")
1314                .field(&"...")
1315                .finish(),
1316            #[cfg(feature = "component-model")]
1317            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1318            #[cfg(feature = "component-model")]
1319            ComponentCanonicalSection(_) => f
1320                .debug_tuple("ComponentCanonicalSection")
1321                .field(&"...")
1322                .finish(),
1323            #[cfg(feature = "component-model")]
1324            ComponentStartSection { .. } => f
1325                .debug_tuple("ComponentStartSection")
1326                .field(&"...")
1327                .finish(),
1328            #[cfg(feature = "component-model")]
1329            ComponentImportSection(_) => f
1330                .debug_tuple("ComponentImportSection")
1331                .field(&"...")
1332                .finish(),
1333            #[cfg(feature = "component-model")]
1334            ComponentExportSection(_) => f
1335                .debug_tuple("ComponentExportSection")
1336                .field(&"...")
1337                .finish(),
1338
1339            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1340
1341            UnknownSection { id, range, .. } => f
1342                .debug_struct("UnknownSection")
1343                .field("id", id)
1344                .field("range", range)
1345                .finish(),
1346
1347            End(offset) => f.debug_tuple("End").field(offset).finish(),
1348        }
1349    }
1350}
1351
1352fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1353    err.inner.needed_hint = None;
1354    err
1355}
1356
1357#[cfg(test)]
1358mod tests {
1359    use super::*;
1360
1361    macro_rules! assert_matches {
1362        ($a:expr, $b:pat $(,)?) => {
1363            match $a {
1364                $b => {}
1365                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1366            }
1367        };
1368    }
1369
1370    #[test]
1371    fn header() {
1372        assert!(Parser::default().parse(&[], true).is_err());
1373        assert_matches!(
1374            Parser::default().parse(&[], false),
1375            Ok(Chunk::NeedMoreData(4)),
1376        );
1377        assert_matches!(
1378            Parser::default().parse(b"\0", false),
1379            Ok(Chunk::NeedMoreData(3)),
1380        );
1381        assert_matches!(
1382            Parser::default().parse(b"\0asm", false),
1383            Ok(Chunk::NeedMoreData(4)),
1384        );
1385        assert_matches!(
1386            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1387            Ok(Chunk::Parsed {
1388                consumed: 8,
1389                payload: Payload::Version { num: 1, .. },
1390            }),
1391        );
1392    }
1393
1394    #[test]
1395    fn header_iter() {
1396        for _ in Parser::default().parse_all(&[]) {}
1397        for _ in Parser::default().parse_all(b"\0") {}
1398        for _ in Parser::default().parse_all(b"\0asm") {}
1399        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1400    }
1401
1402    fn parser_after_header() -> Parser {
1403        let mut p = Parser::default();
1404        assert_matches!(
1405            p.parse(b"\0asm\x01\0\0\0", false),
1406            Ok(Chunk::Parsed {
1407                consumed: 8,
1408                payload: Payload::Version {
1409                    num: WASM_MODULE_VERSION,
1410                    encoding: Encoding::Module,
1411                    ..
1412                },
1413            }),
1414        );
1415        p
1416    }
1417
1418    fn parser_after_component_header() -> Parser {
1419        let mut p = Parser::default();
1420        assert_matches!(
1421            p.parse(b"\0asm\x0d\0\x01\0", false),
1422            Ok(Chunk::Parsed {
1423                consumed: 8,
1424                payload: Payload::Version {
1425                    num: WASM_COMPONENT_VERSION,
1426                    encoding: Encoding::Component,
1427                    ..
1428                },
1429            }),
1430        );
1431        p
1432    }
1433
1434    #[test]
1435    fn start_section() {
1436        assert_matches!(
1437            parser_after_header().parse(&[], false),
1438            Ok(Chunk::NeedMoreData(1)),
1439        );
1440        assert!(parser_after_header().parse(&[8], true).is_err());
1441        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1442        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1443        assert_matches!(
1444            parser_after_header().parse(&[8], false),
1445            Ok(Chunk::NeedMoreData(1)),
1446        );
1447        assert_matches!(
1448            parser_after_header().parse(&[8, 1], false),
1449            Ok(Chunk::NeedMoreData(1)),
1450        );
1451        assert_matches!(
1452            parser_after_header().parse(&[8, 2], false),
1453            Ok(Chunk::NeedMoreData(2)),
1454        );
1455        assert_matches!(
1456            parser_after_header().parse(&[8, 1, 1], false),
1457            Ok(Chunk::Parsed {
1458                consumed: 3,
1459                payload: Payload::StartSection { func: 1, .. },
1460            }),
1461        );
1462        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1463        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1464    }
1465
1466    #[test]
1467    fn end_works() {
1468        assert_matches!(
1469            parser_after_header().parse(&[], true),
1470            Ok(Chunk::Parsed {
1471                consumed: 0,
1472                payload: Payload::End(8),
1473            }),
1474        );
1475    }
1476
1477    #[test]
1478    fn type_section() {
1479        assert!(parser_after_header().parse(&[1], true).is_err());
1480        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1481        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1482        assert_matches!(
1483            parser_after_header().parse(&[1], false),
1484            Ok(Chunk::NeedMoreData(1)),
1485        );
1486        assert_matches!(
1487            parser_after_header().parse(&[1, 1], false),
1488            Ok(Chunk::NeedMoreData(1)),
1489        );
1490        assert_matches!(
1491            parser_after_header().parse(&[1, 1, 1], false),
1492            Ok(Chunk::Parsed {
1493                consumed: 3,
1494                payload: Payload::TypeSection(_),
1495            }),
1496        );
1497        assert_matches!(
1498            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1499            Ok(Chunk::Parsed {
1500                consumed: 3,
1501                payload: Payload::TypeSection(_),
1502            }),
1503        );
1504    }
1505
1506    #[test]
1507    fn custom_section() {
1508        assert!(parser_after_header().parse(&[0], true).is_err());
1509        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1510        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1511        assert_matches!(
1512            parser_after_header().parse(&[0, 2, 1], false),
1513            Ok(Chunk::NeedMoreData(1)),
1514        );
1515        assert_custom(
1516            parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1517            3,
1518            "",
1519            11,
1520            b"",
1521            Range { start: 10, end: 11 },
1522        );
1523        assert_custom(
1524            parser_after_header()
1525                .parse(&[0, 2, 1, b'a'], false)
1526                .unwrap(),
1527            4,
1528            "a",
1529            12,
1530            b"",
1531            Range { start: 10, end: 12 },
1532        );
1533        assert_custom(
1534            parser_after_header()
1535                .parse(&[0, 2, 0, b'a'], false)
1536                .unwrap(),
1537            4,
1538            "",
1539            11,
1540            b"a",
1541            Range { start: 10, end: 12 },
1542        );
1543    }
1544
1545    fn assert_custom(
1546        chunk: Chunk<'_>,
1547        expected_consumed: usize,
1548        expected_name: &str,
1549        expected_data_offset: usize,
1550        expected_data: &[u8],
1551        expected_range: Range<usize>,
1552    ) {
1553        let (consumed, s) = match chunk {
1554            Chunk::Parsed {
1555                consumed,
1556                payload: Payload::CustomSection(s),
1557            } => (consumed, s),
1558            _ => panic!("not a custom section payload"),
1559        };
1560        assert_eq!(consumed, expected_consumed);
1561        assert_eq!(s.name(), expected_name);
1562        assert_eq!(s.data_offset(), expected_data_offset);
1563        assert_eq!(s.data(), expected_data);
1564        assert_eq!(s.range(), expected_range);
1565    }
1566
1567    #[test]
1568    fn function_section() {
1569        assert!(parser_after_header().parse(&[10], true).is_err());
1570        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1571        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1572        assert_matches!(
1573            parser_after_header().parse(&[10], false),
1574            Ok(Chunk::NeedMoreData(1))
1575        );
1576        assert_matches!(
1577            parser_after_header().parse(&[10, 1], false),
1578            Ok(Chunk::NeedMoreData(1))
1579        );
1580        let mut p = parser_after_header();
1581        assert_matches!(
1582            p.parse(&[10, 1, 0], false),
1583            Ok(Chunk::Parsed {
1584                consumed: 3,
1585                payload: Payload::CodeSectionStart { count: 0, .. },
1586            }),
1587        );
1588        assert_matches!(
1589            p.parse(&[], true),
1590            Ok(Chunk::Parsed {
1591                consumed: 0,
1592                payload: Payload::End(11),
1593            }),
1594        );
1595        let mut p = parser_after_header();
1596        assert_matches!(
1597            p.parse(&[10, 2, 1, 0], false),
1598            Ok(Chunk::Parsed {
1599                consumed: 3,
1600                payload: Payload::CodeSectionStart { count: 1, .. },
1601            }),
1602        );
1603        assert_matches!(
1604            p.parse(&[0], false),
1605            Ok(Chunk::Parsed {
1606                consumed: 1,
1607                payload: Payload::CodeSectionEntry(_),
1608            }),
1609        );
1610        assert_matches!(
1611            p.parse(&[], true),
1612            Ok(Chunk::Parsed {
1613                consumed: 0,
1614                payload: Payload::End(12),
1615            }),
1616        );
1617
1618        // 1 byte section with 1 function can't read the function body because
1619        // the section is too small
1620        let mut p = parser_after_header();
1621        assert_matches!(
1622            p.parse(&[10, 1, 1], false),
1623            Ok(Chunk::Parsed {
1624                consumed: 3,
1625                payload: Payload::CodeSectionStart { count: 1, .. },
1626            }),
1627        );
1628        assert_eq!(
1629            p.parse(&[0], false).unwrap_err().message(),
1630            "unexpected end-of-file"
1631        );
1632
1633        // section with 2 functions but section is cut off
1634        let mut p = parser_after_header();
1635        assert_matches!(
1636            p.parse(&[10, 2, 2], false),
1637            Ok(Chunk::Parsed {
1638                consumed: 3,
1639                payload: Payload::CodeSectionStart { count: 2, .. },
1640            }),
1641        );
1642        assert_matches!(
1643            p.parse(&[0], false),
1644            Ok(Chunk::Parsed {
1645                consumed: 1,
1646                payload: Payload::CodeSectionEntry(_),
1647            }),
1648        );
1649        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1650        assert_eq!(
1651            p.parse(&[0], false).unwrap_err().message(),
1652            "unexpected end-of-file",
1653        );
1654
1655        // trailing data is bad
1656        let mut p = parser_after_header();
1657        assert_matches!(
1658            p.parse(&[10, 3, 1], false),
1659            Ok(Chunk::Parsed {
1660                consumed: 3,
1661                payload: Payload::CodeSectionStart { count: 1, .. },
1662            }),
1663        );
1664        assert_matches!(
1665            p.parse(&[0], false),
1666            Ok(Chunk::Parsed {
1667                consumed: 1,
1668                payload: Payload::CodeSectionEntry(_),
1669            }),
1670        );
1671        assert_eq!(
1672            p.parse(&[0], false).unwrap_err().message(),
1673            "trailing bytes at end of section",
1674        );
1675    }
1676
1677    #[test]
1678    fn single_module() {
1679        let mut p = parser_after_component_header();
1680        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1681
1682        // A module that's 8 bytes in length
1683        let mut sub = match p.parse(&[1, 8], false) {
1684            Ok(Chunk::Parsed {
1685                consumed: 2,
1686                payload: Payload::ModuleSection { parser, .. },
1687            }) => parser,
1688            other => panic!("bad parse {:?}", other),
1689        };
1690
1691        // Parse the header of the submodule with the sub-parser.
1692        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1693        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1694        assert_matches!(
1695            sub.parse(b"\0asm\x01\0\0\0", false),
1696            Ok(Chunk::Parsed {
1697                consumed: 8,
1698                payload: Payload::Version {
1699                    num: 1,
1700                    encoding: Encoding::Module,
1701                    ..
1702                },
1703            }),
1704        );
1705
1706        // The sub-parser should be byte-limited so the next byte shouldn't get
1707        // consumed, it's intended for the parent parser.
1708        assert_matches!(
1709            sub.parse(&[10], false),
1710            Ok(Chunk::Parsed {
1711                consumed: 0,
1712                payload: Payload::End(18),
1713            }),
1714        );
1715
1716        // The parent parser should now be back to resuming, and we simulate it
1717        // being done with bytes to ensure that it's safely at the end,
1718        // completing the module code section.
1719        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1720        assert_matches!(
1721            p.parse(&[], true),
1722            Ok(Chunk::Parsed {
1723                consumed: 0,
1724                payload: Payload::End(18),
1725            }),
1726        );
1727    }
1728
1729    #[test]
1730    fn nested_section_too_big() {
1731        let mut p = parser_after_component_header();
1732
1733        // A module that's 10 bytes in length
1734        let mut sub = match p.parse(&[1, 10], false) {
1735            Ok(Chunk::Parsed {
1736                consumed: 2,
1737                payload: Payload::ModuleSection { parser, .. },
1738            }) => parser,
1739            other => panic!("bad parse {:?}", other),
1740        };
1741
1742        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1743        // module.
1744        assert_matches!(
1745            sub.parse(b"\0asm\x01\0\0\0", false),
1746            Ok(Chunk::Parsed {
1747                consumed: 8,
1748                payload: Payload::Version { num: 1, .. },
1749            }),
1750        );
1751
1752        // We can't parse a section which declares its bigger than the outer
1753        // module. This is a custom section, one byte big, with one content byte. The
1754        // content byte, however, lives outside of the parent's module code
1755        // section.
1756        assert_eq!(
1757            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1758            "section too large",
1759        );
1760    }
1761}