wasmparser/parser.rs
1use crate::binary_reader::WASM_MAGIC_NUMBER;
2use crate::prelude::*;
3#[cfg(feature = "features")]
4use crate::WasmFeatures;
5#[cfg(feature = "component-model")]
6use crate::{
7 limits::MAX_WASM_MODULE_SIZE, ComponentCanonicalSectionReader, ComponentExportSectionReader,
8 ComponentImportSectionReader, ComponentInstanceSectionReader, ComponentStartFunction,
9 ComponentTypeSectionReader, CoreTypeSectionReader, InstanceSectionReader, SectionLimited,
10};
11use crate::{
12 BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
13 ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
14 ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
15 TypeSectionReader,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30// allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39 /// The encoding format is a WebAssembly module.
40 Module,
41 /// The encoding format is a WebAssembly component.
42 Component,
43}
44
45/// An incremental parser of a binary WebAssembly module or component.
46///
47/// This type is intended to be used to incrementally parse a WebAssembly module
48/// or component as bytes become available for the module. This can also be used
49/// to parse modules or components that are already entirely resident within memory.
50///
51/// This primary function for a parser is the [`Parser::parse`] function which
52/// will incrementally consume input. You can also use the [`Parser::parse_all`]
53/// function to parse a module or component that is entirely resident in memory.
54#[derive(Debug, Clone)]
55pub struct Parser {
56 state: State,
57 offset: u64,
58 max_size: u64,
59 encoding: Encoding,
60 #[cfg(feature = "features")]
61 features: WasmFeatures,
62}
63
64#[derive(Debug, Clone)]
65enum State {
66 Header,
67 SectionStart,
68 FunctionBody { remaining: u32, len: u32 },
69}
70
71/// A successful return payload from [`Parser::parse`].
72///
73/// On success one of two possible values can be returned, either that more data
74/// is needed to continue parsing or a chunk of the input was parsed, indicating
75/// how much of it was parsed.
76#[derive(Debug)]
77pub enum Chunk<'a> {
78 /// This can be returned at any time and indicates that more data is needed
79 /// to proceed with parsing. Zero bytes were consumed from the input to
80 /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
81 /// bytes are needed to continue parsing.
82 NeedMoreData(u64),
83
84 /// A chunk was successfully parsed.
85 Parsed {
86 /// This many bytes of the `data` input to [`Parser::parse`] were
87 /// consumed to produce `payload`.
88 consumed: usize,
89 /// The value that we actually parsed.
90 payload: Payload<'a>,
91 },
92}
93
94/// Values that can be parsed from a WebAssembly module or component.
95///
96/// This enumeration is all possible chunks of pieces that can be parsed by a
97/// [`Parser`] from a binary WebAssembly module or component. Note that for many
98/// sections the entire section is parsed all at once, whereas other functions,
99/// like the code section, are parsed incrementally. This is a distinction where some
100/// sections, like the type section, are required to be fully resident in memory
101/// (fully downloaded) before proceeding. Other sections, like the code section,
102/// can be processed in a streaming fashion where each function is extracted
103/// individually so it can possibly be shipped to another thread while you wait
104/// for more functions to get downloaded.
105///
106/// Note that payloads, when returned, do not indicate that the module or component
107/// is valid. For example when you receive a `Payload::TypeSection` the type
108/// section itself has not yet actually been parsed. The reader returned will be
109/// able to parse it, but you'll have to actually iterate the reader to do the
110/// full parse. Each payload returned is intended to be a *window* into the
111/// original `data` passed to [`Parser::parse`] which can be further processed
112/// if necessary.
113#[non_exhaustive]
114pub enum Payload<'a> {
115 /// Indicates the header of a WebAssembly module or component.
116 Version {
117 /// The version number found in the header.
118 num: u16,
119 /// The encoding format being parsed.
120 encoding: Encoding,
121 /// The range of bytes that were parsed to consume the header of the
122 /// module or component. Note that this range is relative to the start
123 /// of the byte stream.
124 range: Range<usize>,
125 },
126
127 /// A module type section was received and the provided reader can be
128 /// used to parse the contents of the type section.
129 TypeSection(TypeSectionReader<'a>),
130 /// A module import section was received and the provided reader can be
131 /// used to parse the contents of the import section.
132 ImportSection(ImportSectionReader<'a>),
133 /// A module function section was received and the provided reader can be
134 /// used to parse the contents of the function section.
135 FunctionSection(FunctionSectionReader<'a>),
136 /// A module table section was received and the provided reader can be
137 /// used to parse the contents of the table section.
138 TableSection(TableSectionReader<'a>),
139 /// A module memory section was received and the provided reader can be
140 /// used to parse the contents of the memory section.
141 MemorySection(MemorySectionReader<'a>),
142 /// A module tag section was received, and the provided reader can be
143 /// used to parse the contents of the tag section.
144 TagSection(TagSectionReader<'a>),
145 /// A module global section was received and the provided reader can be
146 /// used to parse the contents of the global section.
147 GlobalSection(GlobalSectionReader<'a>),
148 /// A module export section was received, and the provided reader can be
149 /// used to parse the contents of the export section.
150 ExportSection(ExportSectionReader<'a>),
151 /// A module start section was received.
152 StartSection {
153 /// The start function index
154 func: u32,
155 /// The range of bytes that specify the `func` field, specified in
156 /// offsets relative to the start of the byte stream.
157 range: Range<usize>,
158 },
159 /// A module element section was received and the provided reader can be
160 /// used to parse the contents of the element section.
161 ElementSection(ElementSectionReader<'a>),
162 /// A module data count section was received.
163 DataCountSection {
164 /// The number of data segments.
165 count: u32,
166 /// The range of bytes that specify the `count` field, specified in
167 /// offsets relative to the start of the byte stream.
168 range: Range<usize>,
169 },
170 /// A module data section was received and the provided reader can be
171 /// used to parse the contents of the data section.
172 DataSection(DataSectionReader<'a>),
173 /// Indicator of the start of the code section of a WebAssembly module.
174 ///
175 /// This entry is returned whenever the code section starts. The `count`
176 /// field indicates how many entries are in this code section. After
177 /// receiving this start marker you're guaranteed that the next `count`
178 /// items will be either `CodeSectionEntry` or an error will be returned.
179 ///
180 /// This, unlike other sections, is intended to be used for streaming the
181 /// contents of the code section. The code section is not required to be
182 /// fully resident in memory when we parse it. Instead a [`Parser`] is
183 /// capable of parsing piece-by-piece of a code section.
184 CodeSectionStart {
185 /// The number of functions in this section.
186 count: u32,
187 /// The range of bytes that represent this section, specified in
188 /// offsets relative to the start of the byte stream.
189 range: Range<usize>,
190 /// The size, in bytes, of the remaining contents of this section.
191 ///
192 /// This can be used in combination with [`Parser::skip_section`]
193 /// where the caller will know how many bytes to skip before feeding
194 /// bytes into `Parser` again.
195 size: u32,
196 },
197 /// An entry of the code section, a function, was parsed from a WebAssembly
198 /// module.
199 ///
200 /// This entry indicates that a function was successfully received from the
201 /// code section, and the payload here is the window into the original input
202 /// where the function resides. Note that the function itself has not been
203 /// parsed, it's only been outlined. You'll need to process the
204 /// `FunctionBody` provided to test whether it parses and/or is valid.
205 CodeSectionEntry(FunctionBody<'a>),
206
207 /// A core module section was received and the provided parser can be
208 /// used to parse the nested module.
209 ///
210 /// This variant is special in that it returns a sub-`Parser`. Upon
211 /// receiving a `ModuleSection` it is expected that the returned
212 /// `Parser` will be used instead of the parent `Parser` until the parse has
213 /// finished. You'll need to feed data into the `Parser` returned until it
214 /// returns `Payload::End`. After that you'll switch back to the parent
215 /// parser to resume parsing the rest of the current component.
216 ///
217 /// Note that binaries will not be parsed correctly if you feed the data for
218 /// a nested module into the parent [`Parser`].
219 #[cfg(feature = "component-model")]
220 ModuleSection {
221 /// The parser for the nested module.
222 parser: Parser,
223 /// The range of bytes that represent the nested module in the
224 /// original byte stream.
225 ///
226 /// Note that, to better support streaming parsing and validation, the
227 /// validator does *not* check that this range is in bounds.
228 unchecked_range: Range<usize>,
229 },
230 /// A core instance section was received and the provided parser can be
231 /// used to parse the contents of the core instance section.
232 ///
233 /// Currently this section is only parsed in a component.
234 #[cfg(feature = "component-model")]
235 InstanceSection(InstanceSectionReader<'a>),
236 /// A core type section was received and the provided parser can be
237 /// used to parse the contents of the core type section.
238 ///
239 /// Currently this section is only parsed in a component.
240 #[cfg(feature = "component-model")]
241 CoreTypeSection(CoreTypeSectionReader<'a>),
242 /// A component section from a WebAssembly component was received and the
243 /// provided parser can be used to parse the nested component.
244 ///
245 /// This variant is special in that it returns a sub-`Parser`. Upon
246 /// receiving a `ComponentSection` it is expected that the returned
247 /// `Parser` will be used instead of the parent `Parser` until the parse has
248 /// finished. You'll need to feed data into the `Parser` returned until it
249 /// returns `Payload::End`. After that you'll switch back to the parent
250 /// parser to resume parsing the rest of the current component.
251 ///
252 /// Note that binaries will not be parsed correctly if you feed the data for
253 /// a nested component into the parent [`Parser`].
254 #[cfg(feature = "component-model")]
255 ComponentSection {
256 /// The parser for the nested component.
257 parser: Parser,
258 /// The range of bytes that represent the nested component in the
259 /// original byte stream.
260 ///
261 /// Note that, to better support streaming parsing and validation, the
262 /// validator does *not* check that this range is in bounds.
263 unchecked_range: Range<usize>,
264 },
265 /// A component instance section was received and the provided reader can be
266 /// used to parse the contents of the component instance section.
267 #[cfg(feature = "component-model")]
268 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
269 /// A component alias section was received and the provided reader can be
270 /// used to parse the contents of the component alias section.
271 #[cfg(feature = "component-model")]
272 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
273 /// A component type section was received and the provided reader can be
274 /// used to parse the contents of the component type section.
275 #[cfg(feature = "component-model")]
276 ComponentTypeSection(ComponentTypeSectionReader<'a>),
277 /// A component canonical section was received and the provided reader can be
278 /// used to parse the contents of the component canonical section.
279 #[cfg(feature = "component-model")]
280 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
281 /// A component start section was received.
282 #[cfg(feature = "component-model")]
283 ComponentStartSection {
284 /// The start function description.
285 start: ComponentStartFunction,
286 /// The range of bytes that specify the `start` field.
287 range: Range<usize>,
288 },
289 /// A component import section was received and the provided reader can be
290 /// used to parse the contents of the component import section.
291 #[cfg(feature = "component-model")]
292 ComponentImportSection(ComponentImportSectionReader<'a>),
293 /// A component export section was received, and the provided reader can be
294 /// used to parse the contents of the component export section.
295 #[cfg(feature = "component-model")]
296 ComponentExportSection(ComponentExportSectionReader<'a>),
297
298 /// A module or component custom section was received.
299 CustomSection(CustomSectionReader<'a>),
300
301 /// An unknown section was found.
302 ///
303 /// This variant is returned for all unknown sections encountered. This
304 /// likely wants to be interpreted as an error by consumers of the parser,
305 /// but this can also be used to parse sections currently unsupported by
306 /// the parser.
307 UnknownSection {
308 /// The 8-bit identifier for this section.
309 id: u8,
310 /// The contents of this section.
311 contents: &'a [u8],
312 /// The range of bytes, relative to the start of the original data
313 /// stream, that the contents of this section reside in.
314 range: Range<usize>,
315 },
316
317 /// The end of the WebAssembly module or component was reached.
318 ///
319 /// The value is the offset in the input byte stream where the end
320 /// was reached.
321 End(usize),
322}
323
324const CUSTOM_SECTION: u8 = 0;
325const TYPE_SECTION: u8 = 1;
326const IMPORT_SECTION: u8 = 2;
327const FUNCTION_SECTION: u8 = 3;
328const TABLE_SECTION: u8 = 4;
329const MEMORY_SECTION: u8 = 5;
330const GLOBAL_SECTION: u8 = 6;
331const EXPORT_SECTION: u8 = 7;
332const START_SECTION: u8 = 8;
333const ELEMENT_SECTION: u8 = 9;
334const CODE_SECTION: u8 = 10;
335const DATA_SECTION: u8 = 11;
336const DATA_COUNT_SECTION: u8 = 12;
337const TAG_SECTION: u8 = 13;
338
339#[cfg(feature = "component-model")]
340const COMPONENT_MODULE_SECTION: u8 = 1;
341#[cfg(feature = "component-model")]
342const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
343#[cfg(feature = "component-model")]
344const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
345#[cfg(feature = "component-model")]
346const COMPONENT_SECTION: u8 = 4;
347#[cfg(feature = "component-model")]
348const COMPONENT_INSTANCE_SECTION: u8 = 5;
349#[cfg(feature = "component-model")]
350const COMPONENT_ALIAS_SECTION: u8 = 6;
351#[cfg(feature = "component-model")]
352const COMPONENT_TYPE_SECTION: u8 = 7;
353#[cfg(feature = "component-model")]
354const COMPONENT_CANONICAL_SECTION: u8 = 8;
355#[cfg(feature = "component-model")]
356const COMPONENT_START_SECTION: u8 = 9;
357#[cfg(feature = "component-model")]
358const COMPONENT_IMPORT_SECTION: u8 = 10;
359#[cfg(feature = "component-model")]
360const COMPONENT_EXPORT_SECTION: u8 = 11;
361
362impl Parser {
363 /// Creates a new parser.
364 ///
365 /// Reports errors and ranges relative to `offset` provided, where `offset`
366 /// is some logical offset within the input stream that we're parsing.
367 pub fn new(offset: u64) -> Parser {
368 Parser {
369 state: State::Header,
370 offset,
371 max_size: u64::MAX,
372 // Assume the encoding is a module until we know otherwise
373 encoding: Encoding::Module,
374 #[cfg(feature = "features")]
375 features: WasmFeatures::all(),
376 }
377 }
378
379 /// Tests whether `bytes` looks like a core WebAssembly module.
380 ///
381 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
382 /// starts with the standard core WebAssembly header.
383 pub fn is_core_wasm(bytes: &[u8]) -> bool {
384 const HEADER: [u8; 8] = [
385 WASM_MAGIC_NUMBER[0],
386 WASM_MAGIC_NUMBER[1],
387 WASM_MAGIC_NUMBER[2],
388 WASM_MAGIC_NUMBER[3],
389 WASM_MODULE_VERSION.to_le_bytes()[0],
390 WASM_MODULE_VERSION.to_le_bytes()[1],
391 KIND_MODULE.to_le_bytes()[0],
392 KIND_MODULE.to_le_bytes()[1],
393 ];
394 bytes.starts_with(&HEADER)
395 }
396
397 /// Tests whether `bytes` looks like a WebAssembly component.
398 ///
399 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
400 /// starts with the standard WebAssembly component header.
401 pub fn is_component(bytes: &[u8]) -> bool {
402 const HEADER: [u8; 8] = [
403 WASM_MAGIC_NUMBER[0],
404 WASM_MAGIC_NUMBER[1],
405 WASM_MAGIC_NUMBER[2],
406 WASM_MAGIC_NUMBER[3],
407 WASM_COMPONENT_VERSION.to_le_bytes()[0],
408 WASM_COMPONENT_VERSION.to_le_bytes()[1],
409 KIND_COMPONENT.to_le_bytes()[0],
410 KIND_COMPONENT.to_le_bytes()[1],
411 ];
412 bytes.starts_with(&HEADER)
413 }
414
415 /// Returns the currently active set of wasm features that this parser is
416 /// using while parsing.
417 ///
418 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
419 ///
420 /// For more information see [`BinaryReader::new`].
421 #[cfg(feature = "features")]
422 pub fn features(&self) -> WasmFeatures {
423 self.features
424 }
425
426 /// Sets the wasm features active while parsing to the `features` specified.
427 ///
428 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
429 ///
430 /// For more information see [`BinaryReader::new`].
431 #[cfg(feature = "features")]
432 pub fn set_features(&mut self, features: WasmFeatures) {
433 self.features = features;
434 }
435
436 /// Returns the original offset that this parser is currently at.
437 pub fn offset(&self) -> u64 {
438 self.offset
439 }
440
441 /// Attempts to parse a chunk of data.
442 ///
443 /// This method will attempt to parse the next incremental portion of a
444 /// WebAssembly binary. Data available for the module or component is
445 /// provided as `data`, and the data can be incomplete if more data has yet
446 /// to arrive. The `eof` flag indicates whether more data will ever be received.
447 ///
448 /// There are two ways parsing can succeed with this method:
449 ///
450 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
451 /// in `data` to parse a payload. The caller needs to wait for more data to
452 /// be available in this situation before calling this method again. It is
453 /// guaranteed that this is only returned if `eof` is `false`.
454 ///
455 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
456 /// successfully parsed. The payload is available in this variant of what
457 /// was parsed, and this also indicates how many bytes of `data` was
458 /// consumed. It's expected that the caller will not provide these bytes
459 /// back to the [`Parser`] again.
460 ///
461 /// Note that all `Chunk` return values are connected, with a lifetime, to
462 /// the input buffer. Each parsed chunk borrows the input buffer and is a
463 /// view into it for successfully parsed chunks.
464 ///
465 /// It is expected that you'll call this method until `Payload::End` is
466 /// reached, at which point you're guaranteed that the parse has completed.
467 /// Note that complete parsing, for the top-level module or component,
468 /// implies that `data` is empty and `eof` is `true`.
469 ///
470 /// # Errors
471 ///
472 /// Parse errors are returned as an `Err`. Errors can happen when the
473 /// structure of the data is unexpected or if sections are too large for
474 /// example. Note that errors are not returned for malformed *contents* of
475 /// sections here. Sections are generally not individually parsed and each
476 /// returned [`Payload`] needs to be iterated over further to detect all
477 /// errors.
478 ///
479 /// # Examples
480 ///
481 /// An example of reading a wasm file from a stream (`std::io::Read`) and
482 /// incrementally parsing it.
483 ///
484 /// ```
485 /// use std::io::Read;
486 /// use anyhow::Result;
487 /// use wasmparser::{Parser, Chunk, Payload::*};
488 ///
489 /// fn parse(mut reader: impl Read) -> Result<()> {
490 /// let mut buf = Vec::new();
491 /// let mut cur = Parser::new(0);
492 /// let mut eof = false;
493 /// let mut stack = Vec::new();
494 ///
495 /// loop {
496 /// let (payload, consumed) = match cur.parse(&buf, eof)? {
497 /// Chunk::NeedMoreData(hint) => {
498 /// assert!(!eof); // otherwise an error would be returned
499 ///
500 /// // Use the hint to preallocate more space, then read
501 /// // some more data into our buffer.
502 /// //
503 /// // Note that the buffer management here is not ideal,
504 /// // but it's compact enough to fit in an example!
505 /// let len = buf.len();
506 /// buf.extend((0..hint).map(|_| 0u8));
507 /// let n = reader.read(&mut buf[len..])?;
508 /// buf.truncate(len + n);
509 /// eof = n == 0;
510 /// continue;
511 /// }
512 ///
513 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
514 /// };
515 ///
516 /// match payload {
517 /// // Sections for WebAssembly modules
518 /// Version { .. } => { /* ... */ }
519 /// TypeSection(_) => { /* ... */ }
520 /// ImportSection(_) => { /* ... */ }
521 /// FunctionSection(_) => { /* ... */ }
522 /// TableSection(_) => { /* ... */ }
523 /// MemorySection(_) => { /* ... */ }
524 /// TagSection(_) => { /* ... */ }
525 /// GlobalSection(_) => { /* ... */ }
526 /// ExportSection(_) => { /* ... */ }
527 /// StartSection { .. } => { /* ... */ }
528 /// ElementSection(_) => { /* ... */ }
529 /// DataCountSection { .. } => { /* ... */ }
530 /// DataSection(_) => { /* ... */ }
531 ///
532 /// // Here we know how many functions we'll be receiving as
533 /// // `CodeSectionEntry`, so we can prepare for that, and
534 /// // afterwards we can parse and handle each function
535 /// // individually.
536 /// CodeSectionStart { .. } => { /* ... */ }
537 /// CodeSectionEntry(body) => {
538 /// // here we can iterate over `body` to parse the function
539 /// // and its locals
540 /// }
541 ///
542 /// // Sections for WebAssembly components
543 /// InstanceSection(_) => { /* ... */ }
544 /// CoreTypeSection(_) => { /* ... */ }
545 /// ComponentInstanceSection(_) => { /* ... */ }
546 /// ComponentAliasSection(_) => { /* ... */ }
547 /// ComponentTypeSection(_) => { /* ... */ }
548 /// ComponentCanonicalSection(_) => { /* ... */ }
549 /// ComponentStartSection { .. } => { /* ... */ }
550 /// ComponentImportSection(_) => { /* ... */ }
551 /// ComponentExportSection(_) => { /* ... */ }
552 ///
553 /// ModuleSection { parser, .. }
554 /// | ComponentSection { parser, .. } => {
555 /// stack.push(cur.clone());
556 /// cur = parser.clone();
557 /// }
558 ///
559 /// CustomSection(_) => { /* ... */ }
560 ///
561 /// // Once we've reached the end of a parser we either resume
562 /// // at the parent parser or we break out of the loop because
563 /// // we're done.
564 /// End(_) => {
565 /// if let Some(parent_parser) = stack.pop() {
566 /// cur = parent_parser;
567 /// } else {
568 /// break;
569 /// }
570 /// }
571 ///
572 /// // most likely you'd return an error here
573 /// _ => { /* ... */ }
574 /// }
575 ///
576 /// // once we're done processing the payload we can forget the
577 /// // original.
578 /// buf.drain(..consumed);
579 /// }
580 ///
581 /// Ok(())
582 /// }
583 ///
584 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
585 /// ```
586 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
587 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
588 (&data[..(self.max_size as usize)], true)
589 } else {
590 (data, eof)
591 };
592 // TODO: thread through `offset: u64` to `BinaryReader`, remove
593 // the cast here.
594 let starting_offset = self.offset as usize;
595 let mut reader = BinaryReader::new(data, starting_offset);
596 #[cfg(feature = "features")]
597 {
598 reader.set_features(self.features);
599 }
600 match self.parse_reader(&mut reader, eof) {
601 Ok(payload) => {
602 // Be sure to update our offset with how far we got in the
603 // reader
604 let consumed = reader.original_position() - starting_offset;
605 self.offset += usize_to_u64(consumed);
606 self.max_size -= usize_to_u64(consumed);
607 Ok(Chunk::Parsed {
608 consumed: consumed,
609 payload,
610 })
611 }
612 Err(e) => {
613 // If we're at EOF then there's no way we can recover from any
614 // error, so continue to propagate it.
615 if eof {
616 return Err(e);
617 }
618
619 // If our error doesn't look like it can be resolved with more
620 // data being pulled down, then propagate it, otherwise switch
621 // the error to "feed me please"
622 match e.inner.needed_hint {
623 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
624 None => Err(e),
625 }
626 }
627 }
628 }
629
630 fn parse_reader<'a>(
631 &mut self,
632 reader: &mut BinaryReader<'a>,
633 eof: bool,
634 ) -> Result<Payload<'a>> {
635 use Payload::*;
636
637 match self.state {
638 State::Header => {
639 let start = reader.original_position();
640 let header_version = reader.read_header_version()?;
641 self.encoding = match (header_version >> 16) as u16 {
642 KIND_MODULE => Encoding::Module,
643 KIND_COMPONENT => Encoding::Component,
644 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
645 };
646 let num = header_version as u16;
647 self.state = State::SectionStart;
648 Ok(Version {
649 num,
650 encoding: self.encoding,
651 range: start..reader.original_position(),
652 })
653 }
654 State::SectionStart => {
655 // If we're at eof and there are no bytes in our buffer, then
656 // that means we reached the end of the data since it's
657 // just a bunch of sections concatenated after the header.
658 if eof && reader.bytes_remaining() == 0 {
659 return Ok(Payload::End(reader.original_position()));
660 }
661
662 let id_pos = reader.original_position();
663 let id = reader.read_u8()?;
664 if id & 0x80 != 0 {
665 return Err(BinaryReaderError::new("malformed section id", id_pos));
666 }
667 let len_pos = reader.original_position();
668 let mut len = reader.read_var_u32()?;
669
670 // Test to make sure that this section actually fits within
671 // `Parser::max_size`. This doesn't matter for top-level modules
672 // but it is required for nested modules/components to correctly ensure
673 // that all sections live entirely within their section of the
674 // file.
675 let consumed = reader.original_position() - id_pos;
676 let section_overflow = self
677 .max_size
678 .checked_sub(usize_to_u64(consumed))
679 .and_then(|s| s.checked_sub(len.into()))
680 .is_none();
681 if section_overflow {
682 return Err(BinaryReaderError::new("section too large", len_pos));
683 }
684
685 match (self.encoding, id) {
686 // Sections for both modules and components.
687 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
688
689 // Module sections
690 (Encoding::Module, TYPE_SECTION) => {
691 section(reader, len, TypeSectionReader::new, TypeSection)
692 }
693 (Encoding::Module, IMPORT_SECTION) => {
694 section(reader, len, ImportSectionReader::new, ImportSection)
695 }
696 (Encoding::Module, FUNCTION_SECTION) => {
697 section(reader, len, FunctionSectionReader::new, FunctionSection)
698 }
699 (Encoding::Module, TABLE_SECTION) => {
700 section(reader, len, TableSectionReader::new, TableSection)
701 }
702 (Encoding::Module, MEMORY_SECTION) => {
703 section(reader, len, MemorySectionReader::new, MemorySection)
704 }
705 (Encoding::Module, GLOBAL_SECTION) => {
706 section(reader, len, GlobalSectionReader::new, GlobalSection)
707 }
708 (Encoding::Module, EXPORT_SECTION) => {
709 section(reader, len, ExportSectionReader::new, ExportSection)
710 }
711 (Encoding::Module, START_SECTION) => {
712 let (func, range) = single_item(reader, len, "start")?;
713 Ok(StartSection { func, range })
714 }
715 (Encoding::Module, ELEMENT_SECTION) => {
716 section(reader, len, ElementSectionReader::new, ElementSection)
717 }
718 (Encoding::Module, CODE_SECTION) => {
719 let start = reader.original_position();
720 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
721 let range = start..reader.original_position() + len as usize;
722 self.state = State::FunctionBody {
723 remaining: count,
724 len,
725 };
726 Ok(CodeSectionStart {
727 count,
728 range,
729 size: len,
730 })
731 }
732 (Encoding::Module, DATA_SECTION) => {
733 section(reader, len, DataSectionReader::new, DataSection)
734 }
735 (Encoding::Module, DATA_COUNT_SECTION) => {
736 let (count, range) = single_item(reader, len, "data count")?;
737 Ok(DataCountSection { count, range })
738 }
739 (Encoding::Module, TAG_SECTION) => {
740 section(reader, len, TagSectionReader::new, TagSection)
741 }
742
743 // Component sections
744 #[cfg(feature = "component-model")]
745 (Encoding::Component, COMPONENT_MODULE_SECTION)
746 | (Encoding::Component, COMPONENT_SECTION) => {
747 if len as usize > MAX_WASM_MODULE_SIZE {
748 bail!(
749 len_pos,
750 "{} section is too large",
751 if id == 1 { "module" } else { "component " }
752 );
753 }
754
755 let range = reader.original_position()
756 ..reader.original_position() + usize::try_from(len).unwrap();
757 self.max_size -= u64::from(len);
758 self.offset += u64::from(len);
759 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
760 #[cfg(feature = "features")]
761 {
762 parser.features = self.features;
763 }
764 parser.max_size = u64::from(len);
765
766 Ok(match id {
767 1 => ModuleSection {
768 parser,
769 unchecked_range: range,
770 },
771 4 => ComponentSection {
772 parser,
773 unchecked_range: range,
774 },
775 _ => unreachable!(),
776 })
777 }
778 #[cfg(feature = "component-model")]
779 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
780 section(reader, len, InstanceSectionReader::new, InstanceSection)
781 }
782 #[cfg(feature = "component-model")]
783 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
784 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
785 }
786 #[cfg(feature = "component-model")]
787 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
788 reader,
789 len,
790 ComponentInstanceSectionReader::new,
791 ComponentInstanceSection,
792 ),
793 #[cfg(feature = "component-model")]
794 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
795 section(reader, len, SectionLimited::new, ComponentAliasSection)
796 }
797 #[cfg(feature = "component-model")]
798 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
799 reader,
800 len,
801 ComponentTypeSectionReader::new,
802 ComponentTypeSection,
803 ),
804 #[cfg(feature = "component-model")]
805 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
806 reader,
807 len,
808 ComponentCanonicalSectionReader::new,
809 ComponentCanonicalSection,
810 ),
811 #[cfg(feature = "component-model")]
812 (Encoding::Component, COMPONENT_START_SECTION) => {
813 let (start, range) = single_item(reader, len, "component start")?;
814 Ok(ComponentStartSection { start, range })
815 }
816 #[cfg(feature = "component-model")]
817 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
818 reader,
819 len,
820 ComponentImportSectionReader::new,
821 ComponentImportSection,
822 ),
823 #[cfg(feature = "component-model")]
824 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
825 reader,
826 len,
827 ComponentExportSectionReader::new,
828 ComponentExportSection,
829 ),
830 (_, id) => {
831 let offset = reader.original_position();
832 let contents = reader.read_bytes(len as usize)?;
833 let range = offset..offset + len as usize;
834 Ok(UnknownSection {
835 id,
836 contents,
837 range,
838 })
839 }
840 }
841 }
842
843 // Once we hit 0 remaining incrementally parsed items, with 0
844 // remaining bytes in each section, we're done and can switch back
845 // to parsing sections.
846 State::FunctionBody {
847 remaining: 0,
848 len: 0,
849 } => {
850 self.state = State::SectionStart;
851 self.parse_reader(reader, eof)
852 }
853
854 // ... otherwise trailing bytes with no remaining entries in these
855 // sections indicates an error.
856 State::FunctionBody { remaining: 0, len } => {
857 debug_assert!(len > 0);
858 let offset = reader.original_position();
859 Err(BinaryReaderError::new(
860 "trailing bytes at end of section",
861 offset,
862 ))
863 }
864
865 // Functions are relatively easy to parse when we know there's at
866 // least one remaining and at least one byte available to read
867 // things.
868 //
869 // We use the remaining length try to read a u32 size of the
870 // function, and using that size we require the entire function be
871 // resident in memory. This means that we're reading whole chunks of
872 // functions at a time.
873 //
874 // Limiting via `Parser::max_size` (nested parsing) happens above in
875 // `fn parse`, and limiting by our section size happens via
876 // `delimited`. Actual parsing of the function body is delegated to
877 // the caller to iterate over the `FunctionBody` structure.
878 State::FunctionBody { remaining, mut len } => {
879 let body = delimited(reader, &mut len, |r| {
880 Ok(FunctionBody::new(r.read_reader()?))
881 })?;
882 self.state = State::FunctionBody {
883 remaining: remaining - 1,
884 len,
885 };
886 Ok(CodeSectionEntry(body))
887 }
888 }
889 }
890
891 /// Convenience function that can be used to parse a module or component
892 /// that is entirely resident in memory.
893 ///
894 /// This function will parse the `data` provided as a WebAssembly module
895 /// or component.
896 ///
897 /// Note that when this function yields sections that provide parsers,
898 /// no further action is required for those sections as payloads from
899 /// those parsers will be automatically returned.
900 ///
901 /// # Examples
902 ///
903 /// An example of reading a wasm file from a stream (`std::io::Read`) into
904 /// a buffer and then parsing it.
905 ///
906 /// ```
907 /// use std::io::Read;
908 /// use anyhow::Result;
909 /// use wasmparser::{Parser, Chunk, Payload::*};
910 ///
911 /// fn parse(mut reader: impl Read) -> Result<()> {
912 /// let mut buf = Vec::new();
913 /// reader.read_to_end(&mut buf)?;
914 /// let parser = Parser::new(0);
915 ///
916 /// for payload in parser.parse_all(&buf) {
917 /// match payload? {
918 /// // Sections for WebAssembly modules
919 /// Version { .. } => { /* ... */ }
920 /// TypeSection(_) => { /* ... */ }
921 /// ImportSection(_) => { /* ... */ }
922 /// FunctionSection(_) => { /* ... */ }
923 /// TableSection(_) => { /* ... */ }
924 /// MemorySection(_) => { /* ... */ }
925 /// TagSection(_) => { /* ... */ }
926 /// GlobalSection(_) => { /* ... */ }
927 /// ExportSection(_) => { /* ... */ }
928 /// StartSection { .. } => { /* ... */ }
929 /// ElementSection(_) => { /* ... */ }
930 /// DataCountSection { .. } => { /* ... */ }
931 /// DataSection(_) => { /* ... */ }
932 ///
933 /// // Here we know how many functions we'll be receiving as
934 /// // `CodeSectionEntry`, so we can prepare for that, and
935 /// // afterwards we can parse and handle each function
936 /// // individually.
937 /// CodeSectionStart { .. } => { /* ... */ }
938 /// CodeSectionEntry(body) => {
939 /// // here we can iterate over `body` to parse the function
940 /// // and its locals
941 /// }
942 ///
943 /// // Sections for WebAssembly components
944 /// ModuleSection { .. } => { /* ... */ }
945 /// InstanceSection(_) => { /* ... */ }
946 /// CoreTypeSection(_) => { /* ... */ }
947 /// ComponentSection { .. } => { /* ... */ }
948 /// ComponentInstanceSection(_) => { /* ... */ }
949 /// ComponentAliasSection(_) => { /* ... */ }
950 /// ComponentTypeSection(_) => { /* ... */ }
951 /// ComponentCanonicalSection(_) => { /* ... */ }
952 /// ComponentStartSection { .. } => { /* ... */ }
953 /// ComponentImportSection(_) => { /* ... */ }
954 /// ComponentExportSection(_) => { /* ... */ }
955 ///
956 /// CustomSection(_) => { /* ... */ }
957 ///
958 /// // Once we've reached the end of a parser we either resume
959 /// // at the parent parser or the payload iterator is at its
960 /// // end and we're done.
961 /// End(_) => {}
962 ///
963 /// // most likely you'd return an error here, but if you want
964 /// // you can also inspect the raw contents of unknown sections
965 /// other => {
966 /// match other.as_section() {
967 /// Some((id, range)) => { /* ... */ }
968 /// None => { /* ... */ }
969 /// }
970 /// }
971 /// }
972 /// }
973 ///
974 /// Ok(())
975 /// }
976 ///
977 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
978 /// ```
979 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
980 let mut stack = Vec::new();
981 let mut cur = self;
982 let mut done = false;
983 iter::from_fn(move || {
984 if done {
985 return None;
986 }
987 let payload = match cur.parse(data, true) {
988 // Propagate all errors
989 Err(e) => {
990 done = true;
991 return Some(Err(e));
992 }
993
994 // This isn't possible because `eof` is always true.
995 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
996
997 Ok(Chunk::Parsed { payload, consumed }) => {
998 data = &data[consumed..];
999 payload
1000 }
1001 };
1002
1003 match &payload {
1004 #[cfg(feature = "component-model")]
1005 Payload::ModuleSection { parser, .. }
1006 | Payload::ComponentSection { parser, .. } => {
1007 stack.push(cur.clone());
1008 cur = parser.clone();
1009 }
1010 Payload::End(_) => match stack.pop() {
1011 Some(p) => cur = p,
1012 None => done = true,
1013 },
1014
1015 _ => {}
1016 }
1017
1018 Some(Ok(payload))
1019 })
1020 }
1021
1022 /// Skip parsing the code section entirely.
1023 ///
1024 /// This function can be used to indicate, after receiving
1025 /// `CodeSectionStart`, that the section will not be parsed.
1026 ///
1027 /// The caller will be responsible for skipping `size` bytes (found in the
1028 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1029 /// after the `size` bytes have been skipped.
1030 ///
1031 /// # Panics
1032 ///
1033 /// This function will panic if the parser is not in a state where it's
1034 /// parsing the code section.
1035 ///
1036 /// # Examples
1037 ///
1038 /// ```
1039 /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1040 /// use core::ops::Range;
1041 ///
1042 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1043 /// let mut parser = Parser::new(0);
1044 /// loop {
1045 /// let payload = match parser.parse(wasm, true)? {
1046 /// Chunk::Parsed { consumed, payload } => {
1047 /// wasm = &wasm[consumed..];
1048 /// payload
1049 /// }
1050 /// // this state isn't possible with `eof = true`
1051 /// Chunk::NeedMoreData(_) => unreachable!(),
1052 /// };
1053 /// match payload {
1054 /// TypeSection(s) => print_range("type section", &s.range()),
1055 /// ImportSection(s) => print_range("import section", &s.range()),
1056 /// // .. other sections
1057 ///
1058 /// // Print the range of the code section we see, but don't
1059 /// // actually iterate over each individual function.
1060 /// CodeSectionStart { range, size, .. } => {
1061 /// print_range("code section", &range);
1062 /// parser.skip_section();
1063 /// wasm = &wasm[size as usize..];
1064 /// }
1065 /// End(_) => break,
1066 /// _ => {}
1067 /// }
1068 /// }
1069 /// Ok(())
1070 /// }
1071 ///
1072 /// fn print_range(section: &str, range: &Range<usize>) {
1073 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1074 /// }
1075 /// ```
1076 pub fn skip_section(&mut self) {
1077 let skip = match self.state {
1078 State::FunctionBody { remaining: _, len } => len,
1079 _ => panic!("wrong state to call `skip_section`"),
1080 };
1081 self.offset += u64::from(skip);
1082 self.max_size -= u64::from(skip);
1083 self.state = State::SectionStart;
1084 }
1085}
1086
1087fn usize_to_u64(a: usize) -> u64 {
1088 a.try_into().unwrap()
1089}
1090
1091/// Parses an entire section resident in memory into a `Payload`.
1092///
1093/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1094/// to construct the section to return.
1095fn section<'a, T>(
1096 reader: &mut BinaryReader<'a>,
1097 len: u32,
1098 ctor: fn(BinaryReader<'a>) -> Result<T>,
1099 variant: fn(T) -> Payload<'a>,
1100) -> Result<Payload<'a>> {
1101 let reader = reader.skip(|r| {
1102 r.read_bytes(len as usize)?;
1103 Ok(())
1104 })?;
1105 // clear the hint for "need this many more bytes" here because we already
1106 // read all the bytes, so it's not possible to read more bytes if this
1107 // fails.
1108 let reader = ctor(reader).map_err(clear_hint)?;
1109 Ok(variant(reader))
1110}
1111
1112/// Reads a section that is represented by a single uleb-encoded `u32`.
1113fn single_item<'a, T>(
1114 reader: &mut BinaryReader<'a>,
1115 len: u32,
1116 desc: &str,
1117) -> Result<(T, Range<usize>)>
1118where
1119 T: FromReader<'a>,
1120{
1121 let range = reader.original_position()..reader.original_position() + len as usize;
1122 let mut content = reader.skip(|r| {
1123 r.read_bytes(len as usize)?;
1124 Ok(())
1125 })?;
1126 // We can't recover from "unexpected eof" here because our entire section is
1127 // already resident in memory, so clear the hint for how many more bytes are
1128 // expected.
1129 let ret = content.read().map_err(clear_hint)?;
1130 if !content.eof() {
1131 bail!(
1132 content.original_position(),
1133 "unexpected content in the {desc} section",
1134 );
1135 }
1136 Ok((ret, range))
1137}
1138
1139/// Attempts to parse using `f`.
1140///
1141/// This will update `*len` with the number of bytes consumed, and it will cause
1142/// a failure to be returned instead of the number of bytes consumed exceeds
1143/// what `*len` currently is.
1144fn delimited<'a, T>(
1145 reader: &mut BinaryReader<'a>,
1146 len: &mut u32,
1147 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1148) -> Result<T> {
1149 let start = reader.original_position();
1150 let ret = f(reader)?;
1151 *len = match (reader.original_position() - start)
1152 .try_into()
1153 .ok()
1154 .and_then(|i| len.checked_sub(i))
1155 {
1156 Some(i) => i,
1157 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1158 };
1159 Ok(ret)
1160}
1161
1162impl Default for Parser {
1163 fn default() -> Parser {
1164 Parser::new(0)
1165 }
1166}
1167
1168impl Payload<'_> {
1169 /// If this `Payload` represents a section in the original wasm module then
1170 /// the section's id and range within the original wasm binary are returned.
1171 ///
1172 /// Not all payloads refer to entire sections, such as the `Version` and
1173 /// `CodeSectionEntry` variants. These variants will return `None` from this
1174 /// function.
1175 ///
1176 /// Otherwise this function will return `Some` where the first element is
1177 /// the byte identifier for the section and the second element is the range
1178 /// of the contents of the section within the original wasm binary.
1179 ///
1180 /// The purpose of this method is to enable tools to easily iterate over
1181 /// entire sections if necessary and handle sections uniformly, for example
1182 /// dropping custom sections while preserving all other sections.
1183 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1184 use Payload::*;
1185
1186 match self {
1187 Version { .. } => None,
1188 TypeSection(s) => Some((TYPE_SECTION, s.range())),
1189 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1190 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1191 TableSection(s) => Some((TABLE_SECTION, s.range())),
1192 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1193 TagSection(s) => Some((TAG_SECTION, s.range())),
1194 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1195 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1196 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1197 DataSection(s) => Some((DATA_SECTION, s.range())),
1198 StartSection { range, .. } => Some((START_SECTION, range.clone())),
1199 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1200 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1201 CodeSectionEntry(_) => None,
1202
1203 #[cfg(feature = "component-model")]
1204 ModuleSection {
1205 unchecked_range: range,
1206 ..
1207 } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1208 #[cfg(feature = "component-model")]
1209 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1210 #[cfg(feature = "component-model")]
1211 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1212 #[cfg(feature = "component-model")]
1213 ComponentSection {
1214 unchecked_range: range,
1215 ..
1216 } => Some((COMPONENT_SECTION, range.clone())),
1217 #[cfg(feature = "component-model")]
1218 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1219 #[cfg(feature = "component-model")]
1220 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1221 #[cfg(feature = "component-model")]
1222 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1223 #[cfg(feature = "component-model")]
1224 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1225 #[cfg(feature = "component-model")]
1226 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1227 #[cfg(feature = "component-model")]
1228 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1229 #[cfg(feature = "component-model")]
1230 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1231
1232 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1233
1234 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1235
1236 End(_) => None,
1237 }
1238 }
1239}
1240
1241impl fmt::Debug for Payload<'_> {
1242 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1243 use Payload::*;
1244 match self {
1245 Version {
1246 num,
1247 encoding,
1248 range,
1249 } => f
1250 .debug_struct("Version")
1251 .field("num", num)
1252 .field("encoding", encoding)
1253 .field("range", range)
1254 .finish(),
1255
1256 // Module sections
1257 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1258 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1259 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1260 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1261 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1262 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1263 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1264 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1265 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1266 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1267 StartSection { func, range } => f
1268 .debug_struct("StartSection")
1269 .field("func", func)
1270 .field("range", range)
1271 .finish(),
1272 DataCountSection { count, range } => f
1273 .debug_struct("DataCountSection")
1274 .field("count", count)
1275 .field("range", range)
1276 .finish(),
1277 CodeSectionStart { count, range, size } => f
1278 .debug_struct("CodeSectionStart")
1279 .field("count", count)
1280 .field("range", range)
1281 .field("size", size)
1282 .finish(),
1283 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1284
1285 // Component sections
1286 #[cfg(feature = "component-model")]
1287 ModuleSection {
1288 parser: _,
1289 unchecked_range: range,
1290 } => f
1291 .debug_struct("ModuleSection")
1292 .field("range", range)
1293 .finish(),
1294 #[cfg(feature = "component-model")]
1295 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1296 #[cfg(feature = "component-model")]
1297 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1298 #[cfg(feature = "component-model")]
1299 ComponentSection {
1300 parser: _,
1301 unchecked_range: range,
1302 } => f
1303 .debug_struct("ComponentSection")
1304 .field("range", range)
1305 .finish(),
1306 #[cfg(feature = "component-model")]
1307 ComponentInstanceSection(_) => f
1308 .debug_tuple("ComponentInstanceSection")
1309 .field(&"...")
1310 .finish(),
1311 #[cfg(feature = "component-model")]
1312 ComponentAliasSection(_) => f
1313 .debug_tuple("ComponentAliasSection")
1314 .field(&"...")
1315 .finish(),
1316 #[cfg(feature = "component-model")]
1317 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1318 #[cfg(feature = "component-model")]
1319 ComponentCanonicalSection(_) => f
1320 .debug_tuple("ComponentCanonicalSection")
1321 .field(&"...")
1322 .finish(),
1323 #[cfg(feature = "component-model")]
1324 ComponentStartSection { .. } => f
1325 .debug_tuple("ComponentStartSection")
1326 .field(&"...")
1327 .finish(),
1328 #[cfg(feature = "component-model")]
1329 ComponentImportSection(_) => f
1330 .debug_tuple("ComponentImportSection")
1331 .field(&"...")
1332 .finish(),
1333 #[cfg(feature = "component-model")]
1334 ComponentExportSection(_) => f
1335 .debug_tuple("ComponentExportSection")
1336 .field(&"...")
1337 .finish(),
1338
1339 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1340
1341 UnknownSection { id, range, .. } => f
1342 .debug_struct("UnknownSection")
1343 .field("id", id)
1344 .field("range", range)
1345 .finish(),
1346
1347 End(offset) => f.debug_tuple("End").field(offset).finish(),
1348 }
1349 }
1350}
1351
1352fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1353 err.inner.needed_hint = None;
1354 err
1355}
1356
1357#[cfg(test)]
1358mod tests {
1359 use super::*;
1360
1361 macro_rules! assert_matches {
1362 ($a:expr, $b:pat $(,)?) => {
1363 match $a {
1364 $b => {}
1365 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1366 }
1367 };
1368 }
1369
1370 #[test]
1371 fn header() {
1372 assert!(Parser::default().parse(&[], true).is_err());
1373 assert_matches!(
1374 Parser::default().parse(&[], false),
1375 Ok(Chunk::NeedMoreData(4)),
1376 );
1377 assert_matches!(
1378 Parser::default().parse(b"\0", false),
1379 Ok(Chunk::NeedMoreData(3)),
1380 );
1381 assert_matches!(
1382 Parser::default().parse(b"\0asm", false),
1383 Ok(Chunk::NeedMoreData(4)),
1384 );
1385 assert_matches!(
1386 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1387 Ok(Chunk::Parsed {
1388 consumed: 8,
1389 payload: Payload::Version { num: 1, .. },
1390 }),
1391 );
1392 }
1393
1394 #[test]
1395 fn header_iter() {
1396 for _ in Parser::default().parse_all(&[]) {}
1397 for _ in Parser::default().parse_all(b"\0") {}
1398 for _ in Parser::default().parse_all(b"\0asm") {}
1399 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1400 }
1401
1402 fn parser_after_header() -> Parser {
1403 let mut p = Parser::default();
1404 assert_matches!(
1405 p.parse(b"\0asm\x01\0\0\0", false),
1406 Ok(Chunk::Parsed {
1407 consumed: 8,
1408 payload: Payload::Version {
1409 num: WASM_MODULE_VERSION,
1410 encoding: Encoding::Module,
1411 ..
1412 },
1413 }),
1414 );
1415 p
1416 }
1417
1418 fn parser_after_component_header() -> Parser {
1419 let mut p = Parser::default();
1420 assert_matches!(
1421 p.parse(b"\0asm\x0d\0\x01\0", false),
1422 Ok(Chunk::Parsed {
1423 consumed: 8,
1424 payload: Payload::Version {
1425 num: WASM_COMPONENT_VERSION,
1426 encoding: Encoding::Component,
1427 ..
1428 },
1429 }),
1430 );
1431 p
1432 }
1433
1434 #[test]
1435 fn start_section() {
1436 assert_matches!(
1437 parser_after_header().parse(&[], false),
1438 Ok(Chunk::NeedMoreData(1)),
1439 );
1440 assert!(parser_after_header().parse(&[8], true).is_err());
1441 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1442 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1443 assert_matches!(
1444 parser_after_header().parse(&[8], false),
1445 Ok(Chunk::NeedMoreData(1)),
1446 );
1447 assert_matches!(
1448 parser_after_header().parse(&[8, 1], false),
1449 Ok(Chunk::NeedMoreData(1)),
1450 );
1451 assert_matches!(
1452 parser_after_header().parse(&[8, 2], false),
1453 Ok(Chunk::NeedMoreData(2)),
1454 );
1455 assert_matches!(
1456 parser_after_header().parse(&[8, 1, 1], false),
1457 Ok(Chunk::Parsed {
1458 consumed: 3,
1459 payload: Payload::StartSection { func: 1, .. },
1460 }),
1461 );
1462 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1463 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1464 }
1465
1466 #[test]
1467 fn end_works() {
1468 assert_matches!(
1469 parser_after_header().parse(&[], true),
1470 Ok(Chunk::Parsed {
1471 consumed: 0,
1472 payload: Payload::End(8),
1473 }),
1474 );
1475 }
1476
1477 #[test]
1478 fn type_section() {
1479 assert!(parser_after_header().parse(&[1], true).is_err());
1480 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1481 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1482 assert_matches!(
1483 parser_after_header().parse(&[1], false),
1484 Ok(Chunk::NeedMoreData(1)),
1485 );
1486 assert_matches!(
1487 parser_after_header().parse(&[1, 1], false),
1488 Ok(Chunk::NeedMoreData(1)),
1489 );
1490 assert_matches!(
1491 parser_after_header().parse(&[1, 1, 1], false),
1492 Ok(Chunk::Parsed {
1493 consumed: 3,
1494 payload: Payload::TypeSection(_),
1495 }),
1496 );
1497 assert_matches!(
1498 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1499 Ok(Chunk::Parsed {
1500 consumed: 3,
1501 payload: Payload::TypeSection(_),
1502 }),
1503 );
1504 }
1505
1506 #[test]
1507 fn custom_section() {
1508 assert!(parser_after_header().parse(&[0], true).is_err());
1509 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1510 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1511 assert_matches!(
1512 parser_after_header().parse(&[0, 2, 1], false),
1513 Ok(Chunk::NeedMoreData(1)),
1514 );
1515 assert_custom(
1516 parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1517 3,
1518 "",
1519 11,
1520 b"",
1521 Range { start: 10, end: 11 },
1522 );
1523 assert_custom(
1524 parser_after_header()
1525 .parse(&[0, 2, 1, b'a'], false)
1526 .unwrap(),
1527 4,
1528 "a",
1529 12,
1530 b"",
1531 Range { start: 10, end: 12 },
1532 );
1533 assert_custom(
1534 parser_after_header()
1535 .parse(&[0, 2, 0, b'a'], false)
1536 .unwrap(),
1537 4,
1538 "",
1539 11,
1540 b"a",
1541 Range { start: 10, end: 12 },
1542 );
1543 }
1544
1545 fn assert_custom(
1546 chunk: Chunk<'_>,
1547 expected_consumed: usize,
1548 expected_name: &str,
1549 expected_data_offset: usize,
1550 expected_data: &[u8],
1551 expected_range: Range<usize>,
1552 ) {
1553 let (consumed, s) = match chunk {
1554 Chunk::Parsed {
1555 consumed,
1556 payload: Payload::CustomSection(s),
1557 } => (consumed, s),
1558 _ => panic!("not a custom section payload"),
1559 };
1560 assert_eq!(consumed, expected_consumed);
1561 assert_eq!(s.name(), expected_name);
1562 assert_eq!(s.data_offset(), expected_data_offset);
1563 assert_eq!(s.data(), expected_data);
1564 assert_eq!(s.range(), expected_range);
1565 }
1566
1567 #[test]
1568 fn function_section() {
1569 assert!(parser_after_header().parse(&[10], true).is_err());
1570 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1571 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1572 assert_matches!(
1573 parser_after_header().parse(&[10], false),
1574 Ok(Chunk::NeedMoreData(1))
1575 );
1576 assert_matches!(
1577 parser_after_header().parse(&[10, 1], false),
1578 Ok(Chunk::NeedMoreData(1))
1579 );
1580 let mut p = parser_after_header();
1581 assert_matches!(
1582 p.parse(&[10, 1, 0], false),
1583 Ok(Chunk::Parsed {
1584 consumed: 3,
1585 payload: Payload::CodeSectionStart { count: 0, .. },
1586 }),
1587 );
1588 assert_matches!(
1589 p.parse(&[], true),
1590 Ok(Chunk::Parsed {
1591 consumed: 0,
1592 payload: Payload::End(11),
1593 }),
1594 );
1595 let mut p = parser_after_header();
1596 assert_matches!(
1597 p.parse(&[10, 2, 1, 0], false),
1598 Ok(Chunk::Parsed {
1599 consumed: 3,
1600 payload: Payload::CodeSectionStart { count: 1, .. },
1601 }),
1602 );
1603 assert_matches!(
1604 p.parse(&[0], false),
1605 Ok(Chunk::Parsed {
1606 consumed: 1,
1607 payload: Payload::CodeSectionEntry(_),
1608 }),
1609 );
1610 assert_matches!(
1611 p.parse(&[], true),
1612 Ok(Chunk::Parsed {
1613 consumed: 0,
1614 payload: Payload::End(12),
1615 }),
1616 );
1617
1618 // 1 byte section with 1 function can't read the function body because
1619 // the section is too small
1620 let mut p = parser_after_header();
1621 assert_matches!(
1622 p.parse(&[10, 1, 1], false),
1623 Ok(Chunk::Parsed {
1624 consumed: 3,
1625 payload: Payload::CodeSectionStart { count: 1, .. },
1626 }),
1627 );
1628 assert_eq!(
1629 p.parse(&[0], false).unwrap_err().message(),
1630 "unexpected end-of-file"
1631 );
1632
1633 // section with 2 functions but section is cut off
1634 let mut p = parser_after_header();
1635 assert_matches!(
1636 p.parse(&[10, 2, 2], false),
1637 Ok(Chunk::Parsed {
1638 consumed: 3,
1639 payload: Payload::CodeSectionStart { count: 2, .. },
1640 }),
1641 );
1642 assert_matches!(
1643 p.parse(&[0], false),
1644 Ok(Chunk::Parsed {
1645 consumed: 1,
1646 payload: Payload::CodeSectionEntry(_),
1647 }),
1648 );
1649 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1650 assert_eq!(
1651 p.parse(&[0], false).unwrap_err().message(),
1652 "unexpected end-of-file",
1653 );
1654
1655 // trailing data is bad
1656 let mut p = parser_after_header();
1657 assert_matches!(
1658 p.parse(&[10, 3, 1], false),
1659 Ok(Chunk::Parsed {
1660 consumed: 3,
1661 payload: Payload::CodeSectionStart { count: 1, .. },
1662 }),
1663 );
1664 assert_matches!(
1665 p.parse(&[0], false),
1666 Ok(Chunk::Parsed {
1667 consumed: 1,
1668 payload: Payload::CodeSectionEntry(_),
1669 }),
1670 );
1671 assert_eq!(
1672 p.parse(&[0], false).unwrap_err().message(),
1673 "trailing bytes at end of section",
1674 );
1675 }
1676
1677 #[test]
1678 fn single_module() {
1679 let mut p = parser_after_component_header();
1680 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1681
1682 // A module that's 8 bytes in length
1683 let mut sub = match p.parse(&[1, 8], false) {
1684 Ok(Chunk::Parsed {
1685 consumed: 2,
1686 payload: Payload::ModuleSection { parser, .. },
1687 }) => parser,
1688 other => panic!("bad parse {:?}", other),
1689 };
1690
1691 // Parse the header of the submodule with the sub-parser.
1692 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1693 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1694 assert_matches!(
1695 sub.parse(b"\0asm\x01\0\0\0", false),
1696 Ok(Chunk::Parsed {
1697 consumed: 8,
1698 payload: Payload::Version {
1699 num: 1,
1700 encoding: Encoding::Module,
1701 ..
1702 },
1703 }),
1704 );
1705
1706 // The sub-parser should be byte-limited so the next byte shouldn't get
1707 // consumed, it's intended for the parent parser.
1708 assert_matches!(
1709 sub.parse(&[10], false),
1710 Ok(Chunk::Parsed {
1711 consumed: 0,
1712 payload: Payload::End(18),
1713 }),
1714 );
1715
1716 // The parent parser should now be back to resuming, and we simulate it
1717 // being done with bytes to ensure that it's safely at the end,
1718 // completing the module code section.
1719 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1720 assert_matches!(
1721 p.parse(&[], true),
1722 Ok(Chunk::Parsed {
1723 consumed: 0,
1724 payload: Payload::End(18),
1725 }),
1726 );
1727 }
1728
1729 #[test]
1730 fn nested_section_too_big() {
1731 let mut p = parser_after_component_header();
1732
1733 // A module that's 10 bytes in length
1734 let mut sub = match p.parse(&[1, 10], false) {
1735 Ok(Chunk::Parsed {
1736 consumed: 2,
1737 payload: Payload::ModuleSection { parser, .. },
1738 }) => parser,
1739 other => panic!("bad parse {:?}", other),
1740 };
1741
1742 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1743 // module.
1744 assert_matches!(
1745 sub.parse(b"\0asm\x01\0\0\0", false),
1746 Ok(Chunk::Parsed {
1747 consumed: 8,
1748 payload: Payload::Version { num: 1, .. },
1749 }),
1750 );
1751
1752 // We can't parse a section which declares its bigger than the outer
1753 // module. This is a custom section, one byte big, with one content byte. The
1754 // content byte, however, lives outside of the parent's module code
1755 // section.
1756 assert_eq!(
1757 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1758 "section too large",
1759 );
1760 }
1761}