golem_wasm_ast/
lib.rs

1// Copyright 2024-2025 Golem Cloud
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use mappable_rc::Mrc;
16use std::cell::RefCell;
17use std::collections::HashMap;
18use std::fmt::Debug;
19use std::hash::Hash;
20use std::marker::PhantomData;
21
22/// The analysis module contains functionality built on top of the WASM AST to analyze components.
23///
24/// Currently the only functionality it provides is gathering a concise representation of all the
25/// exported instances and functions with their signatures.
26///
27/// For this analysis first parse a [component::Component] with [component::Component::from_bytes],
28/// then create an [analysis::AnalysisContext] with [analysis::AnalysisContext::new] and finally call
29/// [analysis::AnalysisContext::get_top_level_exports] on the newly created context.
30///
31/// This module is optional and can be enabled with the `analysis` feature flag. It is enabled by default.
32#[cfg(feature = "analysis")]
33pub mod analysis;
34
35/// The component module contains the AST definition of the [WASM Component Model](https://github.com/WebAssembly/component-model).
36///
37/// This module is optional and can be enabled with the `component` feature flag. It is enabled by default.
38/// When disabled the library can only work with core WASM modules.
39#[cfg(feature = "component")]
40pub mod component;
41
42/// The core module contains the AST definition of a core WebAssembly module.
43pub mod core;
44
45/// The customization module defines a type for customizing various parts of the WASM AST.
46///
47/// There are three parts of the AST defined in the [core] module that can be replaced by user
48/// defined types:
49/// - [core::Expr], the node holding a WASM expression (sequence of instructions)
50/// - [core::Data], the node holding a WASM data segment
51/// - [core::Custom], the node holding a custom section
52///
53/// Replacing these with custom nodes can reduce the memory footprint of the AST if there is no need to
54/// write it back to a WASM binary.
55///
56/// There are three predefined modes, each type can be used in the `Ast` type parameter of both [core::Module] and [component::Component]:
57/// - [DefaultAst] uses the default types for all three nodes
58/// - [IgnoreAll] uses replaces all three nodes with empty structures, loosing all information
59/// - [IgnoreAllButMetadata] replaces all three nodes with empty structures, except those [core::Custom] nodes which are intepreted as metadata.
60mod customization;
61
62/// The metadata module defines data structures for representing various metadata extracted from WASM binaries.
63///
64/// This module is optional and can be enabled with the `metadata` feature flag. It is enabled by default.
65#[cfg(feature = "metadata")]
66pub mod metadata;
67
68pub use customization::*;
69
70#[cfg(test)]
71test_r::enable!();
72
73/// An index space defines one of the possible indexes various WASM nodes can belong to.
74///
75/// In many cases, especially in the core WASM AST, each top-level WASM node (such as data, memory, type, etc.) has its own index space.
76/// Indexes to these nodes are represented by unsigned integers, and each index space are independent from each other.
77///
78/// In the component model many top-level AST nodes are mapped to multiple index spaces depending on their contents. For example a [component::ComponentImport] node
79/// can import a module, a function, a value, a type, an instance or a component - each of these defining an entity in a different index space.
80pub trait IndexSpace: Debug + PartialEq + Eq + PartialOrd + Ord {
81    type Index: From<u32> + Into<u32> + Copy + Eq + Hash;
82}
83
84/// Section type defines the type of a section in a WASM binary
85///
86/// This is used to group sections by their type (for example to get all the functions in a module) and also to determine
87/// whether a given section type supports grouping.
88pub trait SectionType: Debug + PartialEq + Eq + PartialOrd + Ord {
89    /// If a section type supports grouping, then sections of the same type next to each other will be serialized into a single WASM section
90    /// containing multiple elements when writing out a WASM binary.
91    ///
92    /// Some section types does not support this encoding (such as the [core::Start] or [core::Custom] sections), in these cases they are all
93    /// serialized into their own section.
94    fn allow_grouping(&self) -> bool;
95}
96
97/// A section is one top level element of a WASM binary, each having an associated [IndexSpace] and [SectionType].
98///
99/// There are two families of sections, core WASM module sections are defined in the [core] module, while component model sections are defined in the [component] module.
100pub trait Section<IS: IndexSpace, ST: SectionType>: Debug + Clone + PartialEq {
101    fn index_space(&self) -> IS;
102    fn section_type(&self) -> ST;
103}
104
105/// Internal representation of modules and components as a sequence of sections
106#[derive(Debug, Clone, PartialEq, Eq)]
107pub(crate) struct Sections<IS: IndexSpace, ST: SectionType, S: Section<IS, ST> + 'static> {
108    sections: Vec<Mrc<S>>,
109    phantom_is: PhantomData<IS>,
110    phantom_st: PhantomData<ST>,
111}
112
113impl<IS: IndexSpace, ST: SectionType, S: Section<IS, ST>> Default for Sections<IS, ST, S> {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119#[allow(unused)]
120impl<IS: IndexSpace, ST: SectionType, S: Section<IS, ST>> Sections<IS, ST, S> {
121    pub fn new() -> Self {
122        Self {
123            sections: Vec::new(),
124            phantom_is: PhantomData,
125            phantom_st: PhantomData,
126        }
127    }
128
129    pub fn from_flat(sections: Vec<S>) -> Self {
130        Self {
131            sections: sections.into_iter().map(|s| Mrc::new(s)).collect(),
132            phantom_is: PhantomData,
133            phantom_st: PhantomData,
134        }
135    }
136
137    pub fn from_grouped(groups: Vec<(ST, Vec<S>)>) -> Self {
138        Self {
139            sections: groups
140                .into_iter()
141                .flat_map(|(_, sections)| sections)
142                .map(|s| Mrc::new(s))
143                .collect(),
144            phantom_is: PhantomData,
145            phantom_st: PhantomData,
146        }
147    }
148
149    pub fn add_to_beginning(&mut self, section: S) {
150        self.sections.insert(0, Mrc::new(section));
151    }
152
153    pub fn add_to_end(&mut self, section: S) {
154        self.sections.push(Mrc::new(section));
155    }
156
157    pub fn add_to_first_group_start(&mut self, section: S) {
158        let mut i = 0;
159        while i < self.sections.len() {
160            if self.sections[i].section_type() == section.section_type() {
161                break;
162            }
163            i += 1;
164        }
165        self.sections.insert(i, Mrc::new(section));
166    }
167
168    pub fn add_to_last_group(&mut self, section: S) {
169        if !self.sections.is_empty() {
170            let mut i = self.sections.len() - 1;
171            while i > 0 {
172                if self.sections[i].section_type() == section.section_type() {
173                    break;
174                }
175                i -= 1;
176            }
177            if i == 0 && self.sections[0].section_type() != section.section_type() {
178                self.add_to_end(section);
179            } else {
180                self.sections.insert(i + 1, Mrc::new(section));
181            }
182        } else {
183            self.add_to_end(section);
184        }
185    }
186
187    pub fn clear_group(&mut self, section_type: &ST) {
188        self.sections.retain(|s| s.section_type() != *section_type);
189    }
190
191    pub fn indexed(&self, index_space: &IS) -> HashMap<IS::Index, Mrc<S>> {
192        self.filter_by_index_space(index_space)
193            .into_iter()
194            .enumerate()
195            .map(|(idx, section)| (IS::Index::from(idx as u32), section))
196            .collect()
197    }
198
199    pub fn filter_by_index_space(&self, index_space: &IS) -> Vec<Mrc<S>> {
200        self.sections
201            .iter()
202            .filter(|&section| section.index_space() == *index_space)
203            .cloned()
204            .collect()
205    }
206
207    pub fn filter_by_section_type(&self, section_type: &ST) -> Vec<Mrc<S>> {
208        self.sections
209            .iter()
210            .filter(|&section| section.section_type() == *section_type)
211            .cloned()
212            .collect()
213    }
214
215    pub fn move_to_end(&mut self, section: S) {
216        self.sections.retain(|s| **s != section);
217        self.sections.push(Mrc::new(section));
218    }
219
220    pub fn replace(&mut self, index_space: IS, idx: IS::Index, section: S) {
221        let mut curr_idx = 0;
222        let mut i = 0;
223        while i < self.sections.len() {
224            if self.sections[i].index_space() == index_space {
225                if curr_idx == idx.into() {
226                    break;
227                }
228                curr_idx += 1;
229            }
230            i += 1;
231        }
232        self.sections[i] = Mrc::new(section);
233    }
234
235    pub fn into_grouped(self) -> Vec<(ST, Vec<Mrc<S>>)> {
236        if self.sections.is_empty() {
237            Vec::new()
238        } else {
239            let mut grouped = Vec::new();
240            let mut current_type = self.sections[0].section_type();
241            let mut current_sections = Vec::new();
242            for section in self.sections {
243                if section.section_type() == current_type {
244                    current_sections.push(section);
245                } else {
246                    grouped.push((current_type, current_sections));
247                    current_sections = Vec::new();
248                    current_type = section.section_type();
249                    current_sections.push(section);
250                }
251            }
252            grouped.push((current_type, current_sections));
253
254            grouped
255        }
256    }
257
258    pub fn take_all(&mut self) -> Vec<Mrc<S>> {
259        std::mem::take(&mut self.sections)
260    }
261}
262
263/// Internal structure holding references to all the items of a given section type in a [Sections] structure.
264struct SectionCache<T: 'static, IS: IndexSpace, ST: SectionType, S: Section<IS, ST>> {
265    cell: RefCell<Option<Vec<Mrc<T>>>>,
266    section_type: ST,
267    get: fn(&S) -> &T,
268    index_space: PhantomData<IS>,
269}
270
271impl<T, IS: IndexSpace, ST: SectionType, S: Section<IS, ST>> SectionCache<T, IS, ST, S> {
272    // TODO: helper macro
273    pub fn new(section_type: ST, get: fn(&S) -> &T) -> Self {
274        Self {
275            cell: RefCell::new(None),
276            section_type,
277            get,
278            index_space: PhantomData,
279        }
280    }
281
282    pub fn count(&self) -> usize {
283        self.cell
284            .borrow()
285            .as_ref()
286            .map_or(0, |sections| sections.len())
287    }
288
289    pub fn invalidate(&self) {
290        self.cell.replace(None);
291    }
292
293    pub fn all(&self) -> Vec<Mrc<T>> {
294        self.cell
295            .borrow()
296            .as_ref()
297            .map_or_else(Vec::new, |sections| sections.clone())
298    }
299
300    pub fn populate(&self, sections: &Sections<IS, ST, S>) {
301        let mut cell = self.cell.borrow_mut();
302        match cell.take() {
303            Some(inner) => {
304                *cell = Some(inner);
305            }
306            None => {
307                let inner = sections
308                    .filter_by_section_type(&self.section_type)
309                    .into_iter()
310                    .map(|section| Mrc::map(section, self.get))
311                    .collect();
312                *cell = Some(inner);
313            }
314        }
315    }
316}
317
318/// Internal structure holding indexed references to all the items of a [Sections] structure belonging to a given [IndexSpace].
319#[derive(Clone)]
320struct SectionIndex<IS: IndexSpace, ST: SectionType, S: Section<IS, ST> + 'static> {
321    cell: RefCell<Option<HashMap<IS::Index, Mrc<S>>>>,
322    index_space: IS,
323    section_type: PhantomData<ST>,
324}
325
326impl<IS: IndexSpace, ST: SectionType, S: Section<IS, ST>> SectionIndex<IS, ST, S> {
327    pub fn new(index_space: IS) -> Self {
328        Self {
329            cell: RefCell::new(None),
330            index_space,
331            section_type: PhantomData,
332        }
333    }
334
335    #[allow(unused)]
336    pub fn count(&self) -> usize {
337        self.cell
338            .borrow()
339            .as_ref()
340            .map_or(0, |sections| sections.len())
341    }
342
343    pub fn get(&self, index: &IS::Index) -> Option<Mrc<S>> {
344        self.cell
345            .borrow()
346            .as_ref()
347            .and_then(|sections| sections.get(index).cloned())
348    }
349
350    pub fn invalidate(&self) {
351        self.cell.replace(None);
352    }
353
354    pub fn populate(&self, sections: &Sections<IS, ST, S>) {
355        let mut cell = self.cell.borrow_mut();
356        match cell.take() {
357            Some(inner) => {
358                *cell = Some(inner);
359            }
360            None => {
361                let inner = sections.indexed(&self.index_space);
362                *cell = Some(inner);
363            }
364        }
365    }
366}
367
368#[macro_export]
369macro_rules! new_core_section_cache {
370    ($tpe:ident) => {
371        $crate::SectionCache::new($crate::core::CoreSectionType::$tpe, |section| {
372            if let $crate::core::CoreSection::$tpe(inner) = section {
373                inner
374            } else {
375                unreachable!()
376            }
377        })
378    };
379}
380
381#[cfg(feature = "component")]
382#[macro_export]
383macro_rules! new_component_section_cache {
384    ($tpe:ident) => {
385        $crate::SectionCache::new($crate::component::ComponentSectionType::$tpe, |section| {
386            if let $crate::component::ComponentSection::$tpe(inner) = section {
387                inner
388            } else {
389                unreachable!()
390            }
391        })
392    };
393}