cranelift_wasm/
state.rs

1//! WebAssembly module and function translation state.
2//!
3//! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
4//! value and control stacks during the translation of a single function.
5
6use crate::environ::{FuncEnvironment, GlobalVariable};
7use crate::{FuncIndex, GlobalIndex, Heap, MemoryIndex, TypeIndex, WasmResult};
8use crate::{HashMap, Occupied, Vacant};
9use cranelift_codegen::ir::{self, Block, Inst, Value};
10use std::vec::Vec;
11
12/// Information about the presence of an associated `else` for an `if`, or the
13/// lack thereof.
14#[derive(Debug)]
15pub enum ElseData {
16    /// The `if` does not already have an `else` block.
17    ///
18    /// This doesn't mean that it will never have an `else`, just that we
19    /// haven't seen it yet.
20    NoElse {
21        /// If we discover that we need an `else` block, this is the jump
22        /// instruction that needs to be fixed up to point to the new `else`
23        /// block rather than the destination block after the `if...end`.
24        branch_inst: Inst,
25
26        /// The placeholder block we're replacing.
27        placeholder: Block,
28    },
29
30    /// We have already allocated an `else` block.
31    ///
32    /// Usually we don't know whether we will hit an `if .. end` or an `if
33    /// .. else .. end`, but sometimes we can tell based on the block's type
34    /// signature that the signature is not valid if there isn't an `else`. In
35    /// these cases, we pre-allocate the `else` block.
36    WithElse {
37        /// This is the `else` block.
38        else_block: Block,
39    },
40}
41
42/// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43/// fields:
44///
45/// - `destination`: reference to the `Block` that will hold the code after the control block;
46/// - `num_return_values`: number of values returned by the control block;
47/// - `original_stack_size`: size of the value stack at the beginning of the control block.
48///
49/// The `loop` frame has a `header` field that references the `Block` that contains the beginning
50/// of the body of the loop.
51#[derive(Debug)]
52pub enum ControlStackFrame {
53    If {
54        destination: Block,
55        else_data: ElseData,
56        num_param_values: usize,
57        num_return_values: usize,
58        original_stack_size: usize,
59        exit_is_branched_to: bool,
60        blocktype: wasmparser::BlockType,
61        /// Was the head of the `if` reachable?
62        head_is_reachable: bool,
63        /// What was the reachability at the end of the consequent?
64        ///
65        /// This is `None` until we're finished translating the consequent, and
66        /// is set to `Some` either by hitting an `else` when we will begin
67        /// translating the alternative, or by hitting an `end` in which case
68        /// there is no alternative.
69        consequent_ends_reachable: Option<bool>,
70        // Note: no need for `alternative_ends_reachable` because that is just
71        // `state.reachable` when we hit the `end` in the `if .. else .. end`.
72    },
73    Block {
74        destination: Block,
75        num_param_values: usize,
76        num_return_values: usize,
77        original_stack_size: usize,
78        exit_is_branched_to: bool,
79    },
80    Loop {
81        destination: Block,
82        header: Block,
83        num_param_values: usize,
84        num_return_values: usize,
85        original_stack_size: usize,
86    },
87}
88
89/// Helper methods for the control stack objects.
90impl ControlStackFrame {
91    pub fn num_return_values(&self) -> usize {
92        match *self {
93            Self::If {
94                num_return_values, ..
95            }
96            | Self::Block {
97                num_return_values, ..
98            }
99            | Self::Loop {
100                num_return_values, ..
101            } => num_return_values,
102        }
103    }
104    pub fn num_param_values(&self) -> usize {
105        match *self {
106            Self::If {
107                num_param_values, ..
108            }
109            | Self::Block {
110                num_param_values, ..
111            }
112            | Self::Loop {
113                num_param_values, ..
114            } => num_param_values,
115        }
116    }
117    pub fn following_code(&self) -> Block {
118        match *self {
119            Self::If { destination, .. }
120            | Self::Block { destination, .. }
121            | Self::Loop { destination, .. } => destination,
122        }
123    }
124    pub fn br_destination(&self) -> Block {
125        match *self {
126            Self::If { destination, .. } | Self::Block { destination, .. } => destination,
127            Self::Loop { header, .. } => header,
128        }
129    }
130    /// Private helper. Use `truncate_value_stack_to_else_params()` or
131    /// `truncate_value_stack_to_original_size()` to restore value-stack state.
132    fn original_stack_size(&self) -> usize {
133        match *self {
134            Self::If {
135                original_stack_size,
136                ..
137            }
138            | Self::Block {
139                original_stack_size,
140                ..
141            }
142            | Self::Loop {
143                original_stack_size,
144                ..
145            } => original_stack_size,
146        }
147    }
148    pub fn is_loop(&self) -> bool {
149        match *self {
150            Self::If { .. } | Self::Block { .. } => false,
151            Self::Loop { .. } => true,
152        }
153    }
154
155    pub fn exit_is_branched_to(&self) -> bool {
156        match *self {
157            Self::If {
158                exit_is_branched_to,
159                ..
160            }
161            | Self::Block {
162                exit_is_branched_to,
163                ..
164            } => exit_is_branched_to,
165            Self::Loop { .. } => false,
166        }
167    }
168
169    pub fn set_branched_to_exit(&mut self) {
170        match *self {
171            Self::If {
172                ref mut exit_is_branched_to,
173                ..
174            }
175            | Self::Block {
176                ref mut exit_is_branched_to,
177                ..
178            } => *exit_is_branched_to = true,
179            Self::Loop { .. } => {}
180        }
181    }
182
183    /// Pop values from the value stack so that it is left at the
184    /// input-parameters to an else-block.
185    pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
186        debug_assert!(matches!(self, &ControlStackFrame::If { .. }));
187        stack.truncate(self.original_stack_size());
188    }
189
190    /// Pop values from the value stack so that it is left at the state it was
191    /// before this control-flow frame.
192    pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
193        // The "If" frame pushes its parameters twice, so they're available to the else block
194        // (see also `FuncTranslationState::push_if`).
195        // Yet, the original_stack_size member accounts for them only once, so that the else
196        // block can see the same number of parameters as the consequent block. As a matter of
197        // fact, we need to subtract an extra number of parameter values for if blocks.
198        let num_duplicated_params = match self {
199            &ControlStackFrame::If {
200                num_param_values, ..
201            } => {
202                debug_assert!(num_param_values <= self.original_stack_size());
203                num_param_values
204            }
205            _ => 0,
206        };
207        stack.truncate(self.original_stack_size() - num_duplicated_params);
208    }
209}
210
211/// Contains information passed along during a function's translation and that records:
212///
213/// - The current value and control stacks.
214/// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
215///   unreachable code;
216pub struct FuncTranslationState {
217    /// A stack of values corresponding to the active values in the input wasm function at this
218    /// point.
219    pub(crate) stack: Vec<Value>,
220    /// A stack of active control flow operations at this point in the input wasm function.
221    pub(crate) control_stack: Vec<ControlStackFrame>,
222    /// Is the current translation state still reachable? This is false when translating operators
223    /// like End, Return, or Unreachable.
224    pub(crate) reachable: bool,
225
226    // Map of global variables that have already been created by `FuncEnvironment::make_global`.
227    globals: HashMap<GlobalIndex, GlobalVariable>,
228
229    // Map of heaps that have been created by `FuncEnvironment::make_heap`.
230    memory_to_heap: HashMap<MemoryIndex, Heap>,
231
232    // Map of indirect call signatures that have been created by
233    // `FuncEnvironment::make_indirect_sig()`.
234    // Stores both the signature reference and the number of WebAssembly arguments
235    signatures: HashMap<TypeIndex, (ir::SigRef, usize)>,
236
237    // Imported and local functions that have been created by
238    // `FuncEnvironment::make_direct_func()`.
239    // Stores both the function reference and the number of WebAssembly arguments
240    functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
241}
242
243// Public methods that are exposed to non-`cranelift_wasm` API consumers.
244impl FuncTranslationState {
245    /// True if the current translation state expresses reachable code, false if it is unreachable.
246    #[inline]
247    pub fn reachable(&self) -> bool {
248        self.reachable
249    }
250}
251
252impl FuncTranslationState {
253    /// Construct a new, empty, `FuncTranslationState`
254    pub(crate) fn new() -> Self {
255        Self {
256            stack: Vec::new(),
257            control_stack: Vec::new(),
258            reachable: true,
259            globals: HashMap::new(),
260            memory_to_heap: HashMap::new(),
261            signatures: HashMap::new(),
262            functions: HashMap::new(),
263        }
264    }
265
266    fn clear(&mut self) {
267        debug_assert!(self.stack.is_empty());
268        debug_assert!(self.control_stack.is_empty());
269        self.reachable = true;
270        self.globals.clear();
271        self.memory_to_heap.clear();
272        self.signatures.clear();
273        self.functions.clear();
274    }
275
276    /// Initialize the state for compiling a function with the given signature.
277    ///
278    /// This resets the state to containing only a single block representing the whole function.
279    /// The exit block is the last block in the function which will contain the return instruction.
280    pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
281        self.clear();
282        self.push_block(
283            exit_block,
284            0,
285            sig.returns
286                .iter()
287                .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
288                .count(),
289        );
290    }
291
292    /// Push a value.
293    pub(crate) fn push1(&mut self, val: Value) {
294        self.stack.push(val);
295    }
296
297    /// Push multiple values.
298    pub(crate) fn pushn(&mut self, vals: &[Value]) {
299        self.stack.extend_from_slice(vals);
300    }
301
302    /// Pop one value.
303    pub(crate) fn pop1(&mut self) -> Value {
304        self.stack
305            .pop()
306            .expect("attempted to pop a value from an empty stack")
307    }
308
309    /// Peek at the top of the stack without popping it.
310    pub(crate) fn peek1(&self) -> Value {
311        *self
312            .stack
313            .last()
314            .expect("attempted to peek at a value on an empty stack")
315    }
316
317    /// Pop two values. Return them in the order they were pushed.
318    pub(crate) fn pop2(&mut self) -> (Value, Value) {
319        let v2 = self.stack.pop().unwrap();
320        let v1 = self.stack.pop().unwrap();
321        (v1, v2)
322    }
323
324    /// Pop three values. Return them in the order they were pushed.
325    pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
326        let v3 = self.stack.pop().unwrap();
327        let v2 = self.stack.pop().unwrap();
328        let v1 = self.stack.pop().unwrap();
329        (v1, v2, v3)
330    }
331
332    /// Helper to ensure the stack size is at least as big as `n`; note that due to
333    /// `debug_assert` this will not execute in non-optimized builds.
334    #[inline]
335    fn ensure_length_is_at_least(&self, n: usize) {
336        debug_assert!(
337            n <= self.stack.len(),
338            "attempted to access {} values but stack only has {} values",
339            n,
340            self.stack.len()
341        )
342    }
343
344    /// Pop the top `n` values on the stack.
345    ///
346    /// The popped values are not returned. Use `peekn` to look at them before popping.
347    pub(crate) fn popn(&mut self, n: usize) {
348        self.ensure_length_is_at_least(n);
349        let new_len = self.stack.len() - n;
350        self.stack.truncate(new_len);
351    }
352
353    /// Peek at the top `n` values on the stack in the order they were pushed.
354    pub(crate) fn peekn(&self, n: usize) -> &[Value] {
355        self.ensure_length_is_at_least(n);
356        &self.stack[self.stack.len() - n..]
357    }
358
359    /// Peek at the top `n` values on the stack in the order they were pushed.
360    pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
361        self.ensure_length_is_at_least(n);
362        let len = self.stack.len();
363        &mut self.stack[len - n..]
364    }
365
366    /// Push a block on the control stack.
367    pub(crate) fn push_block(
368        &mut self,
369        following_code: Block,
370        num_param_types: usize,
371        num_result_types: usize,
372    ) {
373        debug_assert!(num_param_types <= self.stack.len());
374        self.control_stack.push(ControlStackFrame::Block {
375            destination: following_code,
376            original_stack_size: self.stack.len() - num_param_types,
377            num_param_values: num_param_types,
378            num_return_values: num_result_types,
379            exit_is_branched_to: false,
380        });
381    }
382
383    /// Push a loop on the control stack.
384    pub(crate) fn push_loop(
385        &mut self,
386        header: Block,
387        following_code: Block,
388        num_param_types: usize,
389        num_result_types: usize,
390    ) {
391        debug_assert!(num_param_types <= self.stack.len());
392        self.control_stack.push(ControlStackFrame::Loop {
393            header,
394            destination: following_code,
395            original_stack_size: self.stack.len() - num_param_types,
396            num_param_values: num_param_types,
397            num_return_values: num_result_types,
398        });
399    }
400
401    /// Push an if on the control stack.
402    pub(crate) fn push_if(
403        &mut self,
404        destination: Block,
405        else_data: ElseData,
406        num_param_types: usize,
407        num_result_types: usize,
408        blocktype: wasmparser::BlockType,
409    ) {
410        debug_assert!(num_param_types <= self.stack.len());
411
412        // Push a second copy of our `if`'s parameters on the stack. This lets
413        // us avoid saving them on the side in the `ControlStackFrame` for our
414        // `else` block (if it exists), which would require a second heap
415        // allocation. See also the comment in `translate_operator` for
416        // `Operator::Else`.
417        self.stack.reserve(num_param_types);
418        for i in (self.stack.len() - num_param_types)..self.stack.len() {
419            let val = self.stack[i];
420            self.stack.push(val);
421        }
422
423        self.control_stack.push(ControlStackFrame::If {
424            destination,
425            else_data,
426            original_stack_size: self.stack.len() - num_param_types,
427            num_param_values: num_param_types,
428            num_return_values: num_result_types,
429            exit_is_branched_to: false,
430            head_is_reachable: self.reachable,
431            consequent_ends_reachable: None,
432            blocktype,
433        });
434    }
435}
436
437/// Methods for handling entity references.
438impl FuncTranslationState {
439    /// Get the `GlobalVariable` reference that should be used to access the global variable
440    /// `index`. Create the reference if necessary.
441    /// Also return the WebAssembly type of the global.
442    pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
443        &mut self,
444        func: &mut ir::Function,
445        index: u32,
446        environ: &mut FE,
447    ) -> WasmResult<GlobalVariable> {
448        let index = GlobalIndex::from_u32(index);
449        match self.globals.entry(index) {
450            Occupied(entry) => Ok(*entry.get()),
451            Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
452        }
453    }
454
455    /// Get the `Heap` reference that should be used to access linear memory `index`.
456    /// Create the reference if necessary.
457    pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
458        &mut self,
459        func: &mut ir::Function,
460        index: u32,
461        environ: &mut FE,
462    ) -> WasmResult<Heap> {
463        let index = MemoryIndex::from_u32(index);
464        match self.memory_to_heap.entry(index) {
465            Occupied(entry) => Ok(*entry.get()),
466            Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
467        }
468    }
469
470    /// Get the `SigRef` reference that should be used to make an indirect call with signature
471    /// `index`. Also return the number of WebAssembly arguments in the signature.
472    ///
473    /// Create the signature if necessary.
474    pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
475        &mut self,
476        func: &mut ir::Function,
477        index: u32,
478        environ: &mut FE,
479    ) -> WasmResult<(ir::SigRef, usize)> {
480        let index = TypeIndex::from_u32(index);
481        match self.signatures.entry(index) {
482            Occupied(entry) => Ok(*entry.get()),
483            Vacant(entry) => {
484                let sig = environ.make_indirect_sig(func, index)?;
485                Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
486            }
487        }
488    }
489
490    /// Get the `FuncRef` reference that should be used to make a direct call to function
491    /// `index`. Also return the number of WebAssembly arguments in the signature.
492    ///
493    /// Create the function reference if necessary.
494    pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
495        &mut self,
496        func: &mut ir::Function,
497        index: u32,
498        environ: &mut FE,
499    ) -> WasmResult<(ir::FuncRef, usize)> {
500        let index = FuncIndex::from_u32(index);
501        match self.functions.entry(index) {
502            Occupied(entry) => Ok(*entry.get()),
503            Vacant(entry) => {
504                let fref = environ.make_direct_func(func, index)?;
505                let sig = func.dfg.ext_funcs[fref].signature;
506                Ok(*entry.insert((
507                    fref,
508                    num_wasm_parameters(environ, &func.dfg.signatures[sig]),
509                )))
510            }
511        }
512    }
513}
514
515fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
516    environ: &FE,
517    signature: &ir::Signature,
518) -> usize {
519    (0..signature.params.len())
520        .filter(|index| environ.is_wasm_parameter(signature, *index))
521        .count()
522}