wasmtime_runtime/gc/
gc_runtime.rs

1//! Traits for abstracting over our different garbage collectors.
2
3use crate::{
4    ExternRefHostDataId, ExternRefHostDataTable, SendSyncPtr, VMExternRef, VMGcHeader, VMGcRef,
5};
6use anyhow::Result;
7use std::{any::Any, num::NonZeroUsize};
8
9/// Trait for integrating a garbage collector with the runtime.
10///
11/// This trait is responsible for:
12///
13/// * GC barriers used by runtime code (as opposed to compiled Wasm code)
14///
15/// * Creating and managing GC heaps for individual stores
16///
17/// * Running garbage collection
18///
19/// # Safety
20///
21/// The collector, its GC heaps, and GC barriers when taken together as a whole
22/// must be safe. Additionally, they must work with the GC barriers emitted into
23/// compiled Wasm code via the collector's corresponding `GcCompiler`
24/// implementation. That is, if callers only call safe methods on this trait
25/// (while pairing it with its associated `GcCompiler`, `GcHeap`, and etc...)
26/// and uphold all the documented safety invariants of this trait's unsafe
27/// methods, then it must be impossible for callers to violate memory
28/// safety. Implementations of this trait may not add new safety invariants, not
29/// already documented in this trait's interface, that callers need to uphold.
30pub unsafe trait GcRuntime: 'static + Send + Sync {
31    /// Construct a new GC heap.
32    fn new_gc_heap(&self) -> Result<Box<dyn GcHeap>>;
33}
34
35/// A heap that manages garbage-collected objects.
36///
37/// Each `wasmtime::Store` is associated with a single `GcHeap`, and a `GcHeap`
38/// is only ever used with one store at a time, but `GcHeap`s may be reused with
39/// new stores after its original store is dropped. The `reset` method will be
40/// called in between each such reuse. (This reuse allows for better integration
41/// with the pooling allocator).
42///
43/// If a `GcHeap` mapped any memory, its `Drop` implementation should unmap that
44/// memory.
45///
46/// # Safety
47///
48/// The trait methods below are all safe: implementations of this trait must
49/// ensure that these methods cannot be misused to create memory unsafety. The
50/// expectation is that -- given that `VMGcRef` is a newtype over an index --
51/// implementations perform similar tricks as Wasm linear memory
52/// implementations. The heap should internally be a contiguous region of memory
53/// and `VMGcRef` indices into the heap must be bounds checked (explicitly or
54/// implicitly via virtual memory tricks).
55///
56/// Furthermore, if heap corruption occurs because (for example) a `VMGcRef`
57/// from a different heap is used with this heap, then that corruption must be
58/// limited to within this heap. Every heap is a mini sandbox. It follows that
59/// native pointers should never be written into or read out from the GC heap,
60/// since that could spread corruption from inside the GC heap out to the native
61/// host heap. The host data for an `externref`, therefore, is stored in a side
62/// table (`ExternRefHostDataTable`) and never inside the heap. Only an id
63/// referencing a slot in that table should ever be written into the GC heap.
64///
65/// These constraints give us great amounts of safety compared to working with
66/// raw pointers. The worst that could happen is corruption local to heap and a
67/// panic, or perhaps reading stale heap data from a previous Wasm instance. A
68/// corrupt `GcHeap` can *never* result in the native host's corruption.
69///
70/// The downside is that we are introducing `heap_base + index` computations and
71/// bounds checking to access GC memory, adding performance overhead. This is
72/// deemed to be a worthy trade off. Furthermore, it isn't even a clear cut
73/// performance degradation since this allows us to use 32-bit "pointers",
74/// giving us more compact data representations and the improved cache
75/// utilization that implies.
76pub unsafe trait GcHeap: 'static + Send + Sync {
77    ////////////////////////////////////////////////////////////////////////////
78    // `Any` methods
79
80    /// Get this heap as an `&Any`.
81    fn as_any(&self) -> &dyn Any;
82
83    /// Get this heap as an `&mut Any`.
84    fn as_any_mut(&mut self) -> &mut dyn Any;
85
86    ////////////////////////////////////////////////////////////////////////////
87    // No-GC Scope Methods
88
89    /// Enter a no-GC scope.
90    ///
91    /// Calling the `gc` method when we are inside a no-GC scope should panic.
92    ///
93    /// We can enter multiple, nested no-GC scopes and this method should
94    /// account for that.
95    fn enter_no_gc_scope(&mut self);
96
97    /// Exit a no-GC scope.
98    ///
99    /// Dual to `enter_no_gc_scope`.
100    fn exit_no_gc_scope(&mut self);
101
102    ////////////////////////////////////////////////////////////////////////////
103    // GC Object Header Methods
104
105    /// Get a shared borrow of the `VMGcHeader` that this GC reference is
106    /// pointing to.
107    fn header(&self, gc_ref: &VMGcRef) -> &VMGcHeader;
108
109    ////////////////////////////////////////////////////////////////////////////
110    // GC Barriers
111
112    /// Read barrier called every time the runtime clones a GC reference.
113    ///
114    /// Callers should pass a valid `VMGcRef` that belongs to the given
115    /// heap. Failure to do so is memory safe, but may result in general
116    /// failures such as panics or incorrect results.
117    fn clone_gc_ref(&mut self, gc_ref: &VMGcRef) -> VMGcRef;
118
119    /// Write barrier called whenever the runtime is nulling out a GC reference.
120    ///
121    /// Default implemented in terms of the `write_gc_ref` barrier.
122    ///
123    /// If an `externref` is reclaimed, then its associated entry in the
124    /// `host_data_table` should be removed.
125    ///
126    /// Callers should pass a valid `VMGcRef` that belongs to the given
127    /// heap. Failure to do so is memory safe, but may result in general
128    /// failures such as panics or incorrect results.
129    ///
130    /// The given `gc_ref` should not be used again.
131    fn drop_gc_ref(&mut self, host_data_table: &mut ExternRefHostDataTable, gc_ref: VMGcRef) {
132        let mut dest = Some(gc_ref);
133        self.write_gc_ref(host_data_table, &mut dest, None);
134    }
135
136    /// Write barrier called every time the runtime overwrites a GC reference.
137    ///
138    /// The `source` is a borrowed GC reference, and should not have been cloned
139    /// already for this write operation. This allows implementations to fuse
140    /// the `source`'s read barrier into this write barrier.
141    ///
142    /// If an `externref` is reclaimed, then its associated entry in the
143    /// `host_data_table` should be removed.
144    ///
145    /// Callers should pass a valid `VMGcRef` that belongs to the given heap for
146    /// both the `source` and `destination`. Failure to do so is memory safe,
147    /// but may result in general failures such as panics or incorrect results.
148    fn write_gc_ref(
149        &mut self,
150        host_data_table: &mut ExternRefHostDataTable,
151        destination: &mut Option<VMGcRef>,
152        source: Option<&VMGcRef>,
153    );
154
155    /// Read barrier called whenever a GC reference is passed from the runtime
156    /// to Wasm: an argument to a host-to-Wasm call, or a return from a
157    /// Wasm-to-host call.
158    ///
159    /// Callers should pass a valid `VMGcRef` that belongs to the given
160    /// heap. Failure to do so is memory safe, but may result in general
161    /// failures such as panics or incorrect results.
162    fn expose_gc_ref_to_wasm(&mut self, gc_ref: VMGcRef);
163
164    /// Predicate invoked before calling into or returning to Wasm to determine
165    /// whether we should GC first.
166    ///
167    /// `num_gc_refs` is the number of non-`i31ref` GC references that will be
168    /// passed into Wasm.
169    fn need_gc_before_entering_wasm(&self, num_gc_refs: NonZeroUsize) -> bool;
170
171    ////////////////////////////////////////////////////////////////////////////
172    // `externref` Methods
173
174    /// Allocate a `VMExternRef` with space for host data described by the given
175    /// layout.
176    ///
177    /// Return values:
178    ///
179    /// * `Ok(Some(_))`: The allocation was successful.
180    ///
181    /// * `Ok(None)`: There is currently no available space for this
182    ///   allocation. The caller should call `self.gc()`, run the GC to
183    ///   completion so the collector can reclaim space, and then try allocating
184    ///   again.
185    ///
186    /// * `Err(_)`: The collector cannot satisfy this allocation request, and
187    ///   would not be able to even after the caller were to trigger a
188    ///   collection. This could be because, for example, the requested
189    ///   allocation is larger than this collector's implementation limit for
190    ///   object size.
191    fn alloc_externref(&mut self, host_data: ExternRefHostDataId) -> Result<Option<VMExternRef>>;
192
193    /// Get the host data ID associated with the given `externref`.
194    ///
195    /// Callers should pass a valid `externref` that belongs to the given
196    /// heap. Failure to do so is memory safe, but may result in general
197    /// failures such as panics or incorrect results.
198    fn externref_host_data(&self, externref: &VMExternRef) -> ExternRefHostDataId;
199
200    ////////////////////////////////////////////////////////////////////////////
201    // Garbage Collection Methods
202
203    /// Start a new garbage collection process.
204    ///
205    /// The given `roots` are GC roots and should not be collected (nor anything
206    /// transitively reachable from them).
207    ///
208    /// Upon reclaiming an `externref`, its associated entry in the
209    /// `host_data_table` is removed.
210    ///
211    /// Callers should pass valid GC roots that belongs to this heap, and the
212    /// host data table associated with this heap's `externref`s. Failure to do
213    /// so is memory safe, but may result in general failures such as panics or
214    /// incorrect results.
215    ///
216    /// This method should panic if we are in a no-GC scope.
217    fn gc<'a>(
218        &'a mut self,
219        roots: GcRootsIter<'a>,
220        host_data_table: &'a mut ExternRefHostDataTable,
221    ) -> Box<dyn GarbageCollection<'a> + 'a>;
222
223    ////////////////////////////////////////////////////////////////////////////
224    // JIT-Code Interaction Methods
225
226    /// Get the GC heap's base pointer.
227    ///
228    /// # Safety
229    ///
230    /// The memory region
231    ///
232    /// ```ignore
233    /// self.vmctx_gc_heap_base..self.vmctx_gc_heap_base + self.vmctx_gc_heap_bound
234    /// ```
235    ///
236    /// must be the GC heap region, and must remain valid for JIT code as long
237    /// as `self` is not dropped.
238    unsafe fn vmctx_gc_heap_base(&self) -> *mut u8;
239
240    /// Get the GC heap's bound.
241    ///
242    /// # Safety
243    ///
244    /// The memory region
245    ///
246    /// ```ignore
247    /// self.vmctx_gc_heap_base..self.vmctx_gc_heap_base + self.vmctx_gc_heap_bound
248    /// ```
249    ///
250    /// must be the GC heap region, and must remain valid for JIT code as long
251    /// as `self` is not dropped.
252    unsafe fn vmctx_gc_heap_bound(&self) -> usize;
253
254    /// Get the pointer that will be stored in the `VMContext::gc_heap_data`
255    /// field and be accessible from JIT code via collaboration with the
256    /// corresponding `GcCompiler` trait.
257    ///
258    /// # Safety
259    ///
260    /// The returned pointer, if any, must remain valid as long as `self` is not
261    /// dropped.
262    unsafe fn vmctx_gc_heap_data(&self) -> *mut u8;
263
264    ////////////////////////////////////////////////////////////////////////////
265    // Recycling GC Heap Methods
266
267    /// Reset this heap.
268    ///
269    /// Calling this method unassociates this heap with the store that it has
270    /// been associated with, making it available to be associated with a new
271    /// heap.
272    ///
273    /// This should refill free lists, reset bump pointers, and etc... as if
274    /// nothing were allocated in this heap (because nothing is allocated in
275    /// this heap anymore).
276    ///
277    /// This should retain any allocated memory from the global allocator and
278    /// any virtual memory mappings.
279    ///
280    /// This method is only used with the pooling allocator.
281    #[cfg(feature = "pooling-allocator")]
282    fn reset(&mut self);
283}
284
285/// A list of GC roots.
286///
287/// This is effectively a builder for a `GcRootsIter` that will be given to a GC
288/// heap when it is time to perform garbage collection.
289#[derive(Default)]
290pub struct GcRootsList(Vec<RawGcRoot>);
291
292// Ideally these `*mut`s would be `&mut`s and we wouldn't need as much of this
293// machinery around `GcRootsList`, `RawGcRoot`, `GcRoot`, and `GcRootIter` but
294// if we try that then we run into two different kinds of lifetime issues:
295//
296// 1. When collecting the various roots from a `&mut StoreOpaque`, we borrow
297//    from `self` to push new GC roots onto the roots list. But then we want to
298//    call helper methods like `self.for_each_global(...)`, but we can't because
299//    there are active borrows of `self` preventing it.
300//
301// 2. We want to reuse the roots list and its backing storage across GCs, rather
302//    than reallocate on every GC. But the only place for the roots list to live
303//    such that it is easily reusable across GCs is in the store itself. But the
304//    contents of the roots list (when it is non-empty, during GCs) borrow from
305//    the store, which creates self-references.
306#[derive(Clone, Copy)]
307enum RawGcRoot {
308    Stack(SendSyncPtr<u64>),
309    NonStack(SendSyncPtr<VMGcRef>),
310}
311
312impl GcRootsList {
313    /// Add a GC root that is inside a Wasm stack frame to this list.
314    #[inline]
315    pub unsafe fn add_wasm_stack_root(&mut self, ptr_to_root: SendSyncPtr<u64>) {
316        log::trace!(
317            "Adding Wasm stack root: {:#p}",
318            VMGcRef::from_r64(*ptr_to_root.as_ref()).unwrap().unwrap()
319        );
320        self.0.push(RawGcRoot::Stack(ptr_to_root));
321    }
322
323    /// Add a GC root to this list.
324    #[inline]
325    pub unsafe fn add_root(&mut self, ptr_to_root: SendSyncPtr<VMGcRef>) {
326        log::trace!(
327            "Adding non-stack root: {:#p}",
328            ptr_to_root.as_ref().unchecked_copy()
329        );
330        self.0.push(RawGcRoot::NonStack(ptr_to_root))
331    }
332
333    /// Get an iterator over all roots in this list.
334    ///
335    /// # Safety
336    ///
337    /// Callers must ensure that all the pointers to GC roots that have been
338    /// added to this list are valid for the duration of the `'a` lifetime.
339    #[inline]
340    pub unsafe fn iter<'a>(&'a mut self) -> GcRootsIter<'a> {
341        GcRootsIter {
342            list: self,
343            index: 0,
344        }
345    }
346
347    /// Is this list empty?
348    pub fn is_empty(&self) -> bool {
349        self.0.is_empty()
350    }
351
352    /// Clear this GC roots list.
353    #[inline]
354    pub fn clear(&mut self) {
355        self.0.clear();
356    }
357}
358
359/// An iterator over all the roots in a `GcRootsList`.
360pub struct GcRootsIter<'a> {
361    list: &'a mut GcRootsList,
362    index: usize,
363}
364
365impl<'a> Iterator for GcRootsIter<'a> {
366    type Item = GcRoot<'a>;
367
368    #[inline]
369    fn next(&mut self) -> Option<Self::Item> {
370        let root = GcRoot {
371            raw: self.list.0.get(self.index).copied()?,
372            _phantom: std::marker::PhantomData,
373        };
374        self.index += 1;
375        Some(root)
376    }
377}
378
379/// A GC root.
380///
381/// This is, effectively, a mutable reference to a `VMGcRef`.
382///
383/// Collector implementations should update the `VMGcRef` if they move the
384/// `VMGcRef`'s referent during the course of a GC.
385pub struct GcRoot<'a> {
386    raw: RawGcRoot,
387    _phantom: std::marker::PhantomData<&'a mut VMGcRef>,
388}
389
390impl GcRoot<'_> {
391    /// Is this root from inside a Wasm stack frame?
392    #[inline]
393    pub fn is_on_wasm_stack(&self) -> bool {
394        matches!(self.raw, RawGcRoot::Stack(_))
395    }
396
397    /// Get this GC root.
398    ///
399    /// Does NOT run GC barriers.
400    #[inline]
401    pub fn get(&self) -> VMGcRef {
402        match self.raw {
403            RawGcRoot::NonStack(ptr) => unsafe { std::ptr::read(ptr.as_ptr()) },
404            RawGcRoot::Stack(ptr) => unsafe {
405                let r64 = std::ptr::read(ptr.as_ptr());
406                VMGcRef::from_r64(r64)
407                    .expect("valid r64")
408                    .expect("non-null")
409            },
410        }
411    }
412
413    /// Set this GC root.
414    ///
415    /// Does NOT run GC barriers.
416    ///
417    /// Collector implementations should use this method to update GC root
418    /// pointers after the collector moves the GC object that the root is
419    /// referencing.
420    pub fn set(&mut self, new_ref: VMGcRef) {
421        match self.raw {
422            RawGcRoot::NonStack(ptr) => unsafe {
423                std::ptr::write(ptr.as_ptr(), new_ref);
424            },
425            RawGcRoot::Stack(ptr) => unsafe {
426                let r64 = new_ref.into_r64();
427                std::ptr::write(ptr.as_ptr(), r64);
428            },
429        }
430    }
431}
432
433/// A garbage collection process.
434///
435/// Implementations define the `collect_increment` method, and then consumers
436/// can either use
437///
438/// * `GarbageCollection::collect` for synchronous code, or
439///
440/// * `collect_async(Box<dyn GarbageCollection>)` for async code.
441///
442/// When using fuel and/or epochs, consumers can also use `collect_increment`
443/// directly and choose to abandon further execution in this GC's heap's whole
444/// store if the GC is taking too long to complete.
445pub trait GarbageCollection<'a>: Send + Sync {
446    /// Perform an incremental slice of this garbage collection process.
447    ///
448    /// Upon completion of the slice, a `GcProgress` is returned which informs
449    /// the caller whether to continue driving this GC process forward and
450    /// executing more slices (`GcProgress::Continue`) or whether the GC process
451    /// has finished (`GcProgress::Complete`).
452    ///
453    /// The mutator does *not* run in between increments. This method exists
454    /// solely to allow cooperative yielding
455    fn collect_increment(&mut self) -> GcProgress;
456
457    /// Run this GC process to completion.
458    ///
459    /// Keeps calling `collect_increment` in a loop until the GC process is
460    /// complete.
461    fn collect(&mut self) {
462        loop {
463            match self.collect_increment() {
464                GcProgress::Continue => continue,
465                GcProgress::Complete => return,
466            }
467        }
468    }
469}
470
471/// The result of doing an incremental amount of GC.
472pub enum GcProgress {
473    /// There is still more work to do.
474    Continue,
475    /// The GC is complete.
476    Complete,
477}
478
479/// Asynchronously run the given garbage collection process to completion,
480/// cooperatively yielding back to the event loop after each increment of work.
481#[cfg(feature = "async")]
482pub async fn collect_async<'a>(mut collection: Box<dyn GarbageCollection<'a> + 'a>) {
483    loop {
484        match collection.collect_increment() {
485            GcProgress::Continue => crate::Yield::new().await,
486            GcProgress::Complete => return,
487        }
488    }
489}
490
491#[cfg(all(test, feature = "async"))]
492mod collect_async_tests {
493    use super::*;
494
495    #[test]
496    fn is_send_and_sync() {
497        fn _assert_send_sync<T: Send + Sync>(_: T) {}
498
499        fn _foo<'a>(collection: Box<dyn GarbageCollection<'a>>) {
500            _assert_send_sync(collect_async(collection));
501        }
502    }
503}