wasmtime_runtime/gc/gc_runtime.rs
1//! Traits for abstracting over our different garbage collectors.
2
3use crate::{
4 ExternRefHostDataId, ExternRefHostDataTable, SendSyncPtr, VMExternRef, VMGcHeader, VMGcRef,
5};
6use anyhow::Result;
7use std::{any::Any, num::NonZeroUsize};
8
9/// Trait for integrating a garbage collector with the runtime.
10///
11/// This trait is responsible for:
12///
13/// * GC barriers used by runtime code (as opposed to compiled Wasm code)
14///
15/// * Creating and managing GC heaps for individual stores
16///
17/// * Running garbage collection
18///
19/// # Safety
20///
21/// The collector, its GC heaps, and GC barriers when taken together as a whole
22/// must be safe. Additionally, they must work with the GC barriers emitted into
23/// compiled Wasm code via the collector's corresponding `GcCompiler`
24/// implementation. That is, if callers only call safe methods on this trait
25/// (while pairing it with its associated `GcCompiler`, `GcHeap`, and etc...)
26/// and uphold all the documented safety invariants of this trait's unsafe
27/// methods, then it must be impossible for callers to violate memory
28/// safety. Implementations of this trait may not add new safety invariants, not
29/// already documented in this trait's interface, that callers need to uphold.
30pub unsafe trait GcRuntime: 'static + Send + Sync {
31 /// Construct a new GC heap.
32 fn new_gc_heap(&self) -> Result<Box<dyn GcHeap>>;
33}
34
35/// A heap that manages garbage-collected objects.
36///
37/// Each `wasmtime::Store` is associated with a single `GcHeap`, and a `GcHeap`
38/// is only ever used with one store at a time, but `GcHeap`s may be reused with
39/// new stores after its original store is dropped. The `reset` method will be
40/// called in between each such reuse. (This reuse allows for better integration
41/// with the pooling allocator).
42///
43/// If a `GcHeap` mapped any memory, its `Drop` implementation should unmap that
44/// memory.
45///
46/// # Safety
47///
48/// The trait methods below are all safe: implementations of this trait must
49/// ensure that these methods cannot be misused to create memory unsafety. The
50/// expectation is that -- given that `VMGcRef` is a newtype over an index --
51/// implementations perform similar tricks as Wasm linear memory
52/// implementations. The heap should internally be a contiguous region of memory
53/// and `VMGcRef` indices into the heap must be bounds checked (explicitly or
54/// implicitly via virtual memory tricks).
55///
56/// Furthermore, if heap corruption occurs because (for example) a `VMGcRef`
57/// from a different heap is used with this heap, then that corruption must be
58/// limited to within this heap. Every heap is a mini sandbox. It follows that
59/// native pointers should never be written into or read out from the GC heap,
60/// since that could spread corruption from inside the GC heap out to the native
61/// host heap. The host data for an `externref`, therefore, is stored in a side
62/// table (`ExternRefHostDataTable`) and never inside the heap. Only an id
63/// referencing a slot in that table should ever be written into the GC heap.
64///
65/// These constraints give us great amounts of safety compared to working with
66/// raw pointers. The worst that could happen is corruption local to heap and a
67/// panic, or perhaps reading stale heap data from a previous Wasm instance. A
68/// corrupt `GcHeap` can *never* result in the native host's corruption.
69///
70/// The downside is that we are introducing `heap_base + index` computations and
71/// bounds checking to access GC memory, adding performance overhead. This is
72/// deemed to be a worthy trade off. Furthermore, it isn't even a clear cut
73/// performance degradation since this allows us to use 32-bit "pointers",
74/// giving us more compact data representations and the improved cache
75/// utilization that implies.
76pub unsafe trait GcHeap: 'static + Send + Sync {
77 ////////////////////////////////////////////////////////////////////////////
78 // `Any` methods
79
80 /// Get this heap as an `&Any`.
81 fn as_any(&self) -> &dyn Any;
82
83 /// Get this heap as an `&mut Any`.
84 fn as_any_mut(&mut self) -> &mut dyn Any;
85
86 ////////////////////////////////////////////////////////////////////////////
87 // No-GC Scope Methods
88
89 /// Enter a no-GC scope.
90 ///
91 /// Calling the `gc` method when we are inside a no-GC scope should panic.
92 ///
93 /// We can enter multiple, nested no-GC scopes and this method should
94 /// account for that.
95 fn enter_no_gc_scope(&mut self);
96
97 /// Exit a no-GC scope.
98 ///
99 /// Dual to `enter_no_gc_scope`.
100 fn exit_no_gc_scope(&mut self);
101
102 ////////////////////////////////////////////////////////////////////////////
103 // GC Object Header Methods
104
105 /// Get a shared borrow of the `VMGcHeader` that this GC reference is
106 /// pointing to.
107 fn header(&self, gc_ref: &VMGcRef) -> &VMGcHeader;
108
109 ////////////////////////////////////////////////////////////////////////////
110 // GC Barriers
111
112 /// Read barrier called every time the runtime clones a GC reference.
113 ///
114 /// Callers should pass a valid `VMGcRef` that belongs to the given
115 /// heap. Failure to do so is memory safe, but may result in general
116 /// failures such as panics or incorrect results.
117 fn clone_gc_ref(&mut self, gc_ref: &VMGcRef) -> VMGcRef;
118
119 /// Write barrier called whenever the runtime is nulling out a GC reference.
120 ///
121 /// Default implemented in terms of the `write_gc_ref` barrier.
122 ///
123 /// If an `externref` is reclaimed, then its associated entry in the
124 /// `host_data_table` should be removed.
125 ///
126 /// Callers should pass a valid `VMGcRef` that belongs to the given
127 /// heap. Failure to do so is memory safe, but may result in general
128 /// failures such as panics or incorrect results.
129 ///
130 /// The given `gc_ref` should not be used again.
131 fn drop_gc_ref(&mut self, host_data_table: &mut ExternRefHostDataTable, gc_ref: VMGcRef) {
132 let mut dest = Some(gc_ref);
133 self.write_gc_ref(host_data_table, &mut dest, None);
134 }
135
136 /// Write barrier called every time the runtime overwrites a GC reference.
137 ///
138 /// The `source` is a borrowed GC reference, and should not have been cloned
139 /// already for this write operation. This allows implementations to fuse
140 /// the `source`'s read barrier into this write barrier.
141 ///
142 /// If an `externref` is reclaimed, then its associated entry in the
143 /// `host_data_table` should be removed.
144 ///
145 /// Callers should pass a valid `VMGcRef` that belongs to the given heap for
146 /// both the `source` and `destination`. Failure to do so is memory safe,
147 /// but may result in general failures such as panics or incorrect results.
148 fn write_gc_ref(
149 &mut self,
150 host_data_table: &mut ExternRefHostDataTable,
151 destination: &mut Option<VMGcRef>,
152 source: Option<&VMGcRef>,
153 );
154
155 /// Read barrier called whenever a GC reference is passed from the runtime
156 /// to Wasm: an argument to a host-to-Wasm call, or a return from a
157 /// Wasm-to-host call.
158 ///
159 /// Callers should pass a valid `VMGcRef` that belongs to the given
160 /// heap. Failure to do so is memory safe, but may result in general
161 /// failures such as panics or incorrect results.
162 fn expose_gc_ref_to_wasm(&mut self, gc_ref: VMGcRef);
163
164 /// Predicate invoked before calling into or returning to Wasm to determine
165 /// whether we should GC first.
166 ///
167 /// `num_gc_refs` is the number of non-`i31ref` GC references that will be
168 /// passed into Wasm.
169 fn need_gc_before_entering_wasm(&self, num_gc_refs: NonZeroUsize) -> bool;
170
171 ////////////////////////////////////////////////////////////////////////////
172 // `externref` Methods
173
174 /// Allocate a `VMExternRef` with space for host data described by the given
175 /// layout.
176 ///
177 /// Return values:
178 ///
179 /// * `Ok(Some(_))`: The allocation was successful.
180 ///
181 /// * `Ok(None)`: There is currently no available space for this
182 /// allocation. The caller should call `self.gc()`, run the GC to
183 /// completion so the collector can reclaim space, and then try allocating
184 /// again.
185 ///
186 /// * `Err(_)`: The collector cannot satisfy this allocation request, and
187 /// would not be able to even after the caller were to trigger a
188 /// collection. This could be because, for example, the requested
189 /// allocation is larger than this collector's implementation limit for
190 /// object size.
191 fn alloc_externref(&mut self, host_data: ExternRefHostDataId) -> Result<Option<VMExternRef>>;
192
193 /// Get the host data ID associated with the given `externref`.
194 ///
195 /// Callers should pass a valid `externref` that belongs to the given
196 /// heap. Failure to do so is memory safe, but may result in general
197 /// failures such as panics or incorrect results.
198 fn externref_host_data(&self, externref: &VMExternRef) -> ExternRefHostDataId;
199
200 ////////////////////////////////////////////////////////////////////////////
201 // Garbage Collection Methods
202
203 /// Start a new garbage collection process.
204 ///
205 /// The given `roots` are GC roots and should not be collected (nor anything
206 /// transitively reachable from them).
207 ///
208 /// Upon reclaiming an `externref`, its associated entry in the
209 /// `host_data_table` is removed.
210 ///
211 /// Callers should pass valid GC roots that belongs to this heap, and the
212 /// host data table associated with this heap's `externref`s. Failure to do
213 /// so is memory safe, but may result in general failures such as panics or
214 /// incorrect results.
215 ///
216 /// This method should panic if we are in a no-GC scope.
217 fn gc<'a>(
218 &'a mut self,
219 roots: GcRootsIter<'a>,
220 host_data_table: &'a mut ExternRefHostDataTable,
221 ) -> Box<dyn GarbageCollection<'a> + 'a>;
222
223 ////////////////////////////////////////////////////////////////////////////
224 // JIT-Code Interaction Methods
225
226 /// Get the GC heap's base pointer.
227 ///
228 /// # Safety
229 ///
230 /// The memory region
231 ///
232 /// ```ignore
233 /// self.vmctx_gc_heap_base..self.vmctx_gc_heap_base + self.vmctx_gc_heap_bound
234 /// ```
235 ///
236 /// must be the GC heap region, and must remain valid for JIT code as long
237 /// as `self` is not dropped.
238 unsafe fn vmctx_gc_heap_base(&self) -> *mut u8;
239
240 /// Get the GC heap's bound.
241 ///
242 /// # Safety
243 ///
244 /// The memory region
245 ///
246 /// ```ignore
247 /// self.vmctx_gc_heap_base..self.vmctx_gc_heap_base + self.vmctx_gc_heap_bound
248 /// ```
249 ///
250 /// must be the GC heap region, and must remain valid for JIT code as long
251 /// as `self` is not dropped.
252 unsafe fn vmctx_gc_heap_bound(&self) -> usize;
253
254 /// Get the pointer that will be stored in the `VMContext::gc_heap_data`
255 /// field and be accessible from JIT code via collaboration with the
256 /// corresponding `GcCompiler` trait.
257 ///
258 /// # Safety
259 ///
260 /// The returned pointer, if any, must remain valid as long as `self` is not
261 /// dropped.
262 unsafe fn vmctx_gc_heap_data(&self) -> *mut u8;
263
264 ////////////////////////////////////////////////////////////////////////////
265 // Recycling GC Heap Methods
266
267 /// Reset this heap.
268 ///
269 /// Calling this method unassociates this heap with the store that it has
270 /// been associated with, making it available to be associated with a new
271 /// heap.
272 ///
273 /// This should refill free lists, reset bump pointers, and etc... as if
274 /// nothing were allocated in this heap (because nothing is allocated in
275 /// this heap anymore).
276 ///
277 /// This should retain any allocated memory from the global allocator and
278 /// any virtual memory mappings.
279 ///
280 /// This method is only used with the pooling allocator.
281 #[cfg(feature = "pooling-allocator")]
282 fn reset(&mut self);
283}
284
285/// A list of GC roots.
286///
287/// This is effectively a builder for a `GcRootsIter` that will be given to a GC
288/// heap when it is time to perform garbage collection.
289#[derive(Default)]
290pub struct GcRootsList(Vec<RawGcRoot>);
291
292// Ideally these `*mut`s would be `&mut`s and we wouldn't need as much of this
293// machinery around `GcRootsList`, `RawGcRoot`, `GcRoot`, and `GcRootIter` but
294// if we try that then we run into two different kinds of lifetime issues:
295//
296// 1. When collecting the various roots from a `&mut StoreOpaque`, we borrow
297// from `self` to push new GC roots onto the roots list. But then we want to
298// call helper methods like `self.for_each_global(...)`, but we can't because
299// there are active borrows of `self` preventing it.
300//
301// 2. We want to reuse the roots list and its backing storage across GCs, rather
302// than reallocate on every GC. But the only place for the roots list to live
303// such that it is easily reusable across GCs is in the store itself. But the
304// contents of the roots list (when it is non-empty, during GCs) borrow from
305// the store, which creates self-references.
306#[derive(Clone, Copy)]
307enum RawGcRoot {
308 Stack(SendSyncPtr<u64>),
309 NonStack(SendSyncPtr<VMGcRef>),
310}
311
312impl GcRootsList {
313 /// Add a GC root that is inside a Wasm stack frame to this list.
314 #[inline]
315 pub unsafe fn add_wasm_stack_root(&mut self, ptr_to_root: SendSyncPtr<u64>) {
316 log::trace!(
317 "Adding Wasm stack root: {:#p}",
318 VMGcRef::from_r64(*ptr_to_root.as_ref()).unwrap().unwrap()
319 );
320 self.0.push(RawGcRoot::Stack(ptr_to_root));
321 }
322
323 /// Add a GC root to this list.
324 #[inline]
325 pub unsafe fn add_root(&mut self, ptr_to_root: SendSyncPtr<VMGcRef>) {
326 log::trace!(
327 "Adding non-stack root: {:#p}",
328 ptr_to_root.as_ref().unchecked_copy()
329 );
330 self.0.push(RawGcRoot::NonStack(ptr_to_root))
331 }
332
333 /// Get an iterator over all roots in this list.
334 ///
335 /// # Safety
336 ///
337 /// Callers must ensure that all the pointers to GC roots that have been
338 /// added to this list are valid for the duration of the `'a` lifetime.
339 #[inline]
340 pub unsafe fn iter<'a>(&'a mut self) -> GcRootsIter<'a> {
341 GcRootsIter {
342 list: self,
343 index: 0,
344 }
345 }
346
347 /// Is this list empty?
348 pub fn is_empty(&self) -> bool {
349 self.0.is_empty()
350 }
351
352 /// Clear this GC roots list.
353 #[inline]
354 pub fn clear(&mut self) {
355 self.0.clear();
356 }
357}
358
359/// An iterator over all the roots in a `GcRootsList`.
360pub struct GcRootsIter<'a> {
361 list: &'a mut GcRootsList,
362 index: usize,
363}
364
365impl<'a> Iterator for GcRootsIter<'a> {
366 type Item = GcRoot<'a>;
367
368 #[inline]
369 fn next(&mut self) -> Option<Self::Item> {
370 let root = GcRoot {
371 raw: self.list.0.get(self.index).copied()?,
372 _phantom: std::marker::PhantomData,
373 };
374 self.index += 1;
375 Some(root)
376 }
377}
378
379/// A GC root.
380///
381/// This is, effectively, a mutable reference to a `VMGcRef`.
382///
383/// Collector implementations should update the `VMGcRef` if they move the
384/// `VMGcRef`'s referent during the course of a GC.
385pub struct GcRoot<'a> {
386 raw: RawGcRoot,
387 _phantom: std::marker::PhantomData<&'a mut VMGcRef>,
388}
389
390impl GcRoot<'_> {
391 /// Is this root from inside a Wasm stack frame?
392 #[inline]
393 pub fn is_on_wasm_stack(&self) -> bool {
394 matches!(self.raw, RawGcRoot::Stack(_))
395 }
396
397 /// Get this GC root.
398 ///
399 /// Does NOT run GC barriers.
400 #[inline]
401 pub fn get(&self) -> VMGcRef {
402 match self.raw {
403 RawGcRoot::NonStack(ptr) => unsafe { std::ptr::read(ptr.as_ptr()) },
404 RawGcRoot::Stack(ptr) => unsafe {
405 let r64 = std::ptr::read(ptr.as_ptr());
406 VMGcRef::from_r64(r64)
407 .expect("valid r64")
408 .expect("non-null")
409 },
410 }
411 }
412
413 /// Set this GC root.
414 ///
415 /// Does NOT run GC barriers.
416 ///
417 /// Collector implementations should use this method to update GC root
418 /// pointers after the collector moves the GC object that the root is
419 /// referencing.
420 pub fn set(&mut self, new_ref: VMGcRef) {
421 match self.raw {
422 RawGcRoot::NonStack(ptr) => unsafe {
423 std::ptr::write(ptr.as_ptr(), new_ref);
424 },
425 RawGcRoot::Stack(ptr) => unsafe {
426 let r64 = new_ref.into_r64();
427 std::ptr::write(ptr.as_ptr(), r64);
428 },
429 }
430 }
431}
432
433/// A garbage collection process.
434///
435/// Implementations define the `collect_increment` method, and then consumers
436/// can either use
437///
438/// * `GarbageCollection::collect` for synchronous code, or
439///
440/// * `collect_async(Box<dyn GarbageCollection>)` for async code.
441///
442/// When using fuel and/or epochs, consumers can also use `collect_increment`
443/// directly and choose to abandon further execution in this GC's heap's whole
444/// store if the GC is taking too long to complete.
445pub trait GarbageCollection<'a>: Send + Sync {
446 /// Perform an incremental slice of this garbage collection process.
447 ///
448 /// Upon completion of the slice, a `GcProgress` is returned which informs
449 /// the caller whether to continue driving this GC process forward and
450 /// executing more slices (`GcProgress::Continue`) or whether the GC process
451 /// has finished (`GcProgress::Complete`).
452 ///
453 /// The mutator does *not* run in between increments. This method exists
454 /// solely to allow cooperative yielding
455 fn collect_increment(&mut self) -> GcProgress;
456
457 /// Run this GC process to completion.
458 ///
459 /// Keeps calling `collect_increment` in a loop until the GC process is
460 /// complete.
461 fn collect(&mut self) {
462 loop {
463 match self.collect_increment() {
464 GcProgress::Continue => continue,
465 GcProgress::Complete => return,
466 }
467 }
468 }
469}
470
471/// The result of doing an incremental amount of GC.
472pub enum GcProgress {
473 /// There is still more work to do.
474 Continue,
475 /// The GC is complete.
476 Complete,
477}
478
479/// Asynchronously run the given garbage collection process to completion,
480/// cooperatively yielding back to the event loop after each increment of work.
481#[cfg(feature = "async")]
482pub async fn collect_async<'a>(mut collection: Box<dyn GarbageCollection<'a> + 'a>) {
483 loop {
484 match collection.collect_increment() {
485 GcProgress::Continue => crate::Yield::new().await,
486 GcProgress::Complete => return,
487 }
488 }
489}
490
491#[cfg(all(test, feature = "async"))]
492mod collect_async_tests {
493 use super::*;
494
495 #[test]
496 fn is_send_and_sync() {
497 fn _assert_send_sync<T: Send + Sync>(_: T) {}
498
499 fn _foo<'a>(collection: Box<dyn GarbageCollection<'a>>) {
500 _assert_send_sync(collect_async(collection));
501 }
502 }
503}