polkavm_common/
zygote.rs

1//! This module defines the ABI boundary between the host and the zygote.
2//!
3//! In general everything here can be modified at will, provided the zygote
4//! is recompiled.
5
6use core::cell::UnsafeCell;
7use core::sync::atomic::{AtomicBool, AtomicI64, AtomicU32, AtomicU64};
8
9// Due to the limitations of Rust's compile time constant evaluation machinery
10// we need to define this struct multiple times.
11macro_rules! define_address_table {
12    (
13        $name_raw:ident, $name_packed:ident, $name_table:ident,
14        $($name:ident: $type:ty,)+
15    ) => {
16        #[repr(C)]
17        pub struct $name_raw {
18            $(pub $name: $type),+
19        }
20
21        #[derive(Copy, Clone)]
22        #[repr(packed)]
23        pub struct $name_packed {
24            $(pub $name: u64),+
25        }
26
27        #[derive(Copy, Clone)]
28        pub struct $name_table {
29            $(pub $name: u64),+
30        }
31
32        impl $name_table {
33            #[inline]
34            pub fn from_raw(table: $name_raw) -> Self {
35                Self {
36                    $(
37                        $name: table.$name as u64
38                    ),+
39                }
40            }
41
42            pub const fn from_packed(table: &$name_packed) -> Self {
43                Self {
44                    $(
45                        $name: table.$name
46                    ),+
47                }
48            }
49        }
50
51        static_assert!(core::mem::size_of::<$name_raw>() == core::mem::size_of::<$name_packed>());
52        static_assert!(core::mem::size_of::<$name_raw>() == core::mem::size_of::<$name_table>());
53    }
54}
55
56// These are the addresses exported from the zygote.
57define_address_table! {
58    AddressTableRaw, AddressTablePacked, AddressTable,
59    syscall_hostcall: unsafe extern "C" fn() -> !,
60    syscall_trap: unsafe extern "C" fn() -> !,
61    syscall_return: unsafe extern "C" fn() -> !,
62    syscall_step: unsafe extern "C" fn() -> !,
63    syscall_sbrk: unsafe extern "C" fn(u64) -> u32,
64    syscall_not_enough_gas: unsafe extern "C" fn() -> !,
65}
66
67define_address_table! {
68    ExtTableRaw, ExtTablePacked, ExtTable,
69    ext_sbrk: unsafe extern "C" fn() -> !,
70    ext_reset_memory: unsafe extern "C" fn() -> !,
71    ext_zero_memory_chunk: unsafe extern "C" fn() -> !,
72    ext_load_program: unsafe extern "C" fn() -> !,
73    ext_recycle: unsafe extern "C" fn() -> !,
74    ext_set_accessible_aux_size: unsafe extern "C" fn() -> !,
75}
76
77pub const FD_DUMMY_STDIN: i32 = 0;
78pub const FD_LOGGER_STDOUT: i32 = 1;
79pub const FD_LOGGER_STDERR: i32 = 2;
80pub const FD_SHM: i32 = 3;
81pub const FD_MEM: i32 = 4;
82pub const FD_SOCKET: i32 = 5;
83pub const FD_VMCTX: i32 = 6;
84pub const FD_LIFETIME_PIPE: i32 = 7;
85pub const LAST_USED_FD: i32 = FD_LIFETIME_PIPE;
86
87/// The address where the native code starts inside of the VM.
88///
89/// This is not directly accessible by the program running inside of the VM.
90pub const VM_ADDR_NATIVE_CODE: u64 = 0x100000000;
91
92/// The address where the indirect jump table starts inside of the VM.
93///
94/// This is not directly accessible by the program running inside of the VM.
95pub const VM_ADDR_JUMP_TABLE: u64 = 0x800000000;
96
97/// The address where the return-to-host jump table vector physically resides.
98pub const VM_ADDR_JUMP_TABLE_RETURN_TO_HOST: u64 = VM_ADDR_JUMP_TABLE + ((crate::abi::VM_ADDR_RETURN_TO_HOST as u64) << 3);
99
100/// The address of the global per-VM context struct.
101pub const VM_ADDR_VMCTX: u64 = 0x400000000;
102
103/// The address of the signal stack.
104pub const VM_ADDR_SIGSTACK: u64 = 0x500000000;
105
106/// The address of the native stack.
107pub const VM_ADDR_NATIVE_STACK_LOW: u64 = 0x600000000;
108
109/// The size of the native stack.
110pub const VM_ADDR_NATIVE_STACK_SIZE: u64 = 0x4000;
111
112/// The address of the top of the native stack.
113pub const VM_ADDR_NATIVE_STACK_HIGH: u64 = VM_ADDR_NATIVE_STACK_LOW + VM_ADDR_NATIVE_STACK_SIZE;
114
115/// Address where the shared memory is mapped.
116pub const VM_ADDR_SHARED_MEMORY: u64 = 0x700000000;
117
118/// The size of the shared memory region.
119pub const VM_SHARED_MEMORY_SIZE: u64 = u32::MAX as u64;
120
121/// The maximum number of native code bytes that can be emitted by a single VM instruction.
122///
123/// This does *not* affect the VM ABI and can be changed at will,
124/// but should be high enough that it's never hit.
125pub const VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH: u32 = 67;
126
127/// The maximum number of bytes the jump table can be.
128pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE: u64 = (crate::abi::VM_MAXIMUM_JUMP_TABLE_ENTRIES as u64 + 1)
129    * core::mem::size_of::<u64>() as u64
130    * crate::abi::VM_CODE_ADDRESS_ALIGNMENT as u64;
131
132/// The maximum number of bytes the jump table can span in virtual memory.
133pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE: u64 = 0x100000000 * core::mem::size_of::<u64>() as u64;
134
135// TODO: Make this smaller.
136/// The maximum number of bytes the native code can be.
137pub const VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE: u32 = 2176 * 1024 * 1024 - 1;
138
139#[repr(C)]
140pub struct JmpBuf {
141    pub rip: AtomicU64,
142    pub rbx: AtomicU64,
143    pub rsp: AtomicU64,
144    pub rbp: AtomicU64,
145    pub r12: AtomicU64,
146    pub r13: AtomicU64,
147    pub r14: AtomicU64,
148    pub r15: AtomicU64,
149}
150
151#[repr(C)]
152pub struct VmInit {
153    pub stack_address: AtomicU64,
154    pub stack_length: AtomicU64,
155    pub vdso_address: AtomicU64,
156    pub vdso_length: AtomicU64,
157    pub vvar_address: AtomicU64,
158    pub vvar_length: AtomicU64,
159
160    /// Whether userfaultfd-based memory management is available.
161    pub uffd_available: AtomicBool,
162
163    /// Whether sandboxing is disabled.
164    pub sandbox_disabled: AtomicBool,
165
166    /// Whether the logger is enabled.
167    pub logging_enabled: AtomicBool,
168}
169
170const MESSAGE_BUFFER_SIZE: usize = 512;
171
172#[repr(align(64))]
173pub struct CacheAligned<T>(pub T);
174
175impl<T> core::ops::Deref for CacheAligned<T> {
176    type Target = T;
177    #[inline(always)]
178    fn deref(&self) -> &Self::Target {
179        &self.0
180    }
181}
182
183impl<T> core::ops::DerefMut for CacheAligned<T> {
184    #[inline(always)]
185    fn deref_mut(&mut self) -> &mut Self::Target {
186        &mut self.0
187    }
188}
189
190#[repr(C)]
191pub struct VmCtxHeapInfo {
192    pub heap_top: UnsafeCell<u64>,
193    pub heap_threshold: UnsafeCell<u64>,
194}
195
196const REG_COUNT: usize = crate::program::Reg::ALL.len();
197
198#[repr(C)]
199pub struct VmCtxCounters {
200    pub syscall_wait_loop_start: UnsafeCell<u64>,
201    pub syscall_futex_wait: UnsafeCell<u64>,
202}
203
204#[repr(C)]
205pub enum VmFd {
206    None,
207    Shm,
208    Mem,
209}
210
211#[repr(C)]
212pub struct VmMap {
213    pub address: u64,
214    pub length: u64,
215    pub protection: u32,
216    pub flags: u32,
217    pub fd: VmFd,
218    pub fd_offset: u64,
219}
220
221/// The virtual machine context.
222///
223/// This is mapped in shared memory and used by the sandbox to keep its state in,
224/// as well as by the host to communicate with the sandbox.
225#[allow(clippy::partial_pub_fields)]
226#[repr(C)]
227pub struct VmCtx {
228    // NOTE: The order of fields here can matter for performance!
229    _align_1: CacheAligned<()>,
230
231    /// The current gas counter.
232    pub gas: AtomicI64,
233
234    _align_2: CacheAligned<()>,
235
236    /// The futex used to synchronize the sandbox with the host process.
237    pub futex: AtomicU32,
238
239    /// Address to which to jump to.
240    pub jump_into: AtomicU64,
241
242    /// The address of the instruction currently being executed.
243    pub program_counter: AtomicU32,
244
245    /// The address of the next instruction to be executed.
246    pub next_program_counter: AtomicU32,
247
248    /// A multipurpose field:
249    ///   - the hostcall number that was triggered,
250    ///   - the sbrk argument,
251    ///   - the sbrk return value,
252    pub arg: AtomicU32,
253
254    /// A dump of all of the registers of the VM.
255    pub regs: [AtomicU64; REG_COUNT],
256
257    /// The address of the native code to call inside of the VM process, if non-zero.
258    pub next_native_program_counter: AtomicU64,
259
260    /// The state of the program's heap.
261    pub heap_info: VmCtxHeapInfo,
262
263    pub arg2: AtomicU32,
264    pub arg3: AtomicU32,
265
266    pub tmp_reg: AtomicU64,
267    pub rip: AtomicU64,
268
269    /// Offset in shared memory to this sandbox's memory map.
270    pub shm_memory_map_offset: AtomicU64,
271    /// Number of maps to map.
272    pub shm_memory_map_count: AtomicU64,
273    /// Offset in shared memory to this sandbox's code.
274    pub shm_code_offset: AtomicU64,
275    /// Length this sandbox's code.
276    pub shm_code_length: AtomicU64,
277    /// Offset in shared memory to this sandbox's jump table.
278    pub shm_jump_table_offset: AtomicU64,
279    /// Length of sandbox's jump table, in bytes.
280    pub shm_jump_table_length: AtomicU64,
281
282    /// Address of the sysreturn routine.
283    pub sysreturn_address: AtomicU64,
284
285    /// Whether userfaultfd-based memory management is enabled.
286    pub uffd_enabled: AtomicBool,
287
288    /// Address to the base of the heap.
289    pub heap_base: UnsafeCell<u32>,
290
291    /// The initial heap growth threshold.
292    pub heap_initial_threshold: UnsafeCell<u32>,
293
294    /// The maximum heap size.
295    pub heap_max_size: UnsafeCell<u32>,
296
297    /// The page size.
298    pub page_size: UnsafeCell<u32>,
299
300    /// Performance counters. Only for debugging.
301    pub counters: CacheAligned<VmCtxCounters>,
302
303    /// One-time args used during initialization.
304    pub init: VmInit,
305
306    /// Length of the message in the message buffer.
307    pub message_length: UnsafeCell<u32>,
308    /// A buffer used to marshal error messages.
309    pub message_buffer: UnsafeCell<[u8; MESSAGE_BUFFER_SIZE]>,
310}
311
312// Make sure it fits within a single page on amd64.
313static_assert!(core::mem::size_of::<VmCtx>() <= 4096);
314
315/// The VM is busy.
316pub const VMCTX_FUTEX_BUSY: u32 = 0;
317
318/// The VM is idle.
319pub const VMCTX_FUTEX_IDLE: u32 = 1;
320
321/// The VM has triggered a host call and is idle.
322pub const VMCTX_FUTEX_GUEST_ECALLI: u32 = VMCTX_FUTEX_IDLE | (1 << 1);
323
324/// The VM has triggered a trap and is idle.
325pub const VMCTX_FUTEX_GUEST_TRAP: u32 = VMCTX_FUTEX_IDLE | (2 << 1);
326
327/// The VM's signal handler was triggered.
328pub const VMCTX_FUTEX_GUEST_SIGNAL: u32 = VMCTX_FUTEX_IDLE | (3 << 1);
329
330/// The VM has went through a single instruction is idle.
331pub const VMCTX_FUTEX_GUEST_STEP: u32 = VMCTX_FUTEX_IDLE | (4 << 1);
332
333/// The VM gas ran out of gas.
334pub const VMCTX_FUTEX_GUEST_NOT_ENOUGH_GAS: u32 = VMCTX_FUTEX_IDLE | (5 << 1);
335
336/// The VM has triggered a page fault.
337pub const VMCTX_FUTEX_GUEST_PAGEFAULT: u32 = VMCTX_FUTEX_IDLE | (6 << 1);
338
339#[allow(clippy::declare_interior_mutable_const)]
340const ATOMIC_U64_ZERO: AtomicU64 = AtomicU64::new(0);
341
342#[allow(clippy::new_without_default)]
343impl VmCtx {
344    /// Creates a zeroed VM context.
345    pub const fn zeroed() -> Self {
346        VmCtx {
347            _align_1: CacheAligned(()),
348            _align_2: CacheAligned(()),
349
350            gas: AtomicI64::new(0),
351            program_counter: AtomicU32::new(0),
352            next_program_counter: AtomicU32::new(0),
353            arg: AtomicU32::new(0),
354            arg2: AtomicU32::new(0),
355            arg3: AtomicU32::new(0),
356            tmp_reg: AtomicU64::new(0),
357            rip: AtomicU64::new(0),
358            regs: [ATOMIC_U64_ZERO; REG_COUNT],
359            jump_into: AtomicU64::new(0),
360            next_native_program_counter: AtomicU64::new(0),
361
362            futex: AtomicU32::new(VMCTX_FUTEX_BUSY),
363
364            shm_memory_map_offset: AtomicU64::new(0),
365            shm_memory_map_count: AtomicU64::new(0),
366            shm_code_offset: AtomicU64::new(0),
367            shm_code_length: AtomicU64::new(0),
368            shm_jump_table_offset: AtomicU64::new(0),
369            shm_jump_table_length: AtomicU64::new(0),
370            uffd_enabled: AtomicBool::new(false),
371            sysreturn_address: AtomicU64::new(0),
372            heap_base: UnsafeCell::new(0),
373            heap_initial_threshold: UnsafeCell::new(0),
374            heap_max_size: UnsafeCell::new(0),
375            page_size: UnsafeCell::new(0),
376
377            heap_info: VmCtxHeapInfo {
378                heap_top: UnsafeCell::new(0),
379                heap_threshold: UnsafeCell::new(0),
380            },
381
382            counters: CacheAligned(VmCtxCounters {
383                syscall_wait_loop_start: UnsafeCell::new(0),
384                syscall_futex_wait: UnsafeCell::new(0),
385            }),
386
387            init: VmInit {
388                stack_address: AtomicU64::new(0),
389                stack_length: AtomicU64::new(0),
390                vdso_address: AtomicU64::new(0),
391                vdso_length: AtomicU64::new(0),
392                vvar_address: AtomicU64::new(0),
393                vvar_length: AtomicU64::new(0),
394                uffd_available: AtomicBool::new(false),
395                sandbox_disabled: AtomicBool::new(false),
396                logging_enabled: AtomicBool::new(false),
397            },
398
399            message_length: UnsafeCell::new(0),
400            message_buffer: UnsafeCell::new([0; MESSAGE_BUFFER_SIZE]),
401        }
402    }
403
404    /// Creates a fresh VM context.
405    pub const fn new() -> Self {
406        Self::zeroed()
407    }
408}
409
410static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST > VM_ADDR_JUMP_TABLE);
411static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST % 0x4000 == 0);
412static_assert!(VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE <= VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
413static_assert!(VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE < VM_ADDR_JUMP_TABLE_RETURN_TO_HOST);
414static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST < VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
415static_assert!(VM_ADDR_JUMP_TABLE.count_ones() == 1);
416static_assert!((1 << VM_ADDR_JUMP_TABLE.trailing_zeros()) == VM_ADDR_JUMP_TABLE);
417
418static_assert!(VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE >= crate::abi::VM_MAXIMUM_CODE_SIZE * VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH);
419static_assert!(VM_ADDR_NATIVE_CODE > 0xffffffff);
420static_assert!(VM_ADDR_VMCTX > 0xffffffff);
421static_assert!(VM_ADDR_NATIVE_STACK_LOW > 0xffffffff);