polkavm_linux_raw/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3#![deny(clippy::panic)]
4#![deny(clippy::unwrap_used)]
5#![deny(clippy::expect_used)]
6#![deny(clippy::unreachable)]
7#![deny(clippy::indexing_slicing)]
8#![allow(clippy::collapsible_else_if)]
9#![allow(clippy::len_without_is_empty)]
10#![allow(clippy::manual_range_contains)]
11// This crate mostly contains syscall wrappers. If you use them you should know what you're doing.
12#![allow(clippy::missing_safety_doc)]
13#![allow(clippy::undocumented_unsafe_blocks)]
14#![cfg(all(target_os = "linux", target_arch = "x86_64"))]
15
16#[cfg(feature = "std")]
17extern crate std;
18
19mod syscall;
20
21#[cfg(target_arch = "x86_64")]
22#[doc(hidden)]
23pub mod arch_amd64_syscall;
24
25#[cfg(target_arch = "x86_64")]
26#[allow(dead_code)]
27#[allow(non_upper_case_globals)]
28#[allow(non_camel_case_types)]
29#[allow(non_snake_case)]
30#[allow(clippy::ptr_as_ptr)]
31#[allow(clippy::used_underscore_binding)]
32#[allow(clippy::transmute_ptr_to_ptr)]
33mod arch_amd64_bindings;
34
35mod io_uring;
36mod mmap;
37
38pub use io_uring::IoUring;
39pub use mmap::Mmap;
40
41#[cfg(target_arch = "x86_64")]
42#[doc(hidden)]
43pub use arch_amd64_syscall as syscall_impl;
44
45pub use core::ffi::{c_int, c_long, c_uchar, c_uint, c_ulong, c_ushort, c_void};
46
47use core::ffi::CStr;
48use core::marker::PhantomData;
49use core::mem::MaybeUninit;
50use core::sync::atomic::AtomicU32;
51use core::time::Duration;
52
53#[cfg(feature = "std")]
54use std::borrow::Cow;
55
56// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/88345
57#[allow(non_camel_case_types)]
58type c_size_t = usize;
59
60#[allow(non_camel_case_types)]
61pub type size_t = c_size_t;
62
63// Doesn't appear in public headers.
64pub const MNT_FORCE: u32 = 1;
65pub const MNT_DETACH: u32 = 2;
66pub const MNT_EXPIRE: u32 = 4;
67
68pub const SIG_DFL: usize = 0;
69pub const SIG_IGN: usize = 1;
70
71// Bindgen seems to not want to emit this constant,
72// so let's define it manually.
73pub const HWCAP2_FSGSBASE: usize = 1 << 1;
74
75pub(crate) use crate::arch_amd64_bindings as arch_bindings;
76
77#[rustfmt::skip]
78pub use crate::arch_bindings::{
79    __kernel_gid_t as gid_t,
80    __kernel_pid_t as pid_t,
81    __kernel_uid_t as uid_t,
82    __NR_arch_prctl as SYS_arch_prctl,
83    __NR_capset as SYS_capset,
84    __NR_chdir as SYS_chdir,
85    __NR_clock_gettime as SYS_clock_gettime,
86    __NR_clone as SYS_clone,
87    __NR_clone3 as SYS_clone3,
88    __NR_close as SYS_close,
89    __NR_close_range as SYS_close_range,
90    __NR_dup3 as SYS_dup3,
91    __NR_execveat as SYS_execveat,
92    __NR_exit as SYS_exit,
93    __NR_fallocate as SYS_fallocate,
94    __NR_fchdir as SYS_fchdir,
95    __NR_fcntl as SYS_fcntl,
96    __NR_ftruncate as SYS_ftruncate,
97    __NR_futex as SYS_futex,
98    __NR_getdents64 as SYS_getdents64,
99    __NR_getgid as SYS_getgid,
100    __NR_getpid as SYS_getpid,
101    __NR_getuid as SYS_getuid,
102    __NR_io_uring_enter as SYS_io_uring_enter,
103    __NR_io_uring_register as SYS_io_uring_register,
104    __NR_io_uring_setup as SYS_io_uring_setup,
105    __NR_ioctl as SYS_ioctl,
106    __NR_kill as SYS_kill,
107    __NR_lseek as SYS_lseek,
108    __NR_nanosleep as SYS_nanosleep,
109    __NR_madvise as SYS_madvise,
110    __NR_memfd_create as SYS_memfd_create,
111    __NR_mmap as SYS_mmap,
112    __NR_mlock as SYS_mlock,
113    __NR_mlockall as SYS_mlockall,
114    __NR_mount as SYS_mount,
115    __NR_mprotect as SYS_mprotect,
116    __NR_mremap as SYS_mremap,
117    __NR_munmap as SYS_munmap,
118    __NR_open as SYS_open,
119    __NR_openat as SYS_openat,
120    __NR_perf_event_open as SYS_perf_event_open,
121    __NR_pidfd_send_signal as SYS_pidfd_send_signal,
122    __NR_pipe2 as SYS_pipe2,
123    __NR_pivot_root as SYS_pivot_root,
124    __NR_prctl as SYS_prctl,
125    __NR_process_vm_readv as SYS_process_vm_readv,
126    __NR_process_vm_writev as SYS_process_vm_writev,
127    __NR_ptrace as SYS_ptrace,
128    __NR_read as SYS_read,
129    __NR_recvmsg as SYS_recvmsg,
130    __NR_rseq as SYS_rseq,
131    __NR_rt_sigaction as SYS_rt_sigaction,
132    __NR_rt_sigprocmask as SYS_rt_sigprocmask,
133    __NR_rt_sigreturn as SYS_rt_sigreturn,
134    __NR_sched_yield as SYS_sched_yield,
135    __NR_seccomp as SYS_seccomp,
136    __NR_sendmsg as SYS_sendmsg,
137    __NR_set_tid_address as SYS_set_tid_address,
138    __NR_setdomainname as SYS_setdomainname,
139    __NR_sethostname as SYS_sethostname,
140    __NR_setrlimit as SYS_setrlimit,
141    __NR_sigaltstack as SYS_sigaltstack,
142    __NR_socketpair as SYS_socketpair,
143    __NR_umount2 as SYS_umount2,
144    __NR_uname as SYS_uname,
145    __NR_unshare as SYS_unshare,
146    __NR_userfaultfd as SYS_userfaultfd,
147    __NR_waitid as SYS_waitid,
148    __NR_write as SYS_write,
149    __NR_writev as SYS_writev,
150    __user_cap_data_struct,
151    __user_cap_header_struct,
152    __WALL,
153    _LINUX_CAPABILITY_VERSION_3,
154    ARCH_GET_FS,
155    ARCH_GET_GS,
156    ARCH_SET_FS,
157    ARCH_SET_GS,
158    AT_EMPTY_PATH,
159    AT_HWCAP2,
160    AT_MINSIGSTKSZ,
161    AT_NULL,
162    AT_PAGESZ,
163    AT_SYSINFO_EHDR,
164    CLD_CONTINUED,
165    CLD_DUMPED,
166    CLD_EXITED,
167    CLD_KILLED,
168    CLD_STOPPED,
169    CLD_TRAPPED,
170    CLOCK_MONOTONIC_RAW,
171    CLONE_CLEAR_SIGHAND,
172    CLONE_NEWCGROUP,
173    CLONE_NEWIPC,
174    CLONE_NEWNET,
175    CLONE_NEWNS,
176    CLONE_NEWPID,
177    CLONE_NEWUSER,
178    CLONE_NEWUTS,
179    CLONE_PIDFD,
180    E2BIG,
181    EACCES,
182    EAGAIN,
183    EBADF,
184    EBUSY,
185    ECHILD,
186    EDOM,
187    EEXIST,
188    EFAULT,
189    EFBIG,
190    EINTR,
191    EINVAL,
192    EIO,
193    EISDIR,
194    EMFILE,
195    EMLINK,
196    ENFILE,
197    ENODEV,
198    ENOENT,
199    ENOEXEC,
200    ENOMEM,
201    ENOSPC,
202    ENOTBLK,
203    ENOTDIR,
204    ENOTTY,
205    ENXIO,
206    EOPNOTSUPP,
207    EPERM,
208    EPIPE,
209    ERANGE,
210    EROFS,
211    ESPIPE,
212    ESRCH,
213    ETIMEDOUT,
214    ETOOMANYREFS,
215    ETXTBSY,
216    EXDEV,
217    ERESTARTSYS,
218    F_ADD_SEALS,
219    F_DUPFD,
220    F_GETFD,
221    F_SEAL_EXEC,
222    F_SEAL_FUTURE_WRITE,
223    F_SEAL_GROW,
224    F_SEAL_SEAL,
225    F_SEAL_SHRINK,
226    F_SEAL_WRITE,
227    F_SETFD,
228    F_SETFL,
229    F_SETOWN,
230    F_SETSIG,
231    FALLOC_FL_COLLAPSE_RANGE,
232    FALLOC_FL_INSERT_RANGE,
233    FALLOC_FL_KEEP_SIZE,
234    FALLOC_FL_NO_HIDE_STALE,
235    FALLOC_FL_PUNCH_HOLE,
236    FALLOC_FL_UNSHARE_RANGE,
237    FALLOC_FL_ZERO_RANGE,
238    FUTEX_BITSET_MATCH_ANY,
239    FUTEX_WAIT,
240    FUTEX_WAKE,
241    FUTEX2_SIZE_U32,
242    io_cqring_offsets,
243    io_sqring_offsets,
244    io_uring_buf_reg,
245    io_uring_buf_ring,
246    io_uring_buf_status,
247    io_uring_buf,
248    io_uring_cqe,
249    io_uring_file_index_range,
250    io_uring_files_update,
251    io_uring_getevents_arg,
252    io_uring_napi,
253    io_uring_op_IORING_OP_ACCEPT,
254    io_uring_op_IORING_OP_ASYNC_CANCEL,
255    io_uring_op_IORING_OP_CLOSE,
256    io_uring_op_IORING_OP_CONNECT,
257    io_uring_op_IORING_OP_EPOLL_CTL,
258    io_uring_op_IORING_OP_FADVISE,
259    io_uring_op_IORING_OP_FALLOCATE,
260    io_uring_op_IORING_OP_FGETXATTR,
261    io_uring_op_IORING_OP_FILES_UPDATE,
262    io_uring_op_IORING_OP_FIXED_FD_INSTALL,
263    io_uring_op_IORING_OP_FSETXATTR,
264    io_uring_op_IORING_OP_FSYNC,
265    io_uring_op_IORING_OP_FTRUNCATE,
266    io_uring_op_IORING_OP_FUTEX_WAIT,
267    io_uring_op_IORING_OP_FUTEX_WAITV,
268    io_uring_op_IORING_OP_FUTEX_WAKE,
269    io_uring_op_IORING_OP_GETXATTR,
270    io_uring_op_IORING_OP_LAST,
271    io_uring_op_IORING_OP_LINK_TIMEOUT,
272    io_uring_op_IORING_OP_LINKAT,
273    io_uring_op_IORING_OP_MADVISE,
274    io_uring_op_IORING_OP_MKDIRAT,
275    io_uring_op_IORING_OP_MSG_RING,
276    io_uring_op_IORING_OP_NOP,
277    io_uring_op_IORING_OP_OPENAT,
278    io_uring_op_IORING_OP_OPENAT2,
279    io_uring_op_IORING_OP_POLL_ADD,
280    io_uring_op_IORING_OP_POLL_REMOVE,
281    io_uring_op_IORING_OP_PROVIDE_BUFFERS,
282    io_uring_op_IORING_OP_READ_FIXED,
283    io_uring_op_IORING_OP_READ_MULTISHOT,
284    io_uring_op_IORING_OP_READ,
285    io_uring_op_IORING_OP_READV,
286    io_uring_op_IORING_OP_RECV,
287    io_uring_op_IORING_OP_RECVMSG,
288    io_uring_op_IORING_OP_REMOVE_BUFFERS,
289    io_uring_op_IORING_OP_RENAMEAT,
290    io_uring_op_IORING_OP_SEND_ZC,
291    io_uring_op_IORING_OP_SEND,
292    io_uring_op_IORING_OP_SENDMSG_ZC,
293    io_uring_op_IORING_OP_SENDMSG,
294    io_uring_op_IORING_OP_SETXATTR,
295    io_uring_op_IORING_OP_SHUTDOWN,
296    io_uring_op_IORING_OP_SOCKET,
297    io_uring_op_IORING_OP_SPLICE,
298    io_uring_op_IORING_OP_STATX,
299    io_uring_op_IORING_OP_SYMLINKAT,
300    io_uring_op_IORING_OP_SYNC_FILE_RANGE,
301    io_uring_op_IORING_OP_TEE,
302    io_uring_op_IORING_OP_TIMEOUT_REMOVE,
303    io_uring_op_IORING_OP_TIMEOUT,
304    io_uring_op_IORING_OP_UNLINKAT,
305    io_uring_op_IORING_OP_URING_CMD,
306    io_uring_op_IORING_OP_WAITID,
307    io_uring_op_IORING_OP_WRITE_FIXED,
308    io_uring_op_IORING_OP_WRITE,
309    io_uring_op_IORING_OP_WRITEV,
310    io_uring_params,
311    io_uring_probe_op,
312    io_uring_probe,
313    io_uring_recvmsg_out,
314    io_uring_restriction,
315    io_uring_rsrc_register,
316    io_uring_rsrc_update,
317    io_uring_rsrc_update2,
318    io_uring_sqe,
319    io_uring_sync_cancel_reg,
320    IORING_ACCEPT_MULTISHOT,
321    IORING_ASYNC_CANCEL_ALL,
322    IORING_ASYNC_CANCEL_ANY,
323    IORING_ASYNC_CANCEL_FD_FIXED,
324    IORING_ASYNC_CANCEL_FD,
325    IORING_ASYNC_CANCEL_OP,
326    IORING_ASYNC_CANCEL_USERDATA,
327    IORING_CQ_EVENTFD_DISABLED,
328    IORING_CQE_BUFFER_SHIFT,
329    IORING_CQE_F_BUFFER,
330    IORING_CQE_F_MORE,
331    IORING_CQE_F_NOTIF,
332    IORING_CQE_F_SOCK_NONEMPTY,
333    IORING_ENTER_EXT_ARG,
334    IORING_ENTER_GETEVENTS,
335    IORING_ENTER_REGISTERED_RING,
336    IORING_ENTER_SQ_WAIT,
337    IORING_ENTER_SQ_WAKEUP,
338    IORING_FEAT_CQE_SKIP,
339    IORING_FEAT_CUR_PERSONALITY,
340    IORING_FEAT_EXT_ARG,
341    IORING_FEAT_FAST_POLL,
342    IORING_FEAT_LINKED_FILE,
343    IORING_FEAT_NATIVE_WORKERS,
344    IORING_FEAT_NODROP,
345    IORING_FEAT_POLL_32BITS,
346    IORING_FEAT_REG_REG_RING,
347    IORING_FEAT_RSRC_TAGS,
348    IORING_FEAT_RW_CUR_POS,
349    IORING_FEAT_SINGLE_MMAP,
350    IORING_FEAT_SQPOLL_NONFIXED,
351    IORING_FEAT_SUBMIT_STABLE,
352    IORING_FILE_INDEX_ALLOC,
353    IORING_FIXED_FD_NO_CLOEXEC,
354    IORING_FSYNC_DATASYNC,
355    IORING_LINK_TIMEOUT_UPDATE,
356    IORING_MSG_DATA,
357    IORING_MSG_RING_CQE_SKIP,
358    IORING_MSG_RING_FLAGS_PASS,
359    IORING_MSG_SEND_FD,
360    IORING_NOTIF_USAGE_ZC_COPIED,
361    IORING_OFF_CQ_RING,
362    IORING_OFF_MMAP_MASK,
363    IORING_OFF_PBUF_RING,
364    IORING_OFF_PBUF_SHIFT,
365    IORING_OFF_SQ_RING,
366    IORING_OFF_SQES,
367    IORING_POLL_ADD_LEVEL,
368    IORING_POLL_ADD_MULTI,
369    IORING_POLL_UPDATE_EVENTS,
370    IORING_POLL_UPDATE_USER_DATA,
371    IORING_RECV_MULTISHOT,
372    IORING_RECVSEND_FIXED_BUF,
373    IORING_RECVSEND_POLL_FIRST,
374    IORING_REGISTER_BUFFERS_UPDATE,
375    IORING_REGISTER_BUFFERS,
376    IORING_REGISTER_BUFFERS2,
377    IORING_REGISTER_ENABLE_RINGS,
378    IORING_REGISTER_EVENTFD_ASYNC,
379    IORING_REGISTER_EVENTFD,
380    IORING_REGISTER_FILE_ALLOC_RANGE,
381    IORING_REGISTER_FILES_SKIP,
382    IORING_REGISTER_FILES_UPDATE,
383    IORING_REGISTER_FILES_UPDATE2,
384    IORING_REGISTER_FILES,
385    IORING_REGISTER_FILES2,
386    IORING_REGISTER_IOWQ_AFF,
387    IORING_REGISTER_IOWQ_MAX_WORKERS,
388    IORING_REGISTER_LAST,
389    IORING_REGISTER_NAPI,
390    IORING_REGISTER_PBUF_RING,
391    IORING_REGISTER_PBUF_STATUS,
392    IORING_REGISTER_PERSONALITY,
393    IORING_REGISTER_PROBE,
394    IORING_REGISTER_RESTRICTIONS,
395    IORING_REGISTER_RING_FDS,
396    IORING_REGISTER_SYNC_CANCEL,
397    IORING_REGISTER_USE_REGISTERED_RING,
398    IORING_RESTRICTION_LAST,
399    IORING_RESTRICTION_REGISTER_OP,
400    IORING_RESTRICTION_SQE_FLAGS_ALLOWED,
401    IORING_RESTRICTION_SQE_FLAGS_REQUIRED,
402    IORING_RESTRICTION_SQE_OP,
403    IORING_RSRC_REGISTER_SPARSE,
404    IORING_SEND_ZC_REPORT_USAGE,
405    IORING_SETUP_ATTACH_WQ,
406    IORING_SETUP_CLAMP,
407    IORING_SETUP_COOP_TASKRUN,
408    IORING_SETUP_CQE32,
409    IORING_SETUP_CQSIZE,
410    IORING_SETUP_DEFER_TASKRUN,
411    IORING_SETUP_IOPOLL,
412    IORING_SETUP_NO_MMAP,
413    IORING_SETUP_NO_SQARRAY,
414    IORING_SETUP_R_DISABLED,
415    IORING_SETUP_REGISTERED_FD_ONLY,
416    IORING_SETUP_SINGLE_ISSUER,
417    IORING_SETUP_SQ_AFF,
418    IORING_SETUP_SQE128,
419    IORING_SETUP_SQPOLL,
420    IORING_SETUP_SUBMIT_ALL,
421    IORING_SETUP_TASKRUN_FLAG,
422    IORING_SQ_CQ_OVERFLOW,
423    IORING_SQ_NEED_WAKEUP,
424    IORING_SQ_TASKRUN,
425    IORING_TIMEOUT_ABS,
426    IORING_TIMEOUT_BOOTTIME,
427    IORING_TIMEOUT_CLOCK_MASK,
428    IORING_TIMEOUT_ETIME_SUCCESS,
429    IORING_TIMEOUT_MULTISHOT,
430    IORING_TIMEOUT_REALTIME,
431    IORING_TIMEOUT_UPDATE_MASK,
432    IORING_TIMEOUT_UPDATE,
433    IORING_UNREGISTER_BUFFERS,
434    IORING_UNREGISTER_EVENTFD,
435    IORING_UNREGISTER_FILES,
436    IORING_UNREGISTER_IOWQ_AFF,
437    IORING_UNREGISTER_NAPI,
438    IORING_UNREGISTER_PBUF_RING,
439    IORING_UNREGISTER_PERSONALITY,
440    IORING_UNREGISTER_RING_FDS,
441    IORING_URING_CMD_FIXED,
442    IORING_URING_CMD_MASK,
443    IOSQE_ASYNC_BIT,
444    IOSQE_BUFFER_SELECT_BIT,
445    IOSQE_CQE_SKIP_SUCCESS_BIT,
446    IOSQE_FIXED_FILE_BIT,
447    IOSQE_IO_DRAIN_BIT,
448    IOSQE_IO_HARDLINK_BIT,
449    IOSQE_IO_LINK_BIT,
450    iovec,
451    linux_dirent64,
452    MADV_COLD,
453    MADV_COLLAPSE,
454    MADV_DODUMP,
455    MADV_DOFORK,
456    MADV_DONTDUMP,
457    MADV_DONTFORK,
458    MADV_DONTNEED_LOCKED,
459    MADV_DONTNEED,
460    MADV_FREE,
461    MADV_HUGEPAGE,
462    MADV_HWPOISON,
463    MADV_KEEPONFORK,
464    MADV_MERGEABLE,
465    MADV_NOHUGEPAGE,
466    MADV_NORMAL,
467    MADV_PAGEOUT,
468    MADV_POPULATE_READ,
469    MADV_POPULATE_WRITE,
470    MADV_RANDOM,
471    MADV_REMOVE,
472    MADV_SEQUENTIAL,
473    MADV_SOFT_OFFLINE,
474    MADV_UNMERGEABLE,
475    MADV_WILLNEED,
476    MADV_WIPEONFORK,
477    MAP_ANONYMOUS,
478    MAP_FIXED,
479    MAP_POPULATE,
480    MAP_PRIVATE,
481    MAP_SHARED,
482    MCL_CURRENT,
483    MCL_FUTURE,
484    MCL_ONFAULT,
485    MFD_ALLOW_SEALING,
486    MFD_CLOEXEC,
487    MINSIGSTKSZ,
488    MREMAP_FIXED,
489    MREMAP_MAYMOVE,
490    MS_BIND,
491    MS_NODEV,
492    MS_NOEXEC,
493    MS_NOSUID,
494    MS_PRIVATE,
495    MS_RDONLY,
496    MS_REC,
497    new_utsname,
498    O_CLOEXEC,
499    O_DIRECTORY,
500    O_NONBLOCK,
501    O_PATH,
502    O_RDONLY,
503    O_RDWR,
504    O_WRONLY,
505    P_ALL,
506    P_PGID,
507    P_PID,
508    P_PIDFD,
509    PROT_EXEC,
510    PROT_READ,
511    PROT_WRITE,
512    RLIMIT_DATA,
513    RLIMIT_FSIZE,
514    RLIMIT_LOCKS,
515    RLIMIT_MEMLOCK,
516    RLIMIT_MSGQUEUE,
517    RLIMIT_NOFILE,
518    RLIMIT_NPROC,
519    RLIMIT_STACK,
520    rlimit,
521    rseq_flags_RSEQ_FLAG_UNREGISTER as RSEQ_FLAG_UNREGISTER,
522    rseq,
523    rseq_cs,
524    rusage,
525    SA_NODEFER,
526    SA_ONSTACK,
527    SA_RESTORER,
528    SA_SIGINFO,
529    SECCOMP_RET_ALLOW,
530    SECCOMP_RET_ERRNO,
531    SECCOMP_RET_KILL_THREAD,
532    SECCOMP_SET_MODE_FILTER,
533    SIG_BLOCK,
534    SIG_SETMASK,
535    SIG_UNBLOCK,
536    SIGABRT,
537    sigaction as kernel_sigaction,
538    SIGBUS,
539    SIGCHLD,
540    SIGCONT,
541    SIGFPE,
542    SIGHUP,
543    SIGILL,
544    siginfo_t,
545    SIGINT,
546    SIGIO,
547    SIGKILL,
548    SIGPIPE,
549    SIGSEGV,
550    sigset_t as kernel_sigset_t,
551    SIGSTOP,
552    SIGSYS,
553    SIGTERM,
554    SIGTRAP,
555    timespec,
556    UFFD_EVENT_FORK,
557    UFFD_EVENT_PAGEFAULT,
558    UFFD_EVENT_REMAP,
559    UFFD_EVENT_REMOVE,
560    UFFD_EVENT_UNMAP,
561    UFFD_FEATURE_EVENT_FORK,
562    UFFD_FEATURE_EVENT_REMAP,
563    UFFD_FEATURE_EVENT_REMOVE,
564    UFFD_FEATURE_EVENT_UNMAP,
565    UFFD_FEATURE_EXACT_ADDRESS,
566    UFFD_FEATURE_MINOR_HUGETLBFS,
567    UFFD_FEATURE_MINOR_SHMEM,
568    UFFD_FEATURE_MISSING_HUGETLBFS,
569    UFFD_FEATURE_MISSING_SHMEM,
570    UFFD_FEATURE_MOVE,
571    UFFD_FEATURE_PAGEFAULT_FLAG_WP,
572    UFFD_FEATURE_POISON,
573    UFFD_FEATURE_SIGBUS,
574    UFFD_FEATURE_THREAD_ID,
575    UFFD_FEATURE_WP_ASYNC,
576    UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
577    UFFD_FEATURE_WP_UNPOPULATED,
578    uffd_msg,
579    UFFD_PAGEFAULT_FLAG_MINOR,
580    UFFD_PAGEFAULT_FLAG_WP,
581    UFFD_PAGEFAULT_FLAG_WRITE,
582    UFFD_USER_MODE_ONLY,
583    uffdio_api,
584    uffdio_continue,
585    uffdio_copy,
586    uffdio_move,
587    uffdio_poison,
588    uffdio_range,
589    uffdio_register,
590    uffdio_writeprotect,
591    uffdio_zeropage,
592    WEXITED,
593    WNOHANG,
594};
595
596// For some reason bindgen just refuses to emit these.
597pub const UFFD_API: u64 = 0xaa;
598pub const UFFDIO_REGISTER_MODE_MISSING: u64 = 1 << 0;
599pub const UFFDIO_REGISTER_MODE_WP: u64 = 1 << 1;
600pub const UFFDIO_REGISTER_MODE_MINOR: u64 = 1 << 2;
601pub const UFFDIO_COPY_MODE_DONTWAKE: u64 = 1 << 0;
602pub const UFFDIO_COPY_MODE_WP: u64 = 1 << 1;
603pub const UFFDIO_ZEROPAGE_MODE_DONTWAKE: u64 = 1 << 0;
604pub const UFFDIO_WRITEPROTECT_MODE_WP: u64 = 1 << 0;
605pub const UFFDIO_WRITEPROTECT_MODE_DONTWAKE: u64 = 1 << 1;
606pub const UFFDIO_CONTINUE_MODE_DONTWAKE: u64 = 1 << 0;
607pub const UFFDIO_CONTINUE_MODE_WP: u64 = 1 << 1;
608
609macro_rules! ioc {
610    ($dir:expr, $type:expr, $nr:expr, $size:expr) => {
611        ($dir << $crate::arch_bindings::_IOC_DIRSHIFT)
612            | ($type << $crate::arch_bindings::_IOC_TYPESHIFT)
613            | ($nr << $crate::arch_bindings::_IOC_NRSHIFT)
614            | ($size << $crate::arch_bindings::_IOC_SIZESHIFT)
615    };
616}
617
618macro_rules! ior {
619    ($type:expr, $nr:expr, $size:ty) => {
620        ioc!(
621            $crate::arch_bindings::_IOC_READ,
622            $type,
623            $nr,
624            core::mem::size_of::<$size>() as $crate::c_uint
625        )
626    };
627}
628
629macro_rules! iowr {
630    ($type:expr, $nr:expr, $size:ty) => {
631        ioc!(
632            $crate::arch_bindings::_IOC_READ | $crate::arch_bindings::_IOC_WRITE,
633            $type,
634            $nr,
635            core::mem::size_of::<$size>() as $crate::c_uint
636        )
637    };
638}
639
640use crate::arch_bindings::UFFDIO;
641
642const UFFDIO_API: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_API, uffdio_api);
643const UFFDIO_REGISTER: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_REGISTER, uffdio_register);
644const UFFDIO_UNREGISTER: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_UNREGISTER, uffdio_range);
645const UFFDIO_WAKE: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_WAKE, uffdio_range);
646const UFFDIO_COPY: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_COPY, uffdio_copy);
647const UFFDIO_ZEROPAGE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_ZEROPAGE, uffdio_zeropage);
648const UFFDIO_MOVE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_MOVE, uffdio_move);
649const UFFDIO_WRITEPROTECT: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_WRITEPROTECT, uffdio_writeprotect);
650const UFFDIO_CONTINUE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_CONTINUE, uffdio_continue);
651const UFFDIO_POISON: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_POISON, uffdio_poison);
652
653macro_rules! ioctl_wrapper {
654    ($(
655        ($name:ident, $command:ident, $struct:ident),
656    )*) => {
657        $(
658            pub fn $name(fd: FdRef, arg: &mut $struct) -> Result<(), Error> {
659                sys_ioctl(fd, $command, arg as *mut _ as c_ulong)?;
660                Ok(())
661            }
662        )*
663    }
664}
665
666ioctl_wrapper! {
667    (sys_uffdio_api, UFFDIO_API, uffdio_api),
668    (sys_uffdio_register, UFFDIO_REGISTER, uffdio_register),
669    (sys_uffdio_unregister, UFFDIO_UNREGISTER, uffdio_range),
670    (sys_uffdio_wake, UFFDIO_WAKE, uffdio_range),
671    (sys_uffdio_copy, UFFDIO_COPY, uffdio_copy),
672    (sys_uffdio_zeropage, UFFDIO_ZEROPAGE, uffdio_zeropage),
673    (sys_uffdio_move, UFFDIO_MOVE, uffdio_move),
674    (sys_uffdio_writeprotect, UFFDIO_WRITEPROTECT, uffdio_writeprotect),
675    (sys_uffdio_continue, UFFDIO_CONTINUE, uffdio_continue),
676    (sys_uffdio_poison, UFFDIO_POISON, uffdio_poison),
677}
678
679macro_rules! unsafe_impl_zeroed_default {
680    ($(
681        $name:ident,
682    )*) => {
683        $(
684            impl Default for $name {
685                #[inline]
686                fn default() -> Self {
687                    unsafe { core::mem::zeroed() }
688                }
689            }
690        )*
691    }
692}
693
694unsafe_impl_zeroed_default! {
695    uffdio_api,
696    uffdio_register,
697    uffdio_range,
698    uffdio_copy,
699    uffdio_zeropage,
700    uffdio_move,
701    uffdio_writeprotect,
702    uffdio_continue,
703    uffdio_poison,
704    uffd_msg,
705    io_uring_params,
706    io_uring_sqe,
707}
708
709impl siginfo_t {
710    pub unsafe fn si_signo(&self) -> c_int {
711        self.__bindgen_anon_1.__bindgen_anon_1.si_signo
712    }
713
714    pub unsafe fn si_code(&self) -> c_int {
715        self.__bindgen_anon_1.__bindgen_anon_1.si_code
716    }
717
718    pub unsafe fn si_pid(&self) -> pid_t {
719        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._pid
720    }
721
722    pub unsafe fn si_status(&self) -> c_int {
723        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._status
724    }
725}
726
727#[allow(non_snake_case)]
728pub const fn WIFSIGNALED(status: c_int) -> bool {
729    ((status & 0x7f) + 1) as i8 >= 2
730}
731
732#[allow(non_snake_case)]
733pub const fn WTERMSIG(status: c_int) -> c_int {
734    status & 0x7f
735}
736
737#[allow(non_snake_case)]
738pub const fn WIFEXITED(status: c_int) -> bool {
739    (status & 0x7f) == 0
740}
741
742#[allow(non_snake_case)]
743pub const fn WEXITSTATUS(status: c_int) -> c_int {
744    (status >> 8) & 0xff
745}
746
747#[allow(non_camel_case_types)]
748pub type socklen_t = u32;
749
750// Source: linux/arch/x86/include/uapi/asm/signal.h
751#[derive(Debug)]
752#[repr(C)]
753pub struct stack_t {
754    pub ss_sp: *mut c_void,
755    pub ss_flags: c_int,
756    pub ss_size: usize,
757}
758
759// Source: linux/include/uapi/asm-generic/ucontext.h
760#[derive(Debug)]
761#[repr(C)]
762pub struct ucontext {
763    pub uc_flags: c_ulong,
764    pub uc_link: *mut ucontext,
765    pub uc_stack: stack_t,
766    pub uc_mcontext: sigcontext,
767    pub uc_sigmask: kernel_sigset_t,
768}
769
770// Source: linux/arch/x86/include/uapi/asm/sigcontext.h
771#[derive(Debug)]
772#[repr(C)]
773pub struct sigcontext {
774    pub r8: u64,
775    pub r9: u64,
776    pub r10: u64,
777    pub r11: u64,
778    pub r12: u64,
779    pub r13: u64,
780    pub r14: u64,
781    pub r15: u64,
782    pub rdi: u64,
783    pub rsi: u64,
784    pub rbp: u64,
785    pub rbx: u64,
786    pub rdx: u64,
787    pub rax: u64,
788    pub rcx: u64,
789    pub rsp: u64,
790    pub rip: u64,
791    pub eflags: u64,
792    pub cs: u16,
793    pub gs: u16,
794    pub fs: u16,
795    pub ss: u16,
796    pub err: u64,
797    pub trapno: u64,
798    pub oldmask: u64,
799    pub cr2: u64,
800    pub fpstate: *mut fpstate,
801    pub reserved: [u64; 8],
802}
803
804#[repr(C)]
805pub struct fpstate {
806    pub cwd: u16,
807    pub swd: u16,
808    pub twd: u16,
809    pub fop: u16,
810    pub rip: u64,
811    pub rdp: u64,
812    pub mxcsr: u32,
813    pub mxcsr_mask: u32,
814    pub st_space: [u32; 32],  /*  8x  FP registers, 16 bytes each */
815    pub xmm_space: [u32; 64], /* 16x XMM registers, 16 bytes each */
816    pub reserved_1: [u32; 12],
817    pub sw_reserved: fpx_sw_bytes,
818}
819
820#[repr(C)]
821pub struct fpx_sw_bytes {
822    pub magic1: u32,
823    pub extended_size: u32,
824    pub xfeatures: u64,
825    pub xstate_size: u32,
826    pub padding: [u32; 7],
827}
828
829#[repr(C)]
830pub struct msghdr {
831    pub msg_name: *mut c_void,
832    pub msg_namelen: socklen_t,
833    pub msg_iov: *mut iovec,
834    pub msg_iovlen: c_size_t,
835    pub msg_control: *mut c_void,
836    pub msg_controllen: c_size_t,
837    pub msg_flags: c_int,
838}
839
840#[repr(C)]
841pub struct cmsghdr {
842    pub cmsg_len: c_size_t,
843    pub cmsg_level: c_int,
844    pub cmsg_type: c_int,
845}
846
847#[repr(C)]
848struct sock_fprog {
849    pub length: c_ushort,
850    pub filter: *const sock_filter,
851}
852
853#[derive(Copy, Clone, PartialEq, Eq, Debug)]
854#[repr(C)]
855pub struct sock_filter {
856    pub code: u16,
857    pub jt: u8,
858    pub jf: u8,
859    pub k: u32,
860}
861
862// BPF instruction classes (3 bits, mask: 0b00111)
863pub const BPF_LD: u16 = 0b000;
864pub const BPF_LDX: u16 = 0b001;
865pub const BPF_ST: u16 = 0b010;
866pub const BPF_STX: u16 = 0b011;
867pub const BPF_ALU: u16 = 0b100;
868pub const BPF_JMP: u16 = 0b101;
869pub const BPF_RET: u16 = 0b110;
870pub const BPF_MISC: u16 = 0b111;
871
872// BPF LD/LDX/ST/STX width (2 bits, mask: 0b11000)
873pub const BPF_W: u16 = 0b00000; // 32-bit
874pub const BPF_H: u16 = 0b01000; // 16-bit
875pub const BPF_B: u16 = 0b10000; // 8-bit
876
877// BPF LD/LDX/ST/STX addressing mode (3 bits, mask: 0b11100000)
878pub const BPF_IMM: u16 = 0b00000000;
879pub const BPF_ABS: u16 = 0b00100000;
880pub const BPF_IND: u16 = 0b01000000;
881pub const BPF_MEM: u16 = 0b01100000;
882pub const BPF_LEN: u16 = 0b10000000;
883pub const BPF_MSH: u16 = 0b10100000;
884
885// BPF ALU operations (4 bits, mask: 0b11110000)
886pub const BPF_ADD: u16 = 0b00000000;
887pub const BPF_SUB: u16 = 0b00010000;
888pub const BPF_MUL: u16 = 0b00100000;
889pub const BPF_DIV: u16 = 0b00110000;
890pub const BPF_OR: u16 = 0b01000000;
891pub const BPF_AND: u16 = 0b01010000;
892pub const BPF_LSH: u16 = 0b01100000;
893pub const BPF_RSH: u16 = 0b01110000;
894pub const BPF_NEG: u16 = 0b10000000;
895pub const BPF_MOD: u16 = 0b10010000;
896pub const BPF_XOR: u16 = 0b10100000;
897
898// BPF JMP operations (4 bits, mask: 0b11110000)
899pub const BPF_JA: u16 = 0b00000000;
900pub const BPF_JEQ: u16 = 0b00010000;
901pub const BPF_JGT: u16 = 0b00100000;
902pub const BPF_JGE: u16 = 0b00110000;
903pub const BPF_JSET: u16 = 0b01000000;
904
905// BPF ALU/JMP source (1 bit, mask: 0b1000)
906pub const BPF_K: u16 = 0b0000;
907pub const BPF_X: u16 = 0b1000;
908
909pub const SECBIT_NOROOT: u32 = 1;
910pub const SECBIT_NOROOT_LOCKED: u32 = 2;
911pub const SECBIT_NO_SETUID_FIXUP: u32 = 4;
912pub const SECBIT_NO_SETUID_FIXUP_LOCKED: u32 = 8;
913pub const SECBIT_KEEP_CAPS: u32 = 16;
914pub const SECBIT_KEEP_CAPS_LOCKED: u32 = 32;
915pub const SECBIT_NO_CAP_AMBIENT_RAISE: u32 = 64;
916pub const SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED: u32 = 128;
917
918#[macro_export]
919macro_rules! bpf {
920    (@const_one $tok:tt) => {
921        1
922    };
923
924    (@get_label_or_zero ([$label:expr]: $($tok:tt)+)) => {
925        $label
926    };
927
928    (@get_label_or_zero ($($tok:tt)+)) => {
929        0
930    };
931
932    (@count_instructions
933        $(
934            ($($tok:tt)+)
935        ),+
936    ) => {{
937        let mut count = 0;
938        $(
939            count += $crate::bpf!(@const_one ($($tok)+));
940        )+
941
942        count
943    }};
944
945    (@max_label_plus_one
946        $(
947            ($($tok:tt)+)
948        ),+
949    ) => {{
950        let mut max = -1;
951        $(
952            let label = $crate::bpf!(@get_label_or_zero ($($tok)+));
953            if label > max {
954                max = label;
955            }
956        )+
957
958        if max < 0 {
959            0
960        } else {
961            (max + 1) as usize
962        }
963    }};
964
965    (@fill_label $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => {
966        $labels[$label] = $nth_instruction;
967    };
968
969    (@fill_label $labels:expr, $nth_instruction:expr, $($tok:tt)+) => {};
970
971    (@fill_labels
972        $labels:expr,
973        $(
974            ($($tok:tt)+)
975        ),+
976    ) => {{
977        let mut nth_instruction = 0;
978        $(
979            $crate::bpf!(@fill_label $labels, nth_instruction, $($tok)+);
980            #[allow(unused_assignments)]
981            {
982                nth_instruction += 1;
983            }
984        )+
985    }};
986
987    (@target $labels:expr, $nth_instruction:expr, $target:expr) => {{
988        let target = ($labels[$target] as i32 - $nth_instruction as i32 - 1);
989        if target < 0 || target > 255 {
990            panic!("invalid jump");
991        }
992
993        target as u8
994    }};
995
996    (@into_u32 $value:expr) => {{
997        let value = $value;
998        if value as i128 > core::u32::MAX as i128 || (value as i128) < core::i32::MIN as i128 {
999            panic!("out of range value");
1000        }
1001        value as u32
1002    }};
1003
1004    (@op $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => { $crate::bpf!(@op $labels, $nth_instruction, $($tok)+) };
1005
1006    (@op $labels:expr, $nth_instruction:expr, a = *abs[$addr:expr]) => { $crate::sock_filter { code: $crate::BPF_LD | $crate::BPF_W | $crate::BPF_ABS, jt: 0, jf: 0, k: $addr } };
1007    (@op $labels:expr, $nth_instruction:expr, a &= $value:expr) => { $crate::sock_filter { code: $crate::BPF_ALU | $crate::BPF_AND | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
1008    (@op $labels:expr, $nth_instruction:expr, if a == $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: $crate::bpf!(@target $labels, $nth_instruction, $target), jf: 0, k: $crate::bpf!(@into_u32 $value) } };
1009    (@op $labels:expr, $nth_instruction:expr, if a != $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: 0, jf: $crate::bpf!(@target $labels, $nth_instruction, $target), k: $crate::bpf!(@into_u32 $value) } };
1010    (@op $labels:expr, $nth_instruction:expr, jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JA, jt: 0, jf: 0, k: $crate::bpf!(@target $labels, $nth_instruction, $target) as u32 } };
1011    (@op $labels:expr, $nth_instruction:expr, return $value:expr) => { $crate::sock_filter { code: $crate::BPF_RET | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
1012    (@op $labels:expr, $nth_instruction:expr, seccomp_allow) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ALLOW) };
1013    (@op $labels:expr, $nth_instruction:expr, seccomp_kill_thread) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_KILL_THREAD) };
1014    (@op $labels:expr, $nth_instruction:expr, seccomp_return_error($errno:expr)) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ERRNO | { let errno: u16 = $errno; errno as u32 }) };
1015    (@op $labels:expr, $nth_instruction:expr, seccomp_return_eperm) => { $crate::bpf!(@op $labels, $nth_instruction, seccomp_return_error($crate::EPERM as u16)) };
1016    (@op $labels:expr, $nth_instruction:expr, a = syscall_nr) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[0]) };
1017    (@op $labels:expr, $nth_instruction:expr, a = syscall_arg[$nth_arg:expr]) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[16 + $nth_arg * 8]) };
1018
1019    (
1020        $(
1021            ($($tok:tt)+),
1022        )+
1023    ) => {{
1024        let mut filter = [
1025            $crate::sock_filter { code: 0, jt: 0, jf: 0, k: 0 };
1026            { $crate::bpf!(@count_instructions $(($($tok)+)),+) }
1027        ];
1028
1029        let mut labels = [
1030            0;
1031            { $crate::bpf!(@max_label_plus_one $(($($tok)+)),+) }
1032        ];
1033
1034        $crate::bpf!(@fill_labels labels, $(($($tok)+)),+);
1035
1036        {
1037            let mut nth_instruction = 0;
1038
1039            $(
1040                #[allow(clippy::indexing_slicing)]
1041                {
1042                    filter[nth_instruction] = $crate::bpf!(@op labels, nth_instruction, $($tok)+);
1043                }
1044                nth_instruction += 1;
1045            )+
1046
1047            let _ = nth_instruction;
1048        }
1049
1050        filter
1051    }};
1052}
1053
1054#[test]
1055fn test_bpf_jump() {
1056    assert_eq!(
1057        bpf! {
1058            (if a == 1234 => jump @0),
1059            (return 10),
1060            ([0]: return 20),
1061        },
1062        [
1063            sock_filter {
1064                code: BPF_JMP | BPF_JEQ | BPF_K,
1065                jt: 1,
1066                jf: 0,
1067                k: 1234
1068            },
1069            sock_filter {
1070                code: BPF_RET,
1071                jt: 0,
1072                jf: 0,
1073                k: 10
1074            },
1075            sock_filter {
1076                code: BPF_RET,
1077                jt: 0,
1078                jf: 0,
1079                k: 20
1080            },
1081        ]
1082    );
1083
1084    assert_eq!(
1085        bpf! {
1086            (if a == 20 => jump @2),
1087            (if a == 10 => jump @2),
1088            ([0]: return 0),
1089            ([1]: return 1),
1090            ([2]: return 2),
1091        },
1092        [
1093            sock_filter {
1094                code: BPF_JMP | BPF_JEQ | BPF_K,
1095                jt: 3,
1096                jf: 0,
1097                k: 20
1098            },
1099            sock_filter {
1100                code: BPF_JMP | BPF_JEQ | BPF_K,
1101                jt: 2,
1102                jf: 0,
1103                k: 10
1104            },
1105            sock_filter {
1106                code: BPF_RET,
1107                jt: 0,
1108                jf: 0,
1109                k: 0
1110            },
1111            sock_filter {
1112                code: BPF_RET,
1113                jt: 0,
1114                jf: 0,
1115                k: 1
1116            },
1117            sock_filter {
1118                code: BPF_RET,
1119                jt: 0,
1120                jf: 0,
1121                k: 2
1122            },
1123        ]
1124    );
1125}
1126
1127pub const STDIN_FILENO: c_int = 0;
1128pub const STDOUT_FILENO: c_int = 1;
1129pub const STDERR_FILENO: c_int = 2;
1130
1131pub const AF_UNIX: u32 = 1;
1132pub const SOCK_STREAM: u32 = 1;
1133pub const SOCK_SEQPACKET: u32 = 5;
1134pub const SOCK_CLOEXEC: u32 = 0x80000;
1135pub const SOL_SOCKET: c_int = 1;
1136pub const SCM_RIGHTS: c_int = 1;
1137pub const MSG_NOSIGNAL: u32 = 0x4000;
1138
1139pub const SEEK_SET: u32 = 0;
1140pub const SEEK_CUR: u32 = 1;
1141pub const SEEK_END: u32 = 2;
1142
1143pub const O_ASYNC: u32 = 0x2000;
1144
1145#[allow(non_snake_case)]
1146const fn CMSG_ALIGN(len: usize) -> usize {
1147    (len + core::mem::size_of::<usize>() - 1) & !(core::mem::size_of::<usize>() - 1)
1148}
1149
1150#[allow(non_snake_case)]
1151pub unsafe fn CMSG_FIRSTHDR(mhdr: *const msghdr) -> *mut cmsghdr {
1152    if (*mhdr).msg_controllen >= core::mem::size_of::<cmsghdr>() {
1153        (*mhdr).msg_control.cast::<cmsghdr>()
1154    } else {
1155        core::ptr::null_mut()
1156    }
1157}
1158
1159#[allow(non_snake_case)]
1160pub unsafe fn CMSG_DATA(cmsg: *mut cmsghdr) -> *mut c_uchar {
1161    cmsg.add(1).cast::<c_uchar>()
1162}
1163
1164#[allow(non_snake_case)]
1165pub const fn CMSG_SPACE(length: usize) -> usize {
1166    CMSG_ALIGN(length) + CMSG_ALIGN(core::mem::size_of::<cmsghdr>())
1167}
1168
1169#[allow(non_snake_case)]
1170pub const fn CMSG_LEN(length: usize) -> usize {
1171    CMSG_ALIGN(core::mem::size_of::<cmsghdr>()) + length
1172}
1173
1174// The following was copied from the `cstr_core` crate.
1175//
1176// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
1177#[inline]
1178#[doc(hidden)]
1179#[allow(clippy::indexing_slicing)]
1180pub const fn cstr_is_valid(bytes: &[u8]) -> bool {
1181    if bytes.is_empty() || bytes[bytes.len() - 1] != 0 {
1182        return false;
1183    }
1184
1185    let mut index = 0;
1186    while index < bytes.len() - 1 {
1187        if bytes[index] == 0 {
1188            return false;
1189        }
1190        index += 1;
1191    }
1192    true
1193}
1194
1195#[macro_export]
1196macro_rules! cstr {
1197    ($e:expr) => {{
1198        const STR: &[u8] = concat!($e, "\0").as_bytes();
1199        const STR_VALID: bool = $crate::cstr_is_valid(STR);
1200        let _ = [(); 0 - (!(STR_VALID) as usize)];
1201        #[allow(unused_unsafe)]
1202        unsafe {
1203            core::ffi::CStr::from_bytes_with_nul_unchecked(STR)
1204        }
1205    }}
1206}
1207
1208#[derive(Clone)]
1209pub struct Error {
1210    #[cfg(not(feature = "std"))]
1211    message: &'static str,
1212    #[cfg(feature = "std")]
1213    message: Cow<'static, str>,
1214    errno: c_int,
1215}
1216
1217impl core::fmt::Debug for Error {
1218    #[cold]
1219    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1220        core::fmt::Display::fmt(self, fmt)
1221    }
1222}
1223
1224impl core::fmt::Display for Error {
1225    #[cold]
1226    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1227        let mut is_err = false;
1228        self.fmt_to_string(|chunk| {
1229            if fmt.write_str(chunk).is_err() {
1230                is_err = true;
1231            }
1232        });
1233
1234        if is_err {
1235            Err(core::fmt::Error)
1236        } else {
1237            Ok(())
1238        }
1239    }
1240}
1241
1242#[cfg(feature = "std")]
1243impl std::error::Error for Error {}
1244
1245#[cfg(feature = "std")]
1246impl From<std::string::String> for Error {
1247    fn from(message: std::string::String) -> Self {
1248        Error {
1249            message: message.into(),
1250            errno: 0,
1251        }
1252    }
1253}
1254
1255impl From<&'static str> for Error {
1256    fn from(message: &'static str) -> Self {
1257        Error::from_str(message)
1258    }
1259}
1260
1261fn write_number(value: u32, write_str: &mut dyn FnMut(&str)) {
1262    let n = if value >= 10 {
1263        write_number(value / 10, write_str);
1264        value % 10
1265    } else {
1266        value
1267    };
1268
1269    let s = [n as u8 + b'0'];
1270    let s = unsafe { core::str::from_utf8_unchecked(&s) };
1271    write_str(s);
1272}
1273
1274impl Error {
1275    pub fn fmt_to_string(&self, mut write_str: impl FnMut(&str)) {
1276        self.fmt_to_string_impl(&mut write_str);
1277    }
1278
1279    // Avoid pulling in core::fmt machinery to keep the code size low.
1280    #[cold]
1281    fn fmt_to_string_impl(&self, write_str: &mut dyn FnMut(&str)) {
1282        write_str(&self.message);
1283
1284        if self.errno == 0 {
1285            return;
1286        }
1287
1288        write_str(" (errno = ");
1289        write_number(self.errno as u32, write_str);
1290
1291        let errno = match self.errno as u32 {
1292            EPERM => Some("EPERM"),
1293            ENOENT => Some("ENOENT"),
1294            ESRCH => Some("ESRCH"),
1295            EINTR => Some("EINTR"),
1296            EIO => Some("EIO"),
1297            ENXIO => Some("ENXIO"),
1298            E2BIG => Some("E2BIG"),
1299            ENOEXEC => Some("ENOEXEC"),
1300            EBADF => Some("EBADF"),
1301            ECHILD => Some("ECHILD"),
1302            EAGAIN => Some("EAGAIN"),
1303            ENOMEM => Some("ENOMEM"),
1304            EACCES => Some("EACCES"),
1305            EFAULT => Some("EFAULT"),
1306            ENOTBLK => Some("ENOTBLK"),
1307            EBUSY => Some("EBUSY"),
1308            EEXIST => Some("EEXIST"),
1309            EXDEV => Some("EXDEV"),
1310            ENODEV => Some("ENODEV"),
1311            ENOTDIR => Some("ENOTDIR"),
1312            EISDIR => Some("EISDIR"),
1313            EINVAL => Some("EINVAL"),
1314            ENFILE => Some("ENFILE"),
1315            EMFILE => Some("EMFILE"),
1316            ENOTTY => Some("ENOTTY"),
1317            ETXTBSY => Some("ETXTBSY"),
1318            EFBIG => Some("EFBIG"),
1319            ENOSPC => Some("ENOSPC"),
1320            ESPIPE => Some("ESPIPE"),
1321            EROFS => Some("EROFS"),
1322            EMLINK => Some("EMLINK"),
1323            EPIPE => Some("EPIPE"),
1324            EDOM => Some("EDOM"),
1325            ERANGE => Some("ERANGE"),
1326            EOPNOTSUPP => Some("EOPNOTSUPP"),
1327            ETOOMANYREFS => Some("ETOOMANYREFS"),
1328            ERESTARTSYS => Some("ERESTARTSYS"),
1329            _ => None,
1330        };
1331
1332        if let Some(errno) = errno {
1333            write_str(" (");
1334            write_str(errno);
1335            write_str(")");
1336        }
1337
1338        write_str(")");
1339    }
1340
1341    #[cfg(feature = "std")]
1342    #[cold]
1343    pub fn from_os_error(message: &'static str, error: std::io::Error) -> Self {
1344        Self {
1345            message: message.into(),
1346            errno: error.raw_os_error().unwrap_or(0),
1347        }
1348    }
1349
1350    #[cfg(feature = "std")]
1351    #[cold]
1352    pub fn from_last_os_error(message: &'static str) -> Self {
1353        Self {
1354            message: message.into(),
1355            errno: std::io::Error::last_os_error().raw_os_error().unwrap_or(0),
1356        }
1357    }
1358
1359    #[cold]
1360    pub const fn from_errno(message: &'static str, errno: i32) -> Self {
1361        Self {
1362            #[cfg(not(feature = "std"))]
1363            message,
1364            #[cfg(feature = "std")]
1365            message: Cow::Borrowed(message),
1366
1367            errno,
1368        }
1369    }
1370
1371    #[cold]
1372    pub const fn from_str(message: &'static str) -> Self {
1373        Self {
1374            #[cfg(not(feature = "std"))]
1375            message,
1376            #[cfg(feature = "std")]
1377            message: Cow::Borrowed(message),
1378
1379            errno: 0,
1380        }
1381    }
1382
1383    #[inline]
1384    pub fn from_syscall(message: &'static str, result: i64) -> Result<(), Self> {
1385        if result >= -4095 && result < 0 {
1386            Err(Self::from_syscall_unchecked(message, result))
1387        } else {
1388            Ok(())
1389        }
1390    }
1391
1392    #[cold]
1393    #[inline]
1394    const fn from_syscall_unchecked(message: &'static str, result: i64) -> Self {
1395        Self {
1396            #[cfg(not(feature = "std"))]
1397            message,
1398            #[cfg(feature = "std")]
1399            message: Cow::Borrowed(message),
1400
1401            errno: -result as i32,
1402        }
1403    }
1404
1405    #[inline]
1406    pub fn errno(&self) -> u32 {
1407        self.errno as u32
1408    }
1409}
1410
1411#[cfg(target_arch = "x86_64")]
1412#[inline(never)]
1413#[cold]
1414pub fn abort() -> ! {
1415    // In practice `core::hint::unreachable_unchecked` emits this,
1416    // but technically calling it is undefined behavior which could
1417    // affect unrelated code, so let's just call it through `asm!`.
1418
1419    unsafe {
1420        core::arch::asm!("ud2", options(noreturn, nostack));
1421    }
1422}
1423
1424/// An owned file descriptor. Will be automatically closed on drop.
1425#[repr(transparent)]
1426#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
1427pub struct Fd(c_int);
1428
1429/// An unowned file descriptor.
1430#[repr(transparent)]
1431#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
1432pub struct FdRef<'a>(c_int, PhantomData<&'a Fd>);
1433
1434impl Fd {
1435    pub fn raw(&self) -> c_int {
1436        self.0
1437    }
1438
1439    #[inline]
1440    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1441        Self(fd)
1442    }
1443
1444    pub fn borrow(&self) -> FdRef {
1445        FdRef(self.0, PhantomData)
1446    }
1447
1448    pub fn close(mut self) -> Result<(), Error> {
1449        self.close_inplace()?;
1450        Ok(())
1451    }
1452
1453    pub fn leak(mut self) -> c_int {
1454        core::mem::replace(&mut self.0, -1)
1455    }
1456
1457    fn close_inplace(&mut self) -> Result<(), Error> {
1458        if self.raw() < 0 {
1459            return Ok(());
1460        }
1461
1462        let fd = core::mem::replace(&mut self.0, -1);
1463        let result = unsafe { syscall_readonly!(SYS_close, fd) };
1464        Error::from_syscall("close", result)
1465    }
1466}
1467
1468impl Drop for Fd {
1469    fn drop(&mut self) {
1470        let _ = self.close_inplace();
1471    }
1472}
1473
1474impl<'a> FdRef<'a> {
1475    pub fn raw(&self) -> c_int {
1476        self.0
1477    }
1478
1479    #[inline]
1480    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1481        Self(fd, PhantomData)
1482    }
1483}
1484
1485impl<'a> From<&'a Fd> for FdRef<'a> {
1486    fn from(fd: &'a Fd) -> Self {
1487        FdRef(fd.0, PhantomData)
1488    }
1489}
1490
1491impl<'a> From<&'a mut Fd> for FdRef<'a> {
1492    fn from(fd: &'a mut Fd) -> Self {
1493        FdRef(fd.0, PhantomData)
1494    }
1495}
1496
1497impl core::fmt::Write for Fd {
1498    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1499        FdRef::from(self).write_str(string)
1500    }
1501}
1502
1503impl<'a> core::fmt::Write for FdRef<'a> {
1504    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1505        let mut bytes = string.as_bytes();
1506        while !bytes.is_empty() {
1507            let count = sys_write(*self, bytes).map_err(|_| core::fmt::Error)?;
1508            if count == 0 {
1509                return Err(core::fmt::Error);
1510            }
1511            bytes = bytes.get(count..).ok_or(core::fmt::Error)?;
1512        }
1513
1514        Ok(())
1515    }
1516}
1517
1518pub fn sys_uname() -> Result<new_utsname, Error> {
1519    let mut out: new_utsname = unsafe { core::mem::zeroed() };
1520    let result = unsafe { syscall!(SYS_uname, core::ptr::addr_of_mut!(out)) };
1521    Error::from_syscall("uname", result)?;
1522    Ok(out)
1523}
1524
1525pub fn sys_io_uring_setup(entries: u32, params: &mut io_uring_params) -> Result<Fd, Error> {
1526    let fd = unsafe { syscall!(SYS_io_uring_setup, entries, params as *mut io_uring_params) };
1527    Error::from_syscall("io_uring_setup", fd)?;
1528    Ok(Fd::from_raw_unchecked(fd as c_int))
1529}
1530
1531pub fn sys_io_uring_register(fd: FdRef, opcode: u32, arg: *const c_void, arg_count: u32) -> Result<(), Error> {
1532    let result = unsafe { syscall!(SYS_io_uring_register, fd, opcode, arg, arg_count) };
1533    Error::from_syscall("io_uring_register", result)?;
1534    Ok(())
1535}
1536
1537pub unsafe fn sys_io_uring_enter(
1538    fd: FdRef,
1539    to_submit: u32,
1540    min_complete: u32,
1541    flags: u32,
1542    arg: *const c_void,
1543    argsz: usize,
1544) -> Result<u32, Error> {
1545    let result = unsafe { syscall!(SYS_io_uring_enter, fd, to_submit, min_complete, flags, arg, argsz) };
1546    Error::from_syscall("io_uring_enter", result)?;
1547    Ok(result as u32)
1548}
1549
1550pub fn sys_ioctl(fd: FdRef, cmd: c_uint, arg: c_ulong) -> Result<c_int, Error> {
1551    let result = unsafe { syscall!(SYS_ioctl, fd, cmd, arg) };
1552    Error::from_syscall("ioctl", result)?;
1553    Ok(result as c_int)
1554}
1555
1556pub fn sys_userfaultfd(flags: c_uint) -> Result<Fd, Error> {
1557    let fd = unsafe { syscall_readonly!(SYS_userfaultfd, flags) };
1558    Error::from_syscall("userfaultfd", fd)?;
1559    Ok(Fd::from_raw_unchecked(fd as c_int))
1560}
1561
1562fn sys_getdents64(fd: FdRef, buffer: &mut [u8]) -> Result<Option<usize>, Error> {
1563    let length = buffer.len();
1564    let bytes_read = unsafe { syscall!(SYS_getdents64, fd.raw(), buffer, length) };
1565    Error::from_syscall("getdents64", bytes_read)?;
1566
1567    if bytes_read == 0 {
1568        Ok(None)
1569    } else {
1570        Ok(Some(bytes_read as usize))
1571    }
1572}
1573
1574pub unsafe fn sys_arch_prctl_set_gs(value: usize) -> Result<(), Error> {
1575    let result = syscall_readonly!(SYS_arch_prctl, ARCH_SET_GS, value);
1576    Error::from_syscall("arch_prctl(ARCH_SET_GS)", result)?;
1577    Ok(())
1578}
1579
1580pub fn sys_sched_yield() -> Result<(), Error> {
1581    // On Linux this always succeeds, although technically it could fail
1582    // due to a seccomp sandbox, so let's return an error anyway.
1583    let result = unsafe { syscall_readonly!(SYS_sched_yield) };
1584    Error::from_syscall("sched_yield", result)?;
1585    Ok(())
1586}
1587
1588pub fn sys_socketpair(domain: u32, kind: u32, protocol: u32) -> Result<(Fd, Fd), Error> {
1589    let mut output: [c_int; 2] = [-1, -1];
1590    let fd = unsafe { syscall_readonly!(SYS_socketpair, domain, kind, protocol, &mut output[..]) };
1591    Error::from_syscall("socketpair", fd)?;
1592    Ok((Fd(output[0] as c_int), Fd(output[1] as c_int)))
1593}
1594
1595pub fn sys_pipe2(flags: c_uint) -> Result<(Fd, Fd), Error> {
1596    let mut pipes: [c_int; 2] = [-1, -1];
1597    let result = unsafe { syscall_readonly!(SYS_pipe2, pipes.as_mut_ptr(), flags) };
1598    Error::from_syscall("pipe2", result)?;
1599    Ok((Fd::from_raw_unchecked(pipes[0]), Fd::from_raw_unchecked(pipes[1])))
1600}
1601
1602pub fn sys_open(path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1603    let fd = unsafe { syscall_readonly!(SYS_open, path.as_ptr(), flags, 0) };
1604    Error::from_syscall("open", fd)?;
1605    Ok(Fd(fd as c_int))
1606}
1607
1608pub fn sys_openat(dir: FdRef, path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1609    let fd = unsafe { syscall_readonly!(SYS_openat, dir, path.as_ptr(), flags, 0) };
1610    Error::from_syscall("openat", fd)?;
1611    Ok(Fd(fd as c_int))
1612}
1613
1614pub fn sys_memfd_create(name: &CStr, flags: c_uint) -> Result<Fd, Error> {
1615    let fd = unsafe { syscall_readonly!(SYS_memfd_create, name.as_ptr(), flags) };
1616    Error::from_syscall("memfd_create", fd)?;
1617    Ok(Fd(fd as c_int))
1618}
1619
1620pub fn sys_fcntl(fd: FdRef, cmd: u32, arg: u32) -> Result<i32, Error> {
1621    let result = unsafe { syscall_readonly!(SYS_fcntl, fd, cmd, arg) };
1622    Error::from_syscall("fcntl", result)?;
1623    Ok(result as i32)
1624}
1625
1626pub fn sys_fcntl_dupfd(fd: FdRef, min: c_int) -> Result<Fd, Error> {
1627    let fd = sys_fcntl(fd, F_DUPFD, min as u32)?;
1628    Ok(Fd::from_raw_unchecked(fd))
1629}
1630
1631pub fn sys_close_range(first_fd: c_int, last_fd: c_int, flags: c_uint) -> Result<(), Error> {
1632    let result = unsafe { syscall_readonly!(SYS_close_range, first_fd, last_fd, flags) };
1633    Error::from_syscall("close_range", result)
1634}
1635
1636pub fn sys_fallocate(fd: FdRef, mode: c_uint, offset: u64, length: u64) -> Result<(), Error> {
1637    let result = unsafe { syscall!(SYS_fallocate, fd, mode, offset, length) };
1638    Error::from_syscall("fallocate", result)
1639}
1640
1641pub fn sys_ftruncate(fd: FdRef, length: c_ulong) -> Result<(), Error> {
1642    let result = unsafe { syscall!(SYS_ftruncate, fd, length) };
1643    Error::from_syscall("ftruncate", result)
1644}
1645
1646pub fn sys_chdir(path: &CStr) -> Result<(), Error> {
1647    let result = unsafe { syscall_readonly!(SYS_chdir, path.as_ptr()) };
1648    Error::from_syscall("chdir", result)
1649}
1650
1651pub fn sys_fchdir(fd: FdRef) -> Result<(), Error> {
1652    let result = unsafe { syscall_readonly!(SYS_fchdir, fd) };
1653    Error::from_syscall("fchdir", result)
1654}
1655
1656pub unsafe fn sys_mmap(
1657    address: *mut c_void,
1658    length: c_size_t,
1659    protection: c_uint,
1660    flags: c_uint,
1661    fd: Option<FdRef>,
1662    offset: c_ulong,
1663) -> Result<*mut c_void, Error> {
1664    let result = syscall!(SYS_mmap, address, length, protection, flags, fd, offset);
1665    Error::from_syscall("mmap", result)?;
1666    Ok(result as *mut c_void)
1667}
1668
1669pub unsafe fn sys_munmap(address: *mut c_void, length: c_size_t) -> Result<(), Error> {
1670    let result = syscall!(SYS_munmap, address, length);
1671    Error::from_syscall("munmap", result)
1672}
1673
1674pub unsafe fn sys_mremap(
1675    address: *mut c_void,
1676    old_length: c_size_t,
1677    new_length: c_size_t,
1678    flags: c_uint,
1679    new_address: *mut c_void,
1680) -> Result<*mut c_void, Error> {
1681    let result = syscall!(SYS_mremap, address, old_length, new_length, flags, new_address);
1682    Error::from_syscall("mremap", result)?;
1683    Ok(result as *mut c_void)
1684}
1685
1686pub unsafe fn sys_mprotect(address: *mut c_void, length: c_size_t, protection: c_uint) -> Result<(), Error> {
1687    let result = syscall!(SYS_mprotect, address, length, protection);
1688    Error::from_syscall("mprotect", result)
1689}
1690
1691pub unsafe fn sys_madvise(address: *mut c_void, length: c_size_t, advice: c_uint) -> Result<(), Error> {
1692    let result = syscall!(SYS_madvise, address, length, advice);
1693    Error::from_syscall("madvise", result)
1694}
1695
1696pub fn sys_getpid() -> Result<pid_t, Error> {
1697    let result = unsafe { syscall_readonly!(SYS_getpid) };
1698    Error::from_syscall("getpid", result)?;
1699    Ok(result as pid_t)
1700}
1701
1702pub fn sys_getuid() -> Result<uid_t, Error> {
1703    let result = unsafe { syscall_readonly!(SYS_getuid) };
1704    Error::from_syscall("getuid", result)?;
1705    Ok(result as u32)
1706}
1707
1708pub fn sys_getgid() -> Result<gid_t, Error> {
1709    let result = unsafe { syscall_readonly!(SYS_getgid) };
1710    Error::from_syscall("getgid", result)?;
1711    Ok(result as u32)
1712}
1713
1714pub fn sys_kill(pid: pid_t, signal: c_uint) -> Result<(), Error> {
1715    let result = unsafe { syscall_readonly!(SYS_kill, pid, signal) };
1716    Error::from_syscall("kill", result)?;
1717    Ok(())
1718}
1719
1720pub unsafe fn sys_read_raw(fd: FdRef, buffer: *mut u8, length: usize) -> Result<c_size_t, Error> {
1721    let result = unsafe { syscall!(SYS_read, fd.raw(), buffer, length) };
1722    Error::from_syscall("read", result)?;
1723    Ok(result as c_size_t)
1724}
1725
1726pub fn sys_read(fd: FdRef, buffer: &mut [u8]) -> Result<c_size_t, Error> {
1727    unsafe { sys_read_raw(fd, buffer.as_mut_ptr(), buffer.len()) }
1728}
1729
1730pub fn sys_write(fd: FdRef, buffer: &[u8]) -> Result<c_size_t, Error> {
1731    let result = unsafe { syscall_readonly!(SYS_write, fd.raw(), buffer.as_ptr(), buffer.len()) };
1732    Error::from_syscall("write", result)?;
1733    Ok(result as c_size_t)
1734}
1735
1736pub fn sys_lseek(fd: FdRef, offset: i64, whence: u32) -> Result<u64, Error> {
1737    let result = unsafe { syscall_readonly!(SYS_lseek, fd.raw(), offset, whence) };
1738    Error::from_syscall("lseek", result)?;
1739    Ok(result as u64)
1740}
1741
1742pub unsafe fn sys_process_vm_readv(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1743    let result = unsafe {
1744        syscall!(
1745            SYS_process_vm_readv,
1746            pid,
1747            local_iovec,
1748            local_iovec.len(),
1749            remote_iovec,
1750            remote_iovec.len(),
1751            0
1752        )
1753    };
1754    Error::from_syscall("process_vm_readv", result)?;
1755    Ok(result as usize)
1756}
1757
1758pub unsafe fn sys_process_vm_writev(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1759    let result = unsafe {
1760        syscall!(
1761            SYS_process_vm_writev,
1762            pid,
1763            local_iovec,
1764            local_iovec.len(),
1765            remote_iovec,
1766            remote_iovec.len(),
1767            0
1768        )
1769    };
1770    Error::from_syscall("process_vm_writev", result)?;
1771    Ok(result as usize)
1772}
1773
1774pub unsafe fn sys_writev(fd: FdRef, iv: &[iovec]) -> Result<usize, Error> {
1775    let result = unsafe { syscall!(SYS_writev, fd, iv, iv.len()) };
1776    Error::from_syscall("writev", result)?;
1777    Ok(result as usize)
1778}
1779
1780pub fn sys_sendmsg(fd: FdRef, message: &msghdr, flags: u32) -> Result<usize, Error> {
1781    let result = unsafe { syscall_readonly!(SYS_sendmsg, fd.raw(), message as *const msghdr, flags) };
1782    Error::from_syscall("sendmsg", result)?;
1783    Ok(result as usize)
1784}
1785
1786pub fn sys_recvmsg(fd: FdRef, message: &mut msghdr, flags: u32) -> Result<usize, Error> {
1787    let result = unsafe { syscall!(SYS_recvmsg, fd.raw(), message as *mut msghdr, flags) };
1788    Error::from_syscall("recvmsg", result)?;
1789    Ok(result as usize)
1790}
1791
1792pub fn sys_exit(errcode: c_int) -> Result<(), Error> {
1793    let result = unsafe { syscall_readonly!(SYS_exit, errcode) };
1794    Error::from_syscall("exit", result)?;
1795    Ok(())
1796}
1797
1798pub fn sys_dup3(old_fd: c_int, new_fd: c_int, flags: c_uint) -> Result<(), Error> {
1799    let result = unsafe { syscall_readonly!(SYS_dup3, old_fd, new_fd, flags) };
1800    Error::from_syscall("dup3", result)?;
1801    Ok(())
1802}
1803
1804pub unsafe fn sys_execveat(
1805    dirfd: Option<FdRef>,
1806    path: &CStr,
1807    argv: &[*const c_uchar],
1808    envp: &[*const c_uchar],
1809    flags: c_uint,
1810) -> Result<(), Error> {
1811    let result = unsafe { syscall_readonly!(SYS_execveat, dirfd, path.as_ptr(), argv, envp, flags) };
1812    Error::from_syscall("execveat", result)?;
1813    Ok(())
1814}
1815
1816pub fn sys_ptrace_traceme() -> Result<(), Error> {
1817    let result = unsafe { syscall_readonly!(SYS_ptrace, 0, 0, 0) };
1818    Error::from_syscall("ptrace (PTRACE_TRACEME)", result)?;
1819    Ok(())
1820}
1821
1822pub fn sys_ptrace_interrupt(pid: pid_t) -> Result<(), Error> {
1823    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_INTERRUPT, pid, 0, 0) };
1824    Error::from_syscall("ptrace (PTRACE_INTERRUPT)", result)?;
1825    Ok(())
1826}
1827
1828pub fn sys_ptrace_attach(pid: pid_t) -> Result<(), Error> {
1829    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_ATTACH, pid, 0, 0) };
1830    Error::from_syscall("ptrace (PTRACE_ATTACH)", result)?;
1831    Ok(())
1832}
1833
1834pub fn sys_ptrace_seize(pid: pid_t) -> Result<(), Error> {
1835    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SEIZE, pid, 0, 0) };
1836    Error::from_syscall("ptrace (PTRACE_SEIZE)", result)?;
1837    Ok(())
1838}
1839
1840pub fn sys_ptrace_continue(pid: pid_t, signal: Option<u32>) -> Result<(), Error> {
1841    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_CONT, pid, 0, signal.unwrap_or(0)) };
1842    Error::from_syscall("ptrace (PTRACE_CONT)", result)?;
1843    Ok(())
1844}
1845
1846pub fn sys_ptrace_detach(pid: pid_t) -> Result<(), Error> {
1847    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_DETACH, pid, 0, 0) };
1848    Error::from_syscall("ptrace (PTRACE_DETACH)", result)?;
1849    Ok(())
1850}
1851
1852pub fn sys_ptrace_get_siginfo(pid: pid_t) -> Result<siginfo_t, Error> {
1853    let mut siginfo: siginfo_t = unsafe { core::mem::zeroed() };
1854    let result = unsafe {
1855        syscall!(
1856            SYS_ptrace,
1857            crate::arch_bindings::PTRACE_GETSIGINFO,
1858            pid,
1859            0,
1860            core::ptr::addr_of_mut!(siginfo)
1861        )
1862    };
1863    Error::from_syscall("ptrace (PTRACE_GETSIGINFO)", result)?;
1864    Ok(siginfo)
1865}
1866
1867#[cfg(target_arch = "x86_64")]
1868#[repr(C)]
1869#[derive(Default, Debug)]
1870pub struct user_regs_struct {
1871    pub r15: c_ulong,
1872    pub r14: c_ulong,
1873    pub r13: c_ulong,
1874    pub r12: c_ulong,
1875    pub rbp: c_ulong,
1876    pub rbx: c_ulong,
1877    pub r11: c_ulong,
1878    pub r10: c_ulong,
1879    pub r9: c_ulong,
1880    pub r8: c_ulong,
1881    pub rax: c_ulong,
1882    pub rcx: c_ulong,
1883    pub rdx: c_ulong,
1884    pub rsi: c_ulong,
1885    pub rdi: c_ulong,
1886    pub orig_rax: c_ulong,
1887    pub rip: c_ulong,
1888    pub cs: c_ulong,
1889    pub flags: c_ulong,
1890    pub sp: c_ulong,
1891    pub ss: c_ulong,
1892    pub fs_base: c_ulong,
1893    pub gs_base: c_ulong,
1894    pub ds: c_ulong,
1895    pub es: c_ulong,
1896    pub fs: c_ulong,
1897    pub gs: c_ulong,
1898}
1899
1900pub fn sys_ptrace_getregs(pid: pid_t) -> Result<user_regs_struct, Error> {
1901    let mut output: MaybeUninit<user_regs_struct> = MaybeUninit::uninit();
1902    let result = unsafe { syscall!(SYS_ptrace, crate::arch_bindings::PTRACE_GETREGS, pid, 0, output.as_mut_ptr()) };
1903    Error::from_syscall("ptrace (PTRACE_GETREGS)", result)?;
1904
1905    unsafe { Ok(output.assume_init()) }
1906}
1907
1908pub fn sys_ptrace_setregs(pid: pid_t, regs: &user_regs_struct) -> Result<(), Error> {
1909    let regs: *const user_regs_struct = regs;
1910    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SETREGS, pid, 0, regs) };
1911    Error::from_syscall("ptrace (PTRACE_SETREGS)", result)?;
1912    Ok(())
1913}
1914
1915pub fn sys_prctl_set_no_new_privs() -> Result<(), Error> {
1916    const PR_SET_NO_NEW_PRIVS: usize = 38;
1917    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
1918    Error::from_syscall("prctl(PR_SET_NO_NEW_PRIVS)", result)
1919}
1920
1921pub fn sys_prctl_cap_ambient_clear_all() -> Result<(), Error> {
1922    const PR_CAP_AMBIENT: usize = 47;
1923    const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
1924    let result = unsafe { syscall_readonly!(SYS_prctl, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) };
1925    Error::from_syscall("prctl(PR_CAP_AMBIENT)", result)
1926}
1927
1928pub fn sys_prctl_set_securebits(bits: u32) -> Result<(), Error> {
1929    const PR_SET_SECUREBITS: usize = 28;
1930    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_SECUREBITS, bits, 0, 0, 0) };
1931    Error::from_syscall("prctl(PR_SET_SECUREBITS)", result)
1932}
1933
1934pub fn sys_prctl_set_name(name: &[u8; 16]) -> Result<(), Error> {
1935    const PR_SET_NAME: usize = 15;
1936    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NAME, name.as_ptr(), 0, 0, 0) };
1937    Error::from_syscall("prctl(PR_SET_NAME)", result)
1938}
1939
1940pub fn sys_prctl_set_dumpable(value: bool) -> Result<(), Error> {
1941    const PR_SET_DUMPABLE: usize = 4;
1942    let value = usize::from(value);
1943    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_DUMPABLE, value, 0, 0, 0) };
1944    Error::from_syscall("prctl(PR_SET_DUMPABLE)", result)
1945}
1946
1947pub fn sys_prctl_get_dumpable() -> Result<bool, Error> {
1948    const PR_GET_DUMPABLE: usize = 3;
1949    let result = unsafe { syscall_readonly!(SYS_prctl, PR_GET_DUMPABLE, 0, 0, 0, 0) };
1950    Error::from_syscall("prctl(PR_GET_DUMPABLE)", result)?;
1951    if result == 0 {
1952        Ok(false)
1953    } else {
1954        Ok(true)
1955    }
1956}
1957
1958pub fn sys_capset(header: &__user_cap_header_struct, data: &[__user_cap_data_struct; 2]) -> Result<(), Error> {
1959    let result = unsafe {
1960        syscall_readonly!(
1961            SYS_capset,
1962            header as *const __user_cap_header_struct,
1963            data as *const __user_cap_data_struct
1964        )
1965    };
1966    Error::from_syscall("capset", result)
1967}
1968
1969pub fn sys_capset_drop_all() -> Result<(), Error> {
1970    let cap_user_header = __user_cap_header_struct {
1971        version: _LINUX_CAPABILITY_VERSION_3,
1972        pid: 0,
1973    };
1974    let cap_user_data = [__user_cap_data_struct {
1975        effective: 0,
1976        inheritable: 0,
1977        permitted: 0,
1978    }; 2];
1979
1980    sys_capset(&cap_user_header, &cap_user_data)
1981}
1982
1983pub fn sys_seccomp_set_mode_filter(filter: &[sock_filter]) -> Result<(), Error> {
1984    let filter = sock_fprog {
1985        length: if let Ok(length) = c_ushort::try_from(filter.len()) {
1986            length
1987        } else {
1988            return Err(Error::from_errno("seccomp(SECCOMP_SET_MODE_FILTER)", EINVAL as i32));
1989        },
1990        filter: filter.as_ptr(),
1991    };
1992
1993    let result = unsafe { syscall_readonly!(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, core::ptr::addr_of!(filter)) };
1994    Error::from_syscall("seccomp(SECCOMP_SET_MODE_FILTER)", result)
1995}
1996
1997pub fn sys_setrlimit(resource: u32, limit: &rlimit) -> Result<(), Error> {
1998    let result = unsafe { syscall_readonly!(SYS_setrlimit, resource, limit as *const rlimit) };
1999    Error::from_syscall("setrlimit", result)
2000}
2001
2002pub fn sys_sethostname(name: &str) -> Result<(), Error> {
2003    let result = unsafe { syscall_readonly!(SYS_sethostname, name.as_ptr(), name.len()) };
2004    Error::from_syscall("sethostname", result)
2005}
2006
2007pub fn sys_setdomainname(name: &str) -> Result<(), Error> {
2008    let result = unsafe { syscall_readonly!(SYS_setdomainname, name.as_ptr(), name.len()) };
2009    Error::from_syscall("setdomainname", result)
2010}
2011
2012pub fn sys_mount(dev_name: &CStr, dir_name: &CStr, kind: &CStr, flags: u32, data: Option<&CStr>) -> Result<(), Error> {
2013    let result = unsafe {
2014        syscall_readonly!(
2015            SYS_mount,
2016            dev_name.as_ptr(),
2017            dir_name.as_ptr(),
2018            kind.as_ptr(),
2019            flags,
2020            data.map_or(core::ptr::null(), |data| data.as_ptr())
2021        )
2022    };
2023    Error::from_syscall("mount", result)
2024}
2025
2026pub fn sys_umount2(target: &CStr, flags: u32) -> Result<(), Error> {
2027    let result = unsafe { syscall_readonly!(SYS_umount2, target.as_ptr(), flags) };
2028    Error::from_syscall("umount2", result)
2029}
2030
2031pub fn sys_pivot_root(new_root: &CStr, old_root: &CStr) -> Result<(), Error> {
2032    let result = unsafe { syscall_readonly!(SYS_pivot_root, new_root.as_ptr(), old_root.as_ptr()) };
2033    Error::from_syscall("pivot_root", result)
2034}
2035
2036pub fn sys_unshare(flags: u32) -> Result<(), Error> {
2037    let result = unsafe { syscall_readonly!(SYS_unshare, flags) };
2038    Error::from_syscall("unshare", result)
2039}
2040
2041/// Calls the `futex` syscall with `FUTEX_WAIT` operation.
2042///
2043/// This will block *if* the value of the `futex` is equal to the `expected_value`.
2044///
2045/// Possible non-fatal errors:
2046///   - `EAGAIN`: the value of `futex` is not equal to `expected_value`
2047///   - `EINTR`: the syscall was interrupted by a signal
2048///   - `ETIMEDOUT`: the specified timeout has elapsed without the futex being woken up
2049pub fn sys_futex_wait(futex: &AtomicU32, expected_value: u32, timeout: Option<Duration>) -> Result<(), Error> {
2050    let ts: Option<timespec> = timeout.map(|timeout| timespec {
2051        tv_sec: timeout.as_secs() as i64,
2052        tv_nsec: u64::from(timeout.subsec_nanos()) as i64,
2053    });
2054
2055    let result = unsafe {
2056        syscall!(
2057            SYS_futex,
2058            futex as *const AtomicU32,
2059            FUTEX_WAIT,
2060            expected_value,
2061            ts.as_ref().map_or(core::ptr::null(), |ts| ts as *const timespec)
2062        )
2063    };
2064    Error::from_syscall("futex (wait)", result)
2065}
2066
2067/// Wakes up at most one thread waiting on `futex`.
2068///
2069/// Will return `true` if anybody was woken up.
2070pub fn sys_futex_wake_one(futex: &AtomicU32) -> Result<bool, Error> {
2071    let result = unsafe { syscall_readonly!(SYS_futex, futex as *const AtomicU32, FUTEX_WAKE, 1) };
2072    Error::from_syscall("futex (wake)", result)?;
2073    Ok(result == 1)
2074}
2075
2076pub fn sys_set_tid_address(address: *const u32) -> Result<(), Error> {
2077    let result = unsafe { syscall_readonly!(SYS_set_tid_address, address) };
2078    Error::from_syscall("set_tid_address", result)?;
2079    Ok(())
2080}
2081
2082pub unsafe fn sys_rt_sigaction(signal: u32, new_action: &kernel_sigaction, old_action: Option<&mut kernel_sigaction>) -> Result<(), Error> {
2083    let result = unsafe {
2084        syscall_readonly!(
2085            SYS_rt_sigaction,
2086            signal,
2087            new_action as *const kernel_sigaction,
2088            old_action.map_or(core::ptr::null_mut(), |old_action| old_action as *mut kernel_sigaction),
2089            core::mem::size_of::<kernel_sigset_t>()
2090        )
2091    };
2092    Error::from_syscall("rt_sigaction", result)?;
2093    Ok(())
2094}
2095
2096pub unsafe fn sys_rt_sigprocmask(how: u32, new_sigset: &kernel_sigset_t, old_sigset: Option<&mut kernel_sigset_t>) -> Result<(), Error> {
2097    let result = unsafe {
2098        syscall_readonly!(
2099            SYS_rt_sigprocmask,
2100            how,
2101            new_sigset as *const kernel_sigset_t,
2102            old_sigset.map_or(core::ptr::null_mut(), |old_sigset| old_sigset as *mut kernel_sigset_t),
2103            core::mem::size_of::<kernel_sigset_t>()
2104        )
2105    };
2106    Error::from_syscall("rt_sigprocmask", result)?;
2107    Ok(())
2108}
2109
2110pub unsafe fn sys_sigaltstack(new_stack: &stack_t, old_stack: Option<&mut stack_t>) -> Result<(), Error> {
2111    let result = unsafe {
2112        syscall_readonly!(
2113            SYS_sigaltstack,
2114            new_stack as *const stack_t,
2115            old_stack.map_or(core::ptr::null_mut(), |old_stack| old_stack as *mut stack_t)
2116        )
2117    };
2118    Error::from_syscall("sigaltstack", result)?;
2119    Ok(())
2120}
2121
2122pub fn sys_clock_gettime(clock_id: u32) -> Result<Duration, Error> {
2123    let mut output = timespec { tv_sec: 0, tv_nsec: 0 };
2124    let result = unsafe { syscall_readonly!(SYS_clock_gettime, clock_id, core::ptr::addr_of_mut!(output)) };
2125    Error::from_syscall("clock_gettime", result)?;
2126
2127    let duration = Duration::new(output.tv_sec as u64, output.tv_nsec as u32);
2128    Ok(duration)
2129}
2130
2131pub fn sys_nanosleep(duration: Duration) -> Result<Option<Duration>, Error> {
2132    let duration = timespec {
2133        tv_sec: duration.as_secs() as i64,
2134        tv_nsec: u64::from(duration.subsec_nanos()) as i64,
2135    };
2136
2137    let mut remaining = timespec { tv_sec: 0, tv_nsec: 0 };
2138    let result = unsafe { syscall_readonly!(SYS_nanosleep, core::ptr::addr_of!(duration), core::ptr::addr_of_mut!(remaining)) };
2139    let error = Error::from_syscall("nanosleep", result);
2140    if let Err(error) = error {
2141        if error.errno() == EINTR {
2142            let remaining = Duration::new(remaining.tv_sec as u64, remaining.tv_nsec as u32);
2143            Ok(Some(remaining))
2144        } else {
2145            Err(error)
2146        }
2147    } else {
2148        Ok(None)
2149    }
2150}
2151
2152pub fn sys_waitid(which: u32, pid: pid_t, info: &mut siginfo_t, options: u32, usage: Option<&mut rusage>) -> Result<(), Error> {
2153    let result = unsafe {
2154        syscall_readonly!(
2155            SYS_waitid,
2156            which,
2157            pid,
2158            info as *mut siginfo_t,
2159            options,
2160            usage.map_or(core::ptr::null_mut(), |usage| usage as *mut rusage)
2161        )
2162    };
2163
2164    Error::from_syscall("waitid", result)?;
2165    Ok(())
2166}
2167
2168pub fn vm_read_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2169    pid: pid_t,
2170    local: [&mut [MaybeUninit<u8>]; N_LOCAL],
2171    remote: [(usize, usize); N_REMOTE],
2172) -> Result<usize, Error> {
2173    let local_iovec = local.map(|slice| iovec {
2174        iov_base: slice.as_mut_ptr().cast(),
2175        iov_len: slice.len() as u64,
2176    });
2177    let remote_iovec = remote.map(|(address, length)| iovec {
2178        iov_base: address as *mut c_void,
2179        iov_len: length as u64,
2180    });
2181    unsafe { sys_process_vm_readv(pid, &local_iovec, &remote_iovec) }
2182}
2183
2184pub fn vm_write_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2185    pid: pid_t,
2186    local: [&[u8]; N_LOCAL],
2187    remote: [(usize, usize); N_REMOTE],
2188) -> Result<usize, Error> {
2189    let local_iovec = local.map(|slice| iovec {
2190        iov_base: slice.as_ptr().cast_mut().cast(),
2191        iov_len: slice.len() as u64,
2192    });
2193    let remote_iovec = remote.map(|(address, length)| iovec {
2194        iov_base: address as *mut c_void,
2195        iov_len: length as u64,
2196    });
2197    unsafe { sys_process_vm_writev(pid, &local_iovec, &remote_iovec) }
2198}
2199
2200pub fn writev<const N: usize>(fd: FdRef, list: [&[u8]; N]) -> Result<usize, Error> {
2201    let iv = list.map(|slice| iovec {
2202        iov_base: slice.as_ptr().cast_mut().cast(),
2203        iov_len: slice.len() as u64,
2204    });
2205    unsafe { sys_writev(fd, &iv) }
2206}
2207
2208#[inline(always)] // To prevent the buffer from being copied.
2209pub fn readdir(dirfd: FdRef) -> Dirent64Iter {
2210    Dirent64Iter {
2211        dirfd,
2212        buffer: [0; 1024], // TODO: Use MaybeUninit.
2213        bytes_available: 0,
2214        position: 0,
2215    }
2216}
2217
2218#[repr(transparent)]
2219pub struct Dirent64<'a> {
2220    raw: linux_dirent64,
2221    _lifetime: core::marker::PhantomData<&'a [u8]>,
2222}
2223
2224impl<'a> Dirent64<'a> {
2225    pub fn d_type(&self) -> c_uchar {
2226        self.raw.d_type
2227    }
2228
2229    pub fn d_name(&self) -> &'a [u8] {
2230        unsafe {
2231            let name = self.raw.d_name.as_ptr();
2232            let length = {
2233                let mut p = self.raw.d_name.as_ptr();
2234                while *p != 0 {
2235                    p = p.add(1);
2236                }
2237
2238                p as usize - name as usize
2239            };
2240
2241            core::slice::from_raw_parts(name.cast(), length)
2242        }
2243    }
2244}
2245
2246pub struct Dirent64Iter<'a> {
2247    dirfd: FdRef<'a>,
2248    buffer: [u8; 1024],
2249    bytes_available: usize,
2250    position: usize,
2251}
2252
2253impl<'a> Iterator for Dirent64Iter<'a> {
2254    type Item = Result<Dirent64<'a>, Error>;
2255    fn next(&mut self) -> Option<Self::Item> {
2256        loop {
2257            if self.position < self.bytes_available {
2258                let dirent = unsafe { core::ptr::read_unaligned(self.buffer.as_ptr().add(self.position).cast::<Dirent64>()) };
2259
2260                self.position += usize::from(dirent.raw.d_reclen);
2261                return Some(Ok(dirent));
2262            }
2263
2264            match sys_getdents64(self.dirfd, &mut self.buffer) {
2265                Ok(Some(bytes_available)) => self.bytes_available = bytes_available,
2266                Ok(None) => return None,
2267                Err(error) => return Some(Err(error)),
2268            };
2269        }
2270    }
2271}
2272
2273pub fn sendfd(socket: FdRef, fd: FdRef) -> Result<(), Error> {
2274    let mut dummy: c_int = 0;
2275    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2276
2277    let mut iov = iovec {
2278        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2279        iov_len: core::mem::size_of_val(&dummy) as u64,
2280    };
2281
2282    let mut header = msghdr {
2283        msg_name: core::ptr::null_mut(),
2284        msg_namelen: 0,
2285        msg_iov: &mut iov,
2286        msg_iovlen: 1,
2287        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2288        msg_controllen: core::mem::size_of_val(&buffer),
2289        msg_flags: 0,
2290    };
2291
2292    let control_header = cmsghdr {
2293        cmsg_len: CMSG_LEN(core::mem::size_of::<c_int>()),
2294        cmsg_level: SOL_SOCKET,
2295        cmsg_type: SCM_RIGHTS,
2296    };
2297
2298    #[allow(clippy::cast_ptr_alignment)]
2299    unsafe {
2300        core::ptr::write_unaligned(CMSG_FIRSTHDR(&header), control_header);
2301        core::ptr::write_unaligned(CMSG_DATA(buffer.as_mut_ptr().cast::<cmsghdr>()).cast::<c_int>(), fd.raw());
2302    }
2303
2304    header.msg_controllen = CMSG_LEN(core::mem::size_of::<c_int>());
2305    sys_sendmsg(socket, &header, MSG_NOSIGNAL)?;
2306
2307    Ok(())
2308}
2309
2310pub fn recvfd(socket: FdRef) -> Result<Fd, Error> {
2311    let mut dummy: c_int = 0;
2312    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2313
2314    let mut iov = iovec {
2315        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2316        iov_len: core::mem::size_of_val(&dummy) as u64,
2317    };
2318
2319    let mut header = msghdr {
2320        msg_name: core::ptr::null_mut(),
2321        msg_namelen: 0,
2322        msg_iov: &mut iov,
2323        msg_iovlen: 1,
2324        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2325        msg_controllen: core::mem::size_of_val(&buffer),
2326        msg_flags: 0,
2327    };
2328
2329    let count = sys_recvmsg(socket, &mut header, 0)?;
2330    if count == 0 {
2331        return Err(Error::from_str("recvfd failed: received zero bytes"));
2332    }
2333
2334    if count != core::mem::size_of::<c_int>() {
2335        return Err(Error::from_str("recvfd failed: received unexpected number of bytes"));
2336    }
2337
2338    if header.msg_controllen != CMSG_SPACE(core::mem::size_of::<c_int>()) {
2339        return Err(Error::from_str("recvfd failed: invalid control message size"));
2340    }
2341
2342    let control_header = unsafe { &mut *header.msg_control.cast::<cmsghdr>() };
2343
2344    if control_header.cmsg_level != SOL_SOCKET {
2345        return Err(Error::from_str("recvfd failed: invalid control message level"));
2346    }
2347
2348    if control_header.cmsg_type != SCM_RIGHTS {
2349        return Err(Error::from_str("recvfd failed: invalid control message type"));
2350    }
2351
2352    let fd = unsafe { core::ptr::read_unaligned(CMSG_DATA(control_header).cast::<c_int>()) };
2353
2354    Ok(Fd::from_raw_unchecked(fd))
2355}