io_uring/
types.rs

1//! Common Linux types not provided by libc.
2
3pub(crate) mod sealed {
4    use super::{Fd, Fixed};
5    use std::os::unix::io::RawFd;
6
7    #[derive(Debug)]
8    pub enum Target {
9        Fd(RawFd),
10        Fixed(u32),
11    }
12
13    pub trait UseFd: Sized {
14        fn into(self) -> RawFd;
15    }
16
17    pub trait UseFixed: Sized {
18        fn into(self) -> Target;
19    }
20
21    impl UseFd for Fd {
22        #[inline]
23        fn into(self) -> RawFd {
24            self.0
25        }
26    }
27
28    impl UseFixed for Fd {
29        #[inline]
30        fn into(self) -> Target {
31            Target::Fd(self.0)
32        }
33    }
34
35    impl UseFixed for Fixed {
36        #[inline]
37        fn into(self) -> Target {
38            Target::Fixed(self.0)
39        }
40    }
41}
42
43use crate::sys;
44use crate::util::{cast_ptr, unwrap_nonzero, unwrap_u32};
45use bitflags::bitflags;
46use std::convert::TryFrom;
47use std::marker::PhantomData;
48use std::num::NonZeroU32;
49use std::os::unix::io::RawFd;
50
51pub use sys::__kernel_rwf_t as RwFlags;
52
53/// Opaque types, you should use [`statx`](struct@libc::statx) instead.
54#[repr(C)]
55#[allow(non_camel_case_types)]
56pub struct statx {
57    _priv: (),
58}
59
60/// Opaque types, you should use [`epoll_event`](libc::epoll_event) instead.
61#[repr(C)]
62#[allow(non_camel_case_types)]
63pub struct epoll_event {
64    _priv: (),
65}
66
67/// A file descriptor that has not been registered with io_uring.
68#[derive(Debug, Clone, Copy)]
69#[repr(transparent)]
70pub struct Fd(pub RawFd);
71
72/// A file descriptor that has been registered with io_uring using
73/// [`Submitter::register_files`](crate::Submitter::register_files) or [`Submitter::register_files_sparse`](crate::Submitter::register_files_sparse).
74/// This can reduce overhead compared to using [`Fd`] in some cases.
75#[derive(Debug, Clone, Copy)]
76#[repr(transparent)]
77pub struct Fixed(pub u32);
78
79bitflags! {
80    /// Options for [`Timeout`](super::Timeout).
81    ///
82    /// The default behavior is to treat the timespec as a relative time interval. `flags` may
83    /// contain [`types::TimeoutFlags::ABS`] to indicate the timespec represents an absolute
84    /// time. When an absolute time is being specified, the kernel will use its monotonic clock
85    /// unless one of the following flags is set (they may not both be set):
86    /// [`types::TimeoutFlags::BOOTTIME`] or [`types::TimeoutFlags::REALTIME`].
87    ///
88    /// The default behavior when the timeout expires is to return a CQE with -libc::ETIME in
89    /// the res field. To change this behavior to have zero returned, include
90    /// [`types::TimeoutFlags::ETIME_SUCCESS`].
91    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
92    pub struct TimeoutFlags: u32 {
93        const ABS = sys::IORING_TIMEOUT_ABS;
94
95        const BOOTTIME = sys::IORING_TIMEOUT_BOOTTIME;
96
97        const REALTIME = sys::IORING_TIMEOUT_REALTIME;
98
99        const LINK_TIMEOUT_UPDATE = sys::IORING_LINK_TIMEOUT_UPDATE;
100
101        const ETIME_SUCCESS = sys::IORING_TIMEOUT_ETIME_SUCCESS;
102    }
103}
104
105bitflags! {
106    /// Options for [`Fsync`](super::Fsync).
107    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
108    pub struct FsyncFlags: u32 {
109        const DATASYNC = sys::IORING_FSYNC_DATASYNC;
110    }
111}
112
113bitflags! {
114    /// Options for [`AsyncCancel`](super::AsyncCancel) and
115    /// [`Submitter::register_sync_cancel`](super::Submitter::register_sync_cancel).
116    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
117    pub(crate) struct AsyncCancelFlags: u32 {
118        /// Cancel all requests that match the given criteria, rather
119        /// than just canceling the first one found.
120        ///
121        /// Available since 5.19.
122        const ALL = sys::IORING_ASYNC_CANCEL_ALL;
123
124        /// Match based on the file descriptor used in the original
125        /// request rather than the user_data.
126        ///
127        /// Available since 5.19.
128        const FD = sys::IORING_ASYNC_CANCEL_FD;
129
130        /// Match any request in the ring, regardless of user_data or
131        /// file descriptor.  Can be used to cancel any pending
132        /// request in the ring.
133        ///
134        /// Available since 5.19.
135        const ANY = sys::IORING_ASYNC_CANCEL_ANY;
136
137        /// Match based on the fixed file descriptor used in the original
138        /// request rather than the user_data.
139        ///
140        /// Available since 6.0
141        const FD_FIXED = sys::IORING_ASYNC_CANCEL_FD_FIXED;
142    }
143}
144
145/// Wrapper around `open_how` as used in [the `openat2(2)` system
146/// call](https://man7.org/linux/man-pages/man2/openat2.2.html).
147#[derive(Default, Debug, Clone, Copy)]
148#[repr(transparent)]
149pub struct OpenHow(sys::open_how);
150
151impl OpenHow {
152    pub const fn new() -> Self {
153        OpenHow(sys::open_how {
154            flags: 0,
155            mode: 0,
156            resolve: 0,
157        })
158    }
159
160    pub const fn flags(mut self, flags: u64) -> Self {
161        self.0.flags = flags;
162        self
163    }
164
165    pub const fn mode(mut self, mode: u64) -> Self {
166        self.0.mode = mode;
167        self
168    }
169
170    pub const fn resolve(mut self, resolve: u64) -> Self {
171        self.0.resolve = resolve;
172        self
173    }
174}
175
176#[derive(Default, Debug, Clone, Copy)]
177#[repr(transparent)]
178pub struct Timespec(pub(crate) sys::__kernel_timespec);
179
180impl Timespec {
181    #[inline]
182    pub const fn new() -> Self {
183        Timespec(sys::__kernel_timespec {
184            tv_sec: 0,
185            tv_nsec: 0,
186        })
187    }
188
189    #[inline]
190    pub const fn sec(mut self, sec: u64) -> Self {
191        self.0.tv_sec = sec as _;
192        self
193    }
194
195    #[inline]
196    pub const fn nsec(mut self, nsec: u32) -> Self {
197        self.0.tv_nsec = nsec as _;
198        self
199    }
200}
201
202impl From<std::time::Duration> for Timespec {
203    fn from(value: std::time::Duration) -> Self {
204        Timespec::new()
205            .sec(value.as_secs())
206            .nsec(value.subsec_nanos())
207    }
208}
209
210/// Submit arguments
211///
212/// Note that arguments that exceed their lifetime will fail to compile.
213///
214/// ```compile_fail
215/// use io_uring::types::{ SubmitArgs, Timespec };
216///
217/// let sigmask: libc::sigset_t = unsafe { std::mem::zeroed() };
218///
219/// let mut args = SubmitArgs::new();
220///
221/// {
222///     let ts = Timespec::new();
223///     args = args.timespec(&ts);
224///     args = args.sigmask(&sigmask);
225/// }
226///
227/// drop(args);
228/// ```
229#[derive(Default, Debug, Clone, Copy)]
230pub struct SubmitArgs<'prev: 'now, 'now> {
231    pub(crate) args: sys::io_uring_getevents_arg,
232    prev: PhantomData<&'prev ()>,
233    now: PhantomData<&'now ()>,
234}
235
236impl<'prev, 'now> SubmitArgs<'prev, 'now> {
237    #[inline]
238    pub const fn new() -> SubmitArgs<'static, 'static> {
239        let args = sys::io_uring_getevents_arg {
240            sigmask: 0,
241            sigmask_sz: 0,
242            pad: 0,
243            ts: 0,
244        };
245
246        SubmitArgs {
247            args,
248            prev: PhantomData,
249            now: PhantomData,
250        }
251    }
252
253    #[inline]
254    pub fn sigmask<'new>(mut self, sigmask: &'new libc::sigset_t) -> SubmitArgs<'now, 'new> {
255        self.args.sigmask = cast_ptr(sigmask) as _;
256        self.args.sigmask_sz = std::mem::size_of::<libc::sigset_t>() as _;
257
258        SubmitArgs {
259            args: self.args,
260            prev: self.now,
261            now: PhantomData,
262        }
263    }
264
265    #[inline]
266    pub fn timespec<'new>(mut self, timespec: &'new Timespec) -> SubmitArgs<'now, 'new> {
267        self.args.ts = cast_ptr(timespec) as _;
268
269        SubmitArgs {
270            args: self.args,
271            prev: self.now,
272            now: PhantomData,
273        }
274    }
275}
276
277#[repr(transparent)]
278pub struct BufRingEntry(sys::io_uring_buf);
279
280/// An entry in a buf_ring that allows setting the address, length and buffer id.
281#[allow(clippy::len_without_is_empty)]
282impl BufRingEntry {
283    /// Sets the entry addr.
284    pub fn set_addr(&mut self, addr: u64) {
285        self.0.addr = addr;
286    }
287
288    /// Returns the entry addr.
289    pub fn addr(&self) -> u64 {
290        self.0.addr
291    }
292
293    /// Sets the entry len.
294    pub fn set_len(&mut self, len: u32) {
295        self.0.len = len;
296    }
297
298    /// Returns the entry len.
299    pub fn len(&self) -> u32 {
300        self.0.len
301    }
302
303    /// Sets the entry bid.
304    pub fn set_bid(&mut self, bid: u16) {
305        self.0.bid = bid;
306    }
307
308    /// Returns the entry bid.
309    pub fn bid(&self) -> u16 {
310        self.0.bid
311    }
312
313    /// The offset to the ring's tail field given the ring's base address.
314    ///
315    /// The caller should ensure the ring's base address is aligned with the system's page size,
316    /// per the uring interface requirements.
317    ///
318    /// # Safety
319    ///
320    /// The ptr will be dereferenced in order to determine the address of the resv field,
321    /// so the caller is responsible for passing in a valid pointer. And not just
322    /// a valid pointer type, but also the argument must be the address to the first entry
323    /// of the buf_ring for the resv field to even be considered the tail field of the ring.
324    /// The entry must also be properly initialized.
325    pub unsafe fn tail(ring_base: *const BufRingEntry) -> *const u16 {
326        std::ptr::addr_of!((*ring_base).0.resv)
327    }
328}
329
330/// A destination slot for sending fixed resources
331/// (e.g. [`opcode::MsgRingSendFd`](crate::opcode::MsgRingSendFd)).
332#[derive(Debug, Clone, Copy)]
333pub struct DestinationSlot {
334    /// Fixed slot as indexed by the kernel (target+1).
335    dest: NonZeroU32,
336}
337
338impl DestinationSlot {
339    // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always > 0.
340    const AUTO_ALLOC: NonZeroU32 =
341        unwrap_nonzero(NonZeroU32::new(sys::IORING_FILE_INDEX_ALLOC as u32));
342
343    /// Use an automatically allocated target slot.
344    pub const fn auto_target() -> Self {
345        Self {
346            dest: DestinationSlot::AUTO_ALLOC,
347        }
348    }
349
350    /// Try to use a given target slot.
351    ///
352    /// Valid slots are in the range from `0` to `u32::MAX - 2` inclusive.
353    pub fn try_from_slot_target(target: u32) -> Result<Self, u32> {
354        // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always >= 2.
355        const MAX_INDEX: u32 = unwrap_u32(DestinationSlot::AUTO_ALLOC.get().checked_sub(2));
356
357        if target > MAX_INDEX {
358            return Err(target);
359        }
360
361        let kernel_index = target.saturating_add(1);
362        // SAFETY: by construction, always clamped between 1 and IORING_FILE_INDEX_ALLOC-1.
363        debug_assert!(0 < kernel_index && kernel_index < DestinationSlot::AUTO_ALLOC.get());
364        let dest = NonZeroU32::new(kernel_index).unwrap();
365
366        Ok(Self { dest })
367    }
368
369    pub(crate) fn kernel_index_arg(&self) -> u32 {
370        self.dest.get()
371    }
372}
373
374/// Helper structure for parsing the result of a multishot [`opcode::RecvMsg`](crate::opcode::RecvMsg).
375#[derive(Debug)]
376pub struct RecvMsgOut<'buf> {
377    header: sys::io_uring_recvmsg_out,
378    /// The fixed length of the name field, in bytes.
379    ///
380    /// If the incoming name data is larger than this, it gets truncated to this.
381    /// If it is smaller, it gets 0-padded to fill the whole field. In either case,
382    /// this fixed amount of space is reserved in the result buffer.
383    msghdr_name_len: usize,
384
385    name_data: &'buf [u8],
386    control_data: &'buf [u8],
387    payload_data: &'buf [u8],
388}
389
390impl<'buf> RecvMsgOut<'buf> {
391    const DATA_START: usize = std::mem::size_of::<sys::io_uring_recvmsg_out>();
392
393    /// Parse the data buffered upon completion of a `RecvMsg` multishot operation.
394    ///
395    /// `buffer` is the whole buffer previously provided to the ring, while `msghdr`
396    /// is the same content provided as input to the corresponding SQE
397    /// (only `msg_namelen` and `msg_controllen` fields are relevant).
398    #[allow(clippy::result_unit_err)]
399    #[allow(clippy::useless_conversion)]
400    pub fn parse(buffer: &'buf [u8], msghdr: &libc::msghdr) -> Result<Self, ()> {
401        let msghdr_name_len = usize::try_from(msghdr.msg_namelen).unwrap();
402        let msghdr_control_len = usize::try_from(msghdr.msg_controllen).unwrap();
403
404        if Self::DATA_START
405            .checked_add(msghdr_name_len)
406            .and_then(|acc| acc.checked_add(msghdr_control_len))
407            .map(|header_len| buffer.len() < header_len)
408            .unwrap_or(true)
409        {
410            return Err(());
411        }
412        // SAFETY: buffer (minimum) length is checked here above.
413        let header = unsafe {
414            buffer
415                .as_ptr()
416                .cast::<sys::io_uring_recvmsg_out>()
417                .read_unaligned()
418        };
419
420        // min is used because the header may indicate the true size of the data
421        // while what we received was truncated.
422        let (name_data, control_start) = {
423            let name_start = Self::DATA_START;
424            let name_data_end =
425                name_start + usize::min(usize::try_from(header.namelen).unwrap(), msghdr_name_len);
426            let name_field_end = name_start + msghdr_name_len;
427            (&buffer[name_start..name_data_end], name_field_end)
428        };
429        let (control_data, payload_start) = {
430            let control_data_end = control_start
431                + usize::min(
432                    usize::try_from(header.controllen).unwrap(),
433                    msghdr_control_len,
434                );
435            let control_field_end = control_start + msghdr_control_len;
436            (&buffer[control_start..control_data_end], control_field_end)
437        };
438        let payload_data = {
439            let payload_data_end = payload_start
440                + usize::min(
441                    usize::try_from(header.payloadlen).unwrap(),
442                    buffer.len() - payload_start,
443                );
444            &buffer[payload_start..payload_data_end]
445        };
446
447        Ok(Self {
448            header,
449            msghdr_name_len,
450            name_data,
451            control_data,
452            payload_data,
453        })
454    }
455
456    /// Return the length of the incoming `name` data.
457    ///
458    /// This may be larger than the size of the content returned by
459    /// `name_data()`, if the kernel could not fit all the incoming
460    /// data in the provided buffer size. In that case, name data in
461    /// the result buffer gets truncated.
462    pub fn incoming_name_len(&self) -> u32 {
463        self.header.namelen
464    }
465
466    /// Return whether the incoming name data was larger than the provided limit/buffer.
467    ///
468    /// When `true`, data returned by `name_data()` is truncated and
469    /// incomplete.
470    pub fn is_name_data_truncated(&self) -> bool {
471        self.header.namelen as usize > self.msghdr_name_len
472    }
473
474    /// Message control data, with the same semantics as `msghdr.msg_control`.
475    pub fn name_data(&self) -> &[u8] {
476        self.name_data
477    }
478
479    /// Return the length of the incoming `control` data.
480    ///
481    /// This may be larger than the size of the content returned by
482    /// `control_data()`, if the kernel could not fit all the incoming
483    /// data in the provided buffer size. In that case, control data in
484    /// the result buffer gets truncated.
485    pub fn incoming_control_len(&self) -> u32 {
486        self.header.controllen
487    }
488
489    /// Return whether the incoming control data was larger than the provided limit/buffer.
490    ///
491    /// When `true`, data returned by `control_data()` is truncated and
492    /// incomplete.
493    pub fn is_control_data_truncated(&self) -> bool {
494        (self.header.flags & u32::try_from(libc::MSG_CTRUNC).unwrap()) != 0
495    }
496
497    /// Message control data, with the same semantics as `msghdr.msg_control`.
498    pub fn control_data(&self) -> &[u8] {
499        self.control_data
500    }
501
502    /// Return whether the incoming payload was larger than the provided limit/buffer.
503    ///
504    /// When `true`, data returned by `payload_data()` is truncated and
505    /// incomplete.
506    pub fn is_payload_truncated(&self) -> bool {
507        (self.header.flags & u32::try_from(libc::MSG_TRUNC).unwrap()) != 0
508    }
509
510    /// Message payload, as buffered by the kernel.
511    pub fn payload_data(&self) -> &[u8] {
512        self.payload_data
513    }
514
515    /// Return the length of the incoming `payload` data.
516    ///
517    /// This may be larger than the size of the content returned by
518    /// `payload_data()`, if the kernel could not fit all the incoming
519    /// data in the provided buffer size. In that case, payload data in
520    /// the result buffer gets truncated.
521    pub fn incoming_payload_len(&self) -> u32 {
522        self.header.payloadlen
523    }
524
525    /// Message flags, with the same semantics as `msghdr.msg_flags`.
526    pub fn flags(&self) -> u32 {
527        self.header.flags
528    }
529}
530
531/// [CancelBuilder] constructs match criteria for request cancellation.
532///
533/// The [CancelBuilder] can be used to selectively cancel one or more requests
534/// by user_data, fd, fixed fd, or unconditionally.
535///
536/// ### Examples
537///
538/// ```
539/// use io_uring::types::{CancelBuilder, Fd, Fixed};
540///
541/// // Match all in-flight requests.
542/// CancelBuilder::any();
543///
544/// // Match a single request with user_data = 42.
545/// CancelBuilder::user_data(42);
546///
547/// // Match a single request with fd = 42.
548/// CancelBuilder::fd(Fd(42));
549///
550/// // Match a single request with fixed fd = 42.
551/// CancelBuilder::fd(Fixed(42));
552///
553/// // Match all in-flight requests with user_data = 42.
554/// CancelBuilder::user_data(42).all();
555/// ```
556#[derive(Debug)]
557pub struct CancelBuilder {
558    pub(crate) flags: AsyncCancelFlags,
559    pub(crate) user_data: Option<u64>,
560    pub(crate) fd: Option<sealed::Target>,
561}
562
563impl CancelBuilder {
564    /// Create a new [CancelBuilder] which will match any in-flight request.
565    ///
566    /// This will cancel every in-flight request in the ring.
567    ///
568    /// Async cancellation matching any requests is only available since 5.19.
569    pub const fn any() -> Self {
570        Self {
571            flags: AsyncCancelFlags::ANY,
572            user_data: None,
573            fd: None,
574        }
575    }
576
577    /// Create a new [CancelBuilder] which will match in-flight requests
578    /// with the given `user_data` value.
579    ///
580    /// The first request with the given `user_data` value will be canceled.
581    /// [CancelBuilder::all](#method.all) can be called to instead match every
582    /// request with the provided `user_data` value.
583    pub const fn user_data(user_data: u64) -> Self {
584        Self {
585            flags: AsyncCancelFlags::empty(),
586            user_data: Some(user_data),
587            fd: None,
588        }
589    }
590
591    /// Create a new [CancelBuilder] which will match in-flight requests with
592    /// the given `fd` value.
593    ///
594    /// The first request with the given `fd` value will be canceled. [CancelBuilder::all](#method.all)
595    /// can be called to instead match every request with the provided `fd` value.
596    ///
597    /// FD async cancellation is only available since 5.19.
598    pub fn fd(fd: impl sealed::UseFixed) -> Self {
599        let mut flags = AsyncCancelFlags::FD;
600        let target = fd.into();
601        if matches!(target, sealed::Target::Fixed(_)) {
602            flags.insert(AsyncCancelFlags::FD_FIXED);
603        }
604        Self {
605            flags,
606            user_data: None,
607            fd: Some(target),
608        }
609    }
610
611    /// Modify the [CancelBuilder] match criteria to match all in-flight requests
612    /// rather than just the first one.
613    ///
614    /// This has no effect when combined with [CancelBuilder::any](#method.any).
615    ///
616    /// Async cancellation matching all requests is only available since 5.19.
617    pub fn all(mut self) -> Self {
618        self.flags.insert(AsyncCancelFlags::ALL);
619        self
620    }
621
622    pub(crate) fn to_fd(&self) -> i32 {
623        self.fd
624            .as_ref()
625            .map(|target| match *target {
626                sealed::Target::Fd(fd) => fd,
627                sealed::Target::Fixed(idx) => idx as i32,
628            })
629            .unwrap_or(-1)
630    }
631}
632
633/// Wrapper around `futex_waitv` as used in [`futex_waitv` system
634/// call](https://www.kernel.org/doc/html/latest/userspace-api/futex2.html).
635#[derive(Default, Debug, Clone, Copy)]
636#[repr(transparent)]
637pub struct FutexWaitV(sys::futex_waitv);
638
639impl FutexWaitV {
640    pub const fn new() -> Self {
641        Self(sys::futex_waitv {
642            val: 0,
643            uaddr: 0,
644            flags: 0,
645            __reserved: 0,
646        })
647    }
648
649    pub const fn val(mut self, val: u64) -> Self {
650        self.0.val = val;
651        self
652    }
653
654    pub const fn uaddr(mut self, uaddr: u64) -> Self {
655        self.0.uaddr = uaddr;
656        self
657    }
658
659    pub const fn flags(mut self, flags: u32) -> Self {
660        self.0.flags = flags;
661        self
662    }
663}
664
665#[cfg(test)]
666mod tests {
667    use std::time::Duration;
668
669    use crate::types::sealed::Target;
670
671    use super::*;
672
673    #[test]
674    fn timespec_from_duration_converts_correctly() {
675        let duration = Duration::new(2, 500);
676        let timespec = Timespec::from(duration);
677
678        assert_eq!(timespec.0.tv_sec as u64, duration.as_secs());
679        assert_eq!(timespec.0.tv_nsec as u32, duration.subsec_nanos());
680    }
681
682    #[test]
683    fn test_cancel_builder_flags() {
684        let cb = CancelBuilder::any();
685        assert_eq!(cb.flags, AsyncCancelFlags::ANY);
686
687        let mut cb = CancelBuilder::user_data(42);
688        assert_eq!(cb.flags, AsyncCancelFlags::empty());
689        assert_eq!(cb.user_data, Some(42));
690        assert!(cb.fd.is_none());
691        cb = cb.all();
692        assert_eq!(cb.flags, AsyncCancelFlags::ALL);
693
694        let mut cb = CancelBuilder::fd(Fd(42));
695        assert_eq!(cb.flags, AsyncCancelFlags::FD);
696        assert!(matches!(cb.fd, Some(Target::Fd(42))));
697        assert!(cb.user_data.is_none());
698        cb = cb.all();
699        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::ALL);
700
701        let mut cb = CancelBuilder::fd(Fixed(42));
702        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED);
703        assert!(matches!(cb.fd, Some(Target::Fixed(42))));
704        assert!(cb.user_data.is_none());
705        cb = cb.all();
706        assert_eq!(
707            cb.flags,
708            AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED | AsyncCancelFlags::ALL
709        );
710    }
711}