symbolic_common/
byteview.rs

1//! A wrapper type providing direct memory access to binary data.
2//!
3//! See the [`ByteView`] struct for more documentation.
4//!
5//! [`ByteView`]: struct.ByteView.html
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io;
10use std::ops::Deref;
11use std::path::Path;
12use std::sync::Arc;
13
14use memmap2::Mmap;
15
16use crate::cell::StableDeref;
17
18/// The owner of data behind a ByteView.
19///
20/// This can either be an mmapped file, an owned buffer or a borrowed binary slice.
21#[derive(Debug)]
22enum ByteViewBacking<'a> {
23    Buf(Cow<'a, [u8]>),
24    Mmap(Mmap),
25}
26
27impl Deref for ByteViewBacking<'_> {
28    type Target = [u8];
29
30    fn deref(&self) -> &Self::Target {
31        match *self {
32            ByteViewBacking::Buf(ref buf) => buf,
33            ByteViewBacking::Mmap(ref mmap) => mmap,
34        }
35    }
36}
37
38/// A smart pointer for byte data.
39///
40/// This type can be used to uniformly access bytes that were created either from mmapping in a
41/// path, a vector or a borrowed slice. A `ByteView` dereferences into a `&[u8]` and guarantees
42/// random access to the underlying buffer or file.
43///
44/// A `ByteView` can be constructed from borrowed slices, vectors or memory mapped from the file
45/// system directly.
46///
47/// # Example
48///
49/// The most common way to use `ByteView` is to construct it from a file handle. This will own the
50/// underlying file handle until the `ByteView` is dropped:
51///
52/// ```
53/// use std::io::Write;
54/// use symbolic_common::ByteView;
55///
56/// fn main() -> Result<(), std::io::Error> {
57///     let mut file = tempfile::tempfile()?;
58///     file.write_all(b"1234");
59///
60///     let view = ByteView::map_file(file)?;
61///     assert_eq!(view.as_slice(), b"1234");
62///     Ok(())
63/// }
64/// ```
65#[derive(Clone, Debug)]
66pub struct ByteView<'a> {
67    backing: Arc<ByteViewBacking<'a>>,
68}
69
70impl<'a> ByteView<'a> {
71    fn with_backing(backing: ByteViewBacking<'a>) -> Self {
72        ByteView {
73            backing: Arc::new(backing),
74        }
75    }
76
77    /// Constructs a `ByteView` from a `Cow`.
78    ///
79    /// # Example
80    ///
81    /// ```
82    /// use std::borrow::Cow;
83    /// use symbolic_common::ByteView;
84    ///
85    /// let cow = Cow::Borrowed(&b"1234"[..]);
86    /// let view = ByteView::from_cow(cow);
87    /// ```
88    pub fn from_cow(cow: Cow<'a, [u8]>) -> Self {
89        ByteView::with_backing(ByteViewBacking::Buf(cow))
90    }
91
92    /// Constructs a `ByteView` from a byte slice.
93    ///
94    /// # Example
95    ///
96    /// ```
97    /// use symbolic_common::ByteView;
98    ///
99    /// let view = ByteView::from_slice(b"1234");
100    /// ```
101    pub fn from_slice(buffer: &'a [u8]) -> Self {
102        ByteView::from_cow(Cow::Borrowed(buffer))
103    }
104
105    /// Constructs a `ByteView` from a vector of bytes.
106    ///
107    /// # Example
108    ///
109    /// ```
110    /// use symbolic_common::ByteView;
111    ///
112    /// let vec = b"1234".to_vec();
113    /// let view = ByteView::from_vec(vec);
114    /// ```
115    pub fn from_vec(buffer: Vec<u8>) -> Self {
116        ByteView::from_cow(Cow::Owned(buffer))
117    }
118
119    /// Constructs a `ByteView` from an open file handle by memory mapping the file.
120    ///
121    /// See [`ByteView::map_file_ref`] for a non-consuming version of this constructor.
122    ///
123    /// # Example
124    ///
125    /// ```
126    /// use std::io::Write;
127    /// use symbolic_common::ByteView;
128    ///
129    /// fn main() -> Result<(), std::io::Error> {
130    ///     let mut file = tempfile::tempfile()?;
131    ///     let view = ByteView::map_file(file)?;
132    ///     Ok(())
133    /// }
134    /// ```
135    pub fn map_file(file: File) -> Result<Self, io::Error> {
136        Self::map_file_ref(&file)
137    }
138
139    /// Constructs a `ByteView` from an open file handle by memory mapping the file.
140    ///
141    /// The main difference with [`ByteView::map_file`] is that this takes the [`File`] by
142    /// reference rather than consuming it.
143    ///
144    /// # Example
145    ///
146    /// ```
147    /// use std::io::Write;
148    /// use symbolic_common::ByteView;
149    ///
150    /// fn main() -> Result<(), std::io::Error> {
151    ///     let mut file = tempfile::tempfile()?;
152    ///     let view = ByteView::map_file_ref(&file)?;
153    ///     Ok(())
154    /// }
155    /// ```
156    pub fn map_file_ref(file: &File) -> Result<Self, io::Error> {
157        let backing = match unsafe { Mmap::map(file) } {
158            Ok(mmap) => ByteViewBacking::Mmap(mmap),
159            Err(err) => {
160                // this is raised on empty mmaps which we want to ignore. The 1006 Windows error
161                // looks like "The volume for a file has been externally altered so that the opened
162                // file is no longer valid."
163                if err.kind() == io::ErrorKind::InvalidInput
164                    || (cfg!(windows) && err.raw_os_error() == Some(1006))
165                {
166                    ByteViewBacking::Buf(Cow::Borrowed(b""))
167                } else {
168                    return Err(err);
169                }
170            }
171        };
172
173        Ok(ByteView::with_backing(backing))
174    }
175
176    /// Constructs a `ByteView` from any `std::io::Reader`.
177    ///
178    /// **Note**: This currently consumes the entire reader and stores its data in an internal
179    /// buffer. Prefer [`open`] when reading from the file system or [`from_slice`] / [`from_vec`]
180    /// for in-memory operations. This behavior might change in the future.
181    ///
182    /// # Example
183    ///
184    /// ```
185    /// use std::io::Cursor;
186    /// use symbolic_common::ByteView;
187    ///
188    /// fn main() -> Result<(), std::io::Error> {
189    ///     let reader = Cursor::new(b"1234");
190    ///     let view = ByteView::read(reader)?;
191    ///     Ok(())
192    /// }
193    /// ```
194    ///
195    /// [`open`]: struct.ByteView.html#method.open
196    /// [`from_slice`]: struct.ByteView.html#method.from_slice
197    /// [`from_vec`]: struct.ByteView.html#method.from_vec
198    pub fn read<R: io::Read>(mut reader: R) -> Result<Self, io::Error> {
199        let mut buffer = vec![];
200        reader.read_to_end(&mut buffer)?;
201        Ok(ByteView::from_vec(buffer))
202    }
203
204    /// Constructs a `ByteView` from a file path by memory mapping the file.
205    ///
206    /// # Example
207    ///
208    /// ```no_run
209    /// use symbolic_common::ByteView;
210    ///
211    /// fn main() -> Result<(), std::io::Error> {
212    ///     let view = ByteView::open("test.txt")?;
213    ///     Ok(())
214    /// }
215    /// ```
216    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
217        let file = File::open(path)?;
218        Self::map_file(file)
219    }
220
221    /// Returns a slice of the underlying data.
222    ///
223    ///
224    /// # Example
225    ///
226    /// ```
227    /// use symbolic_common::ByteView;
228    ///
229    /// let view = ByteView::from_slice(b"1234");
230    /// let data = view.as_slice();
231    /// ```
232    #[inline(always)]
233    pub fn as_slice(&self) -> &[u8] {
234        self.backing.deref()
235    }
236
237    /// Applies a [`AccessPattern`] hint to the backing storage.
238    ///
239    /// A hint can be applied when the predominantly access pattern
240    /// for this byte view is known.
241    ///
242    /// Applying the wrong hint may have significant effects on performance.
243    ///
244    /// Hints are applied on best effort basis, not all platforms
245    /// support the same hints, not all backing storages support
246    /// hints.
247    ///
248    /// # Example
249    ///
250    /// ```
251    /// use std::io::Write;
252    /// use symbolic_common::{ByteView, AccessPattern};
253    ///
254    /// fn main() -> Result<(), std::io::Error> {
255    ///     let mut file = tempfile::tempfile()?;
256    ///     let view = ByteView::map_file_ref(&file)?;
257    ///     let _ = view.hint(AccessPattern::Random);
258    ///     Ok(())
259    /// }
260    /// ```
261    pub fn hint(&self, hint: AccessPattern) -> Result<(), io::Error> {
262        let _hint = hint; // silence unused lint
263        match self.backing.deref() {
264            ByteViewBacking::Buf(_) => Ok(()),
265            #[cfg(unix)]
266            ByteViewBacking::Mmap(mmap) => mmap.advise(_hint.to_madvise()),
267            #[cfg(not(unix))]
268            ByteViewBacking::Mmap(_) => Ok(()),
269        }
270    }
271}
272
273impl AsRef<[u8]> for ByteView<'_> {
274    #[inline(always)]
275    fn as_ref(&self) -> &[u8] {
276        self.as_slice()
277    }
278}
279
280impl Deref for ByteView<'_> {
281    type Target = [u8];
282
283    #[inline(always)]
284    fn deref(&self) -> &Self::Target {
285        self.as_slice()
286    }
287}
288
289unsafe impl StableDeref for ByteView<'_> {}
290
291/// Values supported by [`ByteView::hint`].
292///
293/// This is largely an abstraction over [`madvise(2)`] and [`fadvise(2)`].
294///
295/// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html
296/// [`fadvise(2)`]: https://man7.org/linux/man-pages/man2/posix_fadvise.2.html
297#[derive(Debug, Default, Clone, Copy)]
298pub enum AccessPattern {
299    /// No special treatment.
300    ///
301    /// The operating system is in full control of the buffer,
302    /// a generally good default.
303    ///
304    /// This is the default.
305    #[default]
306    Normal,
307    /// Expect access to be random.
308    ///
309    /// Read ahead might be less useful than normally.
310    Random,
311    /// Expect access to be in sequential order, read ahead might be very useful.
312    /// After reading data there is a high chance it will not be accessed again
313    /// and can be aggressively freed.
314    Sequential,
315}
316
317impl AccessPattern {
318    #[cfg(unix)]
319    fn to_madvise(self) -> memmap2::Advice {
320        match self {
321            AccessPattern::Normal => memmap2::Advice::Normal,
322            AccessPattern::Random => memmap2::Advice::Random,
323            AccessPattern::Sequential => memmap2::Advice::Sequential,
324        }
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    use std::io::{Read, Seek, Write};
333
334    use similar_asserts::assert_eq;
335    use tempfile::NamedTempFile;
336
337    #[test]
338    fn test_open_empty_file() -> Result<(), std::io::Error> {
339        let tmp = NamedTempFile::new()?;
340
341        let view = ByteView::open(tmp.path())?;
342        assert_eq!(&*view, b"");
343
344        Ok(())
345    }
346
347    #[test]
348    fn test_open_file() -> Result<(), std::io::Error> {
349        let mut tmp = NamedTempFile::new()?;
350
351        tmp.write_all(b"1234")?;
352
353        let view = ByteView::open(tmp.path())?;
354        assert_eq!(&*view, b"1234");
355
356        Ok(())
357    }
358
359    #[test]
360    fn test_mmap_fd_reuse() -> Result<(), std::io::Error> {
361        let mut tmp = NamedTempFile::new()?;
362        tmp.write_all(b"1234")?;
363
364        let view = ByteView::map_file_ref(tmp.as_file())?;
365
366        // This deletes the file on disk.
367        let _path = tmp.path().to_path_buf();
368        let mut file = tmp.into_file();
369        #[cfg(not(windows))]
370        {
371            assert!(!_path.exists());
372        }
373
374        // Ensure we can still read from the the file after mmapping and deleting it on disk.
375        let mut buf = Vec::new();
376        file.rewind()?;
377        file.read_to_end(&mut buf)?;
378        assert_eq!(buf, b"1234");
379        drop(file);
380
381        // Ensure the byteview can still read the file as well.
382        assert_eq!(&*view, b"1234");
383
384        Ok(())
385    }
386}