symbolic_common/byteview.rs
1//! A wrapper type providing direct memory access to binary data.
2//!
3//! See the [`ByteView`] struct for more documentation.
4//!
5//! [`ByteView`]: struct.ByteView.html
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io;
10use std::ops::Deref;
11use std::path::Path;
12use std::sync::Arc;
13
14use memmap2::Mmap;
15
16use crate::cell::StableDeref;
17
18/// The owner of data behind a ByteView.
19///
20/// This can either be an mmapped file, an owned buffer or a borrowed binary slice.
21#[derive(Debug)]
22enum ByteViewBacking<'a> {
23 Buf(Cow<'a, [u8]>),
24 Mmap(Mmap),
25}
26
27impl Deref for ByteViewBacking<'_> {
28 type Target = [u8];
29
30 fn deref(&self) -> &Self::Target {
31 match *self {
32 ByteViewBacking::Buf(ref buf) => buf,
33 ByteViewBacking::Mmap(ref mmap) => mmap,
34 }
35 }
36}
37
38/// A smart pointer for byte data.
39///
40/// This type can be used to uniformly access bytes that were created either from mmapping in a
41/// path, a vector or a borrowed slice. A `ByteView` dereferences into a `&[u8]` and guarantees
42/// random access to the underlying buffer or file.
43///
44/// A `ByteView` can be constructed from borrowed slices, vectors or memory mapped from the file
45/// system directly.
46///
47/// # Example
48///
49/// The most common way to use `ByteView` is to construct it from a file handle. This will own the
50/// underlying file handle until the `ByteView` is dropped:
51///
52/// ```
53/// use std::io::Write;
54/// use symbolic_common::ByteView;
55///
56/// fn main() -> Result<(), std::io::Error> {
57/// let mut file = tempfile::tempfile()?;
58/// file.write_all(b"1234");
59///
60/// let view = ByteView::map_file(file)?;
61/// assert_eq!(view.as_slice(), b"1234");
62/// Ok(())
63/// }
64/// ```
65#[derive(Clone, Debug)]
66pub struct ByteView<'a> {
67 backing: Arc<ByteViewBacking<'a>>,
68}
69
70impl<'a> ByteView<'a> {
71 fn with_backing(backing: ByteViewBacking<'a>) -> Self {
72 ByteView {
73 backing: Arc::new(backing),
74 }
75 }
76
77 /// Constructs a `ByteView` from a `Cow`.
78 ///
79 /// # Example
80 ///
81 /// ```
82 /// use std::borrow::Cow;
83 /// use symbolic_common::ByteView;
84 ///
85 /// let cow = Cow::Borrowed(&b"1234"[..]);
86 /// let view = ByteView::from_cow(cow);
87 /// ```
88 pub fn from_cow(cow: Cow<'a, [u8]>) -> Self {
89 ByteView::with_backing(ByteViewBacking::Buf(cow))
90 }
91
92 /// Constructs a `ByteView` from a byte slice.
93 ///
94 /// # Example
95 ///
96 /// ```
97 /// use symbolic_common::ByteView;
98 ///
99 /// let view = ByteView::from_slice(b"1234");
100 /// ```
101 pub fn from_slice(buffer: &'a [u8]) -> Self {
102 ByteView::from_cow(Cow::Borrowed(buffer))
103 }
104
105 /// Constructs a `ByteView` from a vector of bytes.
106 ///
107 /// # Example
108 ///
109 /// ```
110 /// use symbolic_common::ByteView;
111 ///
112 /// let vec = b"1234".to_vec();
113 /// let view = ByteView::from_vec(vec);
114 /// ```
115 pub fn from_vec(buffer: Vec<u8>) -> Self {
116 ByteView::from_cow(Cow::Owned(buffer))
117 }
118
119 /// Constructs a `ByteView` from an open file handle by memory mapping the file.
120 ///
121 /// See [`ByteView::map_file_ref`] for a non-consuming version of this constructor.
122 ///
123 /// # Example
124 ///
125 /// ```
126 /// use std::io::Write;
127 /// use symbolic_common::ByteView;
128 ///
129 /// fn main() -> Result<(), std::io::Error> {
130 /// let mut file = tempfile::tempfile()?;
131 /// let view = ByteView::map_file(file)?;
132 /// Ok(())
133 /// }
134 /// ```
135 pub fn map_file(file: File) -> Result<Self, io::Error> {
136 Self::map_file_ref(&file)
137 }
138
139 /// Constructs a `ByteView` from an open file handle by memory mapping the file.
140 ///
141 /// The main difference with [`ByteView::map_file`] is that this takes the [`File`] by
142 /// reference rather than consuming it.
143 ///
144 /// # Example
145 ///
146 /// ```
147 /// use std::io::Write;
148 /// use symbolic_common::ByteView;
149 ///
150 /// fn main() -> Result<(), std::io::Error> {
151 /// let mut file = tempfile::tempfile()?;
152 /// let view = ByteView::map_file_ref(&file)?;
153 /// Ok(())
154 /// }
155 /// ```
156 pub fn map_file_ref(file: &File) -> Result<Self, io::Error> {
157 let backing = match unsafe { Mmap::map(file) } {
158 Ok(mmap) => ByteViewBacking::Mmap(mmap),
159 Err(err) => {
160 // this is raised on empty mmaps which we want to ignore. The 1006 Windows error
161 // looks like "The volume for a file has been externally altered so that the opened
162 // file is no longer valid."
163 if err.kind() == io::ErrorKind::InvalidInput
164 || (cfg!(windows) && err.raw_os_error() == Some(1006))
165 {
166 ByteViewBacking::Buf(Cow::Borrowed(b""))
167 } else {
168 return Err(err);
169 }
170 }
171 };
172
173 Ok(ByteView::with_backing(backing))
174 }
175
176 /// Constructs a `ByteView` from any `std::io::Reader`.
177 ///
178 /// **Note**: This currently consumes the entire reader and stores its data in an internal
179 /// buffer. Prefer [`open`] when reading from the file system or [`from_slice`] / [`from_vec`]
180 /// for in-memory operations. This behavior might change in the future.
181 ///
182 /// # Example
183 ///
184 /// ```
185 /// use std::io::Cursor;
186 /// use symbolic_common::ByteView;
187 ///
188 /// fn main() -> Result<(), std::io::Error> {
189 /// let reader = Cursor::new(b"1234");
190 /// let view = ByteView::read(reader)?;
191 /// Ok(())
192 /// }
193 /// ```
194 ///
195 /// [`open`]: struct.ByteView.html#method.open
196 /// [`from_slice`]: struct.ByteView.html#method.from_slice
197 /// [`from_vec`]: struct.ByteView.html#method.from_vec
198 pub fn read<R: io::Read>(mut reader: R) -> Result<Self, io::Error> {
199 let mut buffer = vec![];
200 reader.read_to_end(&mut buffer)?;
201 Ok(ByteView::from_vec(buffer))
202 }
203
204 /// Constructs a `ByteView` from a file path by memory mapping the file.
205 ///
206 /// # Example
207 ///
208 /// ```no_run
209 /// use symbolic_common::ByteView;
210 ///
211 /// fn main() -> Result<(), std::io::Error> {
212 /// let view = ByteView::open("test.txt")?;
213 /// Ok(())
214 /// }
215 /// ```
216 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
217 let file = File::open(path)?;
218 Self::map_file(file)
219 }
220
221 /// Returns a slice of the underlying data.
222 ///
223 ///
224 /// # Example
225 ///
226 /// ```
227 /// use symbolic_common::ByteView;
228 ///
229 /// let view = ByteView::from_slice(b"1234");
230 /// let data = view.as_slice();
231 /// ```
232 #[inline(always)]
233 pub fn as_slice(&self) -> &[u8] {
234 self.backing.deref()
235 }
236
237 /// Applies a [`AccessPattern`] hint to the backing storage.
238 ///
239 /// A hint can be applied when the predominantly access pattern
240 /// for this byte view is known.
241 ///
242 /// Applying the wrong hint may have significant effects on performance.
243 ///
244 /// Hints are applied on best effort basis, not all platforms
245 /// support the same hints, not all backing storages support
246 /// hints.
247 ///
248 /// # Example
249 ///
250 /// ```
251 /// use std::io::Write;
252 /// use symbolic_common::{ByteView, AccessPattern};
253 ///
254 /// fn main() -> Result<(), std::io::Error> {
255 /// let mut file = tempfile::tempfile()?;
256 /// let view = ByteView::map_file_ref(&file)?;
257 /// let _ = view.hint(AccessPattern::Random);
258 /// Ok(())
259 /// }
260 /// ```
261 pub fn hint(&self, hint: AccessPattern) -> Result<(), io::Error> {
262 let _hint = hint; // silence unused lint
263 match self.backing.deref() {
264 ByteViewBacking::Buf(_) => Ok(()),
265 #[cfg(unix)]
266 ByteViewBacking::Mmap(mmap) => mmap.advise(_hint.to_madvise()),
267 #[cfg(not(unix))]
268 ByteViewBacking::Mmap(_) => Ok(()),
269 }
270 }
271}
272
273impl AsRef<[u8]> for ByteView<'_> {
274 #[inline(always)]
275 fn as_ref(&self) -> &[u8] {
276 self.as_slice()
277 }
278}
279
280impl Deref for ByteView<'_> {
281 type Target = [u8];
282
283 #[inline(always)]
284 fn deref(&self) -> &Self::Target {
285 self.as_slice()
286 }
287}
288
289unsafe impl StableDeref for ByteView<'_> {}
290
291/// Values supported by [`ByteView::hint`].
292///
293/// This is largely an abstraction over [`madvise(2)`] and [`fadvise(2)`].
294///
295/// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html
296/// [`fadvise(2)`]: https://man7.org/linux/man-pages/man2/posix_fadvise.2.html
297#[derive(Debug, Default, Clone, Copy)]
298pub enum AccessPattern {
299 /// No special treatment.
300 ///
301 /// The operating system is in full control of the buffer,
302 /// a generally good default.
303 ///
304 /// This is the default.
305 #[default]
306 Normal,
307 /// Expect access to be random.
308 ///
309 /// Read ahead might be less useful than normally.
310 Random,
311 /// Expect access to be in sequential order, read ahead might be very useful.
312 /// After reading data there is a high chance it will not be accessed again
313 /// and can be aggressively freed.
314 Sequential,
315}
316
317impl AccessPattern {
318 #[cfg(unix)]
319 fn to_madvise(self) -> memmap2::Advice {
320 match self {
321 AccessPattern::Normal => memmap2::Advice::Normal,
322 AccessPattern::Random => memmap2::Advice::Random,
323 AccessPattern::Sequential => memmap2::Advice::Sequential,
324 }
325 }
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 use std::io::{Read, Seek, Write};
333
334 use similar_asserts::assert_eq;
335 use tempfile::NamedTempFile;
336
337 #[test]
338 fn test_open_empty_file() -> Result<(), std::io::Error> {
339 let tmp = NamedTempFile::new()?;
340
341 let view = ByteView::open(tmp.path())?;
342 assert_eq!(&*view, b"");
343
344 Ok(())
345 }
346
347 #[test]
348 fn test_open_file() -> Result<(), std::io::Error> {
349 let mut tmp = NamedTempFile::new()?;
350
351 tmp.write_all(b"1234")?;
352
353 let view = ByteView::open(tmp.path())?;
354 assert_eq!(&*view, b"1234");
355
356 Ok(())
357 }
358
359 #[test]
360 fn test_mmap_fd_reuse() -> Result<(), std::io::Error> {
361 let mut tmp = NamedTempFile::new()?;
362 tmp.write_all(b"1234")?;
363
364 let view = ByteView::map_file_ref(tmp.as_file())?;
365
366 // This deletes the file on disk.
367 let _path = tmp.path().to_path_buf();
368 let mut file = tmp.into_file();
369 #[cfg(not(windows))]
370 {
371 assert!(!_path.exists());
372 }
373
374 // Ensure we can still read from the the file after mmapping and deleting it on disk.
375 let mut buf = Vec::new();
376 file.rewind()?;
377 file.read_to_end(&mut buf)?;
378 assert_eq!(buf, b"1234");
379 drop(file);
380
381 // Ensure the byteview can still read the file as well.
382 assert_eq!(&*view, b"1234");
383
384 Ok(())
385 }
386}