buffered_reader/
file_unix.rs

1//! A mmapping `BufferedReader` implementation for files.
2//!
3//! On my (Justus) system, this implementation improves the
4//! performance of the statistics example by ~10% over the
5//! Generic.
6
7use libc::{mmap, munmap, PROT_READ, MAP_PRIVATE};
8use std::fmt;
9use std::fs;
10use std::io;
11use std::os::unix::io::AsRawFd;
12use std::slice;
13use std::path::{Path, PathBuf};
14use std::ptr;
15
16use super::*;
17use crate::file_error::FileError;
18
19// For small files, the overhead of manipulating the page table is not
20// worth the gain.  This threshold has been chosen so that on my
21// (Justus) system, mmaping is faster than sequentially reading.
22const MMAP_THRESHOLD: u64 = 16 * 4096;
23
24/// Wraps files using `mmap`().
25///
26/// This implementation tries to mmap the file, falling back to
27/// just using a generic reader.
28pub struct File<'a, C: fmt::Debug + Sync + Send>(Imp<'a, C>, PathBuf);
29
30assert_send_and_sync!(File<'_, C>
31                      where C: fmt::Debug);
32
33impl<'a, C: fmt::Debug + Sync + Send> fmt::Display for File<'a, C> {
34    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35        write!(f, "{} {:?}", self.0, self.1.display())
36    }
37}
38
39impl<'a, C: fmt::Debug + Sync + Send> fmt::Debug for File<'a, C> {
40    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
41        f.debug_tuple("File")
42            .field(&self.0)
43            .field(&self.1)
44            .finish()
45    }
46}
47
48/// The implementation.
49enum Imp<'a, C: fmt::Debug + Sync + Send> {
50    Generic(Generic<fs::File, C>),
51    Mmap {
52        reader: Memory<'a, C>,
53    }
54}
55
56impl<'a, C: fmt::Debug + Sync + Send> Drop for Imp<'a, C> {
57    fn drop(&mut self) {
58        match self {
59            Imp::Generic(_) => (),
60            Imp::Mmap { reader, } => {
61                let buf = reader.source_buffer();
62                unsafe {
63                    munmap(buf.as_ptr() as *mut _, buf.len());
64                }
65            },
66        }
67    }
68}
69
70impl<'a, C: fmt::Debug + Sync + Send> fmt::Display for Imp<'a, C> {
71    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
72        write!(f, "File(")?;
73        match self {
74            Imp::Generic(_) => write!(f, "Generic")?,
75            Imp::Mmap { .. } => write!(f, "Memory")?,
76        };
77        write!(f, ")")
78    }
79}
80
81impl<'a, C: fmt::Debug + Sync + Send> fmt::Debug for Imp<'a, C> {
82    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
83        match self {
84            Imp::Generic(ref g) =>
85                f.debug_tuple("Generic")
86                .field(&g)
87                .finish(),
88            Imp::Mmap { reader, } =>
89                f.debug_struct("Mmap")
90                .field("addr", &reader.source_buffer().as_ptr())
91                .field("length", &reader.source_buffer().len())
92                .field("reader", reader)
93                .finish(),
94        }
95    }
96}
97
98impl<'a> File<'a, ()> {
99    /// Wraps a [`fs::File`].
100    ///
101    /// The given `path` should be the path that has been used to
102    /// obtain `file` from.  It is used in error messages to provide
103    /// context to the user.
104    ///
105    /// While this is slightly less convenient than [`Self::open`], it
106    /// allows one to inspect or manipulate the [`fs::File`] object
107    /// before handing it off.  For example, one can inspect the
108    /// metadata.
109    pub fn new<P: AsRef<Path>>(file: fs::File, path: P) -> io::Result<Self> {
110        Self::new_with_cookie(file, path.as_ref(), ())
111    }
112
113    /// Opens the given file.
114    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
115        Self::with_cookie(path, ())
116    }
117}
118
119impl<'a, C: fmt::Debug + Sync + Send> File<'a, C> {
120    /// Like [`Self::new`], but sets a cookie.
121    ///
122    /// The given `path` should be the path that has been used to
123    /// obtain `file` from.  It is used in error messages to provide
124    /// context to the user.
125    ///
126    /// While this is slightly less convenient than
127    /// [`Self::with_cookie`], it allows one to inspect or manipulate
128    /// the [`fs::File`] object before handing it off.  For example,
129    /// one can inspect the metadata.
130    pub fn new_with_cookie<P: AsRef<Path>>(file: fs::File, path: P, cookie: C)
131                                           -> io::Result<Self> {
132        let path = path.as_ref();
133
134        // As fallback, we use a generic reader.
135        let generic = |file, cookie| {
136            Ok(File(
137                Imp::Generic(
138                    Generic::with_cookie(file, None, cookie)),
139                path.into()))
140        };
141
142        // For testing and benchmarking purposes, we use the variable
143        // SEQUOIA_DONT_MMAP to turn off mmapping.
144        if ::std::env::var_os("SEQUOIA_DONT_MMAP").is_some() {
145            return generic(file, cookie);
146        }
147
148        let length =
149            file.metadata().map_err(|e| FileError::new(path, e))?.len();
150
151        // For small files, the overhead of manipulating the page
152        // table is not worth the gain.
153        if length < MMAP_THRESHOLD {
154            return generic(file, cookie);
155        }
156
157        // Be nice to 32 bit systems.
158        let length: usize = match length.try_into() {
159            Ok(v) => v,
160            Err(_) => return generic(file, cookie),
161        };
162
163        let fd = file.as_raw_fd();
164        let addr = unsafe {
165            mmap(ptr::null_mut(), length, PROT_READ, MAP_PRIVATE,
166                 fd, 0)
167        };
168        if addr == libc::MAP_FAILED {
169            return generic(file, cookie);
170        }
171
172        let slice = unsafe {
173            slice::from_raw_parts(addr as *const u8, length)
174        };
175
176        Ok(File(
177            Imp::Mmap {
178                reader: Memory::with_cookie(slice, cookie),
179            },
180            path.into(),
181        ))
182    }
183
184    /// Like [`Self::open`], but sets a cookie.
185    pub fn with_cookie<P: AsRef<Path>>(path: P, cookie: C) -> io::Result<Self> {
186        let path = path.as_ref();
187        let file = fs::File::open(path).map_err(|e| FileError::new(path, e))?;
188        Self::new_with_cookie(file, path, cookie)
189    }
190}
191
192impl<'a, C: fmt::Debug + Sync + Send> io::Read for File<'a, C> {
193    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
194        match self.0 {
195            Imp::Generic(ref mut reader) => reader.read(buf),
196            Imp::Mmap { ref mut reader, .. } => reader.read(buf),
197        }.map_err(|e| FileError::new(&self.1, e))
198    }
199}
200
201impl<'a, C: fmt::Debug + Sync + Send> BufferedReader<C> for File<'a, C> {
202    fn buffer(&self) -> &[u8] {
203        match self.0 {
204            Imp::Generic(ref reader) => reader.buffer(),
205            Imp::Mmap { ref reader, .. } => reader.buffer(),
206        }
207    }
208
209    fn data(&mut self, amount: usize) -> io::Result<&[u8]> {
210        let path = &self.1;
211        match self.0 {
212            Imp::Generic(ref mut reader) => reader.data(amount),
213            Imp::Mmap { ref mut reader, .. } => reader.data(amount),
214        }.map_err(|e| FileError::new(path, e))
215    }
216
217    fn data_hard(&mut self, amount: usize) -> io::Result<&[u8]> {
218        let path = &self.1;
219        match self.0 {
220            Imp::Generic(ref mut reader) => reader.data_hard(amount),
221            Imp::Mmap { ref mut reader, .. } => reader.data_hard(amount),
222        }.map_err(|e| FileError::new(path, e))
223    }
224
225    fn consume(&mut self, amount: usize) -> &[u8] {
226        match self.0 {
227            Imp::Generic(ref mut reader) => reader.consume(amount),
228            Imp::Mmap { ref mut reader, .. } => reader.consume(amount),
229        }
230    }
231
232    fn data_consume(&mut self, amount: usize) -> io::Result<&[u8]> {
233        let path = &self.1;
234        match self.0 {
235            Imp::Generic(ref mut reader) => reader.data_consume(amount),
236            Imp::Mmap { ref mut reader, .. } => reader.data_consume(amount),
237        }.map_err(|e| FileError::new(path, e))
238    }
239
240    fn data_consume_hard(&mut self, amount: usize) -> io::Result<&[u8]> {
241        let path = &self.1;
242        match self.0 {
243            Imp::Generic(ref mut reader) => reader.data_consume_hard(amount),
244            Imp::Mmap { ref mut reader, .. } => reader.data_consume_hard(amount),
245        }.map_err(|e| FileError::new(path, e))
246    }
247
248    fn get_mut(&mut self) -> Option<&mut dyn BufferedReader<C>> {
249        None
250    }
251
252    fn get_ref(&self) -> Option<&dyn BufferedReader<C>> {
253        None
254    }
255
256    fn into_inner<'b>(self: Box<Self>) -> Option<Box<dyn BufferedReader<C> + 'b>>
257        where Self: 'b {
258        None
259    }
260
261    fn cookie_set(&mut self, cookie: C) -> C {
262        match self.0 {
263            Imp::Generic(ref mut reader) => reader.cookie_set(cookie),
264            Imp::Mmap { ref mut reader, .. } => reader.cookie_set(cookie),
265        }
266    }
267
268    fn cookie_ref(&self) -> &C {
269        match self.0 {
270            Imp::Generic(ref reader) => reader.cookie_ref(),
271            Imp::Mmap { ref reader, .. } => reader.cookie_ref(),
272        }
273    }
274
275    fn cookie_mut(&mut self) -> &mut C {
276        match self.0 {
277            Imp::Generic(ref mut reader) => reader.cookie_mut(),
278            Imp::Mmap { ref mut reader, .. } => reader.cookie_mut(),
279        }
280    }
281}
282
283#[cfg(test)]
284mod test {
285    use super::*;
286
287    #[test]
288    fn error_contains_path() {
289        let p = "/i/do/not/exist";
290        let e = File::open(p).unwrap_err();
291        assert!(e.to_string().contains(p));
292    }
293}