1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
use crate::Mmap;
use anyhow::{Context, Result};
use std::fs::File;
use std::ops::{Deref, DerefMut, Range};
use std::path::Path;
use std::sync::Arc;
/// A type akin to `Vec<u8>`, but backed by `mmap` and able to be split.
///
/// This type is a non-growable owned list of bytes. It can be segmented into
/// disjoint separately owned views akin to the `split_at` method on slices in
/// Rust. An `MmapVec` is backed by an OS-level memory allocation and is not
/// suitable for lots of small allocation (since it works at the page
/// granularity).
///
/// An `MmapVec` is an owned value which means that owners have the ability to
/// get exclusive access to the underlying bytes, enabling mutation.
pub struct MmapVec {
mmap: Arc<Mmap>,
range: Range<usize>,
}
impl MmapVec {
/// Consumes an existing `mmap` and wraps it up into an `MmapVec`.
///
/// The returned `MmapVec` will have the `size` specified, which can be
/// smaller than the region mapped by the `Mmap`. The returned `MmapVec`
/// will only have at most `size` bytes accessible.
pub fn new(mmap: Mmap, size: usize) -> MmapVec {
assert!(size <= mmap.len());
MmapVec {
mmap: Arc::new(mmap),
range: 0..size,
}
}
/// Creates a new zero-initialized `MmapVec` with the given `size`.
///
/// This commit will return a new `MmapVec` suitably sized to hold `size`
/// bytes. All bytes will be initialized to zero since this is a fresh OS
/// page allocation.
pub fn with_capacity(size: usize) -> Result<MmapVec> {
Ok(MmapVec::new(Mmap::with_at_least(size)?, size))
}
/// Creates a new `MmapVec` from the contents of an existing `slice`.
///
/// A new `MmapVec` is allocated to hold the contents of `slice` and then
/// `slice` is copied into the new mmap. It's recommended to avoid this
/// method if possible to avoid the need to copy data around.
pub fn from_slice(slice: &[u8]) -> Result<MmapVec> {
let mut result = MmapVec::with_capacity(slice.len())?;
result.copy_from_slice(slice);
Ok(result)
}
/// Creates a new `MmapVec` which is the `path` specified mmap'd into
/// memory.
///
/// This function will attempt to open the file located at `path` and will
/// then use that file to learn about its size and map the full contents
/// into memory. This will return an error if the file doesn't exist or if
/// it's too large to be fully mapped into memory.
pub fn from_file(path: &Path) -> Result<MmapVec> {
let mmap = Mmap::from_file(path)
.with_context(|| format!("failed to create mmap for file: {}", path.display()))?;
let len = mmap.len();
Ok(MmapVec::new(mmap, len))
}
/// Splits the collection into two at the given index.
///
/// Returns a separate `MmapVec` which shares the underlying mapping, but
/// only has access to elements in the range `[at, len)`. After the call,
/// the original `MmapVec` will be left with access to the elements in the
/// range `[0, at)`.
///
/// This is an `O(1)` operation which does not involve copies.
pub fn split_off(&mut self, at: usize) -> MmapVec {
assert!(at <= self.range.len());
// Create a new `MmapVec` which refers to the same underlying mmap, but
// has a disjoint range from ours. Our own range is adjusted to be
// disjoint just after `ret` is created.
let ret = MmapVec {
mmap: self.mmap.clone(),
range: at..self.range.end,
};
self.range.end = self.range.start + at;
return ret;
}
/// Makes the specified `range` within this `mmap` to be read/execute.
pub unsafe fn make_executable(
&self,
range: Range<usize>,
enable_branch_protection: bool,
) -> Result<()> {
assert!(range.start <= range.end);
assert!(range.end <= self.range.len());
self.mmap.make_executable(
range.start + self.range.start..range.end + self.range.start,
enable_branch_protection,
)
}
/// Makes the specified `range` within this `mmap` to be read-only.
pub unsafe fn make_readonly(&self, range: Range<usize>) -> Result<()> {
assert!(range.start <= range.end);
assert!(range.end <= self.range.len());
self.mmap
.make_readonly(range.start + self.range.start..range.end + self.range.start)
}
/// Returns the underlying file that this mmap is mapping, if present.
pub fn original_file(&self) -> Option<&Arc<File>> {
self.mmap.original_file()
}
/// Returns the offset within the original mmap that this `MmapVec` is
/// created from.
pub fn original_offset(&self) -> usize {
self.range.start
}
}
impl Deref for MmapVec {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
// SAFETY: this mmap owns its own range of the underlying mmap so it
// should be all good-to-read.
unsafe { self.mmap.slice(self.range.clone()) }
}
}
impl DerefMut for MmapVec {
fn deref_mut(&mut self) -> &mut [u8] {
// SAFETY: The underlying mmap is protected behind an `Arc` which means
// there there can be many references to it. We are guaranteed, though,
// that each reference to the underlying `mmap` has a disjoint `range`
// listed that it can access. This means that despite having shared
// access to the mmap itself we have exclusive ownership of the bytes
// specified in `self.range`. This should allow us to safely hand out
// mutable access to these bytes if so desired.
unsafe {
let slice =
std::slice::from_raw_parts_mut(self.mmap.as_ptr().cast_mut(), self.mmap.len());
&mut slice[self.range.clone()]
}
}
}
#[cfg(test)]
mod tests {
use super::MmapVec;
#[test]
fn smoke() {
let mut mmap = MmapVec::with_capacity(10).unwrap();
assert_eq!(mmap.len(), 10);
assert_eq!(&mmap[..], &[0; 10]);
mmap[0] = 1;
mmap[2] = 3;
assert!(mmap.get(10).is_none());
assert_eq!(mmap[0], 1);
assert_eq!(mmap[2], 3);
}
#[test]
fn split_off() {
let mut vec = Vec::from([1, 2, 3, 4]);
let mut mmap = MmapVec::from_slice(&vec).unwrap();
assert_eq!(&mmap[..], &vec[..]);
// remove nothing; vec length remains 4
assert_eq!(&mmap.split_off(4)[..], &vec.split_off(4)[..]);
assert_eq!(&mmap[..], &vec[..]);
// remove 1 element; vec length is now 3
assert_eq!(&mmap.split_off(3)[..], &vec.split_off(3)[..]);
assert_eq!(&mmap[..], &vec[..]);
// remove 2 elements; vec length is now 1
assert_eq!(&mmap.split_off(1)[..], &vec.split_off(1)[..]);
assert_eq!(&mmap[..], &vec[..]);
// remove last element; vec length is now 0
assert_eq!(&mmap.split_off(0)[..], &vec.split_off(0)[..]);
assert_eq!(&mmap[..], &vec[..]);
// nothing left to remove, but that's okay
assert_eq!(&mmap.split_off(0)[..], &vec.split_off(0)[..]);
assert_eq!(&mmap[..], &vec[..]);
}
}