gix_index/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
//! ## Feature Flags
#![cfg_attr(
all(doc, feature = "document-features"),
doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
#![deny(unsafe_code, missing_docs, rust_2018_idioms)]
use bstr::{BStr, ByteSlice};
use std::{ops::Range, path::PathBuf};
use filetime::FileTime;
/// `gix_hash` is made available as it's part of the public API in various places.
pub use gix_hash as hash;
/// A re-export to allow calling [`State::from_tree()`].
pub use gix_validate as validate;
///
pub mod file;
///
pub mod extension;
///
pub mod entry;
mod access;
///
pub mod init;
///
pub mod decode;
///
pub mod verify;
///
pub mod write;
pub mod fs;
/// All known versions of a git index file.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Version {
/// Supports entries and various extensions.
V2 = 2,
/// Adds support for additional flags for each entry, called extended entries.
V3 = 3,
/// Supports deltified entry paths.
V4 = 4,
}
/// An entry in the index, identifying a non-tree item on disk.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Entry {
/// The filesystem stat information for the file on disk.
pub stat: entry::Stat,
/// The object id for this entry's ODB representation (assuming it's up-to-date with it).
pub id: gix_hash::ObjectId,
/// Additional flags for use in algorithms and for efficiently storing stage information.
pub flags: entry::Flags,
/// The kind of item this entry represents - it's not all blobs in the index anymore.
pub mode: entry::Mode,
/// The range to lookup in the path backing to obtain the entry path relative to the repository.
/// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
path: Range<usize>,
}
/// An index file whose state was read from a file on disk.
#[derive(Clone)]
pub struct File {
/// The state containing the actual index data.
pub(crate) state: State,
/// The path from which the index was read or to which it is supposed to be written.
pub(crate) path: PathBuf,
/// The checksum of all bytes prior to the checksum itself.
pub(crate) checksum: Option<gix_hash::ObjectId>,
}
/// The type to use and store paths to all entries.
pub type PathStorage = Vec<u8>;
/// The type to use and store paths to all entries, as reference
pub type PathStorageRef = [u8];
struct DirEntry<'a> {
/// The first entry in the directory
entry: &'a Entry,
/// One past the last byte of the directory in the path-backing
dir_end: usize,
}
impl DirEntry<'_> {
fn path<'a>(&self, state: &'a State) -> &'a BStr {
let range = self.entry.path.start..self.dir_end;
state.path_backing[range].as_bstr()
}
}
/// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
pub struct AccelerateLookup<'a> {
/// The entries themselves, hashed by their full icase path.
/// Icase-clashes are handled in order of occurrence and are all available for iteration.
icase_entries: hashbrown::HashTable<&'a Entry>,
/// Each hash in this table corresponds to a directory containing one or more entries.
icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
}
/// An in-memory cache of a fully parsed git index file.
///
/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
/// We treat index and its state synonymous.
///
/// # A note on safety
///
/// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
/// by the caller, without applying any level of validation.
///
/// This means that before using these paths to recreate files on disk, *they must be validated*.
///
/// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
/// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
/// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
/// path is presented in the index and its consumer must validate each path component before usage.
///
/// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
/// one can validate component names with `gix_validate::path::component()`.
#[derive(Clone)]
pub struct State {
/// The kind of object hash used when storing the underlying file.
///
/// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
object_hash: gix_hash::Kind,
/// The time at which the state was created, indicating its freshness compared to other files on disk.
///
/// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
/// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
timestamp: FileTime,
version: Version,
entries: Vec<Entry>,
/// A memory area keeping all index paths, in full length, independently of the index version.
///
/// Ranges into this storage are referred to by parts of `entries`.
path_backing: PathStorage,
/// True if one entry in the index has a special marker mode
is_sparse: bool,
// Extensions
end_of_index_at_decode_time: bool,
offset_table_at_decode_time: bool,
tree: Option<extension::Tree>,
link: Option<extension::Link>,
resolve_undo: Option<extension::resolve_undo::Paths>,
untracked: Option<extension::UntrackedCache>,
fs_monitor: Option<extension::FsMonitor>,
}
mod impls {
use crate::entry::Stage;
use std::fmt::{Debug, Formatter};
use crate::State;
impl Debug for State {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
for entry in &self.entries {
writeln!(
f,
"{} {}{:?} {} {}",
match entry.flags.stage() {
Stage::Unconflicted => " ",
Stage::Base => "BASE ",
Stage::Ours => "OURS ",
Stage::Theirs => "THEIRS ",
},
if entry.flags.is_empty() {
"".to_string()
} else {
format!("{:?} ", entry.flags)
},
entry.mode,
entry.id,
entry.path(self)
)?;
}
Ok(())
}
}
}
pub(crate) mod util {
#[inline]
pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
let data = &data[consumed..];
(num, data).into()
}
#[inline]
pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
split_at_pos(data, 4).map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data))
}
#[inline]
pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
split_at_pos(data, 8).map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data))
}
#[inline]
pub fn from_be_u32(b: &[u8]) -> u32 {
u32::from_be_bytes(b.try_into().unwrap())
}
#[inline]
pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
if data.len() < 2 {
return None;
}
data.iter().enumerate().find_map(|(idx, b)| {
(*b == byte).then(|| {
if idx == 0 {
(&[] as &[u8], &data[1..])
} else {
let (a, b) = data.split_at(idx);
(a, &b[1..])
}
})
})
}
#[inline]
pub fn split_at_pos(data: &[u8], pos: usize) -> Option<(&[u8], &[u8])> {
if data.len() < pos {
return None;
}
data.split_at(pos).into()
}
}
#[test]
fn size_of_entry() {
assert_eq!(std::mem::size_of::<crate::Entry>(), 80);
// the reason we have our own time is half the size.
assert_eq!(std::mem::size_of::<crate::entry::stat::Time>(), 8);
assert_eq!(std::mem::size_of::<filetime::FileTime>(), 16);
}