gix_index/access/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
use std::{cmp::Ordering, ops::Range};
use bstr::{BStr, ByteSlice, ByteVec};
use filetime::FileTime;
use crate::entry::{Stage, StageRaw};
use crate::{entry, extension, AccelerateLookup, Entry, PathStorage, PathStorageRef, State, Version};
// TODO: integrate this somehow, somewhere, depending on later usage.
#[allow(dead_code)]
mod sparse;
/// General information and entries
impl State {
/// Return the version used to store this state's information on disk.
pub fn version(&self) -> Version {
self.version
}
/// Returns time at which the state was created, indicating its freshness compared to other files on disk.
pub fn timestamp(&self) -> FileTime {
self.timestamp
}
/// Updates the timestamp of this state, indicating its freshness compared to other files on disk.
///
/// Be careful about using this as setting a timestamp without correctly updating the index
/// **will cause (file system) race conditions** see racy-git.txt in the git documentation
/// for more details.
pub fn set_timestamp(&mut self, timestamp: FileTime) {
self.timestamp = timestamp;
}
/// Return the kind of hashes used in this instance.
pub fn object_hash(&self) -> gix_hash::Kind {
self.object_hash
}
/// Return our entries
pub fn entries(&self) -> &[Entry] {
&self.entries
}
/// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them.
pub fn path_backing(&self) -> &PathStorageRef {
&self.path_backing
}
/// Runs `filter_map` on all entries, returning an iterator over all paths along with the result of `filter_map`.
pub fn entries_with_paths_by_filter_map<'a, T>(
&'a self,
mut filter_map: impl FnMut(&'a BStr, &Entry) -> Option<T> + 'a,
) -> impl Iterator<Item = (&'a BStr, T)> + 'a {
self.entries.iter().filter_map(move |e| {
let p = e.path(self);
filter_map(p, e).map(|t| (p, t))
})
}
/// Return mutable entries along with their path, as obtained from `backing`.
pub fn entries_mut_with_paths_in<'state, 'backing>(
&'state mut self,
backing: &'backing PathStorageRef,
) -> impl Iterator<Item = (&'state mut Entry, &'backing BStr)> {
self.entries.iter_mut().map(move |e| {
let path = backing[e.path.clone()].as_bstr();
(e, path)
})
}
/// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative
/// `path` and `stage`, or `None`.
///
/// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
pub fn entry_index_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<usize> {
let mut stage_cmp = Ordering::Equal;
let idx = self
.entries
.binary_search_by(|e| {
let res = e.path(self).cmp(path);
if res.is_eq() {
stage_cmp = e.stage().cmp(&stage);
}
res
})
.ok()?;
self.entry_index_by_idx_and_stage(path, idx, stage as StageRaw, stage_cmp)
}
/// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`]
/// towards lower stages, and return the lowest or highest seen stage.
/// Return `None` if there is no greater or smaller stage.
fn walk_entry_stages(&self, path: &BStr, base: usize, direction: Ordering) -> Option<usize> {
match direction {
Ordering::Greater => self
.entries
.get(base + 1..)?
.iter()
.enumerate()
.take_while(|(_, e)| e.path(self) == path)
.last()
.map(|(idx, _)| base + 1 + idx),
Ordering::Equal => Some(base),
Ordering::Less => self.entries[..base]
.iter()
.enumerate()
.rev()
.take_while(|(_, e)| e.path(self) == path)
.last()
.map(|(idx, _)| idx),
}
}
fn entry_index_by_idx_and_stage(
&self,
path: &BStr,
idx: usize,
wanted_stage: entry::StageRaw,
stage_cmp: Ordering,
) -> Option<usize> {
match stage_cmp {
Ordering::Greater => self.entries[..idx]
.iter()
.enumerate()
.rev()
.take_while(|(_, e)| e.path(self) == path)
.find_map(|(idx, e)| (e.stage_raw() == wanted_stage).then_some(idx)),
Ordering::Equal => Some(idx),
Ordering::Less => self
.entries
.get(idx + 1..)?
.iter()
.enumerate()
.take_while(|(_, e)| e.path(self) == path)
.find_map(|(ofs, e)| (e.stage_raw() == wanted_stage).then_some(idx + ofs + 1)),
}
}
/// Return a data structure to help with case-insensitive lookups.
///
/// It's required perform any case-insensitive lookup.
/// TODO: needs multi-threaded insertion, raw-table to have multiple locks depending on bucket.
pub fn prepare_icase_backing(&self) -> AccelerateLookup<'_> {
let _span = gix_features::trace::detail!("prepare_icase_backing", entries = self.entries.len());
let mut out = AccelerateLookup::with_capacity(self.entries.len());
for entry in &self.entries {
let entry_path = entry.path(self);
let hash = AccelerateLookup::icase_hash(entry_path);
out.icase_entries
.insert_unique(hash, entry, |e| AccelerateLookup::icase_hash(e.path(self)));
let mut last_pos = entry_path.len();
while let Some(slash_idx) = entry_path[..last_pos].rfind_byte(b'/') {
let dir = entry_path[..slash_idx].as_bstr();
last_pos = slash_idx;
let dir_range = entry.path.start..(entry.path.start + dir.len());
let hash = AccelerateLookup::icase_hash(dir);
if out
.icase_dirs
.find(hash, |dir| {
dir.path(self) == self.path_backing[dir_range.clone()].as_bstr()
})
.is_none()
{
out.icase_dirs.insert_unique(
hash,
crate::DirEntry {
entry,
dir_end: dir_range.end,
},
|dir| AccelerateLookup::icase_hash(dir.path(self)),
);
} else {
break;
}
}
}
gix_features::trace::debug!(directories = out.icase_dirs.len(), "stored directories");
out
}
/// Return the entry at `path` that is at the lowest available stage, using `lookup` for acceleration.
/// It must have been created from this instance, and was ideally kept up-to-date with it.
///
/// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
pub fn entry_by_path_icase<'a>(
&'a self,
path: &BStr,
ignore_case: bool,
lookup: &AccelerateLookup<'a>,
) -> Option<&'a Entry> {
lookup
.icase_entries
.find(AccelerateLookup::icase_hash(path), |e| {
let entry_path = e.path(self);
if entry_path == path {
return true;
};
if !ignore_case {
return false;
}
entry_path.eq_ignore_ascii_case(path)
})
.copied()
}
/// Return the entry (at any stage) that is inside of `directory`, or `None`,
/// using `lookup` for acceleration.
/// Note that submodules are not detected as directories and the user should
/// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
/// possibility. Doing so might also reveal a sparse directory.
///
/// If `ignore_case` is set
pub fn entry_closest_to_directory_icase<'a>(
&'a self,
directory: &BStr,
ignore_case: bool,
lookup: &AccelerateLookup<'a>,
) -> Option<&'a Entry> {
lookup
.icase_dirs
.find(AccelerateLookup::icase_hash(directory), |dir| {
let dir_path = dir.path(self);
if dir_path == directory {
return true;
};
if !ignore_case {
return false;
}
dir_path.eq_ignore_ascii_case(directory)
})
.map(|dir| dir.entry)
}
/// Return the entry (at any stage) that is inside of `directory`, or `None`.
/// Note that submodules are not detected as directories and the user should
/// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
/// possibility. Doing so might also reveal a sparse directory.
///
/// Note that this is a case-sensitive search.
pub fn entry_closest_to_directory(&self, directory: &BStr) -> Option<&Entry> {
let idx = self.entry_index_by_path(directory).err()?;
for entry in &self.entries[idx..] {
let path = entry.path(self);
if path.get(..directory.len())? != directory {
break;
}
let dir_char = path.get(directory.len())?;
if *dir_char > b'/' {
break;
}
if *dir_char < b'/' {
continue;
}
return Some(entry);
}
None
}
/// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative
/// `path` and `stage`, or `None`.
///
/// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
///
/// # Panics
///
/// If `upper_bound` is out of bounds of our entries array.
pub fn entry_index_by_path_and_stage_bounded(
&self,
path: &BStr,
stage: entry::Stage,
upper_bound: usize,
) -> Option<usize> {
self.entries[..upper_bound]
.binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage)))
.ok()
}
/// Like [`entry_index_by_path_and_stage()`](State::entry_index_by_path_and_stage()),
/// but returns the entry instead of the index.
pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> {
self.entry_index_by_path_and_stage(path, stage)
.map(|idx| &self.entries[idx])
}
/// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict.
///
/// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2.
pub fn entry_by_path(&self, path: &BStr) -> Option<&Entry> {
let mut stage_at_index = 0;
let idx = self
.entries
.binary_search_by(|e| {
let res = e.path(self).cmp(path);
if res.is_eq() {
stage_at_index = e.stage_raw();
}
res
})
.ok()?;
let idx = if stage_at_index == 0 || stage_at_index == 2 {
idx
} else {
self.entry_index_by_idx_and_stage(path, idx, Stage::Ours as StageRaw, stage_at_index.cmp(&2))?
};
Some(&self.entries[idx])
}
/// Return the index at `Ok(index)` where the entry matching `path` (in any stage) can be found, or return
/// `Err(index)` to indicate the insertion position at which an entry with `path` would fit in.
pub fn entry_index_by_path(&self, path: &BStr) -> Result<usize, usize> {
self.entries.binary_search_by(|e| e.path(self).cmp(path))
}
/// Return the slice of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
///
/// If `prefix` is empty, all entries are returned.
pub fn prefixed_entries(&self, prefix: &BStr) -> Option<&[Entry]> {
self.prefixed_entries_range(prefix).map(|range| &self.entries[range])
}
/// Return the range of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
///
/// If `prefix` is empty, the range will include all entries.
pub fn prefixed_entries_range(&self, prefix: &BStr) -> Option<Range<usize>> {
if prefix.is_empty() {
return Some(0..self.entries.len());
}
let prefix_len = prefix.len();
let mut low = self.entries.partition_point(|e| {
e.path(self)
.get(..prefix_len)
.map_or_else(|| e.path(self) <= &prefix[..e.path.len()], |p| p < prefix)
});
let mut high = low
+ self.entries[low..].partition_point(|e| e.path(self).get(..prefix_len).map_or(false, |p| p <= prefix));
let low_entry = &self.entries.get(low)?;
if low_entry.stage_raw() != 0 {
low = self
.walk_entry_stages(low_entry.path(self), low, Ordering::Less)
.unwrap_or(low);
}
if let Some(high_entry) = self.entries.get(high) {
if high_entry.stage_raw() != 0 {
high = self
.walk_entry_stages(high_entry.path(self), high, Ordering::Less)
.unwrap_or(high);
}
}
(low != high).then_some(low..high)
}
/// Return the entry at `idx` or _panic_ if the index is out of bounds.
///
/// The `idx` is typically returned by [`entry_by_path_and_stage()`][State::entry_by_path_and_stage()].
pub fn entry(&self, idx: usize) -> &Entry {
&self.entries[idx]
}
/// Returns a boolean value indicating whether the index is sparse or not.
///
/// An index is sparse if it contains at least one [`Mode::DIR`][entry::Mode::DIR] entry.
pub fn is_sparse(&self) -> bool {
self.is_sparse
}
/// Return the range of entries that exactly match the given `path`, in all available stages, or `None` if no entry with such
/// path exists.
///
/// The range can be used to access the respective entries via [`entries()`](Self::entries()) or [`entries_mut()](Self::entries_mut()).
pub fn entry_range(&self, path: &BStr) -> Option<Range<usize>> {
let mut stage_at_index = 0;
let idx = self
.entries
.binary_search_by(|e| {
let res = e.path(self).cmp(path);
if res.is_eq() {
stage_at_index = e.stage_raw();
}
res
})
.ok()?;
let (start, end) = (
self.walk_entry_stages(path, idx, Ordering::Less).unwrap_or(idx),
self.walk_entry_stages(path, idx, Ordering::Greater).unwrap_or(idx) + 1,
);
Some(start..end)
}
}
impl AccelerateLookup<'_> {
fn with_capacity(cap: usize) -> Self {
let ratio_of_entries_to_dirs_in_webkit = 20; // 400k entries and 20k dirs
Self {
icase_entries: hashbrown::HashTable::with_capacity(cap),
icase_dirs: hashbrown::HashTable::with_capacity(cap / ratio_of_entries_to_dirs_in_webkit),
}
}
fn icase_hash(data: &BStr) -> u64 {
use std::hash::Hasher;
let mut hasher = fnv::FnvHasher::default();
for b in data.as_bytes() {
hasher.write_u8(b.to_ascii_lowercase());
}
hasher.finish()
}
}
/// Mutation
impl State {
/// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here.
/// Note that it must not be empty.
pub fn return_path_backing(&mut self, backing: PathStorage) {
debug_assert!(
self.path_backing.is_empty(),
"BUG: return path backing only after taking it, once"
);
self.path_backing = backing;
}
/// Return mutable entries in a slice.
pub fn entries_mut(&mut self) -> &mut [Entry] {
&mut self.entries
}
/// Return a writable slice to entries and read-access to their path storage at the same time.
pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) {
(&mut self.entries, &self.path_backing)
}
/// Return mutable entries along with their paths in an iterator.
pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> {
let paths = &self.path_backing;
self.entries.iter_mut().map(move |e| {
let path = paths[e.path.clone()].as_bstr();
(e, path)
})
}
/// Return all parts that relate to entries, which includes path storage.
///
/// This can be useful for obtaining a standalone, boxable iterator
pub fn into_entries(self) -> (Vec<Entry>, PathStorage) {
(self.entries, self.path_backing)
}
/// Sometimes it's needed to remove the path backing to allow certain mutation to happen in the state while supporting reading the entry's
/// path.
pub fn take_path_backing(&mut self) -> PathStorage {
assert_eq!(
self.entries.is_empty(),
self.path_backing.is_empty(),
"BUG: cannot take out backing multiple times"
);
std::mem::take(&mut self.path_backing)
}
/// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()],
/// but returns the mutable entry instead of the index.
pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> {
self.entry_index_by_path_and_stage(path, stage)
.map(|idx| &mut self.entries[idx])
}
/// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing
/// any sanity checks. This means it's possible to push a new entry to the same path on the same stage and even after sorting
/// the entries lookups may still return the wrong one of them unless the correct binary search criteria is chosen.
///
/// Note that this *is likely* to break invariants that will prevent further lookups by path unless
/// [`entry_index_by_path_and_stage_bounded()`][State::entry_index_by_path_and_stage_bounded()] is used with
/// the `upper_bound` being the amount of entries before the first call to this method.
///
/// Alternatively, make sure to call [`sort_entries()`][State::sort_entries()] before entry lookup by path to restore
/// the invariant.
pub fn dangerously_push_entry(
&mut self,
stat: entry::Stat,
id: gix_hash::ObjectId,
flags: entry::Flags,
mode: entry::Mode,
path: &BStr,
) {
let path = {
let path_start = self.path_backing.len();
self.path_backing.push_str(path);
path_start..self.path_backing.len()
};
self.entries.push(Entry {
stat,
id,
flags,
mode,
path,
});
}
/// Unconditionally sort entries as needed to perform lookups quickly.
pub fn sort_entries(&mut self) {
let path_backing = &self.path_backing;
self.entries.sort_by(|a, b| {
Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
.then_with(|| a.stage().cmp(&b.stage()))
});
}
/// Similar to [`sort_entries()`][State::sort_entries()], but applies `compare` after comparing
/// by path and stage as a third criteria.
pub fn sort_entries_by(&mut self, mut compare: impl FnMut(&Entry, &Entry) -> Ordering) {
let path_backing = &self.path_backing;
self.entries.sort_by(|a, b| {
Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
.then_with(|| a.stage().cmp(&b.stage()))
.then_with(|| compare(a, b))
});
}
/// Physically remove all entries for which `should_remove(idx, path, entry)` returns `true`, traversing them from first to last.
///
/// Note that the memory used for the removed entries paths is not freed, as it's append-only.
///
/// ### Performance
///
/// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove
/// them when [writing the index](Self::write_to()).
pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) {
let mut index = 0;
let paths = &self.path_backing;
self.entries.retain_mut(|e| {
let path = e.path_in(paths);
let res = !should_remove(index, path, e);
index += 1;
res
});
}
}
/// Extensions
impl State {
/// Access the `tree` extension.
pub fn tree(&self) -> Option<&extension::Tree> {
self.tree.as_ref()
}
/// Access the `link` extension.
pub fn link(&self) -> Option<&extension::Link> {
self.link.as_ref()
}
/// Obtain the resolve-undo extension.
pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> {
self.resolve_undo.as_ref()
}
/// Obtain the untracked extension.
pub fn untracked(&self) -> Option<&extension::UntrackedCache> {
self.untracked.as_ref()
}
/// Obtain the fsmonitor extension.
pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> {
self.fs_monitor.as_ref()
}
/// Return `true` if the end-of-index extension was present when decoding this index.
pub fn had_end_of_index_marker(&self) -> bool {
self.end_of_index_at_decode_time
}
/// Return `true` if the offset-table extension was present when decoding this index.
pub fn had_offset_table(&self) -> bool {
self.offset_table_at_decode_time
}
}
#[cfg(test)]
mod tests {
use std::path::{Path, PathBuf};
#[test]
fn entry_by_path_with_conflicting_file() {
let file = PathBuf::from("tests")
.join("fixtures")
.join(Path::new("loose_index").join("conflicting-file.git-index"));
let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file");
assert_eq!(
file.entries().len(),
3,
"we have a set of conflict entries for a single file"
);
for idx in 0..3 {
for wanted_stage in 1..=3 {
let actual_idx = file
.entry_index_by_idx_and_stage(
"file".into(),
idx,
wanted_stage,
(idx + 1).cmp(&(wanted_stage as usize)),
)
.expect("found");
assert_eq!(
actual_idx + 1,
wanted_stage as usize,
"the index and stage have a relation, and that is upheld if we search correctly"
);
}
}
}
}