gix_index/access/
mod.rs

1use std::{cmp::Ordering, ops::Range};
2
3use bstr::{BStr, ByteSlice, ByteVec};
4use filetime::FileTime;
5
6use crate::entry::{Stage, StageRaw};
7use crate::{entry, extension, AccelerateLookup, Entry, PathStorage, PathStorageRef, State, Version};
8
9// TODO: integrate this somehow, somewhere, depending on later usage.
10#[allow(dead_code)]
11mod sparse;
12
13/// General information and entries
14impl State {
15    /// Return the version used to store this state's information on disk.
16    pub fn version(&self) -> Version {
17        self.version
18    }
19
20    /// Returns time at which the state was created, indicating its freshness compared to other files on disk.
21    pub fn timestamp(&self) -> FileTime {
22        self.timestamp
23    }
24
25    /// Updates the timestamp of this state, indicating its freshness compared to other files on disk.
26    ///
27    /// Be careful about using this as setting a timestamp without correctly updating the index
28    /// **will cause (file system) race conditions** see racy-git.txt in the git documentation
29    /// for more details.
30    pub fn set_timestamp(&mut self, timestamp: FileTime) {
31        self.timestamp = timestamp;
32    }
33
34    /// Return the kind of hashes used in this instance.
35    pub fn object_hash(&self) -> gix_hash::Kind {
36        self.object_hash
37    }
38
39    /// Return our entries
40    pub fn entries(&self) -> &[Entry] {
41        &self.entries
42    }
43    /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them.
44    pub fn path_backing(&self) -> &PathStorageRef {
45        &self.path_backing
46    }
47
48    /// Runs `filter_map` on all entries, returning an iterator over all paths along with the result of `filter_map`.
49    pub fn entries_with_paths_by_filter_map<'a, T>(
50        &'a self,
51        mut filter_map: impl FnMut(&'a BStr, &Entry) -> Option<T> + 'a,
52    ) -> impl Iterator<Item = (&'a BStr, T)> + 'a {
53        self.entries.iter().filter_map(move |e| {
54            let p = e.path(self);
55            filter_map(p, e).map(|t| (p, t))
56        })
57    }
58    /// Return mutable entries along with their path, as obtained from `backing`.
59    pub fn entries_mut_with_paths_in<'state, 'backing>(
60        &'state mut self,
61        backing: &'backing PathStorageRef,
62    ) -> impl Iterator<Item = (&'state mut Entry, &'backing BStr)> {
63        self.entries.iter_mut().map(move |e| {
64            let path = backing[e.path.clone()].as_bstr();
65            (e, path)
66        })
67    }
68
69    /// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative
70    /// `path` and `stage`, or `None`.
71    ///
72    /// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
73    pub fn entry_index_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<usize> {
74        let mut stage_cmp = Ordering::Equal;
75        let idx = self
76            .entries
77            .binary_search_by(|e| {
78                let res = e.path(self).cmp(path);
79                if res.is_eq() {
80                    stage_cmp = e.stage().cmp(&stage);
81                }
82                res
83            })
84            .ok()?;
85        self.entry_index_by_idx_and_stage(path, idx, stage as StageRaw, stage_cmp)
86    }
87
88    /// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`]
89    /// towards lower stages, and return the lowest or highest seen stage.
90    /// Return `None` if there is no greater or smaller stage.
91    fn walk_entry_stages(&self, path: &BStr, base: usize, direction: Ordering) -> Option<usize> {
92        match direction {
93            Ordering::Greater => self
94                .entries
95                .get(base + 1..)?
96                .iter()
97                .enumerate()
98                .take_while(|(_, e)| e.path(self) == path)
99                .last()
100                .map(|(idx, _)| base + 1 + idx),
101            Ordering::Equal => Some(base),
102            Ordering::Less => self.entries[..base]
103                .iter()
104                .enumerate()
105                .rev()
106                .take_while(|(_, e)| e.path(self) == path)
107                .last()
108                .map(|(idx, _)| idx),
109        }
110    }
111
112    fn entry_index_by_idx_and_stage(
113        &self,
114        path: &BStr,
115        idx: usize,
116        wanted_stage: entry::StageRaw,
117        stage_cmp: Ordering,
118    ) -> Option<usize> {
119        match stage_cmp {
120            Ordering::Greater => self.entries[..idx]
121                .iter()
122                .enumerate()
123                .rev()
124                .take_while(|(_, e)| e.path(self) == path)
125                .find_map(|(idx, e)| (e.stage_raw() == wanted_stage).then_some(idx)),
126            Ordering::Equal => Some(idx),
127            Ordering::Less => self
128                .entries
129                .get(idx + 1..)?
130                .iter()
131                .enumerate()
132                .take_while(|(_, e)| e.path(self) == path)
133                .find_map(|(ofs, e)| (e.stage_raw() == wanted_stage).then_some(idx + ofs + 1)),
134        }
135    }
136
137    /// Return a data structure to help with case-insensitive lookups.
138    ///
139    /// It's required perform any case-insensitive lookup.
140    /// TODO: needs multi-threaded insertion, raw-table to have multiple locks depending on bucket.
141    pub fn prepare_icase_backing(&self) -> AccelerateLookup<'_> {
142        let _span = gix_features::trace::detail!("prepare_icase_backing", entries = self.entries.len());
143        let mut out = AccelerateLookup::with_capacity(self.entries.len());
144        for entry in &self.entries {
145            let entry_path = entry.path(self);
146            let hash = AccelerateLookup::icase_hash(entry_path);
147            out.icase_entries
148                .insert_unique(hash, entry, |e| AccelerateLookup::icase_hash(e.path(self)));
149
150            let mut last_pos = entry_path.len();
151            while let Some(slash_idx) = entry_path[..last_pos].rfind_byte(b'/') {
152                let dir = entry_path[..slash_idx].as_bstr();
153                last_pos = slash_idx;
154                let dir_range = entry.path.start..(entry.path.start + dir.len());
155
156                let hash = AccelerateLookup::icase_hash(dir);
157                if out
158                    .icase_dirs
159                    .find(hash, |dir| {
160                        dir.path(self) == self.path_backing[dir_range.clone()].as_bstr()
161                    })
162                    .is_none()
163                {
164                    out.icase_dirs.insert_unique(
165                        hash,
166                        crate::DirEntry {
167                            entry,
168                            dir_end: dir_range.end,
169                        },
170                        |dir| AccelerateLookup::icase_hash(dir.path(self)),
171                    );
172                } else {
173                    break;
174                }
175            }
176        }
177        gix_features::trace::debug!(directories = out.icase_dirs.len(), "stored directories");
178        out
179    }
180
181    /// Return the entry at `path` that is at the lowest available stage, using `lookup` for acceleration.
182    /// It must have been created from this instance, and was ideally kept up-to-date with it.
183    ///
184    /// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
185    pub fn entry_by_path_icase<'a>(
186        &'a self,
187        path: &BStr,
188        ignore_case: bool,
189        lookup: &AccelerateLookup<'a>,
190    ) -> Option<&'a Entry> {
191        lookup
192            .icase_entries
193            .find(AccelerateLookup::icase_hash(path), |e| {
194                let entry_path = e.path(self);
195                if entry_path == path {
196                    return true;
197                };
198                if !ignore_case {
199                    return false;
200                }
201                entry_path.eq_ignore_ascii_case(path)
202            })
203            .copied()
204    }
205
206    /// Return the entry (at any stage) that is inside of `directory`, or `None`,
207    /// using `lookup` for acceleration.
208    /// Note that submodules are not detected as directories and the user should
209    /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
210    /// possibility. Doing so might also reveal a sparse directory.
211    ///
212    /// If `ignore_case` is set
213    pub fn entry_closest_to_directory_icase<'a>(
214        &'a self,
215        directory: &BStr,
216        ignore_case: bool,
217        lookup: &AccelerateLookup<'a>,
218    ) -> Option<&'a Entry> {
219        lookup
220            .icase_dirs
221            .find(AccelerateLookup::icase_hash(directory), |dir| {
222                let dir_path = dir.path(self);
223                if dir_path == directory {
224                    return true;
225                };
226                if !ignore_case {
227                    return false;
228                }
229                dir_path.eq_ignore_ascii_case(directory)
230            })
231            .map(|dir| dir.entry)
232    }
233
234    /// Return the entry (at any stage) that is inside of `directory`, or `None`.
235    /// Note that submodules are not detected as directories and the user should
236    /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
237    /// possibility. Doing so might also reveal a sparse directory.
238    ///
239    /// Note that this is a case-sensitive search.
240    pub fn entry_closest_to_directory(&self, directory: &BStr) -> Option<&Entry> {
241        let idx = self.entry_index_by_path(directory).err()?;
242        for entry in &self.entries[idx..] {
243            let path = entry.path(self);
244            if path.get(..directory.len())? != directory {
245                break;
246            }
247            let dir_char = path.get(directory.len())?;
248            if *dir_char > b'/' {
249                break;
250            }
251            if *dir_char < b'/' {
252                continue;
253            }
254            return Some(entry);
255        }
256        None
257    }
258
259    /// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative
260    /// `path` and `stage`, or `None`.
261    ///
262    /// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
263    ///
264    /// # Panics
265    ///
266    /// If `upper_bound` is out of bounds of our entries array.
267    pub fn entry_index_by_path_and_stage_bounded(
268        &self,
269        path: &BStr,
270        stage: entry::Stage,
271        upper_bound: usize,
272    ) -> Option<usize> {
273        self.entries[..upper_bound]
274            .binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage)))
275            .ok()
276    }
277
278    /// Like [`entry_index_by_path_and_stage()`](State::entry_index_by_path_and_stage()),
279    /// but returns the entry instead of the index.
280    pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> {
281        self.entry_index_by_path_and_stage(path, stage)
282            .map(|idx| &self.entries[idx])
283    }
284
285    /// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict.
286    ///
287    /// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2.
288    pub fn entry_by_path(&self, path: &BStr) -> Option<&Entry> {
289        let mut stage_at_index = 0;
290        let idx = self
291            .entries
292            .binary_search_by(|e| {
293                let res = e.path(self).cmp(path);
294                if res.is_eq() {
295                    stage_at_index = e.stage_raw();
296                }
297                res
298            })
299            .ok()?;
300        let idx = if stage_at_index == 0 || stage_at_index == 2 {
301            idx
302        } else {
303            self.entry_index_by_idx_and_stage(path, idx, Stage::Ours as StageRaw, stage_at_index.cmp(&2))?
304        };
305        Some(&self.entries[idx])
306    }
307
308    /// Return the index at `Ok(index)` where the entry matching `path` (in any stage) can be found, or return
309    /// `Err(index)` to indicate the insertion position at which an entry with `path` would fit in.
310    pub fn entry_index_by_path(&self, path: &BStr) -> Result<usize, usize> {
311        self.entries.binary_search_by(|e| e.path(self).cmp(path))
312    }
313
314    /// Return the slice of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
315    ///
316    /// If `prefix` is empty, all entries are returned.
317    pub fn prefixed_entries(&self, prefix: &BStr) -> Option<&[Entry]> {
318        self.prefixed_entries_range(prefix).map(|range| &self.entries[range])
319    }
320
321    /// Return the range of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
322    ///
323    /// If `prefix` is empty, the range will include all entries.
324    pub fn prefixed_entries_range(&self, prefix: &BStr) -> Option<Range<usize>> {
325        if prefix.is_empty() {
326            return Some(0..self.entries.len());
327        }
328        let prefix_len = prefix.len();
329        let mut low = self.entries.partition_point(|e| {
330            e.path(self)
331                .get(..prefix_len)
332                .map_or_else(|| e.path(self) <= &prefix[..e.path.len()], |p| p < prefix)
333        });
334        let mut high =
335            low + self.entries[low..].partition_point(|e| e.path(self).get(..prefix_len).is_some_and(|p| p <= prefix));
336
337        let low_entry = &self.entries.get(low)?;
338        if low_entry.stage_raw() != 0 {
339            low = self
340                .walk_entry_stages(low_entry.path(self), low, Ordering::Less)
341                .unwrap_or(low);
342        }
343        if let Some(high_entry) = self.entries.get(high) {
344            if high_entry.stage_raw() != 0 {
345                high = self
346                    .walk_entry_stages(high_entry.path(self), high, Ordering::Less)
347                    .unwrap_or(high);
348            }
349        }
350        (low != high).then_some(low..high)
351    }
352
353    /// Return the entry at `idx` or _panic_ if the index is out of bounds.
354    ///
355    /// The `idx` is typically returned by [`entry_by_path_and_stage()`][State::entry_by_path_and_stage()].
356    pub fn entry(&self, idx: usize) -> &Entry {
357        &self.entries[idx]
358    }
359
360    /// Returns a boolean value indicating whether the index is sparse or not.
361    ///
362    /// An index is sparse if it contains at least one [`Mode::DIR`][entry::Mode::DIR] entry.
363    pub fn is_sparse(&self) -> bool {
364        self.is_sparse
365    }
366
367    /// Return the range of entries that exactly match the given `path`, in all available stages, or `None` if no entry with such
368    /// path exists.
369    ///
370    /// The range can be used to access the respective entries via [`entries()`](Self::entries()) or [`entries_mut()](Self::entries_mut()).
371    pub fn entry_range(&self, path: &BStr) -> Option<Range<usize>> {
372        let mut stage_at_index = 0;
373        let idx = self
374            .entries
375            .binary_search_by(|e| {
376                let res = e.path(self).cmp(path);
377                if res.is_eq() {
378                    stage_at_index = e.stage_raw();
379                }
380                res
381            })
382            .ok()?;
383
384        let (start, end) = (
385            self.walk_entry_stages(path, idx, Ordering::Less).unwrap_or(idx),
386            self.walk_entry_stages(path, idx, Ordering::Greater).unwrap_or(idx) + 1,
387        );
388        Some(start..end)
389    }
390}
391
392impl AccelerateLookup<'_> {
393    fn with_capacity(cap: usize) -> Self {
394        let ratio_of_entries_to_dirs_in_webkit = 20; // 400k entries and 20k dirs
395        Self {
396            icase_entries: hashbrown::HashTable::with_capacity(cap),
397            icase_dirs: hashbrown::HashTable::with_capacity(cap / ratio_of_entries_to_dirs_in_webkit),
398        }
399    }
400    fn icase_hash(data: &BStr) -> u64 {
401        use std::hash::Hasher;
402        let mut hasher = fnv::FnvHasher::default();
403        for b in data.as_bytes() {
404            hasher.write_u8(b.to_ascii_lowercase());
405        }
406        hasher.finish()
407    }
408}
409
410/// Mutation
411impl State {
412    /// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here.
413    /// Note that it must not be empty.
414    pub fn return_path_backing(&mut self, backing: PathStorage) {
415        debug_assert!(
416            self.path_backing.is_empty(),
417            "BUG: return path backing only after taking it, once"
418        );
419        self.path_backing = backing;
420    }
421
422    /// Return mutable entries in a slice.
423    pub fn entries_mut(&mut self) -> &mut [Entry] {
424        &mut self.entries
425    }
426
427    /// Return a writable slice to entries and read-access to their path storage at the same time.
428    pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) {
429        (&mut self.entries, &self.path_backing)
430    }
431
432    /// Return mutable entries along with their paths in an iterator.
433    pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> {
434        let paths = &self.path_backing;
435        self.entries.iter_mut().map(move |e| {
436            let path = paths[e.path.clone()].as_bstr();
437            (e, path)
438        })
439    }
440
441    /// Return all parts that relate to entries, which includes path storage.
442    ///
443    /// This can be useful for obtaining a standalone, boxable iterator
444    pub fn into_entries(self) -> (Vec<Entry>, PathStorage) {
445        (self.entries, self.path_backing)
446    }
447
448    /// Sometimes it's needed to remove the path backing to allow certain mutation to happen in the state while supporting reading the entry's
449    /// path.
450    pub fn take_path_backing(&mut self) -> PathStorage {
451        assert_eq!(
452            self.entries.is_empty(),
453            self.path_backing.is_empty(),
454            "BUG: cannot take out backing multiple times"
455        );
456        std::mem::take(&mut self.path_backing)
457    }
458
459    /// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()],
460    /// but returns the mutable entry instead of the index.
461    pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> {
462        self.entry_index_by_path_and_stage(path, stage)
463            .map(|idx| &mut self.entries[idx])
464    }
465
466    /// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing
467    /// any sanity checks. This means it's possible to push a new entry to the same path on the same stage and even after sorting
468    /// the entries lookups may still return the wrong one of them unless the correct binary search criteria is chosen.
469    ///
470    /// Note that this *is likely* to break invariants that will prevent further lookups by path unless
471    /// [`entry_index_by_path_and_stage_bounded()`][State::entry_index_by_path_and_stage_bounded()] is used with
472    /// the `upper_bound` being the amount of entries before the first call to this method.
473    ///
474    /// Alternatively, make sure to call [`sort_entries()`][State::sort_entries()] before entry lookup by path to restore
475    /// the invariant.
476    pub fn dangerously_push_entry(
477        &mut self,
478        stat: entry::Stat,
479        id: gix_hash::ObjectId,
480        flags: entry::Flags,
481        mode: entry::Mode,
482        path: &BStr,
483    ) {
484        let path = {
485            let path_start = self.path_backing.len();
486            self.path_backing.push_str(path);
487            path_start..self.path_backing.len()
488        };
489
490        self.entries.push(Entry {
491            stat,
492            id,
493            flags,
494            mode,
495            path,
496        });
497    }
498
499    /// Unconditionally sort entries as needed to perform lookups quickly.
500    pub fn sort_entries(&mut self) {
501        let path_backing = &self.path_backing;
502        self.entries.sort_by(|a, b| {
503            Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
504                .then_with(|| a.stage().cmp(&b.stage()))
505        });
506    }
507
508    /// Similar to [`sort_entries()`][State::sort_entries()], but applies `compare` after comparing
509    /// by path and stage as a third criteria.
510    pub fn sort_entries_by(&mut self, mut compare: impl FnMut(&Entry, &Entry) -> Ordering) {
511        let path_backing = &self.path_backing;
512        self.entries.sort_by(|a, b| {
513            Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
514                .then_with(|| a.stage().cmp(&b.stage()))
515                .then_with(|| compare(a, b))
516        });
517    }
518
519    /// Physically remove all entries for which `should_remove(idx, path, entry)` returns `true`, traversing them from first to last.
520    ///
521    /// Note that the memory used for the removed entries paths is not freed, as it's append-only.
522    ///
523    /// ### Performance
524    ///
525    /// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove
526    /// them when [writing the index](Self::write_to()).
527    pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) {
528        let mut index = 0;
529        let paths = &self.path_backing;
530        self.entries.retain_mut(|e| {
531            let path = e.path_in(paths);
532            let res = !should_remove(index, path, e);
533            index += 1;
534            res
535        });
536    }
537}
538
539/// Extensions
540impl State {
541    /// Access the `tree` extension.
542    pub fn tree(&self) -> Option<&extension::Tree> {
543        self.tree.as_ref()
544    }
545    /// Access the `link` extension.
546    pub fn link(&self) -> Option<&extension::Link> {
547        self.link.as_ref()
548    }
549    /// Obtain the resolve-undo extension.
550    pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> {
551        self.resolve_undo.as_ref()
552    }
553    /// Obtain the untracked extension.
554    pub fn untracked(&self) -> Option<&extension::UntrackedCache> {
555        self.untracked.as_ref()
556    }
557    /// Obtain the fsmonitor extension.
558    pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> {
559        self.fs_monitor.as_ref()
560    }
561    /// Return `true` if the end-of-index extension was present when decoding this index.
562    pub fn had_end_of_index_marker(&self) -> bool {
563        self.end_of_index_at_decode_time
564    }
565    /// Return `true` if the offset-table extension was present when decoding this index.
566    pub fn had_offset_table(&self) -> bool {
567        self.offset_table_at_decode_time
568    }
569}
570
571#[cfg(test)]
572mod tests {
573    use std::path::{Path, PathBuf};
574
575    #[test]
576    fn entry_by_path_with_conflicting_file() {
577        let file = PathBuf::from("tests")
578            .join("fixtures")
579            .join(Path::new("loose_index").join("conflicting-file.git-index"));
580        let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file");
581        assert_eq!(
582            file.entries().len(),
583            3,
584            "we have a set of conflict entries for a single file"
585        );
586        for idx in 0..3 {
587            for wanted_stage in 1..=3 {
588                let actual_idx = file
589                    .entry_index_by_idx_and_stage(
590                        "file".into(),
591                        idx,
592                        wanted_stage,
593                        (idx + 1).cmp(&(wanted_stage as usize)),
594                    )
595                    .expect("found");
596                assert_eq!(
597                    actual_idx + 1,
598                    wanted_stage as usize,
599                    "the index and stage have a relation, and that is upheld if we search correctly"
600                );
601            }
602        }
603    }
604}