gix_dir/walk/
mod.rs

1use crate::{entry, EntryRef};
2use bstr::{BStr, BString};
3use std::collections::BTreeSet;
4use std::path::PathBuf;
5use std::sync::atomic::AtomicBool;
6
7/// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()).
8#[derive(Debug, Copy, Clone, Eq, PartialEq)]
9#[must_use]
10pub enum Action {
11    /// Continue the traversal as normal.
12    Continue,
13    /// Do not continue the traversal, but exit it.
14    Cancel,
15}
16
17/// Ready-made delegate implementations.
18pub mod delegate {
19    use crate::walk::Action;
20    use crate::{entry, walk, Entry, EntryRef};
21
22    type Entries = Vec<(Entry, Option<entry::Status>)>;
23
24    /// A [`Delegate`](walk::Delegate) implementation that collects all `entries` along with their directory status, if present.
25    ///
26    /// Note that this allocates for each entry.
27    #[derive(Default)]
28    pub struct Collect {
29        /// All collected entries, in any order.
30        pub unorded_entries: Entries,
31    }
32
33    impl Collect {
34        /// Return the list of entries that were emitted, sorted ascending by their repository-relative tree path.
35        pub fn into_entries_by_path(mut self) -> Entries {
36            self.unorded_entries.sort_by(|a, b| a.0.rela_path.cmp(&b.0.rela_path));
37            self.unorded_entries
38        }
39    }
40
41    impl walk::Delegate for Collect {
42        fn emit(&mut self, entry: EntryRef<'_>, dir_status: Option<entry::Status>) -> Action {
43            self.unorded_entries.push((entry.to_owned(), dir_status));
44            walk::Action::Continue
45        }
46    }
47}
48
49/// A way for the caller to control the traversal based on provided data.
50pub trait Delegate {
51    /// Called for each observed `entry` *inside* a directory, or the directory itself if the traversal is configured
52    /// to simplify the result (i.e. if every file in a directory is ignored, emit the containing directory instead
53    /// of each file), or if the root of the traversal passes through a directory that can't be traversed.
54    ///
55    /// It will also be called if the `root` in [`walk()`](crate::walk()) itself is matching a particular status,
56    /// even if it is a file.
57    ///
58    /// Note that tracked entries will only be emitted if [`Options::emit_tracked`] is `true`.
59    /// Further, not all pruned entries will be observable as they might be pruned so early that the kind of
60    /// item isn't yet known. Pruned entries are also only emitted if [`Options::emit_pruned`] is `true`.
61    ///
62    /// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
63    /// `dir_status` that wasn't the same as the one of `entry` and if [Options::emit_collapsed] was
64    /// [CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
65    /// was [CollapsedEntriesEmissionMode::All].
66    fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<entry::Status>) -> Action;
67
68    /// Return `true` if the given entry can be recursed into. Will only be called if the entry is a physical directory.
69    /// The base implementation will act like Git does by default in `git status` or `git clean`.
70    ///
71    /// Use `for_deletion` to specify if the seen entries should ultimately be deleted, which may affect the decision
72    /// of whether to resource or not.
73    ///
74    /// If `worktree_root_is_repository` is `true`, then this status is part of the root of an iteration, and the corresponding
75    /// worktree root is a repository itself. This typically happens for submodules. In this case, recursion rules are relaxed
76    /// to allow traversing submodule worktrees.
77    ///
78    /// Note that this method will see all directories, even though not all of them may end up being [emitted](Self::emit()).
79    /// If this method returns `false`, the `entry` will always be emitted.
80    fn can_recurse(
81        &mut self,
82        entry: EntryRef<'_>,
83        for_deletion: Option<ForDeletionMode>,
84        worktree_root_is_repository: bool,
85    ) -> bool {
86        entry.status.can_recurse(
87            entry.disk_kind,
88            entry.pathspec_match,
89            for_deletion,
90            worktree_root_is_repository,
91        )
92    }
93}
94
95/// The way entries are emitted using the [Delegate].
96///
97/// The choice here controls if entries are emitted immediately, or have to be held back.
98#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
99pub enum EmissionMode {
100    /// Emit each entry as it matches exactly, without doing any kind of simplification.
101    ///
102    /// Emissions in this mode are happening as they occur, without any buffering or ordering.
103    #[default]
104    Matching,
105    /// Emit only a containing directory if all of its entries are of the same type.
106    ///
107    /// Note that doing so is more expensive as it requires us to keep track of all entries in the directory structure
108    /// until it's clear what to finally emit.
109    CollapseDirectory,
110}
111
112/// The way entries that are contained in collapsed directories are emitted using the [Delegate].
113#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
114pub enum CollapsedEntriesEmissionMode {
115    /// Emit only entries if their status does not match the one of the parent directory that is
116    /// going to be collapsed.
117    ///
118    /// E.g. if a directory is determined to be untracked, and the entries in question are ignored,
119    /// they will be emitted.
120    ///
121    /// Entries that have the same status will essentially be 'merged' into the collapsing directory
122    /// and won't be observable anymore.
123    #[default]
124    OnStatusMismatch,
125    /// Emit all entries inside of a collapsed directory to make them observable.
126    All,
127}
128
129/// When the walk is for deletion, assure that we don't collapse directories that have precious files in
130/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
131#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
132pub enum ForDeletionMode {
133    /// We will stop traversing into ignored directories which may save a lot of time, but also may include nested repositories
134    /// which might end up being deleted.
135    #[default]
136    IgnoredDirectoriesCanHideNestedRepositories,
137    /// Instead of skipping over ignored directories entirely, we will dive in and find ignored non-bare repositories
138    /// so these are emitted separately and prevent collapsing. These are assumed to be a directory with `.git` inside.
139    /// Only relevant when ignored entries are emitted.
140    FindNonBareRepositoriesInIgnoredDirectories,
141    /// This is a more expensive form of the above variant as it finds all repositories, bare or non-bare.
142    FindRepositoriesInIgnoredDirectories,
143}
144
145/// Options for use in [`walk()`](function::walk()) function.
146#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
147pub struct Options<'a> {
148    /// If `true`, the filesystem will store paths as decomposed unicode, i.e. `รค` becomes `"a\u{308}"`, which means that
149    /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
150    /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
151    /// perform conversions accordingly.
152    /// If `false`, no conversions will be performed.
153    pub precompose_unicode: bool,
154    /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
155    /// This is also called case-folding.
156    /// Note that [pathspecs](Context::pathspec) must also be using the same defaults, which makes them match case-insensitive
157    /// automatically.
158    pub ignore_case: bool,
159    /// If `true`, we will stop figuring out if any directory that is a candidate for recursion is also a nested repository,
160    /// which saves time but leads to recurse into it. If `false`, nested repositories will not be traversed.
161    pub recurse_repositories: bool,
162    /// If `true`, entries that are pruned and whose [Kind](crate::entry::Kind) is known will be emitted.
163    pub emit_pruned: bool,
164    /// If `Some(mode)`, entries that are ignored will be emitted according to the given `mode`.
165    /// If `None`, ignored entries will not be emitted at all.
166    pub emit_ignored: Option<EmissionMode>,
167    /// When the walk is for deletion, this must be `Some(_)` to assure we don't collapse directories that have precious files in
168    /// them, and otherwise assure that no entries are observable that shouldn't be deleted.
169    /// If `None`, precious files are treated like expendable files, which is usually what you want when displaying them
170    /// for addition to the repository, and the collapse of folders can be more generous in relation to ignored files.
171    pub for_deletion: Option<ForDeletionMode>,
172    /// If `true`, we will not only find non-bare repositories in untracked directories, but also bare ones.
173    ///
174    /// Note that this is very costly, but without it, bare repositories will appear like untracked directories when collapsed,
175    /// and they will be recursed into.
176    pub classify_untracked_bare_repositories: bool,
177    /// If `true`, we will also emit entries for tracked items. Otherwise these will remain 'hidden', even if a pathspec directly
178    /// refers to it.
179    pub emit_tracked: bool,
180    /// Controls the way untracked files are emitted. By default, this is happening immediately and without any simplification.
181    pub emit_untracked: EmissionMode,
182    /// If `true`, emit empty directories as well. Note that a directory also counts as empty if it has any amount or depth of nested
183    /// subdirectories, as long as none of them includes a file.
184    /// Thus, this makes leaf-level empty directories visible, as those don't have any content.
185    pub emit_empty_directories: bool,
186    /// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`.
187    pub emit_collapsed: Option<CollapsedEntriesEmissionMode>,
188    /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory
189    /// when checking for exclusion.
190    ///
191    /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded,
192    /// if `true` it will be excluded as the symlink is considered a directory.
193    ///
194    /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
195    pub symlinks_to_directories_are_ignored_like_directories: bool,
196    /// A set of all git worktree checkouts that are located within the main worktree directory.
197    ///
198    /// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
199    /// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
200    pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
201}
202
203/// All information that is required to perform a dirwalk, and classify paths properly.
204pub struct Context<'a> {
205    /// If not `None`, it will be checked before entering any directory to trigger early interruption.
206    ///
207    /// If this flag is `true` at any point in the iteration, it will abort with an error.
208    pub should_interrupt: Option<&'a AtomicBool>,
209    /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
210    ///
211    /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
212    /// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
213    pub git_dir_realpath: &'a std::path::Path,
214    /// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
215    /// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
216    ///
217    /// It is also used to assure that when the walk is for deletion, that the current working dir will not be collapsed.
218    pub current_dir: &'a std::path::Path,
219    /// The index to quickly understand if a file or directory is tracked or not.
220    ///
221    /// ### Important
222    ///
223    /// The index must have been validated so that each entry that is considered up-to-date will have the [gix_index::entry::Flags::UPTODATE] flag
224    /// set. Otherwise the index entry is not considered and a disk-access may occur which is costly.
225    pub index: &'a gix_index::State,
226    /// A utility to lookup index entries faster, and deal with ignore-case handling.
227    ///
228    /// Must be set if `ignore_case` is `true`, or else some entries won't be found if their case is different.
229    ///
230    /// ### Deviation
231    ///
232    /// Git uses a name-based hash (for looking up entries, not directories) even when operating
233    /// in case-sensitive mode. It does, however, skip the directory hash creation (for looking
234    /// up directories) unless `core.ignoreCase` is enabled.
235    ///
236    /// We only use the hashmap when available and when [`ignore_case`](Options::ignore_case) is enabled in the options.
237    pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
238    /// A pathspec to use as filter - we only traverse into directories if it matches.
239    /// Note that the `ignore_case` setting it uses should match our [Options::ignore_case].
240    /// If no such filtering is desired, pass an empty `pathspec` which will match everything.
241    pub pathspec: &'a mut gix_pathspec::Search,
242    /// The `attributes` callback for use in [gix_pathspec::Search::pattern_matching_relative_path()], which happens when
243    /// pathspecs use attributes for filtering.
244    /// If `pathspec` isn't empty, this function may be called if pathspecs perform attribute lookups.
245    pub pathspec_attributes: &'a mut dyn FnMut(
246        &BStr,
247        gix_pathspec::attributes::glob::pattern::Case,
248        bool,
249        &mut gix_pathspec::attributes::search::Outcome,
250    ) -> bool,
251    /// A way to query the `.gitignore` files to see if a directory or file is ignored.
252    /// Set to `None` to not perform any work on checking for ignored, which turns previously ignored files into untracked ones, a useful
253    /// operation when trying to add ignored files to a repository.
254    pub excludes: Option<&'a mut gix_worktree::Stack>,
255    /// Access to the object database for use with `excludes` - it's possible to access `.gitignore` files in the index if configured.
256    pub objects: &'a dyn gix_object::Find,
257    /// If not `None`, override the traversal root that is computed and use this one instead.
258    ///
259    /// This can be useful if the traversal root may be a file, in which case the traversal will
260    /// still be returning possibly matching root entries.
261    ///
262    /// ### Panics
263    ///
264    /// If the `traversal_root` is not in the `worktree_root` passed to [walk()](crate::walk()).
265    pub explicit_traversal_root: Option<&'a std::path::Path>,
266}
267
268/// Additional information collected as outcome of [`walk()`](function::walk()).
269#[derive(Default, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
270pub struct Outcome {
271    /// The amount of calls to read the directory contents.
272    pub read_dir_calls: u32,
273    /// The amount of returned entries provided to the callback. This number can be lower than `seen_entries`.
274    pub returned_entries: usize,
275    /// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
276    pub seen_entries: u32,
277}
278
279/// The error returned by [`walk()`](function::walk()).
280#[derive(Debug, thiserror::Error)]
281#[allow(missing_docs)]
282pub enum Error {
283    #[error("Interrupted")]
284    Interrupted,
285    #[error("Worktree root at '{}' is not a directory", root.display())]
286    WorktreeRootIsFile { root: PathBuf },
287    #[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())]
288    NormalizeRoot { root: PathBuf },
289    #[error("A symlink was found at component {component_index} of traversal root '{}' as seen from worktree root '{}'", root.display(), worktree_root.display())]
290    SymlinkInRoot {
291        root: PathBuf,
292        worktree_root: PathBuf,
293        /// This index starts at 0, with 0 being the first component.
294        component_index: usize,
295    },
296    #[error("Failed to update the excludes stack to see if a path is excluded")]
297    ExcludesAccess(std::io::Error),
298    #[error("Failed to read the directory at '{}'", path.display())]
299    ReadDir { path: PathBuf, source: std::io::Error },
300    #[error("Could not obtain directory entry in root of '{}'", parent_directory.display())]
301    DirEntry {
302        parent_directory: PathBuf,
303        source: std::io::Error,
304    },
305    #[error("Could not obtain filetype of directory entry '{}'", path.display())]
306    DirEntryFileType { path: PathBuf, source: std::io::Error },
307    #[error("Could not obtain symlink metadata on '{}'", path.display())]
308    SymlinkMetadata { path: PathBuf, source: std::io::Error },
309}
310
311mod classify;
312pub(crate) mod function;
313mod readdir;