gix_dir/walk/mod.rs
1use crate::{entry, EntryRef};
2use bstr::{BStr, BString};
3use std::collections::BTreeSet;
4use std::path::PathBuf;
5use std::sync::atomic::AtomicBool;
6
7/// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()).
8#[derive(Debug, Copy, Clone, Eq, PartialEq)]
9#[must_use]
10pub enum Action {
11 /// Continue the traversal as normal.
12 Continue,
13 /// Do not continue the traversal, but exit it.
14 Cancel,
15}
16
17/// Ready-made delegate implementations.
18pub mod delegate {
19 use crate::walk::Action;
20 use crate::{entry, walk, Entry, EntryRef};
21
22 type Entries = Vec<(Entry, Option<entry::Status>)>;
23
24 /// A [`Delegate`](walk::Delegate) implementation that collects all `entries` along with their directory status, if present.
25 ///
26 /// Note that this allocates for each entry.
27 #[derive(Default)]
28 pub struct Collect {
29 /// All collected entries, in any order.
30 pub unorded_entries: Entries,
31 }
32
33 impl Collect {
34 /// Return the list of entries that were emitted, sorted ascending by their repository-relative tree path.
35 pub fn into_entries_by_path(mut self) -> Entries {
36 self.unorded_entries.sort_by(|a, b| a.0.rela_path.cmp(&b.0.rela_path));
37 self.unorded_entries
38 }
39 }
40
41 impl walk::Delegate for Collect {
42 fn emit(&mut self, entry: EntryRef<'_>, dir_status: Option<entry::Status>) -> Action {
43 self.unorded_entries.push((entry.to_owned(), dir_status));
44 walk::Action::Continue
45 }
46 }
47}
48
49/// A way for the caller to control the traversal based on provided data.
50pub trait Delegate {
51 /// Called for each observed `entry` *inside* a directory, or the directory itself if the traversal is configured
52 /// to simplify the result (i.e. if every file in a directory is ignored, emit the containing directory instead
53 /// of each file), or if the root of the traversal passes through a directory that can't be traversed.
54 ///
55 /// It will also be called if the `root` in [`walk()`](crate::walk()) itself is matching a particular status,
56 /// even if it is a file.
57 ///
58 /// Note that tracked entries will only be emitted if [`Options::emit_tracked`] is `true`.
59 /// Further, not all pruned entries will be observable as they might be pruned so early that the kind of
60 /// item isn't yet known. Pruned entries are also only emitted if [`Options::emit_pruned`] is `true`.
61 ///
62 /// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
63 /// `dir_status` that wasn't the same as the one of `entry` and if [Options::emit_collapsed] was
64 /// [CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
65 /// was [CollapsedEntriesEmissionMode::All].
66 fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<entry::Status>) -> Action;
67
68 /// Return `true` if the given entry can be recursed into. Will only be called if the entry is a physical directory.
69 /// The base implementation will act like Git does by default in `git status` or `git clean`.
70 ///
71 /// Use `for_deletion` to specify if the seen entries should ultimately be deleted, which may affect the decision
72 /// of whether to resource or not.
73 ///
74 /// If `worktree_root_is_repository` is `true`, then this status is part of the root of an iteration, and the corresponding
75 /// worktree root is a repository itself. This typically happens for submodules. In this case, recursion rules are relaxed
76 /// to allow traversing submodule worktrees.
77 ///
78 /// Note that this method will see all directories, even though not all of them may end up being [emitted](Self::emit()).
79 /// If this method returns `false`, the `entry` will always be emitted.
80 fn can_recurse(
81 &mut self,
82 entry: EntryRef<'_>,
83 for_deletion: Option<ForDeletionMode>,
84 worktree_root_is_repository: bool,
85 ) -> bool {
86 entry.status.can_recurse(
87 entry.disk_kind,
88 entry.pathspec_match,
89 for_deletion,
90 worktree_root_is_repository,
91 )
92 }
93}
94
95/// The way entries are emitted using the [Delegate].
96///
97/// The choice here controls if entries are emitted immediately, or have to be held back.
98#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
99pub enum EmissionMode {
100 /// Emit each entry as it matches exactly, without doing any kind of simplification.
101 ///
102 /// Emissions in this mode are happening as they occur, without any buffering or ordering.
103 #[default]
104 Matching,
105 /// Emit only a containing directory if all of its entries are of the same type.
106 ///
107 /// Note that doing so is more expensive as it requires us to keep track of all entries in the directory structure
108 /// until it's clear what to finally emit.
109 CollapseDirectory,
110}
111
112/// The way entries that are contained in collapsed directories are emitted using the [Delegate].
113#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
114pub enum CollapsedEntriesEmissionMode {
115 /// Emit only entries if their status does not match the one of the parent directory that is
116 /// going to be collapsed.
117 ///
118 /// E.g. if a directory is determined to be untracked, and the entries in question are ignored,
119 /// they will be emitted.
120 ///
121 /// Entries that have the same status will essentially be 'merged' into the collapsing directory
122 /// and won't be observable anymore.
123 #[default]
124 OnStatusMismatch,
125 /// Emit all entries inside of a collapsed directory to make them observable.
126 All,
127}
128
129/// When the walk is for deletion, assure that we don't collapse directories that have precious files in
130/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
131#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
132pub enum ForDeletionMode {
133 /// We will stop traversing into ignored directories which may save a lot of time, but also may include nested repositories
134 /// which might end up being deleted.
135 #[default]
136 IgnoredDirectoriesCanHideNestedRepositories,
137 /// Instead of skipping over ignored directories entirely, we will dive in and find ignored non-bare repositories
138 /// so these are emitted separately and prevent collapsing. These are assumed to be a directory with `.git` inside.
139 /// Only relevant when ignored entries are emitted.
140 FindNonBareRepositoriesInIgnoredDirectories,
141 /// This is a more expensive form of the above variant as it finds all repositories, bare or non-bare.
142 FindRepositoriesInIgnoredDirectories,
143}
144
145/// Options for use in [`walk()`](function::walk()) function.
146#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
147pub struct Options<'a> {
148 /// If `true`, the filesystem will store paths as decomposed unicode, i.e. `รค` becomes `"a\u{308}"`, which means that
149 /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
150 /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
151 /// perform conversions accordingly.
152 /// If `false`, no conversions will be performed.
153 pub precompose_unicode: bool,
154 /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
155 /// This is also called case-folding.
156 /// Note that [pathspecs](Context::pathspec) must also be using the same defaults, which makes them match case-insensitive
157 /// automatically.
158 pub ignore_case: bool,
159 /// If `true`, we will stop figuring out if any directory that is a candidate for recursion is also a nested repository,
160 /// which saves time but leads to recurse into it. If `false`, nested repositories will not be traversed.
161 pub recurse_repositories: bool,
162 /// If `true`, entries that are pruned and whose [Kind](crate::entry::Kind) is known will be emitted.
163 pub emit_pruned: bool,
164 /// If `Some(mode)`, entries that are ignored will be emitted according to the given `mode`.
165 /// If `None`, ignored entries will not be emitted at all.
166 pub emit_ignored: Option<EmissionMode>,
167 /// When the walk is for deletion, this must be `Some(_)` to assure we don't collapse directories that have precious files in
168 /// them, and otherwise assure that no entries are observable that shouldn't be deleted.
169 /// If `None`, precious files are treated like expendable files, which is usually what you want when displaying them
170 /// for addition to the repository, and the collapse of folders can be more generous in relation to ignored files.
171 pub for_deletion: Option<ForDeletionMode>,
172 /// If `true`, we will not only find non-bare repositories in untracked directories, but also bare ones.
173 ///
174 /// Note that this is very costly, but without it, bare repositories will appear like untracked directories when collapsed,
175 /// and they will be recursed into.
176 pub classify_untracked_bare_repositories: bool,
177 /// If `true`, we will also emit entries for tracked items. Otherwise these will remain 'hidden', even if a pathspec directly
178 /// refers to it.
179 pub emit_tracked: bool,
180 /// Controls the way untracked files are emitted. By default, this is happening immediately and without any simplification.
181 pub emit_untracked: EmissionMode,
182 /// If `true`, emit empty directories as well. Note that a directory also counts as empty if it has any amount or depth of nested
183 /// subdirectories, as long as none of them includes a file.
184 /// Thus, this makes leaf-level empty directories visible, as those don't have any content.
185 pub emit_empty_directories: bool,
186 /// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`.
187 pub emit_collapsed: Option<CollapsedEntriesEmissionMode>,
188 /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory
189 /// when checking for exclusion.
190 ///
191 /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded,
192 /// if `true` it will be excluded as the symlink is considered a directory.
193 ///
194 /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
195 pub symlinks_to_directories_are_ignored_like_directories: bool,
196 /// A set of all git worktree checkouts that are located within the main worktree directory.
197 ///
198 /// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
199 /// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
200 pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
201}
202
203/// All information that is required to perform a dirwalk, and classify paths properly.
204pub struct Context<'a> {
205 /// If not `None`, it will be checked before entering any directory to trigger early interruption.
206 ///
207 /// If this flag is `true` at any point in the iteration, it will abort with an error.
208 pub should_interrupt: Option<&'a AtomicBool>,
209 /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
210 ///
211 /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
212 /// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
213 pub git_dir_realpath: &'a std::path::Path,
214 /// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
215 /// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
216 ///
217 /// It is also used to assure that when the walk is for deletion, that the current working dir will not be collapsed.
218 pub current_dir: &'a std::path::Path,
219 /// The index to quickly understand if a file or directory is tracked or not.
220 ///
221 /// ### Important
222 ///
223 /// The index must have been validated so that each entry that is considered up-to-date will have the [gix_index::entry::Flags::UPTODATE] flag
224 /// set. Otherwise the index entry is not considered and a disk-access may occur which is costly.
225 pub index: &'a gix_index::State,
226 /// A utility to lookup index entries faster, and deal with ignore-case handling.
227 ///
228 /// Must be set if `ignore_case` is `true`, or else some entries won't be found if their case is different.
229 ///
230 /// ### Deviation
231 ///
232 /// Git uses a name-based hash (for looking up entries, not directories) even when operating
233 /// in case-sensitive mode. It does, however, skip the directory hash creation (for looking
234 /// up directories) unless `core.ignoreCase` is enabled.
235 ///
236 /// We only use the hashmap when available and when [`ignore_case`](Options::ignore_case) is enabled in the options.
237 pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
238 /// A pathspec to use as filter - we only traverse into directories if it matches.
239 /// Note that the `ignore_case` setting it uses should match our [Options::ignore_case].
240 /// If no such filtering is desired, pass an empty `pathspec` which will match everything.
241 pub pathspec: &'a mut gix_pathspec::Search,
242 /// The `attributes` callback for use in [gix_pathspec::Search::pattern_matching_relative_path()], which happens when
243 /// pathspecs use attributes for filtering.
244 /// If `pathspec` isn't empty, this function may be called if pathspecs perform attribute lookups.
245 pub pathspec_attributes: &'a mut dyn FnMut(
246 &BStr,
247 gix_pathspec::attributes::glob::pattern::Case,
248 bool,
249 &mut gix_pathspec::attributes::search::Outcome,
250 ) -> bool,
251 /// A way to query the `.gitignore` files to see if a directory or file is ignored.
252 /// Set to `None` to not perform any work on checking for ignored, which turns previously ignored files into untracked ones, a useful
253 /// operation when trying to add ignored files to a repository.
254 pub excludes: Option<&'a mut gix_worktree::Stack>,
255 /// Access to the object database for use with `excludes` - it's possible to access `.gitignore` files in the index if configured.
256 pub objects: &'a dyn gix_object::Find,
257 /// If not `None`, override the traversal root that is computed and use this one instead.
258 ///
259 /// This can be useful if the traversal root may be a file, in which case the traversal will
260 /// still be returning possibly matching root entries.
261 ///
262 /// ### Panics
263 ///
264 /// If the `traversal_root` is not in the `worktree_root` passed to [walk()](crate::walk()).
265 pub explicit_traversal_root: Option<&'a std::path::Path>,
266}
267
268/// Additional information collected as outcome of [`walk()`](function::walk()).
269#[derive(Default, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
270pub struct Outcome {
271 /// The amount of calls to read the directory contents.
272 pub read_dir_calls: u32,
273 /// The amount of returned entries provided to the callback. This number can be lower than `seen_entries`.
274 pub returned_entries: usize,
275 /// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
276 pub seen_entries: u32,
277}
278
279/// The error returned by [`walk()`](function::walk()).
280#[derive(Debug, thiserror::Error)]
281#[allow(missing_docs)]
282pub enum Error {
283 #[error("Interrupted")]
284 Interrupted,
285 #[error("Worktree root at '{}' is not a directory", root.display())]
286 WorktreeRootIsFile { root: PathBuf },
287 #[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())]
288 NormalizeRoot { root: PathBuf },
289 #[error("A symlink was found at component {component_index} of traversal root '{}' as seen from worktree root '{}'", root.display(), worktree_root.display())]
290 SymlinkInRoot {
291 root: PathBuf,
292 worktree_root: PathBuf,
293 /// This index starts at 0, with 0 being the first component.
294 component_index: usize,
295 },
296 #[error("Failed to update the excludes stack to see if a path is excluded")]
297 ExcludesAccess(std::io::Error),
298 #[error("Failed to read the directory at '{}'", path.display())]
299 ReadDir { path: PathBuf, source: std::io::Error },
300 #[error("Could not obtain directory entry in root of '{}'", parent_directory.display())]
301 DirEntry {
302 parent_directory: PathBuf,
303 source: std::io::Error,
304 },
305 #[error("Could not obtain filetype of directory entry '{}'", path.display())]
306 DirEntryFileType { path: PathBuf, source: std::io::Error },
307 #[error("Could not obtain symlink metadata on '{}'", path.display())]
308 SymlinkMetadata { path: PathBuf, source: std::io::Error },
309}
310
311mod classify;
312pub(crate) mod function;
313mod readdir;