gix_dir/walk/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
use crate::{entry, EntryRef};
use bstr::{BStr, BString};
use std::collections::BTreeSet;
use std::path::PathBuf;
use std::sync::atomic::AtomicBool;
/// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()).
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[must_use]
pub enum Action {
/// Continue the traversal as normal.
Continue,
/// Do not continue the traversal, but exit it.
Cancel,
}
/// Ready-made delegate implementations.
pub mod delegate {
use crate::walk::Action;
use crate::{entry, walk, Entry, EntryRef};
type Entries = Vec<(Entry, Option<entry::Status>)>;
/// A [`Delegate`](walk::Delegate) implementation that collects all `entries` along with their directory status, if present.
///
/// Note that this allocates for each entry.
#[derive(Default)]
pub struct Collect {
/// All collected entries, in any order.
pub unorded_entries: Entries,
}
impl Collect {
/// Return the list of entries that were emitted, sorted ascending by their repository-relative tree path.
pub fn into_entries_by_path(mut self) -> Entries {
self.unorded_entries.sort_by(|a, b| a.0.rela_path.cmp(&b.0.rela_path));
self.unorded_entries
}
}
impl walk::Delegate for Collect {
fn emit(&mut self, entry: EntryRef<'_>, dir_status: Option<entry::Status>) -> Action {
self.unorded_entries.push((entry.to_owned(), dir_status));
walk::Action::Continue
}
}
}
/// A way for the caller to control the traversal based on provided data.
pub trait Delegate {
/// Called for each observed `entry` *inside* a directory, or the directory itself if the traversal is configured
/// to simplify the result (i.e. if every file in a directory is ignored, emit the containing directory instead
/// of each file), or if the root of the traversal passes through a directory that can't be traversed.
///
/// It will also be called if the `root` in [`walk()`](crate::walk()) itself is matching a particular status,
/// even if it is a file.
///
/// Note that tracked entries will only be emitted if [`Options::emit_tracked`] is `true`.
/// Further, not all pruned entries will be observable as they might be pruned so early that the kind of
/// item isn't yet known. Pruned entries are also only emitted if [`Options::emit_pruned`] is `true`.
///
/// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
/// `dir_status` that wasn't the same as the one of `entry` and if [Options::emit_collapsed] was
/// [CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
/// was [CollapsedEntriesEmissionMode::All].
fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<entry::Status>) -> Action;
/// Return `true` if the given entry can be recursed into. Will only be called if the entry is a physical directory.
/// The base implementation will act like Git does by default in `git status` or `git clean`.
///
/// Use `for_deletion` to specify if the seen entries should ultimately be deleted, which may affect the decision
/// of whether to resource or not.
///
/// If `worktree_root_is_repository` is `true`, then this status is part of the root of an iteration, and the corresponding
/// worktree root is a repository itself. This typically happens for submodules. In this case, recursion rules are relaxed
/// to allow traversing submodule worktrees.
///
/// Note that this method will see all directories, even though not all of them may end up being [emitted](Self::emit()).
/// If this method returns `false`, the `entry` will always be emitted.
fn can_recurse(
&mut self,
entry: EntryRef<'_>,
for_deletion: Option<ForDeletionMode>,
worktree_root_is_repository: bool,
) -> bool {
entry.status.can_recurse(
entry.disk_kind,
entry.pathspec_match,
for_deletion,
worktree_root_is_repository,
)
}
}
/// The way entries are emitted using the [Delegate].
///
/// The choice here controls if entries are emitted immediately, or have to be held back.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum EmissionMode {
/// Emit each entry as it matches exactly, without doing any kind of simplification.
///
/// Emissions in this mode are happening as they occur, without any buffering or ordering.
#[default]
Matching,
/// Emit only a containing directory if all of its entries are of the same type.
///
/// Note that doing so is more expensive as it requires us to keep track of all entries in the directory structure
/// until it's clear what to finally emit.
CollapseDirectory,
}
/// The way entries that are contained in collapsed directories are emitted using the [Delegate].
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum CollapsedEntriesEmissionMode {
/// Emit only entries if their status does not match the one of the parent directory that is
/// going to be collapsed.
///
/// E.g. if a directory is determined to be untracked, and the entries in question are ignored,
/// they will be emitted.
///
/// Entries that have the same status will essentially be 'merged' into the collapsing directory
/// and won't be observable anymore.
#[default]
OnStatusMismatch,
/// Emit all entries inside of a collapsed directory to make them observable.
All,
}
/// When the walk is for deletion, assure that we don't collapse directories that have precious files in
/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum ForDeletionMode {
/// We will stop traversing into ignored directories which may save a lot of time, but also may include nested repositories
/// which might end up being deleted.
#[default]
IgnoredDirectoriesCanHideNestedRepositories,
/// Instead of skipping over ignored directories entirely, we will dive in and find ignored non-bare repositories
/// so these are emitted separately and prevent collapsing. These are assumed to be a directory with `.git` inside.
/// Only relevant when ignored entries are emitted.
FindNonBareRepositoriesInIgnoredDirectories,
/// This is a more expensive form of the above variant as it finds all repositories, bare or non-bare.
FindRepositoriesInIgnoredDirectories,
}
/// Options for use in [`walk()`](function::walk()) function.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub struct Options<'a> {
/// If `true`, the filesystem will store paths as decomposed unicode, i.e. `รค` becomes `"a\u{308}"`, which means that
/// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
/// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
/// perform conversions accordingly.
/// If `false`, no conversions will be performed.
pub precompose_unicode: bool,
/// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
/// This is also called case-folding.
/// Note that [pathspecs](Context::pathspec) must also be using the same defaults, which makes them match case-insensitive
/// automatically.
pub ignore_case: bool,
/// If `true`, we will stop figuring out if any directory that is a candidate for recursion is also a nested repository,
/// which saves time but leads to recurse into it. If `false`, nested repositories will not be traversed.
pub recurse_repositories: bool,
/// If `true`, entries that are pruned and whose [Kind](crate::entry::Kind) is known will be emitted.
pub emit_pruned: bool,
/// If `Some(mode)`, entries that are ignored will be emitted according to the given `mode`.
/// If `None`, ignored entries will not be emitted at all.
pub emit_ignored: Option<EmissionMode>,
/// When the walk is for deletion, this must be `Some(_)` to assure we don't collapse directories that have precious files in
/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
/// If `None`, precious files are treated like expendable files, which is usually what you want when displaying them
/// for addition to the repository, and the collapse of folders can be more generous in relation to ignored files.
pub for_deletion: Option<ForDeletionMode>,
/// If `true`, we will not only find non-bare repositories in untracked directories, but also bare ones.
///
/// Note that this is very costly, but without it, bare repositories will appear like untracked directories when collapsed,
/// and they will be recursed into.
pub classify_untracked_bare_repositories: bool,
/// If `true`, we will also emit entries for tracked items. Otherwise these will remain 'hidden', even if a pathspec directly
/// refers to it.
pub emit_tracked: bool,
/// Controls the way untracked files are emitted. By default, this is happening immediately and without any simplification.
pub emit_untracked: EmissionMode,
/// If `true`, emit empty directories as well. Note that a directory also counts as empty if it has any amount or depth of nested
/// subdirectories, as long as none of them includes a file.
/// Thus, this makes leaf-level empty directories visible, as those don't have any content.
pub emit_empty_directories: bool,
/// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`.
pub emit_collapsed: Option<CollapsedEntriesEmissionMode>,
/// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory
/// when checking for exclusion.
///
/// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded,
/// if `true` it will be excluded as the symlink is considered a directory.
///
/// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
pub symlinks_to_directories_are_ignored_like_directories: bool,
/// A set of all git worktree checkouts that are located within the main worktree directory.
///
/// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
/// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
}
/// All information that is required to perform a dirwalk, and classify paths properly.
pub struct Context<'a> {
/// If not `None`, it will be checked before entering any directory to trigger early interruption.
///
/// If this flag is `true` at any point in the iteration, it will abort with an error.
pub should_interrupt: Option<&'a AtomicBool>,
/// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
///
/// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
/// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
pub git_dir_realpath: &'a std::path::Path,
/// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
/// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
///
/// It is also used to assure that when the walk is for deletion, that the current working dir will not be collapsed.
pub current_dir: &'a std::path::Path,
/// The index to quickly understand if a file or directory is tracked or not.
///
/// ### Important
///
/// The index must have been validated so that each entry that is considered up-to-date will have the [gix_index::entry::Flags::UPTODATE] flag
/// set. Otherwise the index entry is not considered and a disk-access may occur which is costly.
pub index: &'a gix_index::State,
/// A utility to lookup index entries faster, and deal with ignore-case handling.
///
/// Must be set if `ignore_case` is `true`, or else some entries won't be found if their case is different.
///
/// ### Deviation
///
/// Git uses a name-based hash (for looking up entries, not directories) even when operating
/// in case-sensitive mode. It does, however, skip the directory hash creation (for looking
/// up directories) unless `core.ignoreCase` is enabled.
///
/// We only use the hashmap when available and when [`ignore_case`](Options::ignore_case) is enabled in the options.
pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
/// A pathspec to use as filter - we only traverse into directories if it matches.
/// Note that the `ignore_case` setting it uses should match our [Options::ignore_case].
/// If no such filtering is desired, pass an empty `pathspec` which will match everything.
pub pathspec: &'a mut gix_pathspec::Search,
/// The `attributes` callback for use in [gix_pathspec::Search::pattern_matching_relative_path()], which happens when
/// pathspecs use attributes for filtering.
/// If `pathspec` isn't empty, this function may be called if pathspecs perform attribute lookups.
pub pathspec_attributes: &'a mut dyn FnMut(
&BStr,
gix_pathspec::attributes::glob::pattern::Case,
bool,
&mut gix_pathspec::attributes::search::Outcome,
) -> bool,
/// A way to query the `.gitignore` files to see if a directory or file is ignored.
/// Set to `None` to not perform any work on checking for ignored, which turns previously ignored files into untracked ones, a useful
/// operation when trying to add ignored files to a repository.
pub excludes: Option<&'a mut gix_worktree::Stack>,
/// Access to the object database for use with `excludes` - it's possible to access `.gitignore` files in the index if configured.
pub objects: &'a dyn gix_object::Find,
/// If not `None`, override the traversal root that is computed and use this one instead.
///
/// This can be useful if the traversal root may be a file, in which case the traversal will
/// still be returning possibly matching root entries.
///
/// ### Panics
///
/// If the `traversal_root` is not in the `worktree_root` passed to [walk()](crate::walk()).
pub explicit_traversal_root: Option<&'a std::path::Path>,
}
/// Additional information collected as outcome of [`walk()`](function::walk()).
#[derive(Default, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct Outcome {
/// The amount of calls to read the directory contents.
pub read_dir_calls: u32,
/// The amount of returned entries provided to the callback. This number can be lower than `seen_entries`.
pub returned_entries: usize,
/// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
pub seen_entries: u32,
}
/// The error returned by [`walk()`](function::walk()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Interrupted")]
Interrupted,
#[error("Worktree root at '{}' is not a directory", root.display())]
WorktreeRootIsFile { root: PathBuf },
#[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())]
NormalizeRoot { root: PathBuf },
#[error("A symlink was found at component {component_index} of traversal root '{}' as seen from worktree root '{}'", root.display(), worktree_root.display())]
SymlinkInRoot {
root: PathBuf,
worktree_root: PathBuf,
/// This index starts at 0, with 0 being the first component.
component_index: usize,
},
#[error("Failed to update the excludes stack to see if a path is excluded")]
ExcludesAccess(std::io::Error),
#[error("Failed to read the directory at '{}'", path.display())]
ReadDir { path: PathBuf, source: std::io::Error },
#[error("Could not obtain directory entry in root of '{}'", parent_directory.display())]
DirEntry {
parent_directory: PathBuf,
source: std::io::Error,
},
#[error("Could not obtain filetype of directory entry '{}'", path.display())]
DirEntryFileType { path: PathBuf, source: std::io::Error },
#[error("Could not obtain symlink metadata on '{}'", path.display())]
SymlinkMetadata { path: PathBuf, source: std::io::Error },
}
mod classify;
pub(crate) mod function;
mod readdir;