gix_dir/walk/
function.rs

1use std::borrow::Cow;
2use std::path::{Path, PathBuf};
3
4use bstr::{BStr, BString, ByteSlice};
5
6use crate::walk::{classify, readdir, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome};
7use crate::{entry, EntryRef};
8
9/// A function to perform a git-style, unsorted, directory walk.
10///
11/// * `worktree_root` - the top-most root of the worktree, which must be a prefix to `root`.
12///     - If [`Options::precompose_unicode`] is enabled, this path must be precomposed.
13///     - The starting point of the traversal (traversal root) is calculated from by doing `worktree_root + pathspec.common_prefix()`.
14///     - Note that if the traversal root leading to this directory or it itself is excluded, it will be provided to [`Delegate::emit()`]
15///       without further traversal.
16///     - If [`Options::precompose_unicode`] is enabled, all involved paths must be precomposed.
17///     - Must be contained in `worktree_root`.
18/// * `ctx` - everything needed to classify the paths seen during the traversal.
19/// * `delegate` - an implementation of [`Delegate`] to control details of the traversal and receive its results.
20///
21/// Returns `(outcome, traversal_root)`, with the `traversal_root` actually being used for the traversal,
22/// useful to transform the paths returned for the user. It's always within the `worktree_root`, or the same,
23/// but is hard to guess due to additional logic affecting it.
24///
25/// ### Performance Notes
26///
27/// In theory, parallel directory traversal can be significantly faster, and what's possible for our current
28/// `gix_features::fs::WalkDir` implementation is to abstract a `filter_entry()` method so it works both for
29/// the iterator from the `walkdir` crate as well as from `jwalk`. However, doing so as initial version
30/// has the risk of not being significantly harder if not impossible to implement as flow-control is very
31/// limited.
32///
33/// Thus the decision was made to start out with something akin to the Git implementation, get all tests and
34/// baseline comparison to pass, and see if an iterator with just `filter_entry` would be capable of dealing with
35/// it. Note that `filter_entry` is the only just-in-time traversal control that `walkdir` offers, even though
36/// one could consider switching to `jwalk` and just use its single-threaded implementation if a unified interface
37/// is necessary to make this work - `jwalk` has a more powerful API for this to work.
38///
39/// If that was the case, we are talking about 0.5s for single-threaded traversal (without doing any extra work)
40/// or 0.25s for optimal multi-threaded performance, all in the WebKit directory with 388k items to traverse.
41/// Thus, the speedup could easily be 2x or more and thus worth investigating in due time.
42pub fn walk(
43    worktree_root: &Path,
44    mut ctx: Context<'_>,
45    options: Options<'_>,
46    delegate: &mut dyn Delegate,
47) -> Result<(Outcome, PathBuf), Error> {
48    let root = match ctx.explicit_traversal_root {
49        Some(root) => root.to_owned(),
50        None => ctx
51            .pathspec
52            .longest_common_directory()
53            .and_then(|candidate| {
54                let candidate = worktree_root.join(candidate);
55                candidate.is_dir().then_some(candidate)
56            })
57            .unwrap_or_else(|| worktree_root.join(ctx.pathspec.prefix_directory())),
58    };
59    let _span = gix_trace::coarse!("walk", root = ?root, worktree_root = ?worktree_root, options = ?options);
60    let (mut current, worktree_root_relative) = assure_no_symlink_in_root(worktree_root, &root)?;
61    let mut out = Outcome::default();
62    let mut buf = BString::default();
63    let (root_info, worktree_root_is_repository) = classify::root(
64        worktree_root,
65        &mut buf,
66        worktree_root_relative.as_ref(),
67        options,
68        &mut ctx,
69    )?;
70
71    let can_recurse = can_recurse(
72        buf.as_bstr(),
73        if root == worktree_root && root_info.disk_kind == Some(entry::Kind::Symlink) && current.is_dir() {
74            classify::Outcome {
75                disk_kind: Some(entry::Kind::Directory),
76                ..root_info
77            }
78        } else {
79            root_info
80        },
81        options.for_deletion,
82        worktree_root_is_repository,
83        delegate,
84    );
85    if !can_recurse {
86        if buf.is_empty() && !root_info.disk_kind.is_some_and(|kind| kind.is_dir()) {
87            return Err(Error::WorktreeRootIsFile { root: root.to_owned() });
88        }
89        if options.precompose_unicode {
90            buf = gix_utils::str::precompose_bstr(buf.into()).into_owned();
91        }
92        let _ = emit_entry(
93            Cow::Borrowed(buf.as_bstr()),
94            root_info,
95            None,
96            options,
97            &mut out,
98            delegate,
99        );
100        return Ok((out, root.to_owned()));
101    }
102
103    let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some());
104    let may_collapse = root != worktree_root && state.may_collapse(&current);
105    let (action, _) = readdir::recursive(
106        may_collapse,
107        &mut current,
108        &mut buf,
109        root_info,
110        &mut ctx,
111        options,
112        delegate,
113        &mut out,
114        &mut state,
115    )?;
116    if action != Action::Cancel {
117        state.emit_remaining(may_collapse, options, &mut out, delegate);
118        assert_eq!(state.on_hold.len(), 0, "BUG: after emission, on hold must be empty");
119    }
120    gix_trace::debug!(statistics = ?out);
121    Ok((out, root.to_owned()))
122}
123
124/// Note that we only check symlinks on the way from `worktree_root` to `root`,
125/// so `worktree_root` may go through a symlink.
126/// Returns `(worktree_root, normalized_worktree_relative_root)`.
127fn assure_no_symlink_in_root<'root>(
128    worktree_root: &Path,
129    root: &'root Path,
130) -> Result<(PathBuf, Cow<'root, Path>), Error> {
131    let mut current = worktree_root.to_owned();
132    let worktree_relative = root
133        .strip_prefix(worktree_root)
134        .expect("BUG: root was created from worktree_root + prefix");
135    let worktree_relative = gix_path::normalize(worktree_relative.into(), Path::new(""))
136        .ok_or(Error::NormalizeRoot { root: root.to_owned() })?;
137
138    for (idx, component) in worktree_relative.components().enumerate() {
139        current.push(component);
140        let meta = current.symlink_metadata().map_err(|err| Error::SymlinkMetadata {
141            source: err,
142            path: current.to_owned(),
143        })?;
144        if meta.is_symlink() {
145            return Err(Error::SymlinkInRoot {
146                root: root.to_owned(),
147                worktree_root: worktree_root.to_owned(),
148                component_index: idx,
149            });
150        }
151    }
152    Ok((current, worktree_relative))
153}
154
155pub(super) fn can_recurse(
156    rela_path: &BStr,
157    info: classify::Outcome,
158    for_deletion: Option<ForDeletionMode>,
159    worktree_root_is_repository: bool,
160    delegate: &mut dyn Delegate,
161) -> bool {
162    let is_dir = info.disk_kind.is_some_and(|k| k.is_dir());
163    if !is_dir {
164        return false;
165    }
166    delegate.can_recurse(
167        EntryRef::from_outcome(Cow::Borrowed(rela_path), info),
168        for_deletion,
169        worktree_root_is_repository,
170    )
171}
172
173/// Possibly emit an entry to `for_each` in case the provided information makes that possible.
174#[allow(clippy::too_many_arguments)]
175pub(super) fn emit_entry(
176    rela_path: Cow<'_, BStr>,
177    info: classify::Outcome,
178    dir_status: Option<entry::Status>,
179    Options {
180        emit_pruned,
181        emit_tracked,
182        emit_ignored,
183        emit_empty_directories,
184        ..
185    }: Options<'_>,
186    out: &mut Outcome,
187    delegate: &mut dyn Delegate,
188) -> Action {
189    out.seen_entries += 1;
190
191    if (!emit_empty_directories && info.property == Some(entry::Property::EmptyDirectory)
192        || !emit_tracked && info.status == entry::Status::Tracked)
193        || emit_ignored.is_none() && matches!(info.status, entry::Status::Ignored(_))
194        || !emit_pruned
195            && (info.status.is_pruned()
196                || info
197                    .pathspec_match
198                    .map_or(true, |m| m == entry::PathspecMatch::Excluded))
199    {
200        return Action::Continue;
201    }
202
203    out.returned_entries += 1;
204    delegate.emit(EntryRef::from_outcome(rela_path, info), dir_status)
205}