1use std::{
2 io,
3 path::Path,
4 slice::Chunks,
5 sync::atomic::{AtomicUsize, Ordering},
6};
7
8use bstr::BStr;
9use filetime::FileTime;
10use gix_features::parallel::{in_parallel_if, Reduce};
11use gix_filter::pipeline::convert::ToGitOutcome;
12use gix_object::FindExt;
13
14use crate::index_as_worktree::Context;
15use crate::{
16 index_as_worktree::{
17 traits,
18 traits::{read_data::Stream, CompareBlobs, SubmoduleStatus},
19 types::{Error, Options},
20 Change, Conflict, EntryStatus, Outcome, VisitEntry,
21 },
22 is_dir_to_mode, AtomicU64, SymlinkCheck,
23};
24
25#[allow(clippy::too_many_arguments)]
53pub fn index_as_worktree<'index, T, U, Find, E>(
54 index: &'index gix_index::State,
55 worktree: &Path,
56 collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>,
57 compare: impl CompareBlobs<Output = T> + Send + Clone,
58 submodule: impl SubmoduleStatus<Output = U, Error = E> + Send + Clone,
59 objects: Find,
60 progress: &mut dyn gix_features::progress::Progress,
61 Context {
62 pathspec,
63 stack,
64 filter,
65 should_interrupt,
66 }: Context<'_>,
67 options: Options,
68) -> Result<Outcome, Error>
69where
70 T: Send,
71 U: Send,
72 E: std::error::Error + Send + Sync + 'static,
73 Find: gix_object::Find + Send + Clone,
74{
75 let timestamp = index.timestamp();
79 let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit(
80 500, index.entries().len().into(),
82 options.thread_limit,
83 None,
84 );
85
86 let range = index
87 .prefixed_entries_range(pathspec.common_prefix())
88 .unwrap_or(0..index.entries().len());
89
90 let (entries, path_backing) = (index.entries(), index.path_backing());
91 let mut num_entries = entries.len();
92 let entry_index_offset = range.start;
93 let entries = &entries[range];
94
95 let _span = gix_features::trace::detail!("gix_status::index_as_worktree",
96 num_entries = entries.len(),
97 chunk_size = chunk_size,
98 thread_limit = ?thread_limit);
99
100 let entries_skipped_by_common_prefix = num_entries - entries.len();
101 let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update) = Default::default();
102 let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default();
103
104 num_entries = entries.len();
105 progress.init(entries.len().into(), gix_features::progress::count("files"));
106 let count = progress.counter();
107
108 let new_state = {
109 let options = &options;
110 let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags);
111 let (symlink_metadata_calls, entries_to_update) = (&symlink_metadata_calls, &entries_to_update);
112 let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes);
113 let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads);
114 move |_| {
115 (
116 State {
117 buf: Vec::new(),
118 buf2: Vec::new(),
119 attr_stack: stack,
120 path_stack: SymlinkCheck::new(worktree.into()),
121 timestamp,
122 path_backing,
123 filter,
124 options,
125
126 skipped_by_pathspec,
127 skipped_by_entry_flags,
128 symlink_metadata_calls,
129 entries_to_update,
130 racy_clean,
131 worktree_reads,
132 worktree_bytes,
133 odb_reads,
134 odb_bytes,
135 },
136 compare,
137 submodule,
138 objects,
139 pathspec,
140 )
141 }
142 };
143 in_parallel_if(
144 || true, gix_features::interrupt::Iter::new(
146 OffsetIter {
147 inner: entries.chunks(chunk_size),
148 offset: entry_index_offset,
149 },
150 should_interrupt,
151 ),
152 thread_limit,
153 new_state,
154 |(entry_offset, chunk_entries), (state, blobdiff, submdule, objects, pathspec)| {
155 let all_entries = index.entries();
156 let mut out = Vec::new();
157 let mut idx = 0;
158 while let Some(entry) = chunk_entries.get(idx) {
159 let absolute_entry_index = entry_offset + idx;
160 if idx == 0 && entry.stage_raw() != 0 {
161 let offset = entry_offset.checked_sub(1).and_then(|prev_idx| {
162 let prev_entry = &all_entries[prev_idx];
163 let entry_path = entry.path_in(state.path_backing);
164 if prev_entry.stage_raw() == 0 || prev_entry.path_in(state.path_backing) != entry_path {
165 return None;
167 }
168 Conflict::try_from_entry(all_entries, state.path_backing, absolute_entry_index, entry_path)
169 .map(|(_conflict, offset)| offset)
170 });
171 if let Some(entries_to_skip_as_conflict_originates_in_previous_chunk) = offset {
172 idx += entries_to_skip_as_conflict_originates_in_previous_chunk + 1;
174 continue;
175 }
176 }
177 let res = state.process(
178 all_entries,
179 entry,
180 absolute_entry_index,
181 pathspec,
182 blobdiff,
183 submdule,
184 objects,
185 &mut idx,
186 );
187 idx += 1;
188 count.fetch_add(1, Ordering::Relaxed);
189 if let Some(res) = res {
190 out.push(res);
191 }
192 }
193 out
194 },
195 ReduceChange {
196 collector,
197 entries: index.entries(),
198 },
199 )?;
200
201 Ok(Outcome {
202 entries_to_process: num_entries,
203 entries_processed: count.load(Ordering::Relaxed),
204 entries_skipped_by_common_prefix,
205 entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed),
206 entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed),
207 entries_to_update: entries_to_update.load(Ordering::Relaxed),
208 symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed),
209 racy_clean: racy_clean.load(Ordering::Relaxed),
210 worktree_files_read: worktree_reads.load(Ordering::Relaxed),
211 worktree_bytes: worktree_bytes.load(Ordering::Relaxed),
212 odb_objects_read: odb_reads.load(Ordering::Relaxed),
213 odb_bytes: odb_bytes.load(Ordering::Relaxed),
214 })
215}
216
217struct State<'a, 'b> {
218 buf: Vec<u8>,
219 buf2: Vec<u8>,
220 timestamp: FileTime,
221 path_stack: SymlinkCheck,
224 attr_stack: gix_worktree::Stack,
228 filter: gix_filter::Pipeline,
229 path_backing: &'b gix_index::PathStorageRef,
230 options: &'a Options,
231
232 skipped_by_pathspec: &'a AtomicUsize,
233 skipped_by_entry_flags: &'a AtomicUsize,
234 symlink_metadata_calls: &'a AtomicUsize,
235 entries_to_update: &'a AtomicUsize,
236 racy_clean: &'a AtomicUsize,
237 worktree_bytes: &'a AtomicU64,
238 worktree_reads: &'a AtomicUsize,
239 odb_bytes: &'a AtomicU64,
240 odb_reads: &'a AtomicUsize,
241}
242
243type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, usize, &'index BStr, EntryStatus<T, U>), Error>;
244
245impl<'index> State<'_, 'index> {
246 #[allow(clippy::too_many_arguments)]
247 fn process<T, U, Find, E>(
248 &mut self,
249 entries: &'index [gix_index::Entry],
250 entry: &'index gix_index::Entry,
251 entry_index: usize,
252 pathspec: &mut gix_pathspec::Search,
253 diff: &mut impl CompareBlobs<Output = T>,
254 submodule: &mut impl SubmoduleStatus<Output = U, Error = E>,
255 objects: &Find,
256 outer_entry_index: &mut usize,
257 ) -> Option<StatusResult<'index, T, U>>
258 where
259 E: std::error::Error + Send + Sync + 'static,
260 Find: gix_object::Find,
261 {
262 if entry.flags.intersects(
263 gix_index::entry::Flags::UPTODATE
264 | gix_index::entry::Flags::SKIP_WORKTREE
265 | gix_index::entry::Flags::ASSUME_VALID
266 | gix_index::entry::Flags::FSMONITOR_VALID,
267 ) {
268 self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed);
269 return None;
270 }
271 let path = entry.path_in(self.path_backing);
272 let is_excluded = pathspec
273 .pattern_matching_relative_path(
274 path,
275 Some(entry.mode.is_submodule()),
276 &mut |relative_path, case, is_dir, out| {
277 self.attr_stack
278 .set_case(case)
279 .at_entry(relative_path, Some(is_dir_to_mode(is_dir)), objects)
280 .is_ok_and(|platform| platform.matching_attributes(out))
281 },
282 )
283 .map_or(true, |m| m.is_excluded());
284
285 if is_excluded {
286 self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed);
287 return None;
288 }
289 let status = if entry.stage_raw() != 0 {
290 Ok(
291 Conflict::try_from_entry(entries, self.path_backing, entry_index, path).map(|(conflict, offset)| {
292 *outer_entry_index += offset; EntryStatus::Conflict(conflict)
294 }),
295 )
296 } else {
297 self.compute_status(entry, path, diff, submodule, objects)
298 };
299 match status {
300 Ok(None) => None,
301 Ok(Some(status)) => Some(Ok((entry, entry_index, path, status))),
302 Err(err) => Some(Err(err)),
303 }
304 }
305
306 fn compute_status<T, U, Find, E>(
346 &mut self,
347 entry: &gix_index::Entry,
348 rela_path: &BStr,
349 diff: &mut impl CompareBlobs<Output = T>,
350 submodule: &mut impl SubmoduleStatus<Output = U, Error = E>,
351 objects: &Find,
352 ) -> Result<Option<EntryStatus<T, U>>, Error>
353 where
354 E: std::error::Error + Send + Sync + 'static,
355 Find: gix_object::Find,
356 {
357 let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) {
358 Ok(path) => path,
359 Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => {
360 return Ok(Some(Change::Removed.into()))
361 }
362 Err(err) => return Err(Error::Io(err.into())),
363 };
364 self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed);
365 let metadata = match gix_index::fs::Metadata::from_path_no_follow(worktree_path) {
366 Ok(metadata) if metadata.is_dir() => {
367 if entry.mode.is_submodule() {
372 let status = submodule
373 .status(entry, rela_path)
374 .map_err(|err| Error::SubmoduleStatus {
375 rela_path: rela_path.into(),
376 source: Box::new(err),
377 })?;
378 return Ok(status.map(|status| Change::SubmoduleModification(status).into()));
379 } else {
380 return Ok(Some(Change::Removed.into()));
381 }
382 }
383 Ok(metadata) => metadata,
384 Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => {
385 return Ok(Some(Change::Removed.into()))
386 }
387 Err(err) => {
388 return Err(Error::Io(err.into()));
389 }
390 };
391 if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) {
392 return Ok(Some(EntryStatus::IntentToAdd));
393 }
394 let new_stat = gix_index::entry::Stat::from_fs(&metadata)?;
395 let executable_bit_changed =
396 match entry
397 .mode
398 .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit)
399 {
400 Some(gix_index::entry::mode::Change::Type { new_mode }) => {
401 return Ok(Some(
402 Change::Type {
403 worktree_mode: new_mode,
404 }
405 .into(),
406 ))
407 }
408 Some(gix_index::entry::mode::Change::ExecutableBit) => true,
409 None => false,
410 };
411
412 let mut racy_clean = false;
423 if !executable_bit_changed
424 && new_stat.matches(&entry.stat, self.options.stat)
425 && (!entry.id.is_empty_blob() || entry.stat.size == 0)
427 {
428 racy_clean = new_stat.is_racy(self.timestamp, self.options.stat);
429 if !racy_clean {
430 return Ok(None);
431 } else {
432 self.racy_clean.fetch_add(1, Ordering::Relaxed);
433 }
434 }
435
436 self.buf.clear();
437 self.buf2.clear();
438 let file_size_bytes = if cfg!(windows) && metadata.is_symlink() {
439 u64::from(entry.stat.size)
442 } else {
443 metadata.len()
444 };
445 let fetch_data = ReadDataImpl {
446 buf: &mut self.buf,
447 path: worktree_path,
448 rela_path,
449 entry,
450 file_len: file_size_bytes,
451 filter: &mut self.filter,
452 attr_stack: &mut self.attr_stack,
453 options: self.options,
454 id: &entry.id,
455 objects,
456 worktree_reads: self.worktree_reads,
457 worktree_bytes: self.worktree_bytes,
458 odb_reads: self.odb_reads,
459 odb_bytes: self.odb_bytes,
460 };
461 let content_change = diff.compare_blobs(entry, file_size_bytes, fetch_data, &mut self.buf2)?;
462 if content_change.is_some() || executable_bit_changed {
464 let set_entry_stat_size_zero = content_change.is_some() && racy_clean;
465 Ok(Some(
466 Change::Modification {
467 executable_bit_changed,
468 content_change,
469 set_entry_stat_size_zero,
470 }
471 .into(),
472 ))
473 } else {
474 self.entries_to_update.fetch_add(1, Ordering::Relaxed);
475 Ok(Some(EntryStatus::NeedsUpdate(new_stat)))
476 }
477 }
478}
479
480struct ReduceChange<'a, 'index, T: VisitEntry<'index>> {
481 collector: &'a mut T,
482 entries: &'index [gix_index::Entry],
483}
484
485impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce
486 for ReduceChange<'_, 'index, C>
487{
488 type Input = Vec<StatusResult<'index, T, U>>;
489
490 type FeedProduce = ();
491
492 type Output = ();
493
494 type Error = Error;
495
496 fn feed(&mut self, items: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
497 for item in items {
498 let (entry, entry_index, path, status) = item?;
499 self.collector
500 .visit_entry(self.entries, entry, entry_index, path, status);
501 }
502 Ok(())
503 }
504
505 fn finalize(self) -> Result<Self::Output, Self::Error> {
506 Ok(())
507 }
508}
509
510struct ReadDataImpl<'a, Find>
511where
512 Find: gix_object::Find,
513{
514 buf: &'a mut Vec<u8>,
515 path: &'a Path,
516 rela_path: &'a BStr,
517 file_len: u64,
518 entry: &'a gix_index::Entry,
519 filter: &'a mut gix_filter::Pipeline,
520 attr_stack: &'a mut gix_worktree::Stack,
521 options: &'a Options,
522 id: &'a gix_hash::oid,
523 objects: Find,
524 worktree_bytes: &'a AtomicU64,
525 worktree_reads: &'a AtomicUsize,
526 odb_bytes: &'a AtomicU64,
527 odb_reads: &'a AtomicUsize,
528}
529
530impl<'a, Find> traits::ReadData<'a> for ReadDataImpl<'a, Find>
531where
532 Find: gix_object::Find,
533{
534 fn read_blob(self) -> Result<&'a [u8], Error> {
535 Ok(self.objects.find_blob(self.id, self.buf).map(|b| {
536 self.odb_reads.fetch_add(1, Ordering::Relaxed);
537 self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed);
538 b.data
539 })?)
540 }
541
542 fn stream_worktree_file(self) -> Result<Stream<'a>, Error> {
543 self.buf.clear();
544 let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK;
548 let out = if is_symlink && self.options.fs.symlink {
550 let symlink_path =
553 gix_path::to_unix_separators_on_windows(gix_path::into_bstr(std::fs::read_link(self.path).unwrap()));
554 self.buf.extend_from_slice(&symlink_path);
555 self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed);
556 Stream {
557 inner: ToGitOutcome::Buffer(self.buf),
558 bytes: None,
559 len: None,
560 }
561 } else {
562 self.buf.clear();
563 let platform = self
564 .attr_stack
565 .at_entry(self.rela_path, Some(self.entry.mode), &self.objects)
566 .map_err(gix_hash::io::Error::from)?;
567 let file = std::fs::File::open(self.path).map_err(gix_hash::io::Error::from)?;
568 let out = self
569 .filter
570 .convert_to_git(
571 file,
572 self.path,
573 &mut |_path, attrs| {
574 platform.matching_attributes(attrs);
575 },
576 &mut |buf| Ok(self.objects.find_blob(self.id, buf).map(|_| Some(()))?),
577 )
578 .map_err(|err| Error::Io(io::Error::new(io::ErrorKind::Other, err).into()))?;
579 let len = match out {
580 ToGitOutcome::Unchanged(_) => Some(self.file_len),
581 ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None,
582 };
583 Stream {
584 inner: out,
585 bytes: Some(self.worktree_bytes),
586 len,
587 }
588 };
589
590 self.worktree_reads.fetch_add(1, Ordering::Relaxed);
591 Ok(out)
592 }
593}
594
595struct OffsetIter<'a, T> {
596 inner: Chunks<'a, T>,
597 offset: usize,
598}
599
600impl<'a, T> Iterator for OffsetIter<'a, T> {
601 type Item = (usize, &'a [T]);
602
603 fn next(&mut self) -> Option<Self::Item> {
604 let block = self.inner.next()?;
605 let offset = self.offset;
606 self.offset += block.len();
607 Some((offset, block))
608 }
609}
610
611impl Conflict {
612 pub fn try_from_entry(
618 entries: &[gix_index::Entry],
619 path_backing: &gix_index::PathStorageRef,
620 start_index: usize,
621 entry_path: &BStr,
622 ) -> Option<(Self, usize)> {
623 use Conflict::*;
624 let mut mask = None::<u8>;
625
626 let mut count = 0_usize;
627 for stage in (start_index..(start_index + 3).min(entries.len())).filter_map(|idx| {
628 let entry = &entries[idx];
629 let stage = entry.stage_raw();
630 (stage > 0 && entry.path_in(path_backing) == entry_path).then_some(stage)
631 }) {
632 *mask.get_or_insert(0) |= match stage {
634 1 => 0b001,
635 2 => 0b010,
636 3 => 0b100,
637 _ => 0,
638 };
639 count += 1;
640 }
641
642 mask.map(|mask| {
643 (
644 match mask {
645 0b001 => BothDeleted,
646 0b010 => AddedByUs,
647 0b011 => DeletedByThem,
648 0b100 => AddedByThem,
649 0b101 => DeletedByUs,
650 0b110 => BothAdded,
651 0b111 => BothModified,
652 _ => unreachable!("BUG: bitshifts and typical entry layout doesn't allow for more"),
653 },
654 count - 1,
655 )
656 })
657 }
658}