gix_diff/rewrites/mod.rs
1use crate::tree::visit::ChangeId;
2use crate::Rewrites;
3use std::collections::BTreeSet;
4
5/// Types related to the rename tracker for renames, rewrites and copies.
6pub mod tracker;
7
8/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
9/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
10pub struct Tracker<T> {
11 /// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
12 items: Vec<tracker::Item<T>>,
13 /// A place to store all paths in to reduce amount of allocations.
14 path_backing: Vec<u8>,
15 /// How to track copies and/or rewrites.
16 rewrites: Rewrites,
17 /// Previously emitted relation ids of rewrite pairs, with `(deleted source, added destination)`.
18 child_renames: BTreeSet<(ChangeId, ChangeId)>,
19}
20
21/// Determine in which set of files to search for copies.
22#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
23pub enum CopySource {
24 /// Find copies from the set of modified files only.
25 #[default]
26 FromSetOfModifiedFiles,
27 /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
28 ///
29 /// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
30 FromSetOfModifiedFilesAndAllSources,
31}
32
33/// Under which circumstances we consider a file to be a copy.
34#[derive(Debug, Copy, Clone, PartialEq)]
35pub struct Copies {
36 /// The set of files to search when finding the source of copies.
37 pub source: CopySource,
38 /// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
39 ///
40 /// Useful to have similarity-based rename tracking and cheaper copy tracking.
41 pub percentage: Option<f32>,
42}
43
44impl Default for Copies {
45 fn default() -> Self {
46 Copies {
47 source: CopySource::default(),
48 percentage: Some(0.5),
49 }
50 }
51}
52
53/// Information collected while handling rewrites of files which may be tracked.
54#[derive(Default, Clone, Copy, Debug, PartialEq)]
55pub struct Outcome {
56 /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
57 pub options: Rewrites,
58 /// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
59 pub num_similarity_checks: usize,
60 /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
61 pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
62 /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
63 pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
64}
65
66/// The default settings for rewrites according to the git configuration defaults.
67impl Default for Rewrites {
68 fn default() -> Self {
69 Rewrites {
70 copies: None,
71 percentage: Some(0.5),
72 limit: 1000,
73 track_empty: false,
74 }
75 }
76}