gix_diff/rewrites/
mod.rs

1use crate::tree::visit::ChangeId;
2use crate::Rewrites;
3use std::collections::BTreeSet;
4
5/// Types related to the rename tracker for renames, rewrites and copies.
6pub mod tracker;
7
8/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
9/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
10pub struct Tracker<T> {
11    /// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
12    items: Vec<tracker::Item<T>>,
13    /// A place to store all paths in to reduce amount of allocations.
14    path_backing: Vec<u8>,
15    /// How to track copies and/or rewrites.
16    rewrites: Rewrites,
17    /// Previously emitted relation ids of rewrite pairs, with `(deleted source, added destination)`.
18    child_renames: BTreeSet<(ChangeId, ChangeId)>,
19}
20
21/// Determine in which set of files to search for copies.
22#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
23pub enum CopySource {
24    /// Find copies from the set of modified files only.
25    #[default]
26    FromSetOfModifiedFiles,
27    /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
28    ///
29    /// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
30    FromSetOfModifiedFilesAndAllSources,
31}
32
33/// Under which circumstances we consider a file to be a copy.
34#[derive(Debug, Copy, Clone, PartialEq)]
35pub struct Copies {
36    /// The set of files to search when finding the source of copies.
37    pub source: CopySource,
38    /// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
39    ///
40    /// Useful to have similarity-based rename tracking and cheaper copy tracking.
41    pub percentage: Option<f32>,
42}
43
44impl Default for Copies {
45    fn default() -> Self {
46        Copies {
47            source: CopySource::default(),
48            percentage: Some(0.5),
49        }
50    }
51}
52
53/// Information collected while handling rewrites of files which may be tracked.
54#[derive(Default, Clone, Copy, Debug, PartialEq)]
55pub struct Outcome {
56    /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
57    pub options: Rewrites,
58    /// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
59    pub num_similarity_checks: usize,
60    /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
61    pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
62    /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
63    pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
64}
65
66/// The default settings for rewrites according to the git configuration defaults.
67impl Default for Rewrites {
68    fn default() -> Self {
69        Rewrites {
70            copies: None,
71            percentage: Some(0.5),
72            limit: 1000,
73            track_empty: false,
74        }
75    }
76}