gix_diff/rewrites/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
use crate::tree::visit::ChangeId;
use crate::Rewrites;
use std::collections::BTreeSet;

/// Types related to the rename tracker for renames, rewrites and copies.
pub mod tracker;

/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
pub struct Tracker<T> {
    /// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
    items: Vec<tracker::Item<T>>,
    /// A place to store all paths in to reduce amount of allocations.
    path_backing: Vec<u8>,
    /// How to track copies and/or rewrites.
    rewrites: Rewrites,
    /// Previously emitted relation ids of rewrite pairs, with `(deleted source, added destination)`.
    child_renames: BTreeSet<(ChangeId, ChangeId)>,
}

/// Determine in which set of files to search for copies.
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
pub enum CopySource {
    /// Find copies from the set of modified files only.
    #[default]
    FromSetOfModifiedFiles,
    /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
    ///
    /// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
    FromSetOfModifiedFilesAndAllSources,
}

/// Under which circumstances we consider a file to be a copy.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Copies {
    /// The set of files to search when finding the source of copies.
    pub source: CopySource,
    /// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
    ///
    /// Useful to have similarity-based rename tracking and cheaper copy tracking.
    pub percentage: Option<f32>,
}

impl Default for Copies {
    fn default() -> Self {
        Copies {
            source: CopySource::default(),
            percentage: Some(0.5),
        }
    }
}

/// Information collected while handling rewrites of files which may be tracked.
#[derive(Default, Clone, Copy, Debug, PartialEq)]
pub struct Outcome {
    /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
    pub options: Rewrites,
    /// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
    pub num_similarity_checks: usize,
    /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
    pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
    /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
    pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
}

/// The default settings for rewrites according to the git configuration defaults.
impl Default for Rewrites {
    fn default() -> Self {
        Rewrites {
            copies: None,
            percentage: Some(0.5),
            limit: 1000,
        }
    }
}