gix_features/
hash.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//! Hash functions and hash utilities
//!
//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
mod _impl {
    use super::Digest;

    /// A implementation of the Sha1 hash, which can be used once.
    #[derive(Default, Clone)]
    pub struct Sha1(sha1_smol::Sha1);

    impl Sha1 {
        /// Digest the given `bytes`.
        pub fn update(&mut self, bytes: &[u8]) {
            self.0.update(bytes);
        }
        /// Finalize the hash and produce a digest.
        pub fn digest(self) -> Digest {
            self.0.digest().bytes()
        }
    }
}

/// A hash-digest produced by a [`Hasher`] hash implementation.
#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
pub type Digest = [u8; 20];

#[cfg(feature = "fast-sha1")]
mod _impl {
    use sha1::Digest;

    /// A implementation of the Sha1 hash, which can be used once.
    #[derive(Default, Clone)]
    pub struct Sha1(sha1::Sha1);

    impl Sha1 {
        /// Digest the given `bytes`.
        pub fn update(&mut self, bytes: &[u8]) {
            self.0.update(bytes);
        }
        /// Finalize the hash and produce a digest.
        pub fn digest(self) -> super::Digest {
            self.0.finalize().into()
        }
    }
}

#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use _impl::Sha1 as Hasher;

/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
///
/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
/// should be the previous return value of this function to provide a hash of multiple sequential
/// chunks of `bytes`.
#[cfg(feature = "crc32")]
pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
    let mut h = crc32fast::Hasher::new_with_initial(previous_value);
    h.update(bytes);
    h.finalize()
}

/// Compute a CRC32 value of the given input `bytes`.
///
/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
#[cfg(feature = "crc32")]
pub fn crc32(bytes: &[u8]) -> u32 {
    let mut h = crc32fast::Hasher::new();
    h.update(bytes);
    h.finalize()
}

/// Produce a hasher suitable for the given kind of hash.
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub fn hasher(kind: gix_hash::Kind) -> Hasher {
    match kind {
        gix_hash::Kind::Sha1 => Hasher::default(),
    }
}

/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
/// while initializing and calling `progress`.
///
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
/// denoting the amount of bytes to hash starting from the beginning of the file.
///
/// # Note
///
/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
///   [`gix_hash::ObjectId`] return value.
/// * [Interrupts][crate::interrupt] are supported.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_of_file(
    path: &std::path::Path,
    num_bytes_from_start: u64,
    kind: gix_hash::Kind,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    bytes(
        &mut std::fs::File::open(path)?,
        num_bytes_from_start,
        kind,
        progress,
        should_interrupt,
    )
}

/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes(
    read: &mut dyn std::io::Read,
    num_bytes_from_start: u64,
    kind: gix_hash::Kind,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
}

/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_with_hasher(
    read: &mut dyn std::io::Read,
    num_bytes_from_start: u64,
    mut hasher: Hasher,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    let start = std::time::Instant::now();
    // init progress before the possibility for failure, as convenience in case people want to recover
    progress.init(
        Some(num_bytes_from_start as prodash::progress::Step),
        crate::progress::bytes(),
    );

    const BUF_SIZE: usize = u16::MAX as usize;
    let mut buf = [0u8; BUF_SIZE];
    let mut bytes_left = num_bytes_from_start;

    while bytes_left > 0 {
        let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
        read.read_exact(out)?;
        bytes_left -= out.len() as u64;
        progress.inc_by(out.len());
        hasher.update(out);
        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
        }
    }

    let id = gix_hash::ObjectId::from(hasher.digest());
    progress.show_throughput(start);
    Ok(id)
}

#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
mod write {
    use crate::hash::Hasher;

    /// A utility to automatically generate a hash while writing into an inner writer.
    pub struct Write<T> {
        /// The hash implementation.
        pub hash: Hasher,
        /// The inner writer.
        pub inner: T,
    }

    impl<T> std::io::Write for Write<T>
    where
        T: std::io::Write,
    {
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            let written = self.inner.write(buf)?;
            self.hash.update(&buf[..written]);
            Ok(written)
        }

        fn flush(&mut self) -> std::io::Result<()> {
            self.inner.flush()
        }
    }

    impl<T> Write<T>
    where
        T: std::io::Write,
    {
        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
        pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
            match object_hash {
                gix_hash::Kind::Sha1 => Write {
                    inner,
                    hash: Hasher::default(),
                },
            }
        }
    }
}
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use write::Write;