gix_features/hash.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
//! Hash functions and hash utilities
//!
//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
mod _impl {
use super::Digest;
/// A implementation of the Sha1 hash, which can be used once.
#[derive(Default, Clone)]
pub struct Sha1(sha1_smol::Sha1);
impl Sha1 {
/// Digest the given `bytes`.
pub fn update(&mut self, bytes: &[u8]) {
self.0.update(bytes);
}
/// Finalize the hash and produce a digest.
pub fn digest(self) -> Digest {
self.0.digest().bytes()
}
}
}
/// A hash-digest produced by a [`Hasher`] hash implementation.
#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
pub type Digest = [u8; 20];
#[cfg(feature = "fast-sha1")]
mod _impl {
use sha1::Digest;
/// A implementation of the Sha1 hash, which can be used once.
#[derive(Default, Clone)]
pub struct Sha1(sha1::Sha1);
impl Sha1 {
/// Digest the given `bytes`.
pub fn update(&mut self, bytes: &[u8]) {
self.0.update(bytes);
}
/// Finalize the hash and produce a digest.
pub fn digest(self) -> super::Digest {
self.0.finalize().into()
}
}
}
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use _impl::Sha1 as Hasher;
/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
///
/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
/// should be the previous return value of this function to provide a hash of multiple sequential
/// chunks of `bytes`.
#[cfg(feature = "crc32")]
pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
let mut h = crc32fast::Hasher::new_with_initial(previous_value);
h.update(bytes);
h.finalize()
}
/// Compute a CRC32 value of the given input `bytes`.
///
/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
#[cfg(feature = "crc32")]
pub fn crc32(bytes: &[u8]) -> u32 {
let mut h = crc32fast::Hasher::new();
h.update(bytes);
h.finalize()
}
/// Produce a hasher suitable for the given kind of hash.
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub fn hasher(kind: gix_hash::Kind) -> Hasher {
match kind {
gix_hash::Kind::Sha1 => Hasher::default(),
}
}
/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
/// while initializing and calling `progress`.
///
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
/// denoting the amount of bytes to hash starting from the beginning of the file.
///
/// # Note
///
/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
/// [`gix_hash::ObjectId`] return value.
/// * [Interrupts][crate::interrupt] are supported.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_of_file(
path: &std::path::Path,
num_bytes_from_start: u64,
kind: gix_hash::Kind,
progress: &mut dyn crate::progress::Progress,
should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
bytes(
&mut std::fs::File::open(path)?,
num_bytes_from_start,
kind,
progress,
should_interrupt,
)
}
/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes(
read: &mut dyn std::io::Read,
num_bytes_from_start: u64,
kind: gix_hash::Kind,
progress: &mut dyn crate::progress::Progress,
should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
}
/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_with_hasher(
read: &mut dyn std::io::Read,
num_bytes_from_start: u64,
mut hasher: Hasher,
progress: &mut dyn crate::progress::Progress,
should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
let start = std::time::Instant::now();
// init progress before the possibility for failure, as convenience in case people want to recover
progress.init(
Some(num_bytes_from_start as prodash::progress::Step),
crate::progress::bytes(),
);
const BUF_SIZE: usize = u16::MAX as usize;
let mut buf = [0u8; BUF_SIZE];
let mut bytes_left = num_bytes_from_start;
while bytes_left > 0 {
let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
read.read_exact(out)?;
bytes_left -= out.len() as u64;
progress.inc_by(out.len());
hasher.update(out);
if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
}
}
let id = gix_hash::ObjectId::from(hasher.digest());
progress.show_throughput(start);
Ok(id)
}
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
mod write {
use crate::hash::Hasher;
/// A utility to automatically generate a hash while writing into an inner writer.
pub struct Write<T> {
/// The hash implementation.
pub hash: Hasher,
/// The inner writer.
pub inner: T,
}
impl<T> std::io::Write for Write<T>
where
T: std::io::Write,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let written = self.inner.write(buf)?;
self.hash.update(&buf[..written]);
Ok(written)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
impl<T> Write<T>
where
T: std::io::Write,
{
/// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
match object_hash {
gix_hash::Kind::Sha1 => Write {
inner,
hash: Hasher::default(),
},
}
}
}
}
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use write::Write;