1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
use std::{io::Read, sync::atomic::AtomicBool};
use bstr::BStr;
use gix_hash::ObjectId;
use gix_index as index;
use index::Entry;
use crate::index_as_worktree::Error;
/// Compares the content of two blobs in some way.
pub trait CompareBlobs {
/// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()].
type Output;
/// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size`
/// and allow streaming its bytes using `data`.
/// If this function returns `None` the `entry` and the worktree blob are assumed to be identical.
/// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself.
/// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer.
fn compare_blobs<'a, 'b>(
&mut self,
entry: &gix_index::Entry,
worktree_blob_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error>;
}
/// Determine the status of a submodule, which always indicates that it changed if present.
pub trait SubmoduleStatus {
/// The status result, describing in which way the submodule changed.
type Output;
/// A custom error that may occur while computing the submodule status.
type Error: std::error::Error + Send + Sync + 'static;
/// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected.
fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result<Option<Self::Output>, Self::Error>;
}
/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files.
pub trait ReadData<'a> {
/// Returns the contents of this blob.
///
/// This potentially performs IO and other expensive operations
/// and should only be called when necessary.
fn read_blob(self) -> Result<&'a [u8], Error>;
/// Stream a worktree file in such a manner that its content matches what would be put into git.
fn stream_worktree_file(self) -> Result<read_data::Stream<'a>, Error>;
}
///
pub mod read_data {
use std::sync::atomic::{AtomicU64, Ordering};
use gix_filter::pipeline::convert::ToGitOutcome;
/// A stream with worktree file data.
pub struct Stream<'a> {
pub(crate) inner: ToGitOutcome<'a, std::fs::File>,
pub(crate) bytes: Option<&'a AtomicU64>,
pub(crate) len: Option<u64>,
}
impl<'a> Stream<'a> {
/// Return the underlying byte-buffer if there is one.
///
/// If `None`, read from this instance like a stream.
/// Note that this method should only be called once to assure proper accounting of the amount of bytes read.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
self.inner.as_bytes().map(|v| {
if let Some(bytes) = self.bytes {
bytes.fetch_add(v.len() as u64, Ordering::Relaxed);
}
v
})
}
/// Return the size of the stream in bytes if it is known in advance.
pub fn size(&self) -> Option<u64> {
self.len
}
}
impl<'a> std::io::Read for Stream<'a> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
if let Some(bytes) = self.bytes {
bytes.fetch_add(n as u64, Ordering::Relaxed);
}
Ok(n)
}
}
}
/// Compares to blobs by comparing their size and oid, and only looks at the file if
/// the size matches, therefore it's very fast.
#[derive(Clone)]
pub struct FastEq;
impl CompareBlobs for FastEq {
type Output = ();
// TODO: make all streaming IOPs interruptible.
fn compare_blobs<'a, 'b>(
&mut self,
entry: &Entry,
worktree_file_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error> {
// make sure to account for racily smudged entries here so that they don't always keep
// showing up as modified even after their contents have changed again, to a potentially
// unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs.
if entry.stat.size as u64 != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) {
return Ok(Some(()));
}
HashEq
.compare_blobs(entry, worktree_file_size, data, buf)
.map(|opt| opt.map(|_| ()))
}
}
/// Compares files to blobs by *always* comparing their hashes.
///
/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and
/// therefore always returns an OID that can be reused later.
#[derive(Clone)]
pub struct HashEq;
impl CompareBlobs for HashEq {
type Output = ObjectId;
fn compare_blobs<'a, 'b>(
&mut self,
entry: &Entry,
_worktree_blob_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error> {
let mut stream = data.stream_worktree_file()?;
match stream.as_bytes() {
Some(buffer) => {
let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer);
Ok((entry.id != file_hash).then_some(file_hash))
}
None => {
let file_hash = match stream.size() {
None => {
stream.read_to_end(buf)?;
gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf)
}
Some(len) => gix_object::compute_stream_hash(
entry.id.kind(),
gix_object::Kind::Blob,
&mut stream,
len,
&mut gix_features::progress::Discard,
&AtomicBool::default(),
)?,
};
Ok((entry.id != file_hash).then_some(file_hash))
}
}
}
}