1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
use std::iter::Peekable;
use gix_features::hash;
use crate::data::input;
/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
/// `next()` is called.
///
/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
/// is depleted and compute the hash in one go by re-reading the whole file.
pub struct EntriesToBytesIter<I: Iterator, W> {
/// An iterator for input [`input::Entry`] instances
pub input: Peekable<I>,
/// A way of writing encoded bytes.
output: W,
/// Our trailing hash when done writing all input entries
trailer: Option<gix_hash::ObjectId>,
/// The amount of objects in the iteration and the version of the packfile to be written.
/// Will be `None` to signal the header was written already.
data_version: crate::data::Version,
/// The amount of entries seen so far
num_entries: u32,
/// If we are done, no additional writes will occur
is_done: bool,
/// The kind of hash to use for the digest
object_hash: gix_hash::Kind,
}
impl<I, W> EntriesToBytesIter<I, W>
where
I: Iterator<Item = Result<input::Entry, input::Error>>,
W: std::io::Read + std::io::Write + std::io::Seek,
{
/// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
/// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
/// the pack is completed once the last entry was written.
/// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
///
/// # Panics
///
/// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
assert!(
matches!(version, crate::data::Version::V2),
"currently only pack version 2 can be written",
);
assert!(
matches!(object_hash, gix_hash::Kind::Sha1),
"currently only Sha1 is supported, right now we don't know how other hashes are encoded",
);
EntriesToBytesIter {
input: input.peekable(),
output,
object_hash,
num_entries: 0,
trailer: None,
data_version: version,
is_done: false,
}
}
/// Returns the trailing hash over all ~ entries once done.
/// It's `None` if we are not yet done writing.
pub fn digest(&self) -> Option<gix_hash::ObjectId> {
self.trailer
}
fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
if self.num_entries == 0 {
let header_bytes = crate::data::header::encode(self.data_version, 0);
self.output.write_all(&header_bytes[..])?;
}
self.num_entries += 1;
entry.header.write_to(entry.decompressed_size, &mut self.output)?;
self.output.write_all(
entry
.compressed
.as_deref()
.expect("caller must configure generator to keep compressed bytes"),
)?;
Ok(entry)
}
fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> {
let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
let num_bytes_written = if last_entry.is_some() {
self.output.stream_position()?
} else {
header_bytes.len() as u64
};
self.output.rewind()?;
self.output.write_all(&header_bytes[..])?;
self.output.flush()?;
self.output.rewind()?;
let interrupt_never = std::sync::atomic::AtomicBool::new(false);
let digest = hash::bytes(
&mut self.output,
num_bytes_written,
self.object_hash,
&mut gix_features::progress::Discard,
&interrupt_never,
)?;
self.output.write_all(digest.as_slice())?;
self.output.flush()?;
self.is_done = true;
if let Some(last_entry) = last_entry {
last_entry.trailer = Some(digest);
}
self.trailer = Some(digest);
Ok(())
}
}
impl<I, W> Iterator for EntriesToBytesIter<I, W>
where
I: Iterator<Item = Result<input::Entry, input::Error>>,
W: std::io::Read + std::io::Write + std::io::Seek,
{
/// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
type Item = Result<input::Entry, input::Error>;
fn next(&mut self) -> Option<Self::Item> {
if self.is_done {
return None;
}
match self.input.next() {
Some(res) => Some(match res {
Ok(entry) => self.next_inner(entry).and_then(|mut entry| {
if self.input.peek().is_none() {
self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
} else {
Ok(entry)
}
}),
Err(err) => {
self.is_done = true;
Err(err)
}
}),
None => match self.write_header_and_digest(None) {
Ok(_) => None,
Err(err) => Some(Err(err)),
},
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.input.size_hint()
}
}