gix_pack/data/input/
entries_to_bytes.rs

1use std::iter::Peekable;
2
3use gix_features::hash;
4
5use crate::data::input;
6
7/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
8/// `next()` is called.
9///
10/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
11/// is depleted and compute the hash in one go by re-reading the whole file.
12pub struct EntriesToBytesIter<I: Iterator, W> {
13    /// An iterator for input [`input::Entry`] instances
14    pub input: Peekable<I>,
15    /// A way of writing encoded bytes.
16    output: W,
17    /// Our trailing hash when done writing all input entries
18    trailer: Option<gix_hash::ObjectId>,
19    /// The amount of objects in the iteration and the version of the packfile to be written.
20    /// Will be `None` to signal the header was written already.
21    data_version: crate::data::Version,
22    /// The amount of entries seen so far
23    num_entries: u32,
24    /// If we are done, no additional writes will occur
25    is_done: bool,
26    /// The kind of hash to use for the digest
27    object_hash: gix_hash::Kind,
28}
29
30impl<I, W> EntriesToBytesIter<I, W>
31where
32    I: Iterator<Item = Result<input::Entry, input::Error>>,
33    W: std::io::Read + std::io::Write + std::io::Seek,
34{
35    /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
36    /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
37    /// the pack is completed once the last entry was written.
38    /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
39    ///
40    /// # Panics
41    ///
42    /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
43    pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
44        assert!(
45            matches!(version, crate::data::Version::V2),
46            "currently only pack version 2 can be written",
47        );
48        assert!(
49            matches!(object_hash, gix_hash::Kind::Sha1),
50            "currently only Sha1 is supported, right now we don't know how other hashes are encoded",
51        );
52        EntriesToBytesIter {
53            input: input.peekable(),
54            output,
55            object_hash,
56            num_entries: 0,
57            trailer: None,
58            data_version: version,
59            is_done: false,
60        }
61    }
62
63    /// Returns the trailing hash over all ~ entries once done.
64    /// It's `None` if we are not yet done writing.
65    pub fn digest(&self) -> Option<gix_hash::ObjectId> {
66        self.trailer
67    }
68
69    fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
70        if self.num_entries == 0 {
71            let header_bytes = crate::data::header::encode(self.data_version, 0);
72            self.output.write_all(&header_bytes[..])?;
73        }
74        self.num_entries += 1;
75        entry.header.write_to(entry.decompressed_size, &mut self.output)?;
76        self.output.write_all(
77            entry
78                .compressed
79                .as_deref()
80                .expect("caller must configure generator to keep compressed bytes"),
81        )?;
82        Ok(entry)
83    }
84
85    fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> {
86        let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
87        let num_bytes_written = if last_entry.is_some() {
88            self.output.stream_position()?
89        } else {
90            header_bytes.len() as u64
91        };
92        self.output.rewind()?;
93        self.output.write_all(&header_bytes[..])?;
94        self.output.flush()?;
95
96        self.output.rewind()?;
97        let interrupt_never = std::sync::atomic::AtomicBool::new(false);
98        let digest = hash::bytes(
99            &mut self.output,
100            num_bytes_written,
101            self.object_hash,
102            &mut gix_features::progress::Discard,
103            &interrupt_never,
104        )?;
105        self.output.write_all(digest.as_slice())?;
106        self.output.flush()?;
107
108        self.is_done = true;
109        if let Some(last_entry) = last_entry {
110            last_entry.trailer = Some(digest);
111        }
112        self.trailer = Some(digest);
113        Ok(())
114    }
115}
116
117impl<I, W> Iterator for EntriesToBytesIter<I, W>
118where
119    I: Iterator<Item = Result<input::Entry, input::Error>>,
120    W: std::io::Read + std::io::Write + std::io::Seek,
121{
122    /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
123    type Item = Result<input::Entry, input::Error>;
124
125    fn next(&mut self) -> Option<Self::Item> {
126        if self.is_done {
127            return None;
128        }
129
130        match self.input.next() {
131            Some(res) => Some(match res {
132                Ok(entry) => self.next_inner(entry).and_then(|mut entry| {
133                    if self.input.peek().is_none() {
134                        self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
135                    } else {
136                        Ok(entry)
137                    }
138                }),
139                Err(err) => {
140                    self.is_done = true;
141                    Err(err)
142                }
143            }),
144            None => match self.write_header_and_digest(None) {
145                Ok(_) => None,
146                Err(err) => Some(Err(err)),
147            },
148        }
149    }
150
151    fn size_hint(&self) -> (usize, Option<usize>) {
152        self.input.size_hint()
153    }
154}