gix_pack/index/write/
mod.rs

1use std::{io, sync::atomic::AtomicBool};
2
3pub use error::Error;
4use gix_features::progress::{self, prodash::DynNestedProgress, Count, Progress};
5
6use crate::cache::delta::{traverse, Tree};
7
8mod error;
9
10pub(crate) struct TreeEntry {
11    pub id: gix_hash::ObjectId,
12    pub crc32: u32,
13}
14
15/// Information gathered while executing [`write_data_iter_to_stream()`][crate::index::File::write_data_iter_to_stream]
16#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct Outcome {
19    /// The version of the verified index
20    pub index_version: crate::index::Version,
21    /// The verified checksum of the verified index
22    pub index_hash: gix_hash::ObjectId,
23
24    /// The hash of the '.pack' file, also found in its trailing bytes
25    pub data_hash: gix_hash::ObjectId,
26    /// The amount of objects that were verified, always the amount of objects in the pack.
27    pub num_objects: u32,
28}
29
30/// The progress ids used in [`write_data_iter_from_stream()`][crate::index::File::write_data_iter_to_stream()].
31///
32/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
33#[derive(Debug, Copy, Clone)]
34pub enum ProgressId {
35    /// Counts the amount of objects that were index thus far.
36    IndexObjects,
37    /// The amount of bytes that were decompressed while decoding pack entries.
38    ///
39    /// This is done to determine entry boundaries.
40    DecompressedBytes,
41    /// The amount of objects whose hashes were computed.
42    ///
43    /// This is done by decoding them, which typically involves decoding delta objects.
44    ResolveObjects,
45    /// The amount of bytes that were decoded in total, as the sum of all bytes to represent all resolved objects.
46    DecodedBytes,
47    /// The amount of bytes written to the index file.
48    IndexBytesWritten,
49}
50
51impl From<ProgressId> for gix_features::progress::Id {
52    fn from(v: ProgressId) -> Self {
53        match v {
54            ProgressId::IndexObjects => *b"IWIO",
55            ProgressId::DecompressedBytes => *b"IWDB",
56            ProgressId::ResolveObjects => *b"IWRO",
57            ProgressId::DecodedBytes => *b"IWDB",
58            ProgressId::IndexBytesWritten => *b"IWBW",
59        }
60    }
61}
62
63/// Various ways of writing an index file from pack entries
64impl crate::index::File {
65    /// Write information about `entries` as obtained from a pack data file into a pack index file via the `out` stream.
66    /// The resolver produced by `make_resolver` must resolve pack entries from the same pack data file that produced the
67    /// `entries` iterator.
68    ///
69    /// * `kind` is the version of pack index to produce, use [`crate::index::Version::default()`] if in doubt.
70    /// * `tread_limit` is used for a parallel tree traversal for obtaining object hashes with optimal performance.
71    /// * `root_progress` is the top-level progress to stay informed about the progress of this potentially long-running
72    ///    computation.
73    /// * `object_hash` defines what kind of object hash we write into the index file.
74    /// * `pack_version` is the version of the underlying pack for which `entries` are read. It's used in case none of these objects are provided
75    ///    to compute a pack-hash.
76    ///
77    /// # Remarks
78    ///
79    /// * neither in-pack nor out-of-pack Ref Deltas are supported here, these must have been resolved beforehand.
80    /// * `make_resolver()` will only be called after the iterator stopped returning elements and produces a function that
81    ///   provides all bytes belonging to a pack entry writing them to the given mutable output `Vec`.
82    ///   It should return `None` if the entry cannot be resolved from the pack that produced the `entries` iterator, causing
83    ///   the write operation to fail.
84    #[allow(clippy::too_many_arguments)]
85    pub fn write_data_iter_to_stream<F, F2, R>(
86        version: crate::index::Version,
87        make_resolver: F,
88        entries: &mut dyn Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
89        thread_limit: Option<usize>,
90        root_progress: &mut dyn DynNestedProgress,
91        out: &mut dyn io::Write,
92        should_interrupt: &AtomicBool,
93        object_hash: gix_hash::Kind,
94        pack_version: crate::data::Version,
95    ) -> Result<Outcome, Error>
96    where
97        F: FnOnce() -> io::Result<(F2, R)>,
98        R: Send + Sync,
99        F2: for<'r> Fn(crate::data::EntryRange, &'r R) -> Option<&'r [u8]> + Send + Clone,
100    {
101        if version != crate::index::Version::default() {
102            return Err(Error::Unsupported(version));
103        }
104        let mut num_objects: usize = 0;
105        let mut last_seen_trailer = None;
106        let (anticipated_num_objects, upper_bound) = entries.size_hint();
107        let worst_case_num_objects_after_thin_pack_resolution = upper_bound.unwrap_or(anticipated_num_objects);
108        let mut tree = Tree::with_capacity(worst_case_num_objects_after_thin_pack_resolution)?;
109        let indexing_start = std::time::Instant::now();
110
111        root_progress.init(Some(4), progress::steps());
112        let mut objects_progress = root_progress.add_child_with_id("indexing".into(), ProgressId::IndexObjects.into());
113        objects_progress.init(Some(anticipated_num_objects), progress::count("objects"));
114        let mut decompressed_progress =
115            root_progress.add_child_with_id("decompressing".into(), ProgressId::DecompressedBytes.into());
116        decompressed_progress.init(None, progress::bytes());
117        let mut pack_entries_end: u64 = 0;
118
119        for entry in entries {
120            let crate::data::input::Entry {
121                header,
122                pack_offset,
123                crc32,
124                header_size,
125                compressed: _,
126                compressed_size,
127                decompressed_size,
128                trailer,
129            } = entry?;
130
131            decompressed_progress.inc_by(decompressed_size as usize);
132
133            let entry_len = u64::from(header_size) + compressed_size;
134            pack_entries_end = pack_offset + entry_len;
135
136            let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
137
138            use crate::data::entry::Header::*;
139            match header {
140                Tree | Blob | Commit | Tag => {
141                    tree.add_root(
142                        pack_offset,
143                        TreeEntry {
144                            id: object_hash.null(),
145                            crc32,
146                        },
147                    )?;
148                }
149                RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
150                OfsDelta { base_distance } => {
151                    let base_pack_offset =
152                        crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
153                            Error::IteratorInvariantBaseOffset {
154                                pack_offset,
155                                distance: base_distance,
156                            },
157                        )?;
158                    tree.add_child(
159                        base_pack_offset,
160                        pack_offset,
161                        TreeEntry {
162                            id: object_hash.null(),
163                            crc32,
164                        },
165                    )?;
166                }
167            };
168            last_seen_trailer = trailer;
169            num_objects += 1;
170            objects_progress.inc();
171        }
172        let num_objects: u32 = num_objects
173            .try_into()
174            .map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
175
176        objects_progress.show_throughput(indexing_start);
177        decompressed_progress.show_throughput(indexing_start);
178        drop(objects_progress);
179        drop(decompressed_progress);
180
181        root_progress.inc();
182
183        let (resolver, pack) = make_resolver()?;
184        let sorted_pack_offsets_by_oid = {
185            let traverse::Outcome { roots, children } = tree.traverse(
186                resolver,
187                &pack,
188                pack_entries_end,
189                |data,
190                 _progress,
191                 traverse::Context {
192                     entry,
193                     decompressed: bytes,
194                     ..
195                 }| {
196                    modify_base(data, entry, bytes, version.hash());
197                    Ok::<_, Error>(())
198                },
199                traverse::Options {
200                    object_progress: Box::new(
201                        root_progress.add_child_with_id("Resolving".into(), ProgressId::ResolveObjects.into()),
202                    ),
203                    size_progress: &mut root_progress
204                        .add_child_with_id("Decoding".into(), ProgressId::DecodedBytes.into()),
205                    thread_limit,
206                    should_interrupt,
207                    object_hash,
208                },
209            )?;
210            root_progress.inc();
211
212            let mut items = roots;
213            items.extend(children);
214            {
215                let _progress =
216                    root_progress.add_child_with_id("sorting by id".into(), gix_features::progress::UNKNOWN);
217                items.sort_by_key(|e| e.data.id);
218            }
219
220            root_progress.inc();
221            items
222        };
223
224        let pack_hash = match last_seen_trailer {
225            Some(ph) => ph,
226            None if num_objects == 0 => {
227                let header = crate::data::header::encode(pack_version, 0);
228                let mut hasher = gix_features::hash::hasher(object_hash);
229                hasher.update(&header);
230                gix_hash::ObjectId::from(hasher.digest())
231            }
232            None => return Err(Error::IteratorInvariantTrailer),
233        };
234        let index_hash = crate::index::encode::write_to(
235            out,
236            sorted_pack_offsets_by_oid,
237            &pack_hash,
238            version,
239            &mut root_progress.add_child_with_id("writing index file".into(), ProgressId::IndexBytesWritten.into()),
240        )?;
241        root_progress.show_throughput_with(
242            indexing_start,
243            num_objects as usize,
244            progress::count("objects").expect("unit always set"),
245            progress::MessageLevel::Success,
246        );
247        Ok(Outcome {
248            index_version: version,
249            index_hash,
250            data_hash: pack_hash,
251            num_objects,
252        })
253    }
254}
255
256fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) {
257    let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
258    let id = gix_object::compute_hash(hash, object_kind, decompressed);
259    entry.id = id;
260}