1use std::{io, sync::atomic::AtomicBool};
2
3pub use error::Error;
4use gix_features::progress::{self, prodash::DynNestedProgress, Count, Progress};
5
6use crate::cache::delta::{traverse, Tree};
7
8mod error;
9
10pub(crate) struct TreeEntry {
11 pub id: gix_hash::ObjectId,
12 pub crc32: u32,
13}
14
15#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct Outcome {
19 pub index_version: crate::index::Version,
21 pub index_hash: gix_hash::ObjectId,
23
24 pub data_hash: gix_hash::ObjectId,
26 pub num_objects: u32,
28}
29
30#[derive(Debug, Copy, Clone)]
34pub enum ProgressId {
35 IndexObjects,
37 DecompressedBytes,
41 ResolveObjects,
45 DecodedBytes,
47 IndexBytesWritten,
49}
50
51impl From<ProgressId> for gix_features::progress::Id {
52 fn from(v: ProgressId) -> Self {
53 match v {
54 ProgressId::IndexObjects => *b"IWIO",
55 ProgressId::DecompressedBytes => *b"IWDB",
56 ProgressId::ResolveObjects => *b"IWRO",
57 ProgressId::DecodedBytes => *b"IWDB",
58 ProgressId::IndexBytesWritten => *b"IWBW",
59 }
60 }
61}
62
63impl crate::index::File {
65 #[allow(clippy::too_many_arguments)]
85 pub fn write_data_iter_to_stream<F, F2, R>(
86 version: crate::index::Version,
87 make_resolver: F,
88 entries: &mut dyn Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
89 thread_limit: Option<usize>,
90 root_progress: &mut dyn DynNestedProgress,
91 out: &mut dyn io::Write,
92 should_interrupt: &AtomicBool,
93 object_hash: gix_hash::Kind,
94 pack_version: crate::data::Version,
95 ) -> Result<Outcome, Error>
96 where
97 F: FnOnce() -> io::Result<(F2, R)>,
98 R: Send + Sync,
99 F2: for<'r> Fn(crate::data::EntryRange, &'r R) -> Option<&'r [u8]> + Send + Clone,
100 {
101 if version != crate::index::Version::default() {
102 return Err(Error::Unsupported(version));
103 }
104 let mut num_objects: usize = 0;
105 let mut last_seen_trailer = None;
106 let (anticipated_num_objects, upper_bound) = entries.size_hint();
107 let worst_case_num_objects_after_thin_pack_resolution = upper_bound.unwrap_or(anticipated_num_objects);
108 let mut tree = Tree::with_capacity(worst_case_num_objects_after_thin_pack_resolution)?;
109 let indexing_start = std::time::Instant::now();
110
111 root_progress.init(Some(4), progress::steps());
112 let mut objects_progress = root_progress.add_child_with_id("indexing".into(), ProgressId::IndexObjects.into());
113 objects_progress.init(Some(anticipated_num_objects), progress::count("objects"));
114 let mut decompressed_progress =
115 root_progress.add_child_with_id("decompressing".into(), ProgressId::DecompressedBytes.into());
116 decompressed_progress.init(None, progress::bytes());
117 let mut pack_entries_end: u64 = 0;
118
119 for entry in entries {
120 let crate::data::input::Entry {
121 header,
122 pack_offset,
123 crc32,
124 header_size,
125 compressed: _,
126 compressed_size,
127 decompressed_size,
128 trailer,
129 } = entry?;
130
131 decompressed_progress.inc_by(decompressed_size as usize);
132
133 let entry_len = u64::from(header_size) + compressed_size;
134 pack_entries_end = pack_offset + entry_len;
135
136 let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
137
138 use crate::data::entry::Header::*;
139 match header {
140 Tree | Blob | Commit | Tag => {
141 tree.add_root(
142 pack_offset,
143 TreeEntry {
144 id: object_hash.null(),
145 crc32,
146 },
147 )?;
148 }
149 RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
150 OfsDelta { base_distance } => {
151 let base_pack_offset =
152 crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
153 Error::IteratorInvariantBaseOffset {
154 pack_offset,
155 distance: base_distance,
156 },
157 )?;
158 tree.add_child(
159 base_pack_offset,
160 pack_offset,
161 TreeEntry {
162 id: object_hash.null(),
163 crc32,
164 },
165 )?;
166 }
167 };
168 last_seen_trailer = trailer;
169 num_objects += 1;
170 objects_progress.inc();
171 }
172 let num_objects: u32 = num_objects
173 .try_into()
174 .map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
175
176 objects_progress.show_throughput(indexing_start);
177 decompressed_progress.show_throughput(indexing_start);
178 drop(objects_progress);
179 drop(decompressed_progress);
180
181 root_progress.inc();
182
183 let (resolver, pack) = make_resolver()?;
184 let sorted_pack_offsets_by_oid = {
185 let traverse::Outcome { roots, children } = tree.traverse(
186 resolver,
187 &pack,
188 pack_entries_end,
189 |data,
190 _progress,
191 traverse::Context {
192 entry,
193 decompressed: bytes,
194 ..
195 }| {
196 modify_base(data, entry, bytes, version.hash());
197 Ok::<_, Error>(())
198 },
199 traverse::Options {
200 object_progress: Box::new(
201 root_progress.add_child_with_id("Resolving".into(), ProgressId::ResolveObjects.into()),
202 ),
203 size_progress: &mut root_progress
204 .add_child_with_id("Decoding".into(), ProgressId::DecodedBytes.into()),
205 thread_limit,
206 should_interrupt,
207 object_hash,
208 },
209 )?;
210 root_progress.inc();
211
212 let mut items = roots;
213 items.extend(children);
214 {
215 let _progress =
216 root_progress.add_child_with_id("sorting by id".into(), gix_features::progress::UNKNOWN);
217 items.sort_by_key(|e| e.data.id);
218 }
219
220 root_progress.inc();
221 items
222 };
223
224 let pack_hash = match last_seen_trailer {
225 Some(ph) => ph,
226 None if num_objects == 0 => {
227 let header = crate::data::header::encode(pack_version, 0);
228 let mut hasher = gix_features::hash::hasher(object_hash);
229 hasher.update(&header);
230 gix_hash::ObjectId::from(hasher.digest())
231 }
232 None => return Err(Error::IteratorInvariantTrailer),
233 };
234 let index_hash = crate::index::encode::write_to(
235 out,
236 sorted_pack_offsets_by_oid,
237 &pack_hash,
238 version,
239 &mut root_progress.add_child_with_id("writing index file".into(), ProgressId::IndexBytesWritten.into()),
240 )?;
241 root_progress.show_throughput_with(
242 indexing_start,
243 num_objects as usize,
244 progress::count("objects").expect("unit always set"),
245 progress::MessageLevel::Success,
246 );
247 Ok(Outcome {
248 index_version: version,
249 index_hash,
250 data_hash: pack_hash,
251 num_objects,
252 })
253 }
254}
255
256fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) {
257 let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
258 let id = gix_object::compute_hash(hash, object_kind, decompressed);
259 entry.id = id;
260}