1use std::{cell::RefCell, sync::atomic::AtomicBool};
2
3use gix_features::parallel;
4use gix_hash::ObjectId;
5
6use crate::data::output;
7
8pub(in crate::data::output::count::objects_impl) mod reduce;
9mod util;
10
11mod types;
12pub use types::{Error, ObjectExpansion, Options, Outcome};
13
14mod tree;
15
16pub fn objects<Find>(
33 db: Find,
34 objects_ids: Box<dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>> + Send>,
35 objects: &dyn gix_features::progress::Count,
36 should_interrupt: &AtomicBool,
37 Options {
38 thread_limit,
39 input_object_expansion,
40 chunk_size,
41 }: Options,
42) -> Result<(Vec<output::Count>, Outcome), Error>
43where
44 Find: crate::Find + Send + Clone,
45{
46 let lower_bound = objects_ids.size_hint().0;
47 let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
48 chunk_size,
49 if lower_bound == 0 { None } else { Some(lower_bound) },
50 thread_limit,
51 None,
52 );
53 let chunks = gix_features::iter::Chunks {
54 inner: objects_ids,
55 size: chunk_size,
56 };
57 let seen_objs = gix_hashtable::sync::ObjectIdMap::default();
58 let objects = objects.counter();
59
60 parallel::in_parallel(
61 chunks,
62 thread_limit,
63 {
64 move |_| {
65 (
66 Vec::new(), Vec::new(), objects.clone(),
69 )
70 }
71 },
72 {
73 let seen_objs = &seen_objs;
74 move |oids: Vec<_>, (buf1, buf2, objects)| {
75 expand::this(
76 &db,
77 input_object_expansion,
78 seen_objs,
79 &mut oids.into_iter(),
80 buf1,
81 buf2,
82 objects,
83 should_interrupt,
84 true, )
86 }
87 },
88 reduce::Statistics::new(),
89 )
90}
91
92pub fn objects_unthreaded(
94 db: &dyn crate::Find,
95 object_ids: &mut dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>>,
96 objects: &dyn gix_features::progress::Count,
97 should_interrupt: &AtomicBool,
98 input_object_expansion: ObjectExpansion,
99) -> Result<(Vec<output::Count>, Outcome), Error> {
100 let seen_objs = RefCell::new(gix_hashtable::HashSet::default());
101
102 let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
103 expand::this(
104 db,
105 input_object_expansion,
106 &seen_objs,
107 object_ids,
108 &mut buf1,
109 &mut buf2,
110 &objects.counter(),
111 should_interrupt,
112 false, )
114}
115
116mod expand {
117 use std::{
118 cell::RefCell,
119 sync::atomic::{AtomicBool, Ordering},
120 };
121
122 use gix_hash::{oid, ObjectId};
123 use gix_object::{CommitRefIter, Data, TagRefIter};
124
125 use super::{
126 tree,
127 types::{Error, ObjectExpansion, Outcome},
128 util,
129 };
130 use crate::{
131 data::{output, output::count::PackLocation},
132 FindExt,
133 };
134
135 #[allow(clippy::too_many_arguments)]
136 pub fn this(
137 db: &dyn crate::Find,
138 input_object_expansion: ObjectExpansion,
139 seen_objs: &impl util::InsertImmutable,
140 oids: &mut dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>>,
141 buf1: &mut Vec<u8>,
142 #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
143 objects: &gix_features::progress::AtomicStep,
144 should_interrupt: &AtomicBool,
145 allow_pack_lookups: bool,
146 ) -> Result<(Vec<output::Count>, Outcome), Error> {
147 use ObjectExpansion::*;
148
149 let mut out = Vec::new();
150 let mut tree_traversal_state = gix_traverse::tree::breadthfirst::State::default();
151 let mut tree_diff_state = gix_diff::tree::State::default();
152 let mut parent_commit_ids = Vec::new();
153 let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
154 let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
155 let mut outcome = Outcome::default();
156
157 let stats = &mut outcome;
158 for id in oids {
159 if should_interrupt.load(Ordering::Relaxed) {
160 return Err(Error::Interrupted);
161 }
162
163 let id = id.map_err(Error::InputIteration)?;
164 let (obj, location) = db.find(&id, buf1)?;
165 stats.input_objects += 1;
166 match input_object_expansion {
167 TreeAdditionsComparedToAncestor => {
168 use gix_object::Kind::*;
169 let mut obj = obj;
170 let mut location = location;
171 let mut id = id.to_owned();
172
173 loop {
174 push_obj_count_unique(&mut out, seen_objs, &id, location, objects, stats, false);
175 match obj.kind {
176 Tree | Blob => break,
177 Tag => {
178 id = TagRefIter::from_bytes(obj.data)
179 .target_id()
180 .expect("every tag has a target");
181 let tmp = db.find(&id, buf1)?;
182
183 obj = tmp.0;
184 location = tmp.1;
185
186 stats.expanded_objects += 1;
187 continue;
188 }
189 Commit => {
190 let current_tree_iter = {
191 let mut commit_iter = CommitRefIter::from_bytes(obj.data);
192 let tree_id = commit_iter.tree_id().expect("every commit has a tree");
193 parent_commit_ids.clear();
194 for token in commit_iter {
195 match token {
196 Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
197 parent_commit_ids.push(id);
198 }
199 Ok(_) => break,
200 Err(err) => return Err(Error::CommitDecode(err)),
201 }
202 }
203 let (obj, location) = db.find(&tree_id, buf1)?;
204 push_obj_count_unique(
205 &mut out, seen_objs, &tree_id, location, objects, stats, true,
206 );
207 gix_object::TreeRefIter::from_bytes(obj.data)
208 };
209
210 let objects_ref = if parent_commit_ids.is_empty() {
211 traverse_delegate.clear();
212 let objects = ExpandedCountingObjects::new(db, out, objects);
213 gix_traverse::tree::breadthfirst(
214 current_tree_iter,
215 &mut tree_traversal_state,
216 &objects,
217 &mut traverse_delegate,
218 )
219 .map_err(Error::TreeTraverse)?;
220 out = objects.dissolve(stats);
221 &traverse_delegate.non_trees
222 } else {
223 for commit_id in &parent_commit_ids {
224 let parent_tree_id = {
225 let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
226
227 push_obj_count_unique(
228 &mut out, seen_objs, commit_id, location, objects, stats, true,
229 );
230 CommitRefIter::from_bytes(parent_commit_obj.data)
231 .tree_id()
232 .expect("every commit has a tree")
233 };
234 let parent_tree = {
235 let (parent_tree_obj, location) = db.find(&parent_tree_id, buf2)?;
236 push_obj_count_unique(
237 &mut out,
238 seen_objs,
239 &parent_tree_id,
240 location,
241 objects,
242 stats,
243 true,
244 );
245 gix_object::TreeRefIter::from_bytes(parent_tree_obj.data)
246 };
247
248 changes_delegate.clear();
249 let objects = CountingObjects::new(db);
250 gix_diff::tree(
251 parent_tree,
252 current_tree_iter,
253 &mut tree_diff_state,
254 &objects,
255 &mut changes_delegate,
256 )
257 .map_err(Error::TreeChanges)?;
258 stats.decoded_objects += objects.into_count();
259 }
260 &changes_delegate.objects
261 };
262 for id in objects_ref.iter() {
263 out.push(id_to_count(db, buf2, id, objects, stats, allow_pack_lookups));
264 }
265 break;
266 }
267 }
268 }
269 }
270 TreeContents => {
271 use gix_object::Kind::*;
272 let mut id = id;
273 let mut obj = (obj, location);
274 loop {
275 push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), objects, stats, false);
276 match obj.0.kind {
277 Tree => {
278 traverse_delegate.clear();
279 {
280 let objects = ExpandedCountingObjects::new(db, out, objects);
281 gix_traverse::tree::breadthfirst(
282 gix_object::TreeRefIter::from_bytes(obj.0.data),
283 &mut tree_traversal_state,
284 &objects,
285 &mut traverse_delegate,
286 )
287 .map_err(Error::TreeTraverse)?;
288 out = objects.dissolve(stats);
289 }
290 for id in &traverse_delegate.non_trees {
291 out.push(id_to_count(db, buf1, id, objects, stats, allow_pack_lookups));
292 }
293 break;
294 }
295 Commit => {
296 id = CommitRefIter::from_bytes(obj.0.data)
297 .tree_id()
298 .expect("every commit has a tree");
299 stats.expanded_objects += 1;
300 obj = db.find(&id, buf1)?;
301 continue;
302 }
303 Blob => break,
304 Tag => {
305 id = TagRefIter::from_bytes(obj.0.data)
306 .target_id()
307 .expect("every tag has a target");
308 stats.expanded_objects += 1;
309 obj = db.find(&id, buf1)?;
310 continue;
311 }
312 }
313 }
314 }
315 AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, objects, stats, false),
316 }
317 }
318 outcome.total_objects = out.len();
319 Ok((out, outcome))
320 }
321
322 #[inline]
323 fn push_obj_count_unique(
324 out: &mut Vec<output::Count>,
325 all_seen: &impl util::InsertImmutable,
326 id: &oid,
327 location: Option<crate::data::entry::Location>,
328 objects: &gix_features::progress::AtomicStep,
329 statistics: &mut Outcome,
330 count_expanded: bool,
331 ) {
332 let inserted = all_seen.insert(id.to_owned());
333 if inserted {
334 objects.fetch_add(1, Ordering::Relaxed);
335 statistics.decoded_objects += 1;
336 if count_expanded {
337 statistics.expanded_objects += 1;
338 }
339 out.push(output::Count::from_data(id, location));
340 }
341 }
342
343 #[inline]
344 fn id_to_count(
345 db: &dyn crate::Find,
346 buf: &mut Vec<u8>,
347 id: &oid,
348 objects: &gix_features::progress::AtomicStep,
349 statistics: &mut Outcome,
350 allow_pack_lookups: bool,
351 ) -> output::Count {
352 objects.fetch_add(1, Ordering::Relaxed);
353 statistics.expanded_objects += 1;
354 output::Count {
355 id: id.to_owned(),
356 entry_pack_location: if allow_pack_lookups {
357 PackLocation::LookedUp(db.location_by_oid(id, buf))
358 } else {
359 PackLocation::NotLookedUp
360 },
361 }
362 }
363
364 struct CountingObjects<'a> {
365 decoded_objects: std::cell::RefCell<usize>,
366 objects: &'a dyn crate::Find,
367 }
368
369 impl<'a> CountingObjects<'a> {
370 fn new(objects: &'a dyn crate::Find) -> Self {
371 Self {
372 decoded_objects: Default::default(),
373 objects,
374 }
375 }
376
377 fn into_count(self) -> usize {
378 self.decoded_objects.into_inner()
379 }
380 }
381
382 impl gix_object::Find for CountingObjects<'_> {
383 fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, gix_object::find::Error> {
384 let res = Ok(self.objects.try_find(id, buffer)?.map(|t| t.0));
385 *self.decoded_objects.borrow_mut() += 1;
386 res
387 }
388 }
389
390 struct ExpandedCountingObjects<'a> {
391 decoded_objects: std::cell::RefCell<usize>,
392 expanded_objects: std::cell::RefCell<usize>,
393 out: std::cell::RefCell<Vec<output::Count>>,
394 objects_count: &'a gix_features::progress::AtomicStep,
395 objects: &'a dyn crate::Find,
396 }
397
398 impl<'a> ExpandedCountingObjects<'a> {
399 fn new(
400 objects: &'a dyn crate::Find,
401 out: Vec<output::Count>,
402 objects_count: &'a gix_features::progress::AtomicStep,
403 ) -> Self {
404 Self {
405 decoded_objects: Default::default(),
406 expanded_objects: Default::default(),
407 out: RefCell::new(out),
408 objects_count,
409 objects,
410 }
411 }
412
413 fn dissolve(self, stats: &mut Outcome) -> Vec<output::Count> {
414 stats.decoded_objects += self.decoded_objects.into_inner();
415 stats.expanded_objects += self.expanded_objects.into_inner();
416 self.out.into_inner()
417 }
418 }
419
420 impl gix_object::Find for ExpandedCountingObjects<'_> {
421 fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, gix_object::find::Error> {
422 let maybe_obj = self.objects.try_find(id, buffer)?;
423 *self.decoded_objects.borrow_mut() += 1;
424 match maybe_obj {
425 None => Ok(None),
426 Some((obj, location)) => {
427 self.objects_count.fetch_add(1, Ordering::Relaxed);
428 *self.expanded_objects.borrow_mut() += 1;
429 self.out.borrow_mut().push(output::Count::from_data(id, location));
430 Ok(Some(obj))
431 }
432 }
433 }
434 }
435}