gix_pack/data/output/count/objects/
mod.rs

1use std::{cell::RefCell, sync::atomic::AtomicBool};
2
3use gix_features::parallel;
4use gix_hash::ObjectId;
5
6use crate::data::output;
7
8pub(in crate::data::output::count::objects_impl) mod reduce;
9mod util;
10
11mod types;
12pub use types::{Error, ObjectExpansion, Options, Outcome};
13
14mod tree;
15
16/// Generate [`Count`][output::Count]s from input `objects` with object expansion based on [`options`][Options]
17/// to learn which objects would would constitute a pack. This step is required to know exactly how many objects would
18/// be in a pack while keeping data around to avoid minimize object database access.
19///
20/// A [`Count`][output::Count] object maintains enough state to greatly accelerate future access of packed objects.
21///
22/// * `db` - the object store to use for accessing objects.
23/// * `objects_ids`
24///   * A list of objects ids to add to the pack. Duplication checks are performed so no object is ever added to a pack twice.
25///   * Objects may be expanded based on the provided [`options`][Options]
26/// * `objects`
27///   * count the amount of objects we encounter
28/// * `should_interrupt`
29///  * A flag that is set to true if the operation should stop
30/// * `options`
31///   * more configuration
32pub fn objects<Find>(
33    db: Find,
34    objects_ids: Box<dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>> + Send>,
35    objects: &dyn gix_features::progress::Count,
36    should_interrupt: &AtomicBool,
37    Options {
38        thread_limit,
39        input_object_expansion,
40        chunk_size,
41    }: Options,
42) -> Result<(Vec<output::Count>, Outcome), Error>
43where
44    Find: crate::Find + Send + Clone,
45{
46    let lower_bound = objects_ids.size_hint().0;
47    let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
48        chunk_size,
49        if lower_bound == 0 { None } else { Some(lower_bound) },
50        thread_limit,
51        None,
52    );
53    let chunks = gix_features::iter::Chunks {
54        inner: objects_ids,
55        size: chunk_size,
56    };
57    let seen_objs = gix_hashtable::sync::ObjectIdMap::default();
58    let objects = objects.counter();
59
60    parallel::in_parallel(
61        chunks,
62        thread_limit,
63        {
64            move |_| {
65                (
66                    Vec::new(), // object data buffer
67                    Vec::new(), // object data buffer 2 to hold two objects at a time
68                    objects.clone(),
69                )
70            }
71        },
72        {
73            let seen_objs = &seen_objs;
74            move |oids: Vec<_>, (buf1, buf2, objects)| {
75                expand::this(
76                    &db,
77                    input_object_expansion,
78                    seen_objs,
79                    &mut oids.into_iter(),
80                    buf1,
81                    buf2,
82                    objects,
83                    should_interrupt,
84                    true, /*allow pack lookups*/
85                )
86            }
87        },
88        reduce::Statistics::new(),
89    )
90}
91
92/// Like [`objects()`] but using a single thread only to mostly save on the otherwise required overhead.
93pub fn objects_unthreaded(
94    db: &dyn crate::Find,
95    object_ids: &mut dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>>,
96    objects: &dyn gix_features::progress::Count,
97    should_interrupt: &AtomicBool,
98    input_object_expansion: ObjectExpansion,
99) -> Result<(Vec<output::Count>, Outcome), Error> {
100    let seen_objs = RefCell::new(gix_hashtable::HashSet::default());
101
102    let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
103    expand::this(
104        db,
105        input_object_expansion,
106        &seen_objs,
107        object_ids,
108        &mut buf1,
109        &mut buf2,
110        &objects.counter(),
111        should_interrupt,
112        false, /*allow pack lookups*/
113    )
114}
115
116mod expand {
117    use std::{
118        cell::RefCell,
119        sync::atomic::{AtomicBool, Ordering},
120    };
121
122    use gix_hash::{oid, ObjectId};
123    use gix_object::{CommitRefIter, Data, TagRefIter};
124
125    use super::{
126        tree,
127        types::{Error, ObjectExpansion, Outcome},
128        util,
129    };
130    use crate::{
131        data::{output, output::count::PackLocation},
132        FindExt,
133    };
134
135    #[allow(clippy::too_many_arguments)]
136    pub fn this(
137        db: &dyn crate::Find,
138        input_object_expansion: ObjectExpansion,
139        seen_objs: &impl util::InsertImmutable,
140        oids: &mut dyn Iterator<Item = Result<ObjectId, Box<dyn std::error::Error + Send + Sync + 'static>>>,
141        buf1: &mut Vec<u8>,
142        #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
143        objects: &gix_features::progress::AtomicStep,
144        should_interrupt: &AtomicBool,
145        allow_pack_lookups: bool,
146    ) -> Result<(Vec<output::Count>, Outcome), Error> {
147        use ObjectExpansion::*;
148
149        let mut out = Vec::new();
150        let mut tree_traversal_state = gix_traverse::tree::breadthfirst::State::default();
151        let mut tree_diff_state = gix_diff::tree::State::default();
152        let mut parent_commit_ids = Vec::new();
153        let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
154        let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
155        let mut outcome = Outcome::default();
156
157        let stats = &mut outcome;
158        for id in oids {
159            if should_interrupt.load(Ordering::Relaxed) {
160                return Err(Error::Interrupted);
161            }
162
163            let id = id.map_err(Error::InputIteration)?;
164            let (obj, location) = db.find(&id, buf1)?;
165            stats.input_objects += 1;
166            match input_object_expansion {
167                TreeAdditionsComparedToAncestor => {
168                    use gix_object::Kind::*;
169                    let mut obj = obj;
170                    let mut location = location;
171                    let mut id = id.to_owned();
172
173                    loop {
174                        push_obj_count_unique(&mut out, seen_objs, &id, location, objects, stats, false);
175                        match obj.kind {
176                            Tree | Blob => break,
177                            Tag => {
178                                id = TagRefIter::from_bytes(obj.data)
179                                    .target_id()
180                                    .expect("every tag has a target");
181                                let tmp = db.find(&id, buf1)?;
182
183                                obj = tmp.0;
184                                location = tmp.1;
185
186                                stats.expanded_objects += 1;
187                                continue;
188                            }
189                            Commit => {
190                                let current_tree_iter = {
191                                    let mut commit_iter = CommitRefIter::from_bytes(obj.data);
192                                    let tree_id = commit_iter.tree_id().expect("every commit has a tree");
193                                    parent_commit_ids.clear();
194                                    for token in commit_iter {
195                                        match token {
196                                            Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
197                                                parent_commit_ids.push(id);
198                                            }
199                                            Ok(_) => break,
200                                            Err(err) => return Err(Error::CommitDecode(err)),
201                                        }
202                                    }
203                                    let (obj, location) = db.find(&tree_id, buf1)?;
204                                    push_obj_count_unique(
205                                        &mut out, seen_objs, &tree_id, location, objects, stats, true,
206                                    );
207                                    gix_object::TreeRefIter::from_bytes(obj.data)
208                                };
209
210                                let objects_ref = if parent_commit_ids.is_empty() {
211                                    traverse_delegate.clear();
212                                    let objects = ExpandedCountingObjects::new(db, out, objects);
213                                    gix_traverse::tree::breadthfirst(
214                                        current_tree_iter,
215                                        &mut tree_traversal_state,
216                                        &objects,
217                                        &mut traverse_delegate,
218                                    )
219                                    .map_err(Error::TreeTraverse)?;
220                                    out = objects.dissolve(stats);
221                                    &traverse_delegate.non_trees
222                                } else {
223                                    for commit_id in &parent_commit_ids {
224                                        let parent_tree_id = {
225                                            let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
226
227                                            push_obj_count_unique(
228                                                &mut out, seen_objs, commit_id, location, objects, stats, true,
229                                            );
230                                            CommitRefIter::from_bytes(parent_commit_obj.data)
231                                                .tree_id()
232                                                .expect("every commit has a tree")
233                                        };
234                                        let parent_tree = {
235                                            let (parent_tree_obj, location) = db.find(&parent_tree_id, buf2)?;
236                                            push_obj_count_unique(
237                                                &mut out,
238                                                seen_objs,
239                                                &parent_tree_id,
240                                                location,
241                                                objects,
242                                                stats,
243                                                true,
244                                            );
245                                            gix_object::TreeRefIter::from_bytes(parent_tree_obj.data)
246                                        };
247
248                                        changes_delegate.clear();
249                                        let objects = CountingObjects::new(db);
250                                        gix_diff::tree(
251                                            parent_tree,
252                                            current_tree_iter,
253                                            &mut tree_diff_state,
254                                            &objects,
255                                            &mut changes_delegate,
256                                        )
257                                        .map_err(Error::TreeChanges)?;
258                                        stats.decoded_objects += objects.into_count();
259                                    }
260                                    &changes_delegate.objects
261                                };
262                                for id in objects_ref.iter() {
263                                    out.push(id_to_count(db, buf2, id, objects, stats, allow_pack_lookups));
264                                }
265                                break;
266                            }
267                        }
268                    }
269                }
270                TreeContents => {
271                    use gix_object::Kind::*;
272                    let mut id = id;
273                    let mut obj = (obj, location);
274                    loop {
275                        push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), objects, stats, false);
276                        match obj.0.kind {
277                            Tree => {
278                                traverse_delegate.clear();
279                                {
280                                    let objects = ExpandedCountingObjects::new(db, out, objects);
281                                    gix_traverse::tree::breadthfirst(
282                                        gix_object::TreeRefIter::from_bytes(obj.0.data),
283                                        &mut tree_traversal_state,
284                                        &objects,
285                                        &mut traverse_delegate,
286                                    )
287                                    .map_err(Error::TreeTraverse)?;
288                                    out = objects.dissolve(stats);
289                                }
290                                for id in &traverse_delegate.non_trees {
291                                    out.push(id_to_count(db, buf1, id, objects, stats, allow_pack_lookups));
292                                }
293                                break;
294                            }
295                            Commit => {
296                                id = CommitRefIter::from_bytes(obj.0.data)
297                                    .tree_id()
298                                    .expect("every commit has a tree");
299                                stats.expanded_objects += 1;
300                                obj = db.find(&id, buf1)?;
301                                continue;
302                            }
303                            Blob => break,
304                            Tag => {
305                                id = TagRefIter::from_bytes(obj.0.data)
306                                    .target_id()
307                                    .expect("every tag has a target");
308                                stats.expanded_objects += 1;
309                                obj = db.find(&id, buf1)?;
310                                continue;
311                            }
312                        }
313                    }
314                }
315                AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, objects, stats, false),
316            }
317        }
318        outcome.total_objects = out.len();
319        Ok((out, outcome))
320    }
321
322    #[inline]
323    fn push_obj_count_unique(
324        out: &mut Vec<output::Count>,
325        all_seen: &impl util::InsertImmutable,
326        id: &oid,
327        location: Option<crate::data::entry::Location>,
328        objects: &gix_features::progress::AtomicStep,
329        statistics: &mut Outcome,
330        count_expanded: bool,
331    ) {
332        let inserted = all_seen.insert(id.to_owned());
333        if inserted {
334            objects.fetch_add(1, Ordering::Relaxed);
335            statistics.decoded_objects += 1;
336            if count_expanded {
337                statistics.expanded_objects += 1;
338            }
339            out.push(output::Count::from_data(id, location));
340        }
341    }
342
343    #[inline]
344    fn id_to_count(
345        db: &dyn crate::Find,
346        buf: &mut Vec<u8>,
347        id: &oid,
348        objects: &gix_features::progress::AtomicStep,
349        statistics: &mut Outcome,
350        allow_pack_lookups: bool,
351    ) -> output::Count {
352        objects.fetch_add(1, Ordering::Relaxed);
353        statistics.expanded_objects += 1;
354        output::Count {
355            id: id.to_owned(),
356            entry_pack_location: if allow_pack_lookups {
357                PackLocation::LookedUp(db.location_by_oid(id, buf))
358            } else {
359                PackLocation::NotLookedUp
360            },
361        }
362    }
363
364    struct CountingObjects<'a> {
365        decoded_objects: std::cell::RefCell<usize>,
366        objects: &'a dyn crate::Find,
367    }
368
369    impl<'a> CountingObjects<'a> {
370        fn new(objects: &'a dyn crate::Find) -> Self {
371            Self {
372                decoded_objects: Default::default(),
373                objects,
374            }
375        }
376
377        fn into_count(self) -> usize {
378            self.decoded_objects.into_inner()
379        }
380    }
381
382    impl gix_object::Find for CountingObjects<'_> {
383        fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, gix_object::find::Error> {
384            let res = Ok(self.objects.try_find(id, buffer)?.map(|t| t.0));
385            *self.decoded_objects.borrow_mut() += 1;
386            res
387        }
388    }
389
390    struct ExpandedCountingObjects<'a> {
391        decoded_objects: std::cell::RefCell<usize>,
392        expanded_objects: std::cell::RefCell<usize>,
393        out: std::cell::RefCell<Vec<output::Count>>,
394        objects_count: &'a gix_features::progress::AtomicStep,
395        objects: &'a dyn crate::Find,
396    }
397
398    impl<'a> ExpandedCountingObjects<'a> {
399        fn new(
400            objects: &'a dyn crate::Find,
401            out: Vec<output::Count>,
402            objects_count: &'a gix_features::progress::AtomicStep,
403        ) -> Self {
404            Self {
405                decoded_objects: Default::default(),
406                expanded_objects: Default::default(),
407                out: RefCell::new(out),
408                objects_count,
409                objects,
410            }
411        }
412
413        fn dissolve(self, stats: &mut Outcome) -> Vec<output::Count> {
414            stats.decoded_objects += self.decoded_objects.into_inner();
415            stats.expanded_objects += self.expanded_objects.into_inner();
416            self.out.into_inner()
417        }
418    }
419
420    impl gix_object::Find for ExpandedCountingObjects<'_> {
421        fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, gix_object::find::Error> {
422            let maybe_obj = self.objects.try_find(id, buffer)?;
423            *self.decoded_objects.borrow_mut() += 1;
424            match maybe_obj {
425                None => Ok(None),
426                Some((obj, location)) => {
427                    self.objects_count.fetch_add(1, Ordering::Relaxed);
428                    *self.expanded_objects.borrow_mut() += 1;
429                    self.out.borrow_mut().push(output::Count::from_data(id, location));
430                    Ok(Some(obj))
431                }
432            }
433        }
434    }
435}