gix_traverse/commit/
simple.rs

1use gix_date::SecondsSinceUnixEpoch;
2use gix_hash::ObjectId;
3use gix_hashtable::HashSet;
4use smallvec::SmallVec;
5use std::cmp::Reverse;
6use std::collections::VecDeque;
7
8#[derive(Default, Debug, Copy, Clone)]
9/// The order with which to prioritize the search.
10pub enum CommitTimeOrder {
11    #[default]
12    /// Sort commits by newest first.
13    NewestFirst,
14    /// Sort commits by oldest first.
15    #[doc(alias = "Sort::REVERSE", alias = "git2")]
16    OldestFirst,
17}
18
19/// Specify how to sort commits during a [simple](super::Simple) traversal.
20///
21/// ### Sample History
22///
23/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
24/// (*their X-alignment doesn't matter*).
25///
26/// ```text
27/// ---1----2----4----7 <- second parent of 8
28///     \              \
29///      3----5----6----8---
30/// ```
31#[derive(Default, Debug, Copy, Clone)]
32pub enum Sorting {
33    /// Commits are sorted as they are mentioned in the commit graph.
34    ///
35    /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`.
36    ///
37    /// ### Note
38    ///
39    /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
40    /// as it avoids overlapping branches.
41    #[default]
42    BreadthFirst,
43    /// Commits are sorted by their commit time in the order specified, either newest or oldest first.
44    ///
45    /// The sorting applies to all currently queued commit ids and thus is full.
46    ///
47    /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for [`NewestFirst`](CommitTimeOrder::NewestFirst),
48    /// or `1, 2, 3, 4, 5, 6, 7, 8` for [`OldestFirst`](CommitTimeOrder::OldestFirst).
49    ///
50    /// # Performance
51    ///
52    /// This mode benefits greatly from having an object_cache in `find()`
53    /// to avoid having to lookup each commit twice.
54    ByCommitTime(CommitTimeOrder),
55    /// This sorting is similar to [`ByCommitTime`](Sorting::ByCommitTime), but adds a cutoff to not return commits older than
56    /// a given time, stopping the iteration once no younger commits is queued to be traversed.
57    ///
58    /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
59    ///
60    /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`.
61    ByCommitTimeCutoff {
62        /// The order in which to prioritize lookups.
63        order: CommitTimeOrder,
64        /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
65        seconds: gix_date::SecondsSinceUnixEpoch,
66    },
67}
68
69/// The error is part of the item returned by the [Ancestors](super::Simple) iterator.
70#[derive(Debug, thiserror::Error)]
71#[allow(missing_docs)]
72pub enum Error {
73    #[error(transparent)]
74    Find(#[from] gix_object::find::existing_iter::Error),
75    #[error(transparent)]
76    ObjectDecode(#[from] gix_object::decode::Error),
77}
78
79use Result as Either;
80type QueueKey<T> = Either<T, Reverse<T>>;
81
82/// The state used and potentially shared by multiple graph traversals.
83#[derive(Clone)]
84pub(super) struct State {
85    next: VecDeque<ObjectId>,
86    queue: gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, ObjectId>,
87    buf: Vec<u8>,
88    seen: HashSet<ObjectId>,
89    parents_buf: Vec<u8>,
90    parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>,
91}
92
93///
94mod init {
95    use gix_date::SecondsSinceUnixEpoch;
96    use gix_hash::{oid, ObjectId};
97    use gix_object::{CommitRefIter, FindExt};
98    use std::cmp::Reverse;
99    use Err as Oldest;
100    use Ok as Newest;
101
102    use super::{
103        super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
104        collect_parents, CommitTimeOrder, Error, State,
105    };
106
107    impl Default for State {
108        fn default() -> Self {
109            State {
110                next: Default::default(),
111                queue: gix_revwalk::PriorityQueue::new(),
112                buf: vec![],
113                seen: Default::default(),
114                parents_buf: vec![],
115                parent_ids: Default::default(),
116            }
117        }
118    }
119
120    impl State {
121        fn clear(&mut self) {
122            self.next.clear();
123            self.queue.clear();
124            self.buf.clear();
125            self.seen.clear();
126        }
127    }
128
129    fn to_queue_key(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
130        match order {
131            CommitTimeOrder::NewestFirst => Newest(i),
132            CommitTimeOrder::OldestFirst => Oldest(Reverse(i)),
133        }
134    }
135
136    /// Builder
137    impl<Find, Predicate> Simple<Find, Predicate>
138    where
139        Find: gix_object::Find,
140    {
141        /// Set the `sorting` method.
142        pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
143            self.sorting = sorting;
144            match self.sorting {
145                Sorting::BreadthFirst => {
146                    self.queue_to_vecdeque();
147                }
148                Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
149                    let cutoff_time = self.sorting.cutoff_time();
150                    let state = &mut self.state;
151                    for commit_id in state.next.drain(..) {
152                        let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?;
153                        let time = commit_iter.committer()?.time.seconds;
154                        let key = to_queue_key(time, order);
155                        match (cutoff_time, order) {
156                            (Some(cutoff_time), _) if time >= cutoff_time => {
157                                state.queue.insert(key, commit_id);
158                            }
159                            (Some(_), _) => {}
160                            (None, _) => {
161                                state.queue.insert(key, commit_id);
162                            }
163                        }
164                    }
165                }
166            }
167            Ok(self)
168        }
169
170        /// Change our commit parent handling mode to the given one.
171        pub fn parents(mut self, mode: Parents) -> Self {
172            self.parents = mode;
173            if matches!(self.parents, Parents::First) {
174                self.queue_to_vecdeque();
175            }
176            self
177        }
178
179        /// Set the commitgraph as `cache` to greatly accelerate any traversal.
180        ///
181        /// The cache will be used if possible, but we will fall-back without error to using the object
182        /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well.
183        pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self {
184            self.cache = cache;
185            self
186        }
187
188        fn queue_to_vecdeque(&mut self) {
189            let state = &mut self.state;
190            state.next.extend(
191                std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
192                    .into_iter_unordered()
193                    .map(|(_time, id)| id),
194            );
195        }
196    }
197
198    /// Lifecycle
199    impl<Find> Simple<Find, fn(&oid) -> bool>
200    where
201        Find: gix_object::Find,
202    {
203        /// Create a new instance.
204        ///
205        /// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
206        ///    an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
207        ///    as needed.
208        /// * `tips`
209        ///   * the starting points of the iteration, usually commits
210        ///   * each commit they lead to will only be returned once, including the tip that started it
211        pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, find: Find) -> Self {
212            Self::filtered(tips, find, |_| true)
213        }
214    }
215
216    /// Lifecycle
217    impl<Find, Predicate> Simple<Find, Predicate>
218    where
219        Find: gix_object::Find,
220        Predicate: FnMut(&oid) -> bool,
221    {
222        /// Create a new instance with commit filtering enabled.
223        ///
224        /// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
225        ///    an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
226        ///    as needed.
227        /// * `tips`
228        ///   * the starting points of the iteration, usually commits
229        ///   * each commit they lead to will only be returned once, including the tip that started it
230        /// * `predicate` - indicate whether a given commit should be included in the result as well
231        ///   as whether its parent commits should be traversed.
232        pub fn filtered(
233            tips: impl IntoIterator<Item = impl Into<ObjectId>>,
234            find: Find,
235            mut predicate: Predicate,
236        ) -> Self {
237            let tips = tips.into_iter();
238            let mut state = State::default();
239            {
240                state.clear();
241                state.next.reserve(tips.size_hint().0);
242                for tip in tips.map(Into::into) {
243                    let was_inserted = state.seen.insert(tip);
244                    if was_inserted && predicate(&tip) {
245                        state.next.push_back(tip);
246                    }
247                }
248            }
249            Self {
250                objects: find,
251                cache: None,
252                predicate,
253                state,
254                parents: Default::default(),
255                sorting: Default::default(),
256            }
257        }
258    }
259
260    /// Access
261    impl<Find, Predicate> Simple<Find, Predicate> {
262        /// Return an iterator for accessing data of the current commit, parsed lazily.
263        pub fn commit_iter(&self) -> CommitRefIter<'_> {
264            CommitRefIter::from_bytes(&self.state.buf)
265        }
266
267        /// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`].
268        pub fn commit_data(&self) -> &[u8] {
269            &self.state.buf
270        }
271    }
272
273    impl<Find, Predicate> Iterator for Simple<Find, Predicate>
274    where
275        Find: gix_object::Find,
276        Predicate: FnMut(&oid) -> bool,
277    {
278        type Item = Result<Info, Error>;
279
280        fn next(&mut self) -> Option<Self::Item> {
281            if matches!(self.parents, Parents::First) {
282                self.next_by_topology()
283            } else {
284                match self.sorting {
285                    Sorting::BreadthFirst => self.next_by_topology(),
286                    Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
287                    Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
288                }
289            }
290        }
291    }
292
293    impl Sorting {
294        /// If not topo sort, provide the cutoff date if present.
295        fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
296            match self {
297                Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
298                _ => None,
299            }
300        }
301    }
302
303    /// Utilities
304    impl<Find, Predicate> Simple<Find, Predicate>
305    where
306        Find: gix_object::Find,
307        Predicate: FnMut(&oid) -> bool,
308    {
309        fn next_by_commit_date(
310            &mut self,
311            order: CommitTimeOrder,
312            cutoff: Option<SecondsSinceUnixEpoch>,
313        ) -> Option<Result<Info, Error>> {
314            let state = &mut self.state;
315
316            let (commit_time, oid) = match state.queue.pop()? {
317                (Newest(t) | Oldest(Reverse(t)), o) => (t, o),
318            };
319            let mut parents: ParentIds = Default::default();
320            match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
321                Ok(Either::CachedCommit(commit)) => {
322                    if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
323                        // drop corrupt caches and try again with ODB
324                        self.cache = None;
325                        return self.next_by_commit_date(order, cutoff);
326                    }
327                    for (id, parent_commit_time) in state.parent_ids.drain(..) {
328                        parents.push(id);
329                        let was_inserted = state.seen.insert(id);
330                        if !(was_inserted && (self.predicate)(&id)) {
331                            continue;
332                        }
333
334                        let key = to_queue_key(parent_commit_time, order);
335                        match cutoff {
336                            Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
337                            Some(_) | None => state.queue.insert(key, id),
338                        }
339                    }
340                }
341                Ok(Either::CommitRefIter(commit_iter)) => {
342                    for token in commit_iter {
343                        match token {
344                            Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
345                            Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
346                                parents.push(id);
347                                let was_inserted = state.seen.insert(id);
348                                if !(was_inserted && (self.predicate)(&id)) {
349                                    continue;
350                                }
351
352                                let parent = self.objects.find_commit_iter(id.as_ref(), &mut state.parents_buf).ok();
353                                let parent_commit_time = parent
354                                    .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
355                                    .unwrap_or_default();
356
357                                let time = to_queue_key(parent_commit_time, order);
358                                match cutoff {
359                                    Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
360                                    Some(_) | None => state.queue.insert(time, id),
361                                }
362                            }
363                            Ok(_unused_token) => break,
364                            Err(err) => return Some(Err(err.into())),
365                        }
366                    }
367                }
368                Err(err) => return Some(Err(err.into())),
369            }
370            Some(Ok(Info {
371                id: oid,
372                parent_ids: parents,
373                commit_time: Some(commit_time),
374            }))
375        }
376    }
377
378    /// Utilities
379    impl<Find, Predicate> Simple<Find, Predicate>
380    where
381        Find: gix_object::Find,
382        Predicate: FnMut(&oid) -> bool,
383    {
384        fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
385            let state = &mut self.state;
386            let oid = state.next.pop_front()?;
387            let mut parents: ParentIds = Default::default();
388            match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
389                Ok(Either::CachedCommit(commit)) => {
390                    if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
391                        // drop corrupt caches and try again with ODB
392                        self.cache = None;
393                        return self.next_by_topology();
394                    }
395
396                    for (id, _commit_time) in state.parent_ids.drain(..) {
397                        parents.push(id);
398                        let was_inserted = state.seen.insert(id);
399                        if was_inserted && (self.predicate)(&id) {
400                            state.next.push_back(id);
401                        }
402                        if matches!(self.parents, Parents::First) {
403                            break;
404                        }
405                    }
406                }
407                Ok(Either::CommitRefIter(commit_iter)) => {
408                    for token in commit_iter {
409                        match token {
410                            Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
411                            Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
412                                parents.push(id);
413                                let was_inserted = state.seen.insert(id);
414                                if was_inserted && (self.predicate)(&id) {
415                                    state.next.push_back(id);
416                                }
417                                if matches!(self.parents, Parents::First) {
418                                    break;
419                                }
420                            }
421                            Ok(_a_token_past_the_parents) => break,
422                            Err(err) => return Some(Err(err.into())),
423                        }
424                    }
425                }
426                Err(err) => return Some(Err(err.into())),
427            }
428            Some(Ok(Info {
429                id: oid,
430                parent_ids: parents,
431                commit_time: None,
432            }))
433        }
434    }
435}
436
437fn collect_parents(
438    dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>,
439    cache: Option<&gix_commitgraph::Graph>,
440    parents: gix_commitgraph::file::commit::Parents<'_>,
441) -> bool {
442    dest.clear();
443    let cache = cache.as_ref().expect("parents iter is available, backed by `cache`");
444    for parent_id in parents {
445        match parent_id {
446            Ok(pos) => dest.push({
447                let parent = cache.commit_at(pos);
448                (
449                    parent.id().to_owned(),
450                    parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill
451                )
452            }),
453            Err(_err) => return false,
454        }
455    }
456    true
457}