gix_traverse/commit/topo/
iter.rs

1use crate::commit::topo::{Error, Sorting, WalkFlags};
2use crate::commit::{find, Either, Info, Parents, Topo};
3use gix_hash::{oid, ObjectId};
4use gix_revwalk::PriorityQueue;
5use smallvec::SmallVec;
6
7pub(in crate::commit) type GenAndCommitTime = (u32, i64);
8
9// Git's priority queue works as a LIFO stack if no compare function is set,
10// which is the case for `--topo-order.` However, even in that case the initial
11// items of the queue are sorted according to the commit time before beginning
12// the walk.
13#[derive(Debug)]
14pub(in crate::commit) enum Queue {
15    Date(PriorityQueue<i64, Info>),
16    Topo(Vec<(i64, Info)>),
17}
18
19impl Queue {
20    pub(super) fn new(s: Sorting) -> Self {
21        match s {
22            Sorting::DateOrder => Self::Date(PriorityQueue::new()),
23            Sorting::TopoOrder => Self::Topo(vec![]),
24        }
25    }
26
27    pub(super) fn push(&mut self, commit_time: i64, info: Info) {
28        match self {
29            Self::Date(q) => q.insert(commit_time, info),
30            Self::Topo(q) => q.push((commit_time, info)),
31        }
32    }
33
34    fn pop(&mut self) -> Option<Info> {
35        match self {
36            Self::Date(q) => q.pop().map(|(_, info)| info),
37            Self::Topo(q) => q.pop().map(|(_, info)| info),
38        }
39    }
40
41    pub(super) fn initial_sort(&mut self) {
42        if let Self::Topo(ref mut inner_vec) = self {
43            inner_vec.sort_by(|a, b| a.0.cmp(&b.0));
44        }
45    }
46}
47
48impl<Find, Predicate> Topo<Find, Predicate>
49where
50    Find: gix_object::Find,
51{
52    pub(super) fn compute_indegrees_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
53        while let Some(((gen, _), _)) = self.indegree_queue.peek() {
54            if *gen >= gen_cutoff {
55                self.indegree_walk_step()?;
56            } else {
57                break;
58            }
59        }
60
61        Ok(())
62    }
63
64    fn indegree_walk_step(&mut self) -> Result<(), Error> {
65        if let Some(((gen, _), id)) = self.indegree_queue.pop() {
66            self.explore_to_depth(gen)?;
67
68            let parents = self.collect_parents(&id)?;
69            for (id, gen_time) in parents {
70                self.indegrees.entry(id).and_modify(|e| *e += 1).or_insert(2);
71
72                let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
73                if !state.contains(WalkFlags::InDegree) {
74                    *state |= WalkFlags::InDegree;
75                    self.indegree_queue.insert(gen_time, id);
76                }
77            }
78        }
79        Ok(())
80    }
81
82    fn explore_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
83        while let Some(((gen, _), _)) = self.explore_queue.peek() {
84            if *gen >= gen_cutoff {
85                self.explore_walk_step()?;
86            } else {
87                break;
88            }
89        }
90        Ok(())
91    }
92
93    fn explore_walk_step(&mut self) -> Result<(), Error> {
94        if let Some((_, id)) = self.explore_queue.pop() {
95            let parents = self.collect_parents(&id)?;
96            self.process_parents(&id, &parents)?;
97
98            for (id, gen_time) in parents {
99                let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
100
101                if !state.contains(WalkFlags::Explored) {
102                    *state |= WalkFlags::Explored;
103                    self.explore_queue.insert(gen_time, id);
104                }
105            }
106        }
107        Ok(())
108    }
109
110    fn expand_topo_walk(&mut self, id: &oid) -> Result<(), Error> {
111        let parents = self.collect_parents(id)?;
112        self.process_parents(id, &parents)?;
113
114        for (pid, (parent_gen, parent_commit_time)) in parents {
115            let parent_state = self.states.get(&pid).ok_or(Error::MissingStateUnexpected)?;
116            if parent_state.contains(WalkFlags::Uninteresting) {
117                continue;
118            }
119
120            if parent_gen < self.min_gen {
121                self.min_gen = parent_gen;
122                self.compute_indegrees_to_depth(self.min_gen)?;
123            }
124
125            let i = self.indegrees.get_mut(&pid).ok_or(Error::MissingIndegreeUnexpected)?;
126            *i -= 1;
127            if *i != 1 {
128                continue;
129            }
130
131            let parent_ids = self.collect_all_parents(&pid)?.into_iter().map(|e| e.0).collect();
132            self.topo_queue.push(
133                parent_commit_time,
134                Info {
135                    id: pid,
136                    parent_ids,
137                    commit_time: Some(parent_commit_time),
138                },
139            );
140        }
141
142        Ok(())
143    }
144
145    fn process_parents(&mut self, id: &oid, parents: &[(ObjectId, GenAndCommitTime)]) -> Result<(), Error> {
146        let state = self.states.get_mut(id).ok_or(Error::MissingStateUnexpected)?;
147        if state.contains(WalkFlags::Added) {
148            return Ok(());
149        }
150
151        *state |= WalkFlags::Added;
152
153        // If the current commit is uninteresting we pass that on to ALL
154        // parents, otherwise we set the Seen flag.
155        let (pass, insert) = if state.contains(WalkFlags::Uninteresting) {
156            let flags = WalkFlags::Uninteresting;
157            for (id, _) in parents {
158                let grand_parents = self.collect_all_parents(id)?;
159
160                for (id, _) in &grand_parents {
161                    self.states
162                        .entry(*id)
163                        .and_modify(|s| *s |= WalkFlags::Uninteresting)
164                        .or_insert(WalkFlags::Uninteresting | WalkFlags::Seen);
165                }
166            }
167            (flags, flags)
168        } else {
169            // NOTE: git sets SEEN like we do but keeps the SYMMETRIC_LEFT and
170            // ANCENSTRY_PATH if they are set, but they have no purpose here.
171            let flags = WalkFlags::empty();
172            (flags, WalkFlags::Seen)
173        };
174
175        for (id, _) in parents {
176            self.states.entry(*id).and_modify(|s| *s |= pass).or_insert(insert);
177        }
178        Ok(())
179    }
180
181    fn collect_parents(&mut self, id: &oid) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
182        collect_parents(
183            &mut self.commit_graph,
184            &self.find,
185            id,
186            matches!(self.parents, Parents::First),
187            &mut self.buf,
188        )
189    }
190
191    // Same as collect_parents but disregards the first_parent flag
192    pub(super) fn collect_all_parents(
193        &mut self,
194        id: &oid,
195    ) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
196        collect_parents(&mut self.commit_graph, &self.find, id, false, &mut self.buf)
197    }
198
199    fn pop_commit(&mut self) -> Option<Result<Info, Error>> {
200        let commit = self.topo_queue.pop()?;
201        let i = match self.indegrees.get_mut(&commit.id) {
202            Some(i) => i,
203            None => {
204                return Some(Err(Error::MissingIndegreeUnexpected));
205            }
206        };
207
208        *i = 0;
209        if let Err(e) = self.expand_topo_walk(&commit.id) {
210            return Some(Err(e));
211        };
212
213        Some(Ok(commit))
214    }
215}
216
217impl<Find, Predicate> Iterator for Topo<Find, Predicate>
218where
219    Find: gix_object::Find,
220    Predicate: FnMut(&oid) -> bool,
221{
222    type Item = Result<Info, Error>;
223
224    fn next(&mut self) -> Option<Self::Item> {
225        loop {
226            match self.pop_commit()? {
227                Ok(id) => {
228                    if (self.predicate)(&id.id) {
229                        return Some(Ok(id));
230                    }
231                }
232                Err(e) => return Some(Err(e)),
233            }
234        }
235    }
236}
237
238fn collect_parents<Find>(
239    cache: &mut Option<gix_commitgraph::Graph>,
240    f: Find,
241    id: &oid,
242    first_only: bool,
243    buf: &mut Vec<u8>,
244) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error>
245where
246    Find: gix_object::Find,
247{
248    let mut parents = SmallVec::<[(ObjectId, GenAndCommitTime); 1]>::new();
249    match find(cache.as_ref(), &f, id, buf)? {
250        Either::CommitRefIter(c) => {
251            for token in c {
252                use gix_object::commit::ref_iter::Token as T;
253                match token {
254                    Ok(T::Tree { .. }) => continue,
255                    Ok(T::Parent { id }) => {
256                        parents.push((id, (0, 0))); // Dummy numbers to be filled in
257                        if first_only {
258                            break;
259                        }
260                    }
261                    Ok(_past_parents) => break,
262                    Err(err) => return Err(err.into()),
263                }
264            }
265            // Need to check the cache again. That a commit is not in the cache
266            // doesn't mean a parent is not.
267            for (id, gen_time) in parents.iter_mut() {
268                let commit = find(cache.as_ref(), &f, id, buf)?;
269                *gen_time = gen_and_commit_time(commit)?;
270            }
271        }
272        Either::CachedCommit(c) => {
273            for pos in c.iter_parents() {
274                let Ok(pos) = pos else {
275                    // drop corrupt cache and use ODB from now on.
276                    *cache = None;
277                    return collect_parents(cache, f, id, first_only, buf);
278                };
279                let parent_commit = cache
280                    .as_ref()
281                    .expect("cache exists if CachedCommit was returned")
282                    .commit_at(pos);
283                parents.push((
284                    parent_commit.id().into(),
285                    (parent_commit.generation(), parent_commit.committer_timestamp() as i64),
286                ));
287                if first_only {
288                    break;
289                }
290            }
291        }
292    };
293    Ok(parents)
294}
295
296pub(super) fn gen_and_commit_time(c: Either<'_, '_>) -> Result<GenAndCommitTime, Error> {
297    match c {
298        Either::CommitRefIter(c) => {
299            let mut commit_time = 0;
300            for token in c {
301                use gix_object::commit::ref_iter::Token as T;
302                match token {
303                    Ok(T::Tree { .. }) => continue,
304                    Ok(T::Parent { .. }) => continue,
305                    Ok(T::Author { .. }) => continue,
306                    Ok(T::Committer { signature }) => {
307                        commit_time = signature.time.seconds;
308                        break;
309                    }
310                    Ok(_unused_token) => break,
311                    Err(err) => return Err(err.into()),
312                }
313            }
314            Ok((gix_commitgraph::GENERATION_NUMBER_INFINITY, commit_time))
315        }
316        Either::CachedCommit(c) => Ok((c.generation(), c.committer_timestamp() as i64)),
317    }
318}