gix_revwalk/graph/
mod.rs

1use std::{fmt::Formatter, ops::Index};
2
3use gix_hash::oid;
4use smallvec::SmallVec;
5
6use crate::Graph;
7
8/// A mapping between an object id and arbitrary data, and produced when calling [`Graph::detach()`].
9pub type IdMap<T> = gix_hashtable::HashMap<gix_hash::ObjectId, T>;
10
11///
12pub mod commit;
13
14mod errors {
15    ///
16    pub mod insert_parents {
17        use crate::graph::commit::iter_parents;
18
19        /// The error returned by [`insert_parents()`](crate::Graph::insert_parents()).
20        #[derive(Debug, thiserror::Error)]
21        #[allow(missing_docs)]
22        pub enum Error {
23            #[error(transparent)]
24            Lookup(#[from] gix_object::find::existing_iter::Error),
25            #[error("A commit could not be decoded during traversal")]
26            Decode(#[from] gix_object::decode::Error),
27            #[error(transparent)]
28            Parent(#[from] iter_parents::Error),
29        }
30    }
31
32    ///
33    pub mod get_or_insert_default {
34        use crate::graph::commit::to_owned;
35
36        /// The error returned by [`try_lookup_or_insert_default()`](crate::Graph::try_lookup_or_insert_default()).
37        #[derive(Debug, thiserror::Error)]
38        #[allow(missing_docs)]
39        pub enum Error {
40            #[error(transparent)]
41            Lookup(#[from] gix_object::find::existing_iter::Error),
42            #[error(transparent)]
43            ToOwned(#[from] to_owned::Error),
44        }
45    }
46}
47pub use errors::{get_or_insert_default, insert_parents};
48use gix_date::SecondsSinceUnixEpoch;
49
50/// The generation away from the HEAD of graph, useful to limit algorithms by topological depth as well.
51///
52/// 0 would mean the starting point of the hierarchy, and 1 their parents.
53/// This number is only available natively if there is a commit-graph.
54pub type Generation = u32;
55
56impl<T: std::fmt::Debug> std::fmt::Debug for Graph<'_, '_, T> {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        std::fmt::Debug::fmt(&self.map, f)
59    }
60}
61
62impl<'cache, T: Default> Graph<'_, 'cache, T> {
63    /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set.
64    /// If it wasn't, associate it with the default value. Assure `update_data(data)` gets run.
65    /// Return the commit when done.
66    /// Note that none of the data updates happen if there was no commit named `id`.
67    pub fn try_lookup_or_insert(
68        &mut self,
69        id: gix_hash::ObjectId,
70        update_data: impl FnOnce(&mut T),
71    ) -> Result<Option<LazyCommit<'_, 'cache>>, get_or_insert_default::Error> {
72        self.try_lookup_or_insert_default(id, T::default, update_data)
73    }
74}
75
76/// Access and mutation
77impl<'cache, T> Graph<'_, 'cache, T> {
78    /// Returns the amount of entries in the graph.
79    pub fn len(&self) -> usize {
80        self.map.len()
81    }
82
83    /// Returns `true` if there is no entry in the graph.
84    pub fn is_empty(&self) -> bool {
85        self.map.is_empty()
86    }
87
88    /// Returns true if `id` has data associated with it, meaning that we processed it already.
89    pub fn contains(&self, id: &gix_hash::oid) -> bool {
90        self.map.contains_key(id.as_ref())
91    }
92
93    /// Returns the data associated with `id` if available.
94    pub fn get(&self, id: &gix_hash::oid) -> Option<&T> {
95        self.map.get(id)
96    }
97
98    /// Returns the data associated with `id` if available as mutable reference.
99    pub fn get_mut(&mut self, id: &gix_hash::oid) -> Option<&mut T> {
100        self.map.get_mut(id)
101    }
102
103    /// Insert `id` into the graph and associate it with `value`, returning the previous value associated with `id` if it existed.
104    pub fn insert(&mut self, id: gix_hash::ObjectId, value: T) -> Option<T> {
105        self.map.insert(id, value)
106    }
107
108    /// Insert `id` into the graph and associate it with the value returned by `make_data`,
109    /// and returning the previous value associated with `id` if it existed.
110    /// Fail if `id` doesn't exist in the object database.
111    pub fn insert_data<E>(
112        &mut self,
113        id: gix_hash::ObjectId,
114        mut make_data: impl FnMut(LazyCommit<'_, 'cache>) -> Result<T, E>,
115    ) -> Result<Option<T>, E>
116    where
117        E: From<gix_object::find::existing_iter::Error>,
118    {
119        let value = make_data(self.lookup(&id).map_err(E::from)?)?;
120        Ok(self.map.insert(id, value))
121    }
122
123    /// Remove all data from the graph to start over.
124    pub fn clear(&mut self) {
125        self.map.clear();
126    }
127
128    /// Insert the parents of commit named `id` to the graph and associate new parents with data
129    /// by calling `new_parent_data(parent_id, committer_timestamp)`, or update existing parents
130    /// data with `update_existing(parent_id, &mut existing_data)`.
131    /// If `first_parent` is `true`, only the first parent of commits will be looked at.
132    pub fn insert_parents(
133        &mut self,
134        id: &gix_hash::oid,
135        new_parent_data: &mut dyn FnMut(gix_hash::ObjectId, SecondsSinceUnixEpoch) -> T,
136        update_existing: &mut dyn FnMut(gix_hash::ObjectId, &mut T),
137        first_parent: bool,
138    ) -> Result<(), insert_parents::Error> {
139        let commit = self.lookup(id)?;
140        let parents: SmallVec<[_; 2]> = commit.iter_parents().collect();
141        for parent_id in parents {
142            let parent_id = parent_id?;
143            match self.map.entry(parent_id) {
144                gix_hashtable::hash_map::Entry::Vacant(entry) => {
145                    let parent = match try_lookup(&parent_id, &*self.find, self.cache, &mut self.parent_buf)? {
146                        Some(p) => p,
147                        None => continue, // skip missing objects, this is due to shallow clones for instance.
148                    };
149
150                    let parent_commit_date = parent.committer_timestamp().unwrap_or_default();
151                    entry.insert(new_parent_data(parent_id, parent_commit_date));
152                }
153                gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
154                    update_existing(parent_id, entry.get_mut());
155                }
156            }
157            if first_parent {
158                break;
159            }
160        }
161        Ok(())
162    }
163
164    /// Insert the parents of commit named `id` to the graph and associate new parents with data
165    /// as produced by `parent_data(parent_id, parent_info, maybe-existing-data &mut T) -> T`, which is always
166    /// provided the full parent commit information.
167    /// It will be provided either existing data, along with complete information about the parent,
168    /// and produces new data even though it's only used in case the parent isn't stored in the graph yet.
169    #[allow(clippy::type_complexity)]
170    pub fn insert_parents_with_lookup<E>(
171        &mut self,
172        id: &gix_hash::oid,
173        parent_data: &mut dyn FnMut(gix_hash::ObjectId, LazyCommit<'_, 'cache>, Option<&mut T>) -> Result<T, E>,
174    ) -> Result<(), E>
175    where
176        E: From<gix_object::find::existing_iter::Error>
177            + From<gix_object::decode::Error>
178            + From<commit::iter_parents::Error>,
179    {
180        let commit = self.lookup(id).map_err(E::from)?;
181        let parents: SmallVec<[_; 2]> = commit.iter_parents().collect();
182        for parent_id in parents {
183            let parent_id = parent_id.map_err(E::from)?;
184            let parent = match try_lookup(&parent_id, &*self.find, self.cache, &mut self.parent_buf).map_err(E::from)? {
185                Some(p) => p,
186                None => continue, // skip missing objects, this is due to shallow clones for instance.
187            };
188
189            match self.map.entry(parent_id) {
190                gix_hashtable::hash_map::Entry::Vacant(entry) => {
191                    entry.insert(parent_data(parent_id, parent, None)?);
192                }
193                gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
194                    parent_data(parent_id, parent, Some(entry.get_mut()))?;
195                }
196            }
197        }
198        Ok(())
199    }
200
201    /// Turn ourselves into the underlying graph structure, which is a mere mapping between object ids and their data.
202    pub fn detach(self) -> IdMap<T> {
203        self.map
204    }
205}
206
207/// Initialization
208impl<'find, 'cache, T> Graph<'find, 'cache, T> {
209    /// Create a new instance with `objects` to retrieve commits and optionally `cache` to accelerate commit access.
210    ///
211    /// ### Performance
212    ///
213    /// `find` should be optimized to access the same object repeatedly, ideally with an object cache to keep the last couple of
214    /// most recently used commits.
215    /// Furthermore, **none-existing commits should not trigger the pack-db to be refreshed.** Otherwise, performance may be sub-optimal
216    /// in shallow repositories as running into non-existing commits will trigger a refresh of the `packs` directory.
217    pub fn new(objects: impl gix_object::Find + 'find, cache: Option<&'cache gix_commitgraph::Graph>) -> Self {
218        Graph {
219            find: Box::new(objects),
220            cache,
221            map: gix_hashtable::HashMap::default(),
222            buf: Vec::new(),
223            parent_buf: Vec::new(),
224        }
225    }
226}
227
228/// Commit based methods
229impl<T> Graph<'_, '_, Commit<T>> {
230    /// Lookup `id` in the graph, but insert it if it's not yet present by looking it up without failing if the commit doesn't exist.
231    /// Call `new_data()` to obtain data for a newly inserted commit.
232    /// `update_data(data)` gets run either on existing or on new data.
233    ///
234    /// Note that none of the data updates happen if `id` didn't exist.
235    pub fn get_or_insert_commit_default(
236        &mut self,
237        id: gix_hash::ObjectId,
238        new_data: impl FnOnce() -> T,
239        update_data: impl FnOnce(&mut T),
240    ) -> Result<Option<&mut Commit<T>>, get_or_insert_default::Error> {
241        match self.map.entry(id) {
242            gix_hashtable::hash_map::Entry::Vacant(entry) => {
243                let res = try_lookup(&id, &*self.find, self.cache, &mut self.buf)?;
244                let commit = match res {
245                    None => return Ok(None),
246                    Some(commit) => commit,
247                };
248                let mut commit = commit.to_owned(new_data)?;
249                update_data(&mut commit.data);
250                entry.insert(commit);
251            }
252            gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
253                update_data(&mut entry.get_mut().data);
254            }
255        };
256        Ok(self.map.get_mut(&id))
257    }
258
259    /// For each stored commit, call `clear` on its data.
260    pub fn clear_commit_data(&mut self, mut clear: impl FnMut(&mut T)) {
261        self.map.values_mut().for_each(|c| clear(&mut c.data));
262    }
263}
264
265/// Commit based methods
266impl<T: Default> Graph<'_, '_, Commit<T>> {
267    /// Lookup `id` in the graph, but insert it if it's not yet present by looking it up without failing if the commit doesn't exist.
268    /// Newly inserted commits are populated with default data.
269    /// `update_data(data)` gets run either on existing or on new data.
270    ///
271    /// Note that none of the data updates happen if `id` didn't exist.
272    ///
273    /// If only commit data is desired without the need for attaching custom data, use
274    /// [`try_lookup(id).to_owned()`][Graph::try_lookup()] instead.
275    pub fn get_or_insert_commit(
276        &mut self,
277        id: gix_hash::ObjectId,
278        update_data: impl FnOnce(&mut T),
279    ) -> Result<Option<&mut Commit<T>>, get_or_insert_default::Error> {
280        self.get_or_insert_commit_default(id, T::default, update_data)
281    }
282
283    /// Lookup `id` in the graph, but insert it if it's not yet present by looking it up without failing if the commit doesn't exist.
284    /// `update_commit(commit)` gets run either on existing or on new data.
285    ///
286    /// Note that none of the data updates happen if `id` didn't exist in the graph.
287    pub fn get_or_insert_full_commit(
288        &mut self,
289        id: gix_hash::ObjectId,
290        update_commit: impl FnOnce(&mut Commit<T>),
291    ) -> Result<Option<&mut Commit<T>>, get_or_insert_default::Error> {
292        match self.map.entry(id) {
293            gix_hashtable::hash_map::Entry::Vacant(entry) => {
294                let res = try_lookup(&id, &*self.find, self.cache, &mut self.buf)?;
295                let commit = match res {
296                    None => return Ok(None),
297                    Some(commit) => commit,
298                };
299                let mut commit = commit.to_owned(T::default)?;
300                update_commit(&mut commit);
301                entry.insert(commit);
302            }
303            gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
304                update_commit(entry.get_mut());
305            }
306        };
307        Ok(self.map.get_mut(&id))
308    }
309}
310
311/// Lazy commit access
312impl<'cache, T> Graph<'_, 'cache, T> {
313    /// Lookup `id` without failing if the commit doesn't exist or `id` isn't a commit,
314    /// and assure that `id` is inserted into our set
315    /// with a `default` value assigned to it.
316    /// `update_data(data)` gets run either on existing or no new data.
317    /// Return the commit when done.
318    ///
319    /// Note that none of the data updates happen if `id` didn't exist.
320    ///
321    /// If only commit data is desired without the need for attaching custom data, use
322    /// [`try_lookup(id)`][Graph::try_lookup()] instead.
323    pub fn try_lookup_or_insert_default(
324        &mut self,
325        id: gix_hash::ObjectId,
326        default: impl FnOnce() -> T,
327        update_data: impl FnOnce(&mut T),
328    ) -> Result<Option<LazyCommit<'_, 'cache>>, get_or_insert_default::Error> {
329        let res = try_lookup(&id, &*self.find, self.cache, &mut self.buf)?;
330        Ok(res.map(|commit| {
331            match self.map.entry(id) {
332                gix_hashtable::hash_map::Entry::Vacant(entry) => {
333                    let mut data = default();
334                    update_data(&mut data);
335                    entry.insert(data);
336                }
337                gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
338                    update_data(entry.get_mut());
339                }
340            };
341            commit
342        }))
343    }
344
345    /// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist
346    /// or isn't a commit.
347    ///
348    /// It's possible that commits don't exist if the repository is shallow.
349    pub fn try_lookup(
350        &mut self,
351        id: &gix_hash::oid,
352    ) -> Result<Option<LazyCommit<'_, 'cache>>, gix_object::find::existing_iter::Error> {
353        try_lookup(id, &*self.find, self.cache, &mut self.buf)
354    }
355
356    /// Lookup `id` and return a handle to it, or fail if it doesn't exist or is no commit.
357    pub fn lookup(
358        &mut self,
359        id: &gix_hash::oid,
360    ) -> Result<LazyCommit<'_, 'cache>, gix_object::find::existing_iter::Error> {
361        self.try_lookup(id)?
362            .ok_or(gix_object::find::existing_iter::Error::NotFound { oid: id.to_owned() })
363    }
364}
365
366fn try_lookup<'graph, 'cache>(
367    id: &gix_hash::oid,
368    objects: &dyn gix_object::Find,
369    cache: Option<&'cache gix_commitgraph::Graph>,
370    buf: &'graph mut Vec<u8>,
371) -> Result<Option<LazyCommit<'graph, 'cache>>, gix_object::find::existing_iter::Error> {
372    if let Some(cache) = cache {
373        if let Some(pos) = cache.lookup(id) {
374            return Ok(Some(LazyCommit {
375                backing: Either::Right((cache, pos)),
376            }));
377        }
378    }
379    #[allow(clippy::manual_map)]
380    Ok(
381        match objects
382            .try_find(id, buf)
383            .map_err(gix_object::find::existing_iter::Error::Find)?
384        {
385            Some(data) => data.kind.is_commit().then_some(LazyCommit {
386                backing: Either::Left(buf),
387            }),
388            None => None,
389        },
390    )
391}
392
393impl<'a, T> Index<&'a gix_hash::oid> for Graph<'_, '_, T> {
394    type Output = T;
395
396    fn index(&self, index: &'a oid) -> &Self::Output {
397        &self.map[index]
398    }
399}
400
401/// A commit that contains all information we can obtain through the commit-graph, which is typically enough to fuel any graph iteration.
402pub struct Commit<T> {
403    /// The parents of the commit.
404    pub parents: SmallVec<[gix_hash::ObjectId; 1]>,
405    /// The time at which the commit was created.
406    pub commit_time: SecondsSinceUnixEpoch,
407    /// The generation of the commit, if available.
408    pub generation: Option<u32>,
409    /// Any kind of data to associate with this commit.
410    pub data: T,
411}
412
413impl<T> std::fmt::Debug for Commit<T>
414where
415    T: std::fmt::Debug,
416{
417    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
418        f.debug_struct("Commit")
419            .field("parents", &self.parents)
420            .field("commit_time", &self.commit_time)
421            .field("generation", &self.generation)
422            .field("data", &self.data)
423            .finish()
424    }
425}
426
427impl<T> Clone for Commit<T>
428where
429    T: Clone,
430{
431    fn clone(&self) -> Self {
432        Commit {
433            parents: self.parents.clone(),
434            commit_time: self.commit_time,
435            generation: self.generation,
436            data: self.data.clone(),
437        }
438    }
439}
440
441/// A commit that provides access to graph-related information, on demand.
442///
443/// The owned version of this type is called [`Commit`] and can be obtained by calling [`LazyCommit::to_owned()`].
444pub struct LazyCommit<'graph, 'cache> {
445    backing: Either<&'graph [u8], (&'cache gix_commitgraph::Graph, gix_commitgraph::Position)>,
446}
447
448enum Either<T, U> {
449    Left(T),
450    Right(U),
451}