gix_diff/tree/
function.rs

1use std::{borrow::BorrowMut, collections::VecDeque};
2
3use gix_object::{tree::EntryRef, FindExt, TreeRefIter};
4
5use crate::tree::{
6    visit::{Change, ChangeId, Relation},
7    Error, State, TreeInfoTuple, Visit,
8};
9
10/// Calculate the changes that would need to be applied to `lhs` to get `rhs` using `objects` to obtain objects as needed for traversal.
11/// `state` can be used between multiple calls to re-use memory.
12///
13/// * The `state` maybe owned or mutably borrowed to allow reuses allocated data structures through multiple runs.
14/// * `delegate` will receive the computed changes, see the [`Visit`] trait for more information on what to expect.
15///
16/// # Notes
17///
18/// * `lhs` can be an empty tree to simulate what would happen if the left-hand side didn't exist.
19/// * To obtain progress, implement it within the `delegate`.
20/// * Tree entries are expected to be ordered using [`tree-entry-comparison`][git_cmp_c] (the same [in Rust][git_cmp_rs])
21/// * it does a breadth first iteration as buffer space only fits two trees, the current one on the one we compare with.
22/// * does not do rename tracking but attempts to reduce allocations to zero (so performance is mostly determined
23///   by the delegate implementation which should be as specific as possible. Rename tracking can be computed on top of the changes
24///   received by the `delegate`.
25/// * cycle checking is not performed, but can be performed in the delegate which can return
26///   [`tree::visit::Action::Cancel`](crate::tree::visit::Action::Cancel) to stop the traversal.
27///
28/// [git_cmp_c]: https://github.com/git/git/blob/ef8ce8f3d4344fd3af049c17eeba5cd20d98b69f/tree-diff.c#L72-L88
29/// [git_cmp_rs]: https://github.com/GitoxideLabs/gitoxide/blob/795962b107d86f58b1f7c75006da256d19cc80ad/gix-object/src/tree/mod.rs#L263-L273
30#[doc(alias = "diff_tree_to_tree", alias = "git2")]
31pub fn diff<StateMut>(
32    lhs: TreeRefIter<'_>,
33    rhs: TreeRefIter<'_>,
34    mut state: StateMut,
35    objects: impl gix_object::Find,
36    delegate: &mut impl Visit,
37) -> Result<(), Error>
38where
39    StateMut: BorrowMut<State>,
40{
41    let state = state.borrow_mut();
42    state.clear();
43    let mut lhs_entries = peekable(lhs);
44    let mut rhs_entries = peekable(rhs);
45    let mut relation = None;
46    let mut pop_path = false;
47
48    loop {
49        if pop_path {
50            delegate.pop_path_component();
51        }
52        pop_path = true;
53
54        match (lhs_entries.next(), rhs_entries.next()) {
55            (None, None) => {
56                match state.trees.pop_front() {
57                    Some((None, Some(rhs), relation_to_propagate)) => {
58                        delegate.pop_front_tracked_path_and_set_current();
59                        relation = relation_to_propagate;
60                        rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
61                    }
62                    Some((Some(lhs), Some(rhs), relation_to_propagate)) => {
63                        delegate.pop_front_tracked_path_and_set_current();
64                        lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
65                        rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
66                        relation = relation_to_propagate;
67                    }
68                    Some((Some(lhs), None, relation_to_propagate)) => {
69                        delegate.pop_front_tracked_path_and_set_current();
70                        lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
71                        relation = relation_to_propagate;
72                    }
73                    Some((None, None, _)) => unreachable!("BUG: it makes no sense to fill the stack with empties"),
74                    None => return Ok(()),
75                }
76                pop_path = false;
77            }
78            (Some(lhs), Some(rhs)) => {
79                use std::cmp::Ordering::*;
80                let (lhs, rhs) = (lhs?, rhs?);
81                match compare(&lhs, &rhs) {
82                    Equal => handle_lhs_and_rhs_with_equal_filenames(
83                        lhs,
84                        rhs,
85                        &mut state.trees,
86                        &mut state.change_id,
87                        relation,
88                        delegate,
89                    )?,
90                    Less => catchup_lhs_with_rhs(
91                        &mut lhs_entries,
92                        lhs,
93                        rhs,
94                        &mut state.trees,
95                        &mut state.change_id,
96                        relation,
97                        delegate,
98                    )?,
99                    Greater => catchup_rhs_with_lhs(
100                        &mut rhs_entries,
101                        lhs,
102                        rhs,
103                        &mut state.trees,
104                        &mut state.change_id,
105                        relation,
106                        delegate,
107                    )?,
108                }
109            }
110            (Some(lhs), None) => {
111                let lhs = lhs?;
112                delete_entry_schedule_recursion(lhs, &mut state.trees, &mut state.change_id, relation, delegate)?;
113            }
114            (None, Some(rhs)) => {
115                let rhs = rhs?;
116                add_entry_schedule_recursion(rhs, &mut state.trees, &mut state.change_id, relation, delegate)?;
117            }
118        }
119    }
120}
121
122fn compare(a: &EntryRef<'_>, b: &EntryRef<'_>) -> std::cmp::Ordering {
123    let common = a.filename.len().min(b.filename.len());
124    a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
125        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
126        let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
127        a.cmp(&b)
128    })
129}
130
131fn delete_entry_schedule_recursion(
132    entry: EntryRef<'_>,
133    queue: &mut VecDeque<TreeInfoTuple>,
134    change_id: &mut ChangeId,
135    relation_to_propagate: Option<Relation>,
136    delegate: &mut impl Visit,
137) -> Result<(), Error> {
138    delegate.push_path_component(entry.filename);
139    let relation = relation_to_propagate.or_else(|| {
140        entry.mode.is_tree().then(|| {
141            *change_id += 1;
142            Relation::Parent(*change_id)
143        })
144    });
145    let is_cancelled = delegate
146        .visit(Change::Deletion {
147            entry_mode: entry.mode,
148            oid: entry.oid.to_owned(),
149            relation,
150        })
151        .cancelled();
152    if is_cancelled {
153        return Err(Error::Cancelled);
154    }
155    if entry.mode.is_tree() {
156        delegate.pop_path_component();
157        delegate.push_back_tracked_path_component(entry.filename);
158        queue.push_back((Some(entry.oid.to_owned()), None, to_child(relation)));
159    }
160    Ok(())
161}
162
163fn add_entry_schedule_recursion(
164    entry: EntryRef<'_>,
165    queue: &mut VecDeque<TreeInfoTuple>,
166    change_id: &mut ChangeId,
167    relation_to_propagate: Option<Relation>,
168    delegate: &mut impl Visit,
169) -> Result<(), Error> {
170    delegate.push_path_component(entry.filename);
171    let relation = relation_to_propagate.or_else(|| {
172        entry.mode.is_tree().then(|| {
173            *change_id += 1;
174            Relation::Parent(*change_id)
175        })
176    });
177    if delegate
178        .visit(Change::Addition {
179            entry_mode: entry.mode,
180            oid: entry.oid.to_owned(),
181            relation,
182        })
183        .cancelled()
184    {
185        return Err(Error::Cancelled);
186    }
187    if entry.mode.is_tree() {
188        delegate.pop_path_component();
189        delegate.push_back_tracked_path_component(entry.filename);
190        queue.push_back((None, Some(entry.oid.to_owned()), to_child(relation)));
191    }
192    Ok(())
193}
194
195fn catchup_rhs_with_lhs(
196    rhs_entries: &mut IteratorType<TreeRefIter<'_>>,
197    lhs: EntryRef<'_>,
198    rhs: EntryRef<'_>,
199    queue: &mut VecDeque<TreeInfoTuple>,
200    change_id: &mut ChangeId,
201    relation_to_propagate: Option<Relation>,
202    delegate: &mut impl Visit,
203) -> Result<(), Error> {
204    use std::cmp::Ordering::*;
205    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
206    loop {
207        match rhs_entries.peek() {
208            Some(Ok(rhs)) => match compare(&lhs, rhs) {
209                Equal => {
210                    let rhs = rhs_entries.next().transpose()?.expect("the peeked item to be present");
211                    delegate.pop_path_component();
212                    handle_lhs_and_rhs_with_equal_filenames(
213                        lhs,
214                        rhs,
215                        queue,
216                        change_id,
217                        relation_to_propagate,
218                        delegate,
219                    )?;
220                    break;
221                }
222                Greater => {
223                    let rhs = rhs_entries.next().transpose()?.expect("the peeked item to be present");
224                    delegate.pop_path_component();
225                    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
226                }
227                Less => {
228                    delegate.pop_path_component();
229                    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
230                    break;
231                }
232            },
233            Some(Err(err)) => return Err(Error::EntriesDecode(err.to_owned())),
234            None => {
235                delegate.pop_path_component();
236                delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
237                break;
238            }
239        }
240    }
241    Ok(())
242}
243
244fn catchup_lhs_with_rhs(
245    lhs_entries: &mut IteratorType<TreeRefIter<'_>>,
246    lhs: EntryRef<'_>,
247    rhs: EntryRef<'_>,
248    queue: &mut VecDeque<TreeInfoTuple>,
249    change_id: &mut ChangeId,
250    relation_to_propagate: Option<Relation>,
251    delegate: &mut impl Visit,
252) -> Result<(), Error> {
253    use std::cmp::Ordering::*;
254    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
255    loop {
256        match lhs_entries.peek() {
257            Some(Ok(lhs)) => match compare(lhs, &rhs) {
258                Equal => {
259                    let lhs = lhs_entries.next().expect("the peeked item to be present")?;
260                    delegate.pop_path_component();
261                    handle_lhs_and_rhs_with_equal_filenames(
262                        lhs,
263                        rhs,
264                        queue,
265                        change_id,
266                        relation_to_propagate,
267                        delegate,
268                    )?;
269                    break;
270                }
271                Less => {
272                    let lhs = lhs_entries.next().expect("the peeked item to be present")?;
273                    delegate.pop_path_component();
274                    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
275                }
276                Greater => {
277                    delegate.pop_path_component();
278                    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
279                    break;
280                }
281            },
282            Some(Err(err)) => return Err(Error::EntriesDecode(err.to_owned())),
283            None => {
284                delegate.pop_path_component();
285                add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
286                break;
287            }
288        }
289    }
290    Ok(())
291}
292
293fn handle_lhs_and_rhs_with_equal_filenames(
294    lhs: EntryRef<'_>,
295    rhs: EntryRef<'_>,
296    queue: &mut VecDeque<TreeInfoTuple>,
297    change_id: &mut ChangeId,
298    relation_to_propagate: Option<Relation>,
299    delegate: &mut impl Visit,
300) -> Result<(), Error> {
301    match (lhs.mode.is_tree(), rhs.mode.is_tree()) {
302        (true, true) => {
303            delegate.push_back_tracked_path_component(lhs.filename);
304            if lhs.oid != rhs.oid
305                && delegate
306                    .visit(Change::Modification {
307                        previous_entry_mode: lhs.mode,
308                        previous_oid: lhs.oid.to_owned(),
309                        entry_mode: rhs.mode,
310                        oid: rhs.oid.to_owned(),
311                    })
312                    .cancelled()
313            {
314                return Err(Error::Cancelled);
315            }
316            queue.push_back((
317                Some(lhs.oid.to_owned()),
318                Some(rhs.oid.to_owned()),
319                relation_to_propagate,
320            ));
321        }
322        (_, true) => {
323            delegate.push_back_tracked_path_component(lhs.filename);
324            if delegate
325                .visit(Change::Deletion {
326                    entry_mode: lhs.mode,
327                    oid: lhs.oid.to_owned(),
328                    relation: None,
329                })
330                .cancelled()
331            {
332                return Err(Error::Cancelled);
333            }
334
335            let relation = relation_to_propagate.or_else(|| {
336                *change_id += 1;
337                Some(Relation::Parent(*change_id))
338            });
339            if delegate
340                .visit(Change::Addition {
341                    entry_mode: rhs.mode,
342                    oid: rhs.oid.to_owned(),
343                    relation,
344                })
345                .cancelled()
346            {
347                return Err(Error::Cancelled);
348            }
349            queue.push_back((None, Some(rhs.oid.to_owned()), to_child(relation)));
350        }
351        (true, _) => {
352            delegate.push_back_tracked_path_component(lhs.filename);
353            let relation = relation_to_propagate.or_else(|| {
354                *change_id += 1;
355                Some(Relation::Parent(*change_id))
356            });
357            if delegate
358                .visit(Change::Deletion {
359                    entry_mode: lhs.mode,
360                    oid: lhs.oid.to_owned(),
361                    relation,
362                })
363                .cancelled()
364            {
365                return Err(Error::Cancelled);
366            }
367            if delegate
368                .visit(Change::Addition {
369                    entry_mode: rhs.mode,
370                    oid: rhs.oid.to_owned(),
371                    relation: None,
372                })
373                .cancelled()
374            {
375                return Err(Error::Cancelled);
376            }
377            queue.push_back((Some(lhs.oid.to_owned()), None, to_child(relation)));
378        }
379        (false, false) => {
380            delegate.push_path_component(lhs.filename);
381            debug_assert!(lhs.mode.is_no_tree() && lhs.mode.is_no_tree());
382            if (lhs.oid != rhs.oid || lhs.mode != rhs.mode)
383                && delegate
384                    .visit(Change::Modification {
385                        previous_entry_mode: lhs.mode,
386                        previous_oid: lhs.oid.to_owned(),
387                        entry_mode: rhs.mode,
388                        oid: rhs.oid.to_owned(),
389                    })
390                    .cancelled()
391            {
392                return Err(Error::Cancelled);
393            }
394        }
395    }
396    Ok(())
397}
398
399type IteratorType<I> = std::iter::Peekable<I>;
400
401fn to_child(r: Option<Relation>) -> Option<Relation> {
402    r.map(|r| match r {
403        Relation::Parent(id) => Relation::ChildOfParent(id),
404        Relation::ChildOfParent(id) => Relation::ChildOfParent(id),
405    })
406}
407
408fn peekable<I: Iterator>(iter: I) -> IteratorType<I> {
409    iter.peekable()
410}
411
412#[cfg(test)]
413mod tests {
414    use std::cmp::Ordering;
415
416    use gix_object::tree::EntryKind;
417
418    use super::*;
419
420    #[test]
421    fn compare_select_samples() {
422        let null = gix_hash::ObjectId::null(gix_hash::Kind::Sha1);
423        let actual = compare(
424            &EntryRef {
425                mode: EntryKind::Blob.into(),
426                filename: "plumbing-cli.rs".into(),
427                oid: &null,
428            },
429            &EntryRef {
430                mode: EntryKind::Tree.into(),
431                filename: "plumbing".into(),
432                oid: &null,
433            },
434        );
435        assert_eq!(actual, Ordering::Less);
436        let actual = compare(
437            &EntryRef {
438                mode: EntryKind::Tree.into(),
439                filename: "plumbing-cli.rs".into(),
440                oid: &null,
441            },
442            &EntryRef {
443                mode: EntryKind::Blob.into(),
444                filename: "plumbing".into(),
445                oid: &null,
446            },
447        );
448        assert_eq!(actual, Ordering::Greater);
449    }
450}