gix_protocol/fetch/
negotiate.rs

1//! A modules with primitives to perform negotiation as part of a fetch operation.
2//!
3//! The functions provided are called in a certain order:
4//!
5//! 1. [`mark_complete_and_common_ref()`] - initialize the [`negotiator`](gix_negotiate::Negotiator) with all state known on the remote.
6//! 2. [`add_wants()`] is called if the call at 1) returned [`Action::MustNegotiate`].
7//! 3. [`one_round()`] is called for each negotiation round, providing information if the negotiation is done.
8use std::borrow::Cow;
9
10use gix_date::SecondsSinceUnixEpoch;
11use gix_negotiate::Flags;
12use gix_ref::file::ReferenceExt;
13
14use crate::fetch::{refmap, RefMap, Shallow, Tags};
15
16type Queue = gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, gix_hash::ObjectId>;
17
18/// The error returned during [`one_round()`] or [`mark_complete_and_common_ref()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22    #[error("We were unable to figure out what objects the server should send after {rounds} round(s)")]
23    NegotiationFailed { rounds: usize },
24    #[error(transparent)]
25    LookupCommitInGraph(#[from] gix_revwalk::graph::get_or_insert_default::Error),
26    #[error(transparent)]
27    OpenPackedRefsBuffer(#[from] gix_ref::packed::buffer::open::Error),
28    #[error(transparent)]
29    IO(#[from] std::io::Error),
30    #[error(transparent)]
31    InitRefIter(#[from] gix_ref::file::iter::loose_then_packed::Error),
32    #[error(transparent)]
33    PeelToId(#[from] gix_ref::peel::to_id::Error),
34    #[error(transparent)]
35    AlternateRefsAndObjects(Box<dyn std::error::Error + Send + Sync + 'static>),
36}
37
38/// Determines what should be done after [preparing the commit-graph for negotiation](mark_complete_and_common_ref).
39#[must_use]
40#[derive(Debug, Clone)]
41pub enum Action {
42    /// None of the remote refs moved compared to our last recorded state (via tracking refs), so there is nothing to do at all,
43    /// not even a ref update.
44    NoChange,
45    /// Don't negotiate, don't fetch the pack, skip right to updating the references.
46    ///
47    /// This happens if we already have all local objects even though the server seems to have changed.
48    SkipToRefUpdate,
49    /// We can't know for sure if fetching *is not* needed, so we go ahead and negotiate.
50    MustNegotiate {
51        /// Each `ref_map.mapping` has a slot here which is `true` if we have the object the remote ref points to, locally.
52        remote_ref_target_known: Vec<bool>,
53    },
54}
55
56/// Key information about each round in the pack-negotiation, as produced by [`one_round()`].
57#[derive(Debug, Clone, Copy)]
58pub struct Round {
59    /// The amount of `HAVE` lines sent this round.
60    ///
61    /// Each `HAVE` is an object that we tell the server about which would acknowledge each one it has as well.
62    pub haves_sent: usize,
63    /// A total counter, over all previous rounds, indicating how many `HAVE`s we sent without seeing a single acknowledgement,
64    /// i.e. the indication of a common object.
65    ///
66    /// This number maybe zero or be lower compared to the previous round if we have received at least one acknowledgement.
67    pub in_vain: usize,
68    /// The amount of haves we should send in this round.
69    ///
70    /// If the value is lower than `haves_sent` (the `HAVE` lines actually sent), the negotiation algorithm has run out of options
71    /// which typically indicates the end of the negotiation phase.
72    pub haves_to_send: usize,
73    /// If `true`, the server reported, as response to our previous `HAVE`s, that at least one of them is in common by acknowledging it.
74    ///
75    /// This may also lead to the server responding with a pack.
76    pub previous_response_had_at_least_one_in_common: bool,
77}
78
79/// This function is modeled after the similarly named one in the git codebase to mark known refs in a commit-graph.
80///
81/// It to do the following:
82///
83/// * figure out all advertised refs on the remote *that we already have* and keep track of the oldest one as cutoff date.
84/// * mark all of our own refs as tips for a traversal.
85/// * mark all their parents, recursively, up to (and including) the cutoff date up to which we have seen the servers commit that we have.
86/// * pass all known-to-be-common-with-remote commits to the negotiator as common commits.
87///
88/// This is done so that we already find the most recent common commits, even if we are ahead, which is *potentially* better than
89/// what we would get if we would rely on tracking refs alone, particularly if one wouldn't trust the tracking refs for some reason.
90///
91/// Note that git doesn't trust its own tracking refs as the server *might* have changed completely, for instance by force-pushing, so
92/// marking our local tracking refs as known is something that's actually not proven to be correct so it's not done.
93///
94/// Additionally, it does what's done in `transport.c` and we check if a fetch is actually needed as at least one advertised ref changed.
95///
96/// Finally, we also mark tips in the `negotiator` in one go to avoid traversing all refs twice, since we naturally encounter all tips during
97/// our own walk.
98///
99/// Return whether we should negotiate, along with a queue for later use.
100///
101/// # Parameters
102///
103/// * `objects`
104///     - Access to the object database. *Note* that the `exists()` calls must not trigger a refresh of the ODB packs as plenty of them might fail, i.e. find on object.
105/// * `refs`
106///     - Access to the git references database.
107/// * `alternates`
108///     - A function that returns an iterator over `(refs, objects)` for each alternate repository, to assure all known objects are added also according to their tips.
109/// * `negotiator`
110///     - The implementation that performs the negotiation later, i.e. prepare wants and haves.
111/// * `graph`
112///     - The commit-graph for use by the `negotiator` - we populate it with tips to initialize the graph traversal.
113/// * `ref_map`
114///     - The references known on the remote, as previously obtained with [`RefMap::new()`].
115/// * `shallow`
116///     - How to deal with shallow repositories. It does affect how negotiations are performed.
117/// * `mapping_is_ignored`
118///     - `f(mapping) -> bool` returns `true` if the given mapping should not participate in change tracking.
119///     - [`make_refmapping_ignore_predicate()`] is a typical implementation for this.
120#[allow(clippy::too_many_arguments)]
121pub fn mark_complete_and_common_ref<Out, F, E>(
122    objects: &(impl gix_object::Find + gix_object::FindHeader + gix_object::Exists),
123    refs: &gix_ref::file::Store,
124    alternates: impl FnOnce() -> Result<Out, E>,
125    negotiator: &mut dyn gix_negotiate::Negotiator,
126    graph: &mut gix_negotiate::Graph<'_, '_>,
127    ref_map: &RefMap,
128    shallow: &Shallow,
129    mapping_is_ignored: impl Fn(&refmap::Mapping) -> bool,
130) -> Result<Action, Error>
131where
132    E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
133    Out: Iterator<Item = (gix_ref::file::Store, F)>,
134    F: gix_object::Find,
135{
136    let _span = gix_trace::detail!("mark_complete_and_common_ref", mappings = ref_map.mappings.len());
137    if ref_map.mappings.is_empty() {
138        return Ok(Action::NoChange);
139    }
140    if let Shallow::Deepen(0) = shallow {
141        // Avoid deepening (relative) with zero as it seems to upset the server. Git also doesn't actually
142        // perform the negotiation for some reason (couldn't find it in code).
143        return Ok(Action::NoChange);
144    }
145    if let Some(refmap::Mapping {
146        remote: refmap::Source::Ref(crate::handshake::Ref::Unborn { .. }),
147        ..
148    }) = ref_map.mappings.last().filter(|_| ref_map.mappings.len() == 1)
149    {
150        // There is only an unborn branch, as the remote has an empty repository. This means there is nothing to do except for
151        // possibly reproducing the unborn branch locally.
152        return Ok(Action::SkipToRefUpdate);
153    }
154
155    // Compute the cut-off date by checking which of the refs advertised (and matched in refspecs) by the remote we have,
156    // and keep the oldest one.
157    let mut cutoff_date = None::<SecondsSinceUnixEpoch>;
158    let mut num_mappings_with_change = 0;
159    let mut remote_ref_target_known: Vec<bool> = std::iter::repeat(false).take(ref_map.mappings.len()).collect();
160    let mut remote_ref_included: Vec<bool> = std::iter::repeat(false).take(ref_map.mappings.len()).collect();
161
162    for (mapping_idx, mapping) in ref_map.mappings.iter().enumerate() {
163        let want_id = mapping.remote.as_id();
164        let have_id = mapping.local.as_ref().and_then(|name| {
165            // this is the only time git uses the peer-id.
166            let r = refs.find(name).ok()?;
167            r.target.try_id().map(ToOwned::to_owned)
168        });
169
170        // Even for ignored mappings we want to know if the `want` is already present locally, so skip nothing else.
171        if !mapping_is_ignored(mapping) {
172            remote_ref_included[mapping_idx] = true;
173            // Like git, we don't let known unchanged mappings participate in the tree traversal
174            if want_id.zip(have_id).map_or(true, |(want, have)| want != have) {
175                num_mappings_with_change += 1;
176            }
177        }
178
179        if let Some(commit) = want_id
180            .and_then(|id| graph.get_or_insert_commit(id.into(), |_| {}).transpose())
181            .transpose()?
182        {
183            remote_ref_target_known[mapping_idx] = true;
184            cutoff_date = cutoff_date.unwrap_or_default().max(commit.commit_time).into();
185        } else if want_id.is_some_and(|maybe_annotated_tag| objects.exists(maybe_annotated_tag)) {
186            remote_ref_target_known[mapping_idx] = true;
187        }
188    }
189
190    if matches!(shallow, Shallow::NoChange) {
191        if num_mappings_with_change == 0 {
192            return Ok(Action::NoChange);
193        } else if remote_ref_target_known
194            .iter()
195            .zip(remote_ref_included)
196            .filter_map(|(known, included)| included.then_some(known))
197            .all(|known| *known)
198        {
199            return Ok(Action::SkipToRefUpdate);
200        }
201    }
202
203    // color our commits as complete as identified by references, unconditionally
204    // (`git` is conditional here based on `deepen`, but it doesn't make sense and it's hard to extract from history when that happened).
205    let mut queue = Queue::new();
206    mark_all_refs_in_repo(refs, objects, graph, &mut queue, Flags::COMPLETE)?;
207    for (alt_refs, alt_objs) in alternates().map_err(|err| Error::AlternateRefsAndObjects(err.into()))? {
208        mark_all_refs_in_repo(&alt_refs, &alt_objs, graph, &mut queue, Flags::COMPLETE)?;
209    }
210    // Keep track of the tips, which happen to be on our queue right, before we traverse the graph with cutoff.
211    let tips = if let Some(cutoff) = cutoff_date {
212        let tips = Cow::Owned(queue.clone());
213        // color all their parents up to the cutoff date, the oldest commit we know the server has.
214        mark_recent_complete_commits(&mut queue, graph, cutoff)?;
215        tips
216    } else {
217        Cow::Borrowed(&queue)
218    };
219
220    gix_trace::detail!("mark known_common").into_scope(|| -> Result<_, Error> {
221        // mark all complete advertised refs as common refs.
222        for mapping in ref_map
223            .mappings
224            .iter()
225            .zip(remote_ref_target_known.iter().copied())
226            // We need this filter as the graph wouldn't contain annotated tags.
227            .filter_map(|(mapping, known)| (!known).then_some(mapping))
228        {
229            let want_id = mapping.remote.as_id();
230            if let Some(common_id) = want_id
231                .and_then(|id| graph.get(id).map(|c| (c, id)))
232                .filter(|(c, _)| c.data.flags.contains(Flags::COMPLETE))
233                .map(|(_, id)| id)
234            {
235                negotiator.known_common(common_id.into(), graph)?;
236            }
237        }
238        Ok(())
239    })?;
240
241    // As negotiators currently may rely on getting `known_common` calls first and tips after, we adhere to that which is the only
242    // reason we cached the set of tips.
243    gix_trace::detail!("mark tips", num_tips = tips.len()).into_scope(|| -> Result<_, Error> {
244        for tip in tips.iter_unordered() {
245            negotiator.add_tip(*tip, graph)?;
246        }
247        Ok(())
248    })?;
249
250    Ok(Action::MustNegotiate {
251        remote_ref_target_known,
252    })
253}
254
255/// Create a predicate that checks if a refspec mapping should be ignored.
256///
257/// We want to ignore mappings during negotiation if they would be handled implicitly by the server, which is the case
258/// when tags would be sent implicitly due to `Tags::Included`.
259pub fn make_refmapping_ignore_predicate(fetch_tags: Tags, ref_map: &RefMap) -> impl Fn(&refmap::Mapping) -> bool + '_ {
260    // With included tags, we have to keep mappings of tags to handle them later when updating refs, but we don't want to
261    // explicitly `want` them as the server will determine by itself which tags are pointing to a commit it wants to send.
262    // If we would not exclude implicit tag mappings like this, we would get too much of the graph.
263    let tag_refspec_to_ignore = matches!(fetch_tags, Tags::Included)
264        .then(|| fetch_tags.to_refspec())
265        .flatten();
266    move |mapping| {
267        tag_refspec_to_ignore.is_some_and(|tag_spec| {
268            mapping
269                .spec_index
270                .implicit_index()
271                .and_then(|idx| ref_map.extra_refspecs.get(idx))
272                .is_some_and(|spec| spec.to_ref() == tag_spec)
273        })
274    }
275}
276
277/// Add all 'wants' to `arguments` once it's known negotiation is necessary.
278///
279/// This is a call to be made when [`mark_complete_and_common_ref()`] returned [`Action::MustNegotiate`].
280/// That variant also contains the `remote_ref_target_known` field which is supposed to be passed here.
281///
282/// `objects` are used to see if remote ids are known here and are tags, in which case they are also added as 'haves' as
283/// [negotiators](gix_negotiate::Negotiator) don't see tags at all.
284///
285/// * `ref_map` is the state of refs as known on the remote.
286/// * `shallow` defines if the history should be shallow.
287/// * `mapping_is_ignored` is typically initialized with [`make_refmapping_ignore_predicate`].
288///
289/// Returns `true` if at least one [want](crate::fetch::Arguments::want()) was added, or `false` otherwise.
290/// Note that not adding a single want can make the remote hang, so it's avoided on the client side by ending the fetch operation.
291pub fn add_wants(
292    objects: &impl gix_object::FindHeader,
293    arguments: &mut crate::fetch::Arguments,
294    ref_map: &RefMap,
295    remote_ref_target_known: &[bool],
296    shallow: &Shallow,
297    mapping_is_ignored: impl Fn(&refmap::Mapping) -> bool,
298) -> bool {
299    // When using shallow, we can't exclude `wants` as the remote won't send anything then. Thus, we have to resend everything
300    // we have as want instead to get exactly the same graph, but possibly deepened.
301    let is_shallow = !matches!(shallow, Shallow::NoChange);
302    let mut has_want = false;
303    let wants = ref_map
304        .mappings
305        .iter()
306        .zip(remote_ref_target_known)
307        .filter_map(|(m, known)| (is_shallow || !*known).then_some(m))
308        .filter(|m| !mapping_is_ignored(m));
309    for want in wants {
310        let id_on_remote = want.remote.as_id();
311        if !arguments.can_use_ref_in_want() || matches!(want.remote, refmap::Source::ObjectId(_)) {
312            if let Some(id) = id_on_remote {
313                arguments.want(id);
314                has_want = true;
315            }
316        } else {
317            arguments.want_ref(
318                want.remote
319                    .as_name()
320                    .expect("name available if this isn't an object id"),
321            );
322            has_want = true;
323        }
324        let id_is_annotated_tag_we_have = id_on_remote
325            .and_then(|id| objects.try_header(id).ok().flatten().map(|h| (id, h)))
326            .filter(|(_, h)| h.kind == gix_object::Kind::Tag)
327            .map(|(id, _)| id);
328        if let Some(tag_on_remote) = id_is_annotated_tag_we_have {
329            // Annotated tags are not handled at all by negotiators in the commit-graph - they only see commits and thus won't
330            // ever add `have`s for tags. To correct for that, we add these haves here to avoid getting them sent again.
331            arguments.have(tag_on_remote);
332        }
333    }
334    has_want
335}
336
337/// Remove all commits that are more recent than the cut-off, which is the commit time of the oldest common commit we have with the server.
338fn mark_recent_complete_commits(
339    queue: &mut Queue,
340    graph: &mut gix_negotiate::Graph<'_, '_>,
341    cutoff: SecondsSinceUnixEpoch,
342) -> Result<(), Error> {
343    let _span = gix_trace::detail!("mark_recent_complete", queue_len = queue.len());
344    while let Some(id) = queue
345        .peek()
346        .and_then(|(commit_time, id)| (commit_time >= &cutoff).then_some(*id))
347    {
348        queue.pop_value();
349        let commit = graph.get(&id).expect("definitely set when adding tips or parents");
350        for parent_id in commit.parents.clone() {
351            let mut was_complete = false;
352            if let Some(parent) = graph
353                .get_or_insert_commit(parent_id, |md| {
354                    was_complete = md.flags.contains(Flags::COMPLETE);
355                    md.flags |= Flags::COMPLETE;
356                })?
357                .filter(|_| !was_complete)
358            {
359                queue.insert(parent.commit_time, parent_id);
360            }
361        }
362    }
363    Ok(())
364}
365
366fn mark_all_refs_in_repo(
367    store: &gix_ref::file::Store,
368    objects: &impl gix_object::Find,
369    graph: &mut gix_negotiate::Graph<'_, '_>,
370    queue: &mut Queue,
371    mark: Flags,
372) -> Result<(), Error> {
373    let _span = gix_trace::detail!("mark_all_refs");
374    for local_ref in store.iter()?.all()? {
375        let mut local_ref = local_ref?;
376        let id = local_ref.peel_to_id_in_place_packed(
377            store,
378            objects,
379            store.cached_packed_buffer()?.as_ref().map(|b| &***b),
380        )?;
381        let mut is_complete = false;
382        if let Some(commit) = graph
383            .get_or_insert_commit(id, |md| {
384                is_complete = md.flags.contains(Flags::COMPLETE);
385                md.flags |= mark;
386            })?
387            .filter(|_| !is_complete)
388        {
389            queue.insert(commit.commit_time, id);
390        };
391    }
392    Ok(())
393}
394
395///
396pub mod one_round {
397    /// State to keep between individual [rounds](super::one_round()).
398    #[derive(Clone, Debug)]
399    pub struct State {
400        /// The amount of haves to send the next round.
401        /// It's initialized with the standard window size for negotations.
402        pub haves_to_send: usize,
403        /// Is turned `true` if the remote as confirmed any common commit so far.
404        pub(super) seen_ack: bool,
405        /// The amount of haves we have sent that didn't have a match on the remote.
406        ///
407        /// The higher this number, the more time was wasted.
408        pub(super) in_vain: usize,
409        /// Commits we have in common.
410        ///
411        /// Only set when we are stateless as we have to resend known common commits each round.
412        pub(super) common_commits: Option<Vec<gix_hash::ObjectId>>,
413    }
414
415    impl State {
416        /// Create a new instance.
417        ///
418        /// setting `connection_is_stateless` accordingly which affects the amount of haves to send.
419        pub fn new(connection_is_stateless: bool) -> Self {
420            State {
421                haves_to_send: gix_negotiate::window_size(connection_is_stateless, None),
422                seen_ack: false,
423                in_vain: 0,
424                common_commits: connection_is_stateless.then(Vec::new),
425            }
426        }
427    }
428
429    impl State {
430        /// Return `true` if the transports connection is stateless.
431        fn connection_is_stateless(&self) -> bool {
432            self.common_commits.is_some()
433        }
434        pub(super) fn adjust_window_size(&mut self) {
435            self.haves_to_send = gix_negotiate::window_size(self.connection_is_stateless(), Some(self.haves_to_send));
436        }
437    }
438}
439
440/// Prepare to negotiate a single round in the process of letting the remote know what we have, and have in common.
441///
442/// Note that this function only configures `arguments`, no IO is performed.
443///
444/// The operation is performed with `negotiator` and `graph`, sending the amount of `haves_to_send` after possibly
445/// making the common commits (as sent by the remote) known to `negotiator` using `previous_response`, if this isn't the first round.
446/// All [commits we have](crate::fetch::Arguments::have()) are added to `arguments` accordingly.
447///
448/// Returns information about this round, and `true` if we are done and should stop negotiating *after* the `arguments` have
449/// been sent to the remote one last time.
450pub fn one_round(
451    negotiator: &mut dyn gix_negotiate::Negotiator,
452    graph: &mut gix_negotiate::Graph<'_, '_>,
453    state: &mut one_round::State,
454    arguments: &mut crate::fetch::Arguments,
455    previous_response: Option<&crate::fetch::Response>,
456) -> Result<(Round, bool), Error> {
457    let mut seen_ack = false;
458    if let Some(response) = previous_response {
459        use crate::fetch::response::Acknowledgement;
460        for ack in response.acknowledgements() {
461            match ack {
462                Acknowledgement::Common(id) => {
463                    seen_ack = true;
464                    negotiator.in_common_with_remote(*id, graph)?;
465                    if let Some(common) = &mut state.common_commits {
466                        common.push(*id);
467                    }
468                }
469                Acknowledgement::Ready => {
470                    // NOTE: In git, there is some logic dealing with whether to expect a DELIM or FLUSH package,
471                    //       but we handle this with peeking.
472                }
473                Acknowledgement::Nak => {}
474            }
475        }
476    }
477
478    // `common` is set only if this is a stateless transport, and we repeat previously confirmed common commits as HAVE, because
479    // we are not going to repeat them otherwise.
480    if let Some(common) = &mut state.common_commits {
481        for have_id in common {
482            arguments.have(have_id);
483        }
484    }
485
486    let mut haves_added = 0;
487    for have_id in (0..state.haves_to_send).map_while(|_| negotiator.next_have(graph)) {
488        arguments.have(have_id?);
489        haves_added += 1;
490    }
491    // Note that we are differing from the git implementation, which does an extra-round of with no new haves sent at all.
492    // For us, it seems better to just say we are done when we know we are done, as potentially additional acks won't affect the
493    // queue of our implementation at all (so the negotiator won't come up with more haves next time either).
494    if seen_ack {
495        state.in_vain = 0;
496    }
497    state.seen_ack |= seen_ack;
498    state.in_vain += haves_added;
499    let round = Round {
500        haves_sent: haves_added,
501        in_vain: state.in_vain,
502        haves_to_send: state.haves_to_send,
503        previous_response_had_at_least_one_in_common: seen_ack,
504    };
505    let is_done = haves_added != state.haves_to_send || (state.seen_ack && state.in_vain >= 256);
506    state.adjust_window_size();
507
508    Ok((round, is_done))
509}