tame_index/index/
git_remote.rs

1use super::{FileLock, GitIndex};
2use crate::{Error, IndexKrate, KrateName};
3use std::sync::atomic::AtomicBool;
4
5/// Uses a "bare" git index that fetches files directly from the repo instead of
6/// using a local checkout, the same as cargo itself.
7///
8/// Uses cargo's cache
9pub struct RemoteGitIndex {
10    index: GitIndex,
11    repo: gix::Repository,
12    head_commit: gix::ObjectId,
13}
14
15const DIR: gix::remote::Direction = gix::remote::Direction::Fetch;
16
17impl RemoteGitIndex {
18    /// Creates a new [`Self`] that can access and write local cache entries,
19    /// and contact the remote index to retrieve the latest index information
20    ///
21    /// Note that if a repository does not exist at the local disk path of the
22    /// provided [`GitIndex`], a full clone will be performed.
23    #[inline]
24    pub fn new(index: GitIndex, lock: &FileLock) -> Result<Self, Error> {
25        Self::with_options(
26            index,
27            gix::progress::Discard,
28            &gix::interrupt::IS_INTERRUPTED,
29            lock,
30        )
31    }
32
33    /// Breaks [`Self`] into its component parts
34    ///
35    /// This method is useful if you need thread safe access to the repository
36    #[inline]
37    pub fn into_parts(self) -> (GitIndex, gix::Repository) {
38        (self.index, self.repo)
39    }
40
41    /// Creates a new [`Self`] that allows showing of progress of the the potential
42    /// fetch if the disk location is empty, as well as allowing interruption
43    /// of the fetch operation.
44    pub fn with_options<P>(
45        mut index: GitIndex,
46        progress: P,
47        should_interrupt: &AtomicBool,
48        _lock: &FileLock,
49    ) -> Result<Self, Error>
50    where
51        P: gix::NestedProgress,
52        P::SubProgress: 'static,
53    {
54        let open_or_clone_repo = || -> Result<_, GitError> {
55            let mut mapping = gix::sec::trust::Mapping::default();
56            let open_with_complete_config =
57                gix::open::Options::default().permissions(gix::open::Permissions {
58                    config: gix::open::permissions::Config {
59                        // Be sure to get all configuration, some of which is only known by the git binary.
60                        // That way we are sure to see all the systems credential helpers
61                        git_binary: true,
62                        ..Default::default()
63                    },
64                    ..Default::default()
65                });
66
67            mapping.reduced = open_with_complete_config.clone();
68            mapping.full = open_with_complete_config.clone();
69
70            // Attempt to open the repository, if it fails for any reason,
71            // attempt to perform a fresh clone instead
72            let repo = gix::ThreadSafeRepository::discover_opts(
73                &index.cache.path,
74                gix::discover::upwards::Options::default().apply_environment(),
75                mapping,
76            )
77            .ok()
78            .map(|repo| repo.to_thread_local())
79            .filter(|repo| {
80                // The `cargo` standard registry clone has no configured origin (when created with `git2`).
81                repo.find_remote("origin").map_or(true, |remote| {
82                    remote
83                        .url(DIR)
84                        .map_or(false, |remote_url| remote_url.to_bstring() == index.url)
85                })
86            })
87            .or_else(|| gix::open_opts(&index.cache.path, open_with_complete_config).ok());
88
89            let res = if let Some(repo) = repo {
90                (repo, None)
91            } else {
92                // We need to create the directory chain ourselves, gix will fail
93                // if any parent directory is missing
94                if !index.cache.path.exists() {
95                    std::fs::create_dir_all(&index.cache.path).map_err(|source| {
96                        GitError::ClonePrep(Box::new(gix::clone::Error::Init(
97                            gix::init::Error::Init(gix::create::Error::CreateDirectory {
98                                source,
99                                path: index.cache.path.clone().into(),
100                            }),
101                        )))
102                    })?;
103                }
104
105                let (repo, out) = gix::prepare_clone_bare(index.url.as_str(), &index.cache.path)
106                    .map_err(Box::new)?
107                    .with_remote_name("origin")
108                    .map_err(Box::new)?
109                    .configure_remote(|remote| {
110                        Ok(remote.with_refspecs(["+HEAD:refs/remotes/origin/HEAD"], DIR)?)
111                    })
112                    .fetch_only(progress, should_interrupt)
113                    .map_err(|err| GitError::from(Box::new(err)))?;
114
115                (repo, Some(out))
116            };
117
118            Ok(res)
119        };
120
121        let (mut repo, fetch_outcome) = open_or_clone_repo()?;
122
123        if let Some(fetch_outcome) = fetch_outcome {
124            crate::utils::git::write_fetch_head(
125                &repo,
126                &fetch_outcome,
127                &repo.find_remote("origin").unwrap(),
128            )?;
129        }
130
131        repo.object_cache_size_if_unset(4 * 1024 * 1024);
132
133        let head_commit = Self::set_head(&mut index, &repo)?;
134
135        Ok(Self {
136            repo,
137            index,
138            head_commit,
139        })
140    }
141
142    /// Gets the local index
143    #[inline]
144    pub fn local(&self) -> &GitIndex {
145        &self.index
146    }
147
148    /// Get the configuration of the index.
149    ///
150    /// See the [cargo docs](https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration)
151    pub fn index_config(&self) -> Result<super::IndexConfig, Error> {
152        let blob = self.read_blob("config.json")?.ok_or_else(|| {
153            Error::Io(std::io::Error::new(
154                std::io::ErrorKind::NotFound,
155                "unable to find config.json",
156            ))
157        })?;
158        Ok(serde_json::from_slice(&blob.data)?)
159    }
160
161    /// Sets the head commit in the wrapped index so that cache entries can be
162    /// properly filtered
163    #[inline]
164    fn set_head(index: &mut GitIndex, repo: &gix::Repository) -> Result<gix::ObjectId, Error> {
165        let find_remote_head = || -> Result<gix::ObjectId, GitError> {
166            const CANDIDATE_REFS: &[&str] = &[
167                "FETCH_HEAD",    /* the location with the most-recent updates, as written by git2 */
168                "origin/HEAD", /* typical refspecs update this symbolic ref to point to the actual remote ref with the fetched commit */
169                "origin/master", /* for good measure, resolve this branch by hand in case origin/HEAD is broken */
170                "HEAD",
171            ];
172            let mut candidates: Vec<_> = CANDIDATE_REFS
173                .iter()
174                .enumerate()
175                .filter_map(|(i, refname)| {
176                    let ref_id = repo
177                        .find_reference(*refname)
178                        .ok()?
179                        .into_fully_peeled_id()
180                        .ok()?;
181
182                    let commit = ref_id.object().ok()?.try_into_commit().ok()?;
183                    let commit_time = commit.time().ok()?.seconds;
184
185                    Some((i, commit.id, commit_time))
186                })
187                .collect();
188
189            // Sort from oldest to newest, the last one will be the best reference
190            // we could reasonably locate, and since we are on second resolution,
191            // prefer the ordering of candidates if times are equal.
192            //
193            // This allows FETCH_HEAD to be authoritative, unless one of the other
194            // references is more up to date, which can occur in (at least) 2 scenarios:
195            //
196            // 1. The repo is a fresh clone by cargo either via git or libgit2,
197            // neither of which write FETCH_HEAD during clone
198            // 2. A fetch was performed by an external crate/program to cargo or
199            // ourselves that didn't update FETCH_HEAD
200            candidates.sort_by(|a, b| match a.2.cmp(&b.2) {
201                std::cmp::Ordering::Equal => b.0.cmp(&a.0),
202                o => o,
203            });
204
205            // get the most recent commit, the one with most time passed since unix epoch.
206            Ok(candidates
207                .last()
208                .ok_or_else(|| GitError::UnableToFindRemoteHead)?
209                .1)
210        };
211
212        let gix::ObjectId::Sha1(sha1) = find_remote_head()?;
213        index.set_head_commit(Some(sha1));
214
215        Ok(gix::ObjectId::Sha1(sha1))
216    }
217
218    /// Attempts to read the specified crate's index metadata
219    ///
220    /// An attempt is first made to read the cache entry for the crate, and
221    /// falls back to reading the metadata from the git blob it is stored in
222    ///
223    /// This method does no network I/O
224    pub fn krate(
225        &self,
226        name: KrateName<'_>,
227        write_cache_entry: bool,
228        lock: &FileLock,
229    ) -> Result<Option<IndexKrate>, Error> {
230        if let Ok(Some(cached)) = self.cached_krate(name, lock) {
231            return Ok(Some(cached));
232        }
233
234        let Some(blob) = self.read_blob(&name.relative_path(None))? else {
235            return Ok(None);
236        };
237
238        let krate = IndexKrate::from_slice(&blob.data)?;
239        if write_cache_entry {
240            // It's unfortunate if fail to write to the cache, but we still were
241            // able to retrieve the contents from git
242            let mut hex_id = [0u8; 40];
243            let gix::ObjectId::Sha1(sha1) = blob.id;
244            let blob_id = crate::utils::encode_hex(&sha1, &mut hex_id);
245
246            let _ = self.index.write_to_cache(&krate, Some(blob_id), lock);
247        }
248
249        Ok(Some(krate))
250    }
251
252    fn read_blob(&self, path: &str) -> Result<Option<gix::ObjectDetached>, GitError> {
253        let tree = self
254            .repo
255            .find_object(self.head_commit)
256            .map_err(Box::new)?
257            .try_into_commit()?
258            .tree()?;
259
260        let Some(entry) = tree
261            .lookup_entry_by_path(path)
262            .map_err(|err| GitError::BlobLookup(Box::new(err)))?
263        else {
264            return Ok(None);
265        };
266        let blob = entry
267            .object()
268            .map_err(|err| GitError::BlobLookup(Box::new(err)))?;
269
270        // Sanity check this is a blob, it _shouldn't_ be possible to get anything
271        // else (like a subtree), but better safe than sorry
272        if blob.kind != gix::object::Kind::Blob {
273            return Ok(None);
274        }
275
276        Ok(Some(blob.detach()))
277    }
278
279    /// Attempts to read the locally cached crate information
280    ///
281    /// Note this method has improvements over using [`GitIndex::cached_krate`].
282    ///
283    /// In older versions of cargo, only the head commit hash is used as the version
284    /// for cached crates, which means a fetch invalidates _all_ cached crates,
285    /// even if they have not been modified in any commits since the previous
286    /// fetch.
287    ///
288    /// This method does the same thing as cargo, which is to allow _either_
289    /// the head commit oid _or_ the blob oid as a version, which is more
290    /// granular and means the cached crate can remain valid as long as it is
291    /// not updated in a subsequent fetch. [`GitIndex::cached_krate`] cannot take
292    /// advantage of that though as it does not have access to git and thus
293    /// cannot know the blob id.
294    #[inline]
295    pub fn cached_krate(
296        &self,
297        name: KrateName<'_>,
298        lock: &FileLock,
299    ) -> Result<Option<IndexKrate>, Error> {
300        let Some(cached) = self.index.cache.read_cache_file(name, lock)? else {
301            return Ok(None);
302        };
303        let valid = crate::index::cache::ValidCacheEntry::read(&cached)?;
304
305        if Some(valid.revision) != self.index.head_commit() {
306            let Some(blob) = self.read_blob(&name.relative_path(None))? else {
307                return Ok(None);
308            };
309
310            let mut hex_id = [0u8; 40];
311            let gix::ObjectId::Sha1(sha1) = blob.id;
312            let blob_id = crate::utils::encode_hex(&sha1, &mut hex_id);
313
314            if valid.revision != blob_id {
315                return Ok(None);
316            }
317        }
318
319        valid.to_krate(None)
320    }
321
322    /// Performs a fetch from the remote index repository.
323    ///
324    /// This method performs network I/O.
325    #[inline]
326    pub fn fetch(&mut self, lock: &FileLock) -> Result<(), Error> {
327        self.fetch_with_options(
328            gix::progress::Discard,
329            &gix::interrupt::IS_INTERRUPTED,
330            lock,
331        )
332    }
333
334    /// Same as [`Self::fetch`] but allows specifying a progress implementation
335    /// and allows interruption of the network operations
336    pub fn fetch_with_options<P>(
337        &mut self,
338        mut progress: P,
339        should_interrupt: &AtomicBool,
340        _lock: &FileLock,
341    ) -> Result<(), Error>
342    where
343        P: gix::NestedProgress,
344        P::SubProgress: 'static,
345    {
346        // We're updating the reflog which requires a committer be set, which might
347        // not be the case, particular in a CI environment, but also would default
348        // the the git config for the current directory/global, which on a normal
349        // user machine would show the user was the one who updated the database which
350        // is kind of misleading, so we just override the config for this operation
351
352        let mut config = self.repo.config_snapshot_mut();
353        config
354            .set_raw_value(&"committer.name", "tame-index")
355            .map_err(GitError::from)?;
356        // Note we _have_ to set the email as well, but luckily gix does not actually
357        // validate if it's a proper email or not :)
358        config
359            .set_raw_value(&"committer.email", "")
360            .map_err(GitError::from)?;
361
362        let repo = config
363            .commit_auto_rollback()
364            .map_err(|err| GitError::from(Box::new(err)))?;
365
366        let mut remote = repo.find_remote("origin").ok().unwrap_or_else(|| {
367            repo.remote_at(self.index.url.as_str())
368                .expect("owned URL is always valid")
369        });
370
371        remote
372            .replace_refspecs(Some("+HEAD:refs/remotes/origin/HEAD"), DIR)
373            .expect("valid statically known refspec");
374
375        // Perform the actual fetch
376        let outcome = remote
377            .connect(DIR)
378            .map_err(|err| GitError::from(Box::new(err)))?
379            .prepare_fetch(&mut progress, Default::default())
380            .map_err(|err| GitError::from(Box::new(err)))?
381            .receive(&mut progress, should_interrupt)
382            .map_err(|err| GitError::from(Box::new(err)))?;
383
384        crate::utils::git::write_fetch_head(&repo, &outcome, &remote)?;
385        self.head_commit = Self::set_head(&mut self.index, &repo)?;
386
387        Ok(())
388    }
389}
390
391/// Errors that can occur during a git operation
392#[derive(Debug, thiserror::Error)]
393#[allow(missing_docs)]
394pub enum GitError {
395    #[error(transparent)]
396    ClonePrep(#[from] Box<gix::clone::Error>),
397    #[error(transparent)]
398    CloneFetch(#[from] Box<gix::clone::fetch::Error>),
399    #[error(transparent)]
400    Connect(#[from] Box<gix::remote::connect::Error>),
401    #[error(transparent)]
402    FetchPrep(#[from] Box<gix::remote::fetch::prepare::Error>),
403    #[error(transparent)]
404    Fetch(#[from] Box<gix::remote::fetch::Error>),
405    #[error(transparent)]
406    Open(#[from] Box<gix::open::Error>),
407    #[error(transparent)]
408    Commit(#[from] gix::object::commit::Error),
409    #[error(transparent)]
410    InvalidObject(#[from] gix::object::try_into::Error),
411    #[error(transparent)]
412    ReferenceLookup(#[from] Box<gix::reference::find::existing::Error>),
413    #[error(transparent)]
414    BlobLookup(#[from] Box<gix::object::find::existing::Error>),
415    #[error(transparent)]
416    RemoteLookup(#[from] Box<gix::remote::find::existing::Error>),
417    #[error(transparent)]
418    Lock(#[from] gix::lock::acquire::Error),
419    #[error(transparent)]
420    RemoteName(#[from] Box<gix::remote::name::Error>),
421    #[error(transparent)]
422    Config(#[from] Box<gix::config::Error>),
423    #[error(transparent)]
424    ConfigValue(#[from] gix::config::file::set_raw_value::Error),
425    #[error("unable to locate remote HEAD")]
426    UnableToFindRemoteHead,
427    #[error("unable to update HEAD to remote HEAD")]
428    UnableToUpdateHead,
429}
430
431impl GitError {
432    /// Returns true if the error is a (potentially) spurious network error that
433    /// indicates a retry of the operation could succeed
434    #[inline]
435    pub fn is_spurious(&self) -> bool {
436        use gix::protocol::transport::IsSpuriousError;
437
438        match self {
439            Self::Fetch(fe) => return fe.is_spurious(),
440            Self::CloneFetch(cf) => {
441                if let gix::clone::fetch::Error::Fetch(fe) = &**cf {
442                    return fe.is_spurious();
443                }
444            }
445            _ => {}
446        }
447
448        false
449    }
450
451    /// Returns true if a fetch could not be completed successfully due to the
452    /// repo being locked, and could succeed if retried
453    #[inline]
454    pub fn is_locked(&self) -> bool {
455        let ure = match self {
456            Self::Fetch(fe) => {
457                if let gix::remote::fetch::Error::UpdateRefs(ure) = &**fe {
458                    ure
459                } else {
460                    return false;
461                }
462            }
463            Self::CloneFetch(cf) => {
464                if let gix::clone::fetch::Error::Fetch(gix::remote::fetch::Error::UpdateRefs(ure)) =
465                    &**cf
466                {
467                    ure
468                } else {
469                    return false;
470                }
471            }
472            Self::Lock(le) => {
473                return !matches!(le, gix::lock::acquire::Error::PermanentlyLocked { .. })
474            }
475            _ => return false,
476        };
477
478        if let gix::remote::fetch::refs::update::Error::EditReferences(ere) = ure {
479            match ere {
480                gix::reference::edit::Error::FileTransactionPrepare(ftpe) => {
481                    use gix::refs::file::transaction::prepare::Error as PrepError;
482                    if let PrepError::LockAcquire { source, .. }
483                    | PrepError::PackedTransactionAcquire(source) = ftpe
484                    {
485                        // currently this is either io or permanentlylocked, but just in case
486                        // more variants are added, we just assume it's possible to retry
487                        // in anything but the permanentlylocked variant
488                        !matches!(source, gix::lock::acquire::Error::PermanentlyLocked { .. })
489                    } else {
490                        false
491                    }
492                }
493                gix::reference::edit::Error::FileTransactionCommit(ftce) => {
494                    matches!(
495                        ftce,
496                        gix::refs::file::transaction::commit::Error::LockCommit { .. }
497                    )
498                }
499                _ => false,
500            }
501        } else {
502            false
503        }
504    }
505}