1use crate::dedupe::DedupeContext;
2use crate::dirs::{
3 crate_name_to_relative_path, local_path_and_canonical_url_with_hash_kind, HashKind, DEFAULT_HASHER_KIND,
4};
5use crate::error::GixError;
6use crate::git::{changes, config, URL};
7use crate::{path_max_byte_len, Crate, Error, GitIndex, IndexConfig};
8use gix::bstr::ByteSlice;
9use gix::config::tree::Key;
10use std::io;
11use std::path::{Path, PathBuf};
12use std::time::Duration;
13use std::time::SystemTime;
14
15#[derive(Debug, Clone)]
17pub struct Change {
18 pub(super) crate_name: Box<str>,
20 pub(super) time: SystemTime,
22 pub(super) commit: gix::ObjectId,
23}
24
25impl Change {
26 #[inline]
28 #[must_use]
29 pub fn crate_name(&self) -> &str {
30 &*self.crate_name
31 }
32
33 #[inline]
35 #[must_use]
36 pub fn time(&self) -> SystemTime {
37 self.time
38 }
39
40 #[must_use]
42 pub fn commit(&self) -> &[u8; 20] {
43 self.commit.as_bytes().try_into().unwrap()
44 }
45
46 #[must_use]
48 pub fn commit_hex(&self) -> String {
49 self.commit.to_string()
50 }
51}
52
53impl GitIndex {
54 #[doc(hidden)]
55 #[deprecated(note = "use new_cargo_default()")]
56 pub fn new<P: Into<PathBuf>>(path: P) -> Self {
57 Self::from_path_and_url(path.into(), URL.into(), Mode::ReadOnly)
58 .unwrap()
59 .expect("repo present after possibly cloning index")
60 }
61
62 pub fn new_cargo_default() -> Result<Self, Error> {
75 let url = config::get_crates_io_replacement(None, None)?;
76 Self::from_url(url.as_deref().unwrap_or(URL))
77 }
78
79 pub fn try_new_cargo_default() -> Result<Option<Self>, Error> {
81 let url = config::get_crates_io_replacement(None, None)?;
82 Self::try_from_url(url.as_deref().unwrap_or(URL))
83 }
84
85 pub fn from_url(url: &str) -> Result<Self, Error> {
97 Self::from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND)
98 }
99
100 pub fn from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result<Self, Error> {
102 let (path, canonical_url) = local_path_and_canonical_url_with_hash_kind(url, None, hash_kind)?;
103 Ok(
104 Self::from_path_and_url(path, canonical_url, Mode::CloneUrlToPathIfRepoMissing)?
105 .expect("repo present after possibly cloning it"),
106 )
107 }
108
109 pub fn try_from_url(url: &str) -> Result<Option<Self>, Error> {
111 Self::try_from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND)
112 }
113
114 pub fn try_from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result<Option<Self>, Error> {
116 let (path, canonical_url) = local_path_and_canonical_url_with_hash_kind(url, None, hash_kind)?;
117 Self::from_path_and_url(path, canonical_url, Mode::ReadOnly)
118 }
119
120 pub fn with_path<P: Into<PathBuf>, S: Into<String>>(path: P, url: S) -> Result<Self, Error> {
129 Ok(
130 Self::from_path_and_url(path.into(), url.into(), Mode::CloneUrlToPathIfRepoMissing)?
131 .expect("repo present after possibly cloning it"),
132 )
133 }
134
135 pub fn try_with_path<P: Into<PathBuf>, S: Into<String>>(path: P, url: S) -> Result<Option<Self>, Error> {
138 Self::from_path_and_url(path.into(), url.into(), Mode::ReadOnly)
139 }
140
141 #[inline]
143 #[must_use]
144 pub fn path(&self) -> &Path {
145 &self.path
146 }
147
148 #[inline]
150 #[must_use]
151 pub fn url(&self) -> &str {
152 &self.url
153 }
154
155 #[inline]
159 #[must_use]
160 pub fn time(&self) -> Result<SystemTime, GixError> {
161 Ok(SystemTime::UNIX_EPOCH
162 + Duration::from_secs(
163 self.repo
164 .find_object(self.head_commit)?
165 .peel_to_commit()?
166 .time()?
167 .seconds
168 .max(0) as _,
169 ))
170 }
171
172 #[must_use]
174 pub fn commit(&self) -> &[u8; 20] {
175 self.head_commit.as_bytes().try_into().unwrap()
176 }
177
178 #[must_use]
180 pub fn commit_hex(&self) -> String {
181 self.head_commit.to_string()
182 }
183
184 fn lookup_commit(&self, rev: &str) -> Option<gix::ObjectId> {
185 self.repo
186 .rev_parse_single(rev)
187 .ok()?
188 .object()
189 .ok()?
190 .try_into_commit()
191 .ok()?
192 .id
193 .into()
194 }
195
196 pub fn set_commit_from_refspec(&mut self, rev: &str) -> Result<(), Error> {
199 self.head_commit = self.lookup_commit(rev).ok_or_else(|| Error::MissingHead {
200 repo_path: self.path.to_owned(),
201 refs_tried: &[],
202 refs_available: self
203 .repo
204 .references()
205 .ok()
206 .and_then(|p| {
207 p.all()
208 .ok()?
209 .map(|r| r.ok().map(|r| r.name().as_bstr().to_string()))
210 .collect()
211 })
212 .unwrap_or_default(),
213 })?;
214 Ok(())
215 }
216
217 pub fn changes(&self) -> Result<changes::Changes<'_>, Error> {
229 Ok(changes::Changes::new(self)?)
230 }
231
232 fn from_path_and_url(path: PathBuf, url: String, mode: Mode) -> Result<Option<Self>, Error> {
233 let open_with_complete_config = gix::open::Options::default().permissions(gix::open::Permissions {
234 config: gix::open::permissions::Config {
235 git_binary: true,
238 ..Default::default()
239 },
240 ..Default::default()
241 });
242
243 if let Some(parent) = path.parent() {
244 std::fs::create_dir_all(parent)?;
245 }
246 let repo = gix::open_opts(&path, open_with_complete_config.clone())
247 .ok()
248 .filter(|repo| {
249 repo.find_remote("origin").map_or(true, |remote| {
251 remote
252 .url(gix::remote::Direction::Fetch)
253 .map_or(false, |remote_url| remote_url.to_bstring().starts_with_str(&url))
254 })
255 });
256
257 let repo = match mode {
258 Mode::ReadOnly => repo,
259 Mode::CloneUrlToPathIfRepoMissing => Some(match repo {
260 Some(repo) => repo,
261 None => match gix::open_opts(&path, open_with_complete_config).ok() {
262 None => clone_url(&url, &path)?,
263 Some(repo) => repo,
264 },
265 }),
266 };
267
268 match repo {
269 None => Ok(None),
270 Some(repo) => {
271 let head_commit = Self::find_repo_head(&repo, &path)?;
272 Ok(Some(Self {
273 path,
274 url,
275 repo,
276 head_commit,
277 }))
278 }
279 }
280 }
281
282 fn tree(&self) -> Result<gix::Tree<'_>, GixError> {
283 Ok(self.repo.find_object(self.head_commit)?.try_into_commit()?.tree()?)
284 }
285
286 #[doc(hidden)]
287 #[deprecated(note = "use update()")]
288 pub fn retrieve_or_update(&mut self) -> Result<(), Error> {
289 self.update()
290 }
291
292 #[doc(hidden)]
293 #[deprecated(note = "it's always retrieved. there's no need to call it any more")]
294 pub fn retrieve(&self) -> Result<(), Error> {
295 Ok(())
296 }
297
298 #[doc(hidden)]
299 #[deprecated(note = "it's always retrieved, so it's assumed to always exist")]
300 #[must_use]
301 pub fn exists(&self) -> bool {
302 true
303 }
304
305 pub fn update(&mut self) -> Result<(), Error> {
309 let mut remote = self
310 .repo
311 .find_remote("origin")
312 .ok()
313 .unwrap_or_else(|| self.repo.remote_at(self.url.as_str()).expect("own URL is always valid"));
314 fetch_remote(
315 &mut remote,
316 &["+HEAD:refs/remotes/origin/HEAD", "+master:refs/remotes/origin/master"],
317 )?;
318
319 let head_commit = Self::find_repo_head(&self.repo, &self.path)?;
320 self.head_commit = head_commit;
321
322 Ok(())
323 }
324
325 #[must_use]
332 pub fn crate_(&self, name: &str) -> Option<Crate> {
333 let rel_path = crate_name_to_relative_path(name, None)?;
334
335 {
338 let mut cache_path = PathBuf::with_capacity(path_max_byte_len(&self.path) + 8 + rel_path.len());
340 cache_path.push(&self.path);
341 cache_path.push(".cache");
342 cache_path.push(&rel_path);
343 if let Ok(cache_bytes) = std::fs::read(&cache_path) {
344 if let Ok(krate) = Crate::from_cache_slice(&cache_bytes, None) {
345 return Some(krate);
346 }
347 }
348 }
349
350 self.crate_from_rel_path(rel_path).ok()
353 }
354
355 fn crate_from_rel_path(&self, rel_path: String) -> Result<Crate, Error> {
356 let object = self.object_at_path(rel_path.into())?;
357 Crate::from_slice(&object.data).map_err(Error::Io)
358 }
359
360 #[inline]
367 #[must_use]
368 pub fn crates(&self) -> Crates<'_> {
369 Crates {
370 blobs: self.crates_blobs().expect("HEAD commit disappeared"),
371 dedupe: MaybeOwned::Owned(DedupeContext::new()),
372 }
373 }
374
375 #[cfg(feature = "parallel")]
380 #[must_use]
381 pub fn crates_parallel(
382 &self,
383 ) -> impl rayon::iter::ParallelIterator<Item = Result<Crate, crate::error::CratesIterError>> + '_ {
384 use rayon::iter::{IntoParallelIterator, ParallelIterator};
385 let tree_oids = match self.crates_top_level_ids() {
386 Ok(objs) => objs,
387 Err(_) => vec![self.repo.object_hash().null()], };
389
390 tree_oids
391 .into_par_iter()
392 .map_init(
393 {
394 let repo = self.repo.clone().into_sync();
395 move || {
396 (
397 {
398 let mut repo = repo.to_thread_local();
399 repo.objects.unset_pack_cache();
400 repo
401 },
402 DedupeContext::new(),
403 )
404 }
405 },
406 |(repo, ctx), oid| {
407 let mut stack = Vec::with_capacity(64);
408 match repo.find_object(oid) {
409 Ok(obj) => stack.push(obj.detach()),
410 Err(_) => return vec![Err(crate::error::CratesIterError)],
411 };
412 let blobs = CratesTreesToBlobs {
413 stack,
414 repo: repo.clone(),
415 };
416 Crates {
417 blobs,
418 dedupe: MaybeOwned::Borrowed(ctx),
419 }
420 .map(Ok)
421 .collect::<Vec<_>>()
422 },
423 )
424 .flat_map_iter(|chunk| chunk.into_iter())
425 }
426
427 fn crates_blobs(&self) -> Result<CratesTreesToBlobs, GixError> {
428 let repo = with_delta_cache(self.repo.clone());
429 Ok(CratesTreesToBlobs {
430 stack: self
431 .crates_top_level_ids()?
432 .into_iter()
433 .map(|id| self.repo.find_object(id).map(|tree| tree.detach()))
434 .collect::<Result<_, _>>()?,
435 repo,
436 })
437 }
438
439 fn crates_top_level_ids(&self) -> Result<Vec<gix::ObjectId>, GixError> {
440 let mut stack = Vec::with_capacity(800);
441 for entry in self.tree()?.iter() {
442 let entry = entry?;
443 if !is_top_level_dir(&entry) {
445 continue;
446 };
447 stack.push(entry.oid().to_owned());
448 }
449 Ok(stack)
450 }
451
452 pub fn index_config(&self) -> Result<IndexConfig, Error> {
454 let blob = self.object_at_path("config.json".into())?;
455 serde_json::from_slice(&blob.data).map_err(Error::Json)
456 }
457
458 fn object_at_path(&self, path: PathBuf) -> Result<gix::Object<'_>, GixError> {
459 let entry = self
460 .tree()?
461 .peel_to_entry_by_path(&path)?
462 .ok_or(GixError::PathMissing { path })?;
463 Ok(entry.object()?)
464 }
465
466 fn find_repo_head(repo: &gix::Repository, path: &Path) -> Result<gix::ObjectId, Error> {
479 #[rustfmt::skip]
480 const CANDIDATE_REFS: &[&str] = &[
481 "FETCH_HEAD", "origin/HEAD", "origin/master", ];
485 let mut candidates: Vec<_> = CANDIDATE_REFS
486 .iter()
487 .filter_map(|refname| repo.find_reference(*refname).ok()?.into_fully_peeled_id().ok())
488 .filter_map(|r| {
489 let c = r.object().ok()?.try_into_commit().ok()?;
490 Some((c.id, c.time().ok()?.seconds))
491 })
492 .collect();
493
494 candidates.sort_by_key(|t| t.1);
495 Ok(candidates
497 .last()
498 .ok_or_else(|| Error::MissingHead {
499 repo_path: path.to_owned(),
500 refs_tried: CANDIDATE_REFS,
501 refs_available: repo
502 .references()
503 .ok()
504 .and_then(|p| {
505 p.all()
506 .ok()?
507 .map(|r| r.ok().map(|r| r.name().as_bstr().to_string()))
508 .collect()
509 })
510 .unwrap_or_default(),
511 })?
512 .0)
513 }
514}
515
516fn is_top_level_dir(entry: &gix::object::tree::EntryRef<'_, '_>) -> bool {
517 entry.mode().is_tree() && entry.filename().len() <= 2
518}
519
520fn with_delta_cache(mut repo: gix::Repository) -> gix::Repository {
521 if repo
522 .config_snapshot()
523 .integer(gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT.logical_name().as_str())
524 .is_none()
525 {
526 let mut config = repo.config_snapshot_mut();
527 config
529 .set_value(&gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT, "96m")
530 .expect("in memory always works");
531 }
532 repo
533}
534
535pub(super) fn fetch_remote(remote: &mut gix::Remote<'_>, refspecs: &[&str]) -> Result<(), GixError> {
536 remote.replace_refspecs(refspecs, gix::remote::Direction::Fetch)?;
537
538 remote
539 .connect(gix::remote::Direction::Fetch)?
540 .prepare_fetch(gix::progress::Discard, Default::default())?
541 .receive(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?;
542 Ok(())
543}
544
545fn clone_url(url: &str, destination: &Path) -> Result<gix::Repository, GixError> {
546 let (repo, _outcome) = gix::prepare_clone_bare(url, destination)?
548 .with_remote_name("origin")?
549 .configure_remote(|remote| {
550 Ok(remote.with_refspecs(
551 ["+HEAD:refs/remotes/origin/HEAD", "+master:refs/remotes/origin/master"],
552 gix::remote::Direction::Fetch,
553 )?)
554 })
555 .fetch_only(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?;
556 Ok(repo)
557}
558
559struct CratesTreesToBlobs {
561 stack: Vec<gix::ObjectDetached>,
562 repo: gix::Repository,
563}
564
565struct CrateUnparsed(Vec<u8>);
567
568impl CrateUnparsed {
569 #[inline]
570 fn parse(&self, ctx: &mut DedupeContext) -> io::Result<Crate> {
571 Crate::from_slice_with_context(self.0.as_slice(), ctx)
572 }
573}
574
575impl Iterator for CratesTreesToBlobs {
576 type Item = CrateUnparsed;
577
578 fn next(&mut self) -> Option<Self::Item> {
579 while let Some(obj) = self.stack.pop() {
580 if obj.kind.is_tree() {
581 let tree = gix::objs::TreeRef::from_bytes(&obj.data).unwrap();
582 for entry in tree.entries.into_iter().rev() {
583 self.stack.push(self.repo.find_object(entry.oid).unwrap().detach());
584 }
585 continue;
586 } else {
587 return Some(CrateUnparsed(obj.data));
588 }
589 }
590 None
591 }
592}
593
594enum MaybeOwned<'a, T> {
595 Owned(T),
596 #[cfg_attr(not(feature = "parallel"), allow(dead_code))]
597 Borrowed(&'a mut T),
598}
599
600pub struct Crates<'a> {
602 blobs: CratesTreesToBlobs,
603 dedupe: MaybeOwned<'a, DedupeContext>,
604}
605
606impl<'a> Iterator for Crates<'a> {
607 type Item = Crate;
608
609 fn next(&mut self) -> Option<Self::Item> {
610 for next in self.blobs.by_ref() {
611 let dedupe = match &mut self.dedupe {
612 MaybeOwned::Owned(d) => d,
613 MaybeOwned::Borrowed(d) => d,
614 };
615 if let Ok(k) = CrateUnparsed::parse(&next, dedupe) {
616 return Some(k);
617 }
618 }
619 None
620 }
621}
622
623enum Mode {
624 ReadOnly,
625 CloneUrlToPathIfRepoMissing,
626}
627
628#[cfg(test)]
629#[cfg(feature = "git-https")]
630mod tests {
631 use crate::dedupe::DedupeContext;
632 use crate::{git, GitIndex};
633 use gix::bstr::ByteSlice;
634
635 #[test]
636 #[cfg_attr(debug_assertions, ignore = "too slow in debug mode")]
637 fn parse_all_blobs() {
638 std::thread::scope(|scope| {
639 let (tx, rx) = std::sync::mpsc::channel();
640 let blobs = scope.spawn(move || {
641 let index = shared_index();
642 for c in index.crates_blobs().unwrap() {
643 tx.send(c).unwrap();
644 }
645 });
646 let parse = scope.spawn(move || {
647 let mut found_gcc_crate = false;
648 let mut ctx = DedupeContext::new();
649 for c in rx {
650 match c.parse(&mut ctx) {
651 Ok(c) => {
652 if c.name() == "gcc" {
653 found_gcc_crate = true;
654 }
655 }
656 Err(e) => panic!("can't parse :( {:?}: {e}", c.0.as_bstr()),
657 }
658 }
659 assert!(found_gcc_crate);
660 });
661 parse.join().unwrap();
662 blobs.join().unwrap();
663 });
664 }
665
666 fn shared_index() -> GitIndex {
667 static LOCK: parking_lot::Mutex<()> = parking_lot::Mutex::new(());
668 let _guard = LOCK.lock();
669
670 let index_path = "tests/fixtures/git-registry";
671 if is_ci::cached() {
672 GitIndex::new_cargo_default().expect("CI has just cloned this index and its ours and valid")
673 } else {
674 GitIndex::with_path(index_path, git::URL).expect("clone works and there is no racing")
675 }
676 }
677}