crates_index/
lib.rs

1// Copyright 2015 Corey Farwell
2// Copyright 2015 Contributors of github.com/huonw/crates.io-graph
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//	http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Library for retrieving and interacting with the
17//! [crates.io index](https://github.com/rust-lang/crates.io-index).
18//!
19//! ## Examples
20//!
21//! ### Getting information about a single crate
22//!
23//! ```rust
24//! # #[cfg(all(not(debug_assertions), feature = "git"))]
25//! # {
26//! let index = crates_index::GitIndex::new_cargo_default()?;
27//! let serde_crate = index.crate_("serde").expect("you should handle errors here");
28//! println!("Serde is at v{}", serde_crate.highest_normal_version().unwrap().version());
29//! # }
30//! # Ok::<_, crates_index::Error>(())
31//! ```
32//!
33//! ### Iterating over *all* crates in the index
34//!
35//! ```rust
36//! # #[cfg(all(not(debug_assertions), feature = "parallel", feature = "git"))]
37//! # {
38//! let index = crates_index::GitIndex::new_cargo_default()?;
39//! for crate_ in index.crates() {
40//!    let latest = crate_.most_recent_version();
41//!    println!("crate name: {}", latest.name());
42//!    println!("most recently released version: {}", latest.version());
43//! }
44//!
45//! // or faster:
46//! use rayon::prelude::*;
47//! index.crates_parallel().for_each(|crate_| {
48//!     /* etc. */
49//! });
50//!
51//! # }
52//! # Ok::<_, crates_index::Error>(())
53//! ```
54//!
55//! ### Getting most recently published or yanked crates
56//!
57//! ```rust
58//! # #[cfg(feature = "git")]
59//! # {
60//! let index = crates_index::GitIndex::new_cargo_default()?;
61//!
62//! for c in index.changes()?.take(20) {
63//!     let c = c?;
64//!     println!("{} has changed in the index commit {}", c.crate_name(), c.commit_hex());
65//! }
66//!
67//! # }
68//! # Ok::<_, crates_index::Error>(())
69//! ```
70//!
71//! ## Auto-cloning and parallelism
72//!
73//! When using any means of instantiating the [`GitIndex`] type, we  will
74//! clone the default crates index (or the given one) if it no git
75//! repository is present at the destination path.
76//!
77//! This operation is racy and opening the index concurrently can lead to errors
78//! as multiple threads may try to clone the index at the same time if it wasn't there yet.
79//!
80//! To prevent that, consider using synchronization primitives on application level that
81//! synchronize methods like [`GitIndex::new_cargo_default()`] and its siblings.
82//!
83//! ## Git Repository Performance
84//!
85//! By default, `gix` is compiled with `max-performance-safe`, which maximizes support for compilation environments but which
86//! may be slower as it uses a pure-Rust Zlib implementation.
87//! To get best possible performance, use the `git-index-performance` feature toggle.
88//!
89//! ## Using `rustls` instead of `openssl` when using the `git-https` feature in applications
90//!
91//! When using the `git-https` feature, a choice will be made for you that involves selecting the `curl` backend for making
92//! the `https` protocol available. As using a different backend isn't additive, as cargo features should be, one will have
93//! to resort to the following.
94//!
95//! * Change the `crates-index` dependency to `features = ["git-index", …(everything else *but* "git-https")]`
96//! * Add the `gix` dependency with `default-features = false` and `features = ["blocking-http-transport-reqwest-rust-tls"]`.
97//!   Consider renaming the crate to `gix-for-configuration-only = { package = "gix", … }` to make the intend clear.
98//!
99//! Please note that this should only be done in application manifests, who have the final say over the protocol and backend choices.
100//! ## Feature Flags
101#![cfg_attr(
102    feature = "document-features",
103    cfg_attr(doc, doc = ::document_features::document_features!())
104)]
105#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
106#![deny(unsafe_code, rust_2018_compatibility, missing_docs)]
107use std::path::{Path, PathBuf};
108
109/// Wrapper around managing the crates.io-index git repository
110///
111/// Uses a "bare" git index that fetches files directly from the repo instead of local checkout.
112/// Uses Cargo's cache.
113///
114/// ### Instantiation
115///
116/// When creating an instance of this type, the crates-index will be cloned automatically should it not
117/// be present. If a repository is present at the location but the remote doesn't match the desired index URL,
118/// a new remote will be added and fetched from.
119///
120/// Please note that concurrent calls to [`GitIndex::new_cargo_default()`] (and related) will automatically block
121/// and wait for each other, so only one instance will try to clone the index while the others will wait for completion.
122///
123/// This, however, only protects from itself and `cargo` cloning the index at the same time might interfere.
124#[cfg(feature = "git")]
125pub struct GitIndex {
126    path: std::path::PathBuf,
127    url: String,
128
129    pub(crate) repo: gix::Repository,
130    pub(crate) head_commit: gix::ObjectId,
131}
132
133/// The Git based index implementation
134pub mod git;
135
136mod config;
137pub use config::IndexConfig;
138
139mod dedupe;
140mod dirs;
141pub use dirs::{local_path_and_canonical_url, local_path_and_canonical_url_with_hash_kind, HashKind};
142
143/// Re-exports in case you want to inspect specific error details
144pub mod error;
145#[doc(hidden)]
146#[cfg(feature = "parallel")]
147pub use error::CratesIterError;
148#[doc(hidden)]
149pub use error::Error;
150
151/// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches.
152///
153/// Currently it only uses local Cargo cache, and does not access the network in any way.
154/// For examples of how to update the local cache,
155/// see [`examples/sparse_http_reqwest.rs`][reqwest] and [`examples/sparse_http_ureq.rs`][ureq].
156///
157/// [reqwest]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_reqwest.rs
158/// [ureq]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_ureq.rs
159#[derive(Debug)]
160pub struct SparseIndex {
161    path: PathBuf,
162    url: String,
163}
164
165/// The sparse index implementation.
166pub mod sparse;
167/// The matching `http` types for use in the [`sparse`] API.
168#[cfg(feature = "sparse")]
169pub use http;
170
171mod names;
172pub use names::Names;
173
174mod types;
175pub use types::{Crate, Dependency, DependencyKind, Version};
176
177pub(crate) fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> + '_ {
178    struct Split<'a> {
179        haystack: &'a [u8],
180        needle: u8,
181    }
182
183    impl<'a> Iterator for Split<'a> {
184        type Item = &'a [u8];
185
186        #[inline]
187        fn next(&mut self) -> Option<&'a [u8]> {
188            if self.haystack.is_empty() {
189                return None;
190            }
191            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
192                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
193                None => (self.haystack, &[][..]),
194            };
195            self.haystack = remaining;
196            Some(ret)
197        }
198    }
199
200    Split { haystack, needle }
201}
202
203#[cfg(unix)]
204fn path_max_byte_len(path: &Path) -> usize {
205    use std::os::unix::prelude::OsStrExt;
206    path.as_os_str().as_bytes().len()
207}
208
209#[cfg(not(unix))]
210fn path_max_byte_len(path: &Path) -> usize {
211    path.to_str().map_or(0, |p| p.len())
212}