crates_index/lib.rs
1// Copyright 2015 Corey Farwell
2// Copyright 2015 Contributors of github.com/huonw/crates.io-graph
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Library for retrieving and interacting with the
17//! [crates.io index](https://github.com/rust-lang/crates.io-index).
18//!
19//! ## Examples
20//!
21//! ### Getting information about a single crate
22//!
23//! ```rust
24//! # #[cfg(all(not(debug_assertions), feature = "git"))]
25//! # {
26//! let index = crates_index::GitIndex::new_cargo_default()?;
27//! let serde_crate = index.crate_("serde").expect("you should handle errors here");
28//! println!("Serde is at v{}", serde_crate.highest_normal_version().unwrap().version());
29//! # }
30//! # Ok::<_, crates_index::Error>(())
31//! ```
32//!
33//! ### Iterating over *all* crates in the index
34//!
35//! ```rust
36//! # #[cfg(all(not(debug_assertions), feature = "parallel", feature = "git"))]
37//! # {
38//! let index = crates_index::GitIndex::new_cargo_default()?;
39//! for crate_ in index.crates() {
40//! let latest = crate_.most_recent_version();
41//! println!("crate name: {}", latest.name());
42//! println!("most recently released version: {}", latest.version());
43//! }
44//!
45//! // or faster:
46//! use rayon::prelude::*;
47//! index.crates_parallel().for_each(|crate_| {
48//! /* etc. */
49//! });
50//!
51//! # }
52//! # Ok::<_, crates_index::Error>(())
53//! ```
54//!
55//! ### Getting most recently published or yanked crates
56//!
57//! ```rust
58//! # #[cfg(feature = "git")]
59//! # {
60//! let index = crates_index::GitIndex::new_cargo_default()?;
61//!
62//! for c in index.changes()?.take(20) {
63//! let c = c?;
64//! println!("{} has changed in the index commit {}", c.crate_name(), c.commit_hex());
65//! }
66//!
67//! # }
68//! # Ok::<_, crates_index::Error>(())
69//! ```
70//!
71//! ## Auto-cloning and parallelism
72//!
73//! When using any means of instantiating the [`GitIndex`] type, we will
74//! clone the default crates index (or the given one) if it no git
75//! repository is present at the destination path.
76//!
77//! This operation is racy and opening the index concurrently can lead to errors
78//! as multiple threads may try to clone the index at the same time if it wasn't there yet.
79//!
80//! To prevent that, consider using synchronization primitives on application level that
81//! synchronize methods like [`GitIndex::new_cargo_default()`] and its siblings.
82//!
83//! ## Git Repository Performance
84//!
85//! By default, `gix` is compiled with `max-performance-safe`, which maximizes support for compilation environments but which
86//! may be slower as it uses a pure-Rust Zlib implementation.
87//! To get best possible performance, use the `git-index-performance` feature toggle.
88//!
89//! ## Using `rustls` instead of `openssl` when using the `git-https` feature in applications
90//!
91//! When using the `git-https` feature, a choice will be made for you that involves selecting the `curl` backend for making
92//! the `https` protocol available. As using a different backend isn't additive, as cargo features should be, one will have
93//! to resort to the following.
94//!
95//! * Change the `crates-index` dependency to `features = ["git-index", …(everything else *but* "git-https")]`
96//! * Add the `gix` dependency with `default-features = false` and `features = ["blocking-http-transport-reqwest-rust-tls"]`.
97//! Consider renaming the crate to `gix-for-configuration-only = { package = "gix", … }` to make the intend clear.
98//!
99//! Please note that this should only be done in application manifests, who have the final say over the protocol and backend choices.
100//! ## Feature Flags
101#![cfg_attr(
102 feature = "document-features",
103 cfg_attr(doc, doc = ::document_features::document_features!())
104)]
105#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
106#![deny(unsafe_code, rust_2018_compatibility, missing_docs)]
107use std::path::{Path, PathBuf};
108
109/// Wrapper around managing the crates.io-index git repository
110///
111/// Uses a "bare" git index that fetches files directly from the repo instead of local checkout.
112/// Uses Cargo's cache.
113///
114/// ### Instantiation
115///
116/// When creating an instance of this type, the crates-index will be cloned automatically should it not
117/// be present. If a repository is present at the location but the remote doesn't match the desired index URL,
118/// a new remote will be added and fetched from.
119///
120/// Please note that concurrent calls to [`GitIndex::new_cargo_default()`] (and related) will automatically block
121/// and wait for each other, so only one instance will try to clone the index while the others will wait for completion.
122///
123/// This, however, only protects from itself and `cargo` cloning the index at the same time might interfere.
124#[cfg(feature = "git")]
125pub struct GitIndex {
126 path: std::path::PathBuf,
127 url: String,
128
129 pub(crate) repo: gix::Repository,
130 pub(crate) head_commit: gix::ObjectId,
131}
132
133/// The Git based index implementation
134pub mod git;
135
136mod config;
137pub use config::IndexConfig;
138
139mod dedupe;
140mod dirs;
141pub use dirs::{local_path_and_canonical_url, local_path_and_canonical_url_with_hash_kind, HashKind};
142
143/// Re-exports in case you want to inspect specific error details
144pub mod error;
145#[doc(hidden)]
146#[cfg(feature = "parallel")]
147pub use error::CratesIterError;
148#[doc(hidden)]
149pub use error::Error;
150
151/// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches.
152///
153/// Currently it only uses local Cargo cache, and does not access the network in any way.
154/// For examples of how to update the local cache,
155/// see [`examples/sparse_http_reqwest.rs`][reqwest] and [`examples/sparse_http_ureq.rs`][ureq].
156///
157/// [reqwest]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_reqwest.rs
158/// [ureq]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_ureq.rs
159#[derive(Debug)]
160pub struct SparseIndex {
161 path: PathBuf,
162 url: String,
163}
164
165/// The sparse index implementation.
166pub mod sparse;
167/// The matching `http` types for use in the [`sparse`] API.
168#[cfg(feature = "sparse")]
169pub use http;
170
171mod names;
172pub use names::Names;
173
174mod types;
175pub use types::{Crate, Dependency, DependencyKind, Version};
176
177pub(crate) fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> + '_ {
178 struct Split<'a> {
179 haystack: &'a [u8],
180 needle: u8,
181 }
182
183 impl<'a> Iterator for Split<'a> {
184 type Item = &'a [u8];
185
186 #[inline]
187 fn next(&mut self) -> Option<&'a [u8]> {
188 if self.haystack.is_empty() {
189 return None;
190 }
191 let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
192 Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
193 None => (self.haystack, &[][..]),
194 };
195 self.haystack = remaining;
196 Some(ret)
197 }
198 }
199
200 Split { haystack, needle }
201}
202
203#[cfg(unix)]
204fn path_max_byte_len(path: &Path) -> usize {
205 use std::os::unix::prelude::OsStrExt;
206 path.as_os_str().as_bytes().len()
207}
208
209#[cfg(not(unix))]
210fn path_max_byte_len(path: &Path) -> usize {
211 path.to_str().map_or(0, |p| p.len())
212}