tame_index/
utils.rs

1//! Provides several useful functions for determining the disk location of a
2//! remote registry index
3
4use crate::{Error, InvalidUrl, InvalidUrlError, PathBuf};
5
6pub mod flock;
7#[cfg(feature = "__git")]
8pub mod git;
9
10/// Returns the storage directory (in utf-8) used by Cargo, often known as
11/// `.cargo` or `CARGO_HOME`
12#[inline]
13pub fn cargo_home() -> Result<crate::PathBuf, crate::Error> {
14    Ok(crate::PathBuf::from_path_buf(home::cargo_home()?)?)
15}
16
17/// Encodes a slice of bytes into a hexadecimal string to the specified buffer
18pub(crate) fn encode_hex<'out, const I: usize, const O: usize>(
19    input: &[u8; I],
20    output: &'out mut [u8; O],
21) -> &'out str {
22    assert_eq!(I * 2, O);
23
24    const CHARS: &[u8] = b"0123456789abcdef";
25
26    for (i, &byte) in input.iter().enumerate() {
27        let i = i * 2;
28        output[i] = CHARS[(byte >> 4) as usize];
29        output[i + 1] = CHARS[(byte & 0xf) as usize];
30    }
31
32    // SAFETY: we only emit ASCII hex characters
33    #[allow(unsafe_code)]
34    unsafe {
35        std::str::from_utf8_unchecked(output)
36    }
37}
38
39/// The details for a remote url
40pub struct UrlDir {
41    /// The unique directory name for the url
42    pub dir_name: String,
43    /// The canonical url for the remote url
44    pub canonical: String,
45}
46
47/// Canonicalizes a `git+` url the same as cargo.
48///
49/// This is similar to cargo's `CanonicalUrl`, which previously was only used for
50/// git+ url's, but since cargo 1.85.0 is now used as part of the hash for all
51/// sources. Note that cargo removes queries and fragments _only_ from git+ URLs
52/// and that happens before canonicalization, so this function does not handle them
53/// specifically as we only care about sparse and git registry URLs
54pub fn canonicalize_url(mut url: &str) -> Result<String, Error> {
55    let scheme_ind = url.find("://").map(|i| i + 3).ok_or_else(|| InvalidUrl {
56        url: url.to_owned(),
57        source: InvalidUrlError::MissingScheme,
58    })?;
59
60    // Could use the Url crate for this, but it's simple enough and we don't
61    // need to deal with every possible url (I hope...)
62    let (host, path_length) = match url[scheme_ind..].find('/') {
63        Some(end) => (
64            &url[scheme_ind..scheme_ind + end],
65            url.len() - (end + scheme_ind),
66        ),
67        None => (&url[scheme_ind..], 0),
68    };
69
70    // trim port
71    let host = host.split(':').next().unwrap();
72
73    if path_length > 1 && url.ends_with('/') {
74        url = &url[..url.len() - 1];
75    }
76
77    if url.ends_with(".git") {
78        url = &url[..url.len() - 4];
79    }
80
81    // cargo special cases github.com for reasons, so do the same
82    Ok(if host == "github.com" {
83        url.to_lowercase()
84    } else {
85        url.to_owned()
86    })
87}
88
89/// Converts a url into a relative path and its canonical form
90///
91/// Cargo uses a small algorithm to create unique directory names for any url
92/// so that they can be located in the same root without clashing
93///
94/// This function currently only supports 2 different URL kinds
95///
96/// * `(?:registry+)?<git registry url>`
97/// * `sparse+<sparse registry url>`
98#[allow(deprecated)]
99pub fn url_to_local_dir(url: &str, stable: bool) -> Result<UrlDir, Error> {
100    use std::hash::{Hash, Hasher, SipHasher};
101
102    // This is extremely irritating, but we need to use usize for the kind, which
103    // impacts the hash calculation, making it different based on pointer size.
104    //
105    // The reason for this is that cargo just uses #[derive(Hash)] for the SourceKind
106    // https://github.com/rust-lang/cargo/blob/88b4b3bcd3bbb66873734d97ae412a6bcf9b75ee/crates/cargo-util-schemas/src/core/source_kind.rs#L4-L5,
107    // which then uses https://doc.rust-lang.org/core/intrinsics/fn.discriminant_value.html
108    // to get the discriminant and add to the hash...and that is pointer width :(
109    //
110    // Note that these are isize instead of usize because contrary to what one
111    // would expect from the automatic discriminant assigned by rustc starting
112    // at 0 and incrementing by 1 each time...it's actually signed, which can
113    // be seen by overriding `Hasher::write_isize` and hashing a discriminant
114    //
115    // This is unfortunately a hard requirement because of https://github.com/rust-lang/rustc-stable-hash/blob/24e9848c89917abca155c8f854118e6d00ad4a30/src/stable_hasher.rs#L263-L299
116    // where it specializes _only_ isize to only write a u8 if the value is less
117    // than 0xff, something that doesn't happen for usize, which of course affects
118    // the calculated hash
119    const GIT_REGISTRY: isize = 2;
120    const SPARSE_REGISTRY: isize = 3;
121
122    // Ensure we have a registry or bare url
123    let (url, scheme_ind, kind) = {
124        let mut scheme_ind = url.find("://").ok_or_else(|| InvalidUrl {
125            url: url.to_owned(),
126            source: InvalidUrlError::MissingScheme,
127        })?;
128
129        let scheme_str = &url[..scheme_ind];
130
131        let (url, kind) = match scheme_str.split_once('+') {
132            Some(("sparse", _)) => (url, SPARSE_REGISTRY),
133            // If there is no scheme modifier, assume git registry, same as cargo
134            None => (url, GIT_REGISTRY),
135            Some(("registry", _)) => {
136                scheme_ind -= 9;
137                (&url[9..], GIT_REGISTRY)
138            }
139            Some((_, _)) => {
140                return Err(InvalidUrl {
141                    url: url.to_owned(),
142                    source: InvalidUrlError::UnknownSchemeModifier,
143                }
144                .into());
145            }
146        };
147
148        (url, scheme_ind + 3, kind)
149    };
150
151    let (dir_name, url) = if stable {
152        let canonical = canonicalize_url(url)?;
153
154        let hash = {
155            let mut hasher = rustc_stable_hash::StableSipHasher128::new();
156            kind.hash(&mut hasher);
157            canonical.hash(&mut hasher);
158            Hasher::finish(&hasher)
159        };
160
161        let mut raw_ident = [0u8; 16];
162        let ident = encode_hex(&hash.to_le_bytes(), &mut raw_ident);
163
164        let dir_name = {
165            let host = match url[scheme_ind..].find('/') {
166                Some(end) => &url[scheme_ind..scheme_ind + end],
167                None => &url[scheme_ind..],
168            };
169
170            // trim port
171            let host = host.split(':').next().unwrap();
172            host.split_once('@').map_or(host, |(_user, host)| host)
173        };
174
175        (format!("{dir_name}-{ident}"), canonical)
176    } else {
177        let hash = {
178            let mut hasher = SipHasher::new();
179            kind.hash(&mut hasher);
180            url.hash(&mut hasher);
181            hasher.finish()
182        };
183        let mut raw_ident = [0u8; 16];
184        let ident = encode_hex(&hash.to_le_bytes(), &mut raw_ident);
185
186        // Could use the Url crate for this, but it's simple enough and we don't
187        // need to deal with every possible url (I hope...)
188        let host = match url[scheme_ind..].find('/') {
189            Some(end) => &url[scheme_ind..scheme_ind + end],
190            None => &url[scheme_ind..],
191        };
192
193        // trim port
194        let host = host.split(':').next().unwrap();
195        let host = host.split_once('@').map_or(host, |(_user, host)| host);
196
197        (format!("{host}-{ident}"), url.to_owned())
198    };
199
200    Ok(UrlDir {
201        dir_name,
202        canonical: url,
203    })
204}
205
206/// Get the disk location of the specified url, as well as its canonical form
207///
208/// If not specified, the root directory is the user's default cargo home
209pub fn get_index_details(
210    url: &str,
211    root: Option<PathBuf>,
212    stable: bool,
213) -> Result<(PathBuf, String), Error> {
214    let url_dir = url_to_local_dir(url, stable)?;
215
216    let mut path = match root {
217        Some(path) => path,
218        None => cargo_home()?,
219    };
220
221    path.push("registry");
222    path.push("index");
223    path.push(url_dir.dir_name);
224
225    Ok((path, url_dir.canonical))
226}
227
228use std::io;
229
230/// Parses the output of `cargo -V` to get the semver
231///
232/// This handles the 2? cases that I am aware of
233///
234/// 1. Official cargo prints `cargo <semver>(?:-<channel>)? (<sha1[..7]> <date>)`
235/// 2. Non-official builds may drop the additional metadata and just print `cargo <semver>`
236#[inline]
237fn parse_cargo_semver(s: &str) -> Result<semver::Version, Error> {
238    let semver = s.trim().split(' ').nth(1).ok_or_else(|| {
239        io::Error::new(
240            io::ErrorKind::InvalidData,
241            "cargo version information was in an invalid format",
242        )
243    })?;
244
245    Ok(semver.parse()?)
246}
247
248/// Retrieves the current version of cargo being used
249pub fn cargo_version(working_dir: Option<&crate::Path>) -> Result<crate::Version, Error> {
250    let mut cargo = std::process::Command::new(
251        std::env::var_os("CARGO")
252            .as_deref()
253            .unwrap_or(std::ffi::OsStr::new("cargo")),
254    );
255
256    cargo.arg("-V");
257
258    if let Some(wd) = working_dir {
259        cargo.current_dir(wd);
260    }
261
262    cargo.stdout(std::process::Stdio::piped());
263
264    let output = cargo.output()?;
265    if !output.status.success() {
266        return Err(io::Error::new(
267            io::ErrorKind::Other,
268            "failed to request cargo version information",
269        )
270        .into());
271    }
272
273    let stdout = String::from_utf8(output.stdout)
274        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
275
276    parse_cargo_semver(&stdout)
277}
278
279#[cfg(test)]
280mod test {
281    use super::{get_index_details, url_to_local_dir};
282    use crate::PathBuf;
283
284    #[test]
285    #[cfg(all(target_pointer_width = "64", target_endian = "little"))]
286    fn matches_cargo() {
287        assert_eq!(
288            get_index_details(crate::CRATES_IO_INDEX, Some(PathBuf::new()), false).unwrap(),
289            (
290                "registry/index/github.com-1ecc6299db9ec823".into(),
291                crate::CRATES_IO_INDEX.to_owned()
292            )
293        );
294
295        assert_eq!(
296            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), false).unwrap(),
297            (
298                "registry/index/index.crates.io-6f17d22bba15001f".into(),
299                crate::CRATES_IO_HTTP_INDEX.to_owned(),
300            )
301        );
302
303        const NON_CRATES_IO_GITHUB: &str = "https://github.com/EmbarkStudios/cargo-test-index";
304        assert_eq!(
305            get_index_details(NON_CRATES_IO_GITHUB, Some(PathBuf::new()), false).unwrap(),
306            (
307                "registry/index/github.com-655148e0a865c9e0".into(),
308                NON_CRATES_IO_GITHUB.to_owned(),
309            )
310        );
311
312        const NON_GITHUB_INDEX: &str =
313            "https://dl.cloudsmith.io/public/embark/deny/cargo/index.git";
314        assert_eq!(
315            get_index_details(NON_GITHUB_INDEX, Some(PathBuf::new()), false).unwrap(),
316            (
317                "registry/index/dl.cloudsmith.io-955e041deb7d37e6".into(),
318                NON_GITHUB_INDEX.to_owned(),
319            )
320        );
321
322        // Just verifies that any non git+ or sparse+ url is treated as a git
323        // registry for purposes of hashing
324        const FAKE_REGISTRY: &str = "https://github.com/RustSec/advisory-db";
325
326        assert_eq!(
327            url_to_local_dir(FAKE_REGISTRY, false).unwrap().dir_name,
328            "github.com-a946fc29ac602819"
329        );
330    }
331
332    #[test]
333    fn matches_cargo_1850() {
334        assert_eq!(
335            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), true).unwrap(),
336            (
337                "registry/index/index.crates.io-1949cf8c6b5b557f".into(),
338                crate::CRATES_IO_HTTP_INDEX.to_owned(),
339            )
340        );
341        assert_eq!(
342            get_index_details(crate::CRATES_IO_INDEX, Some(PathBuf::new()), true).unwrap(),
343            (
344                "registry/index/github.com-25cdd57fae9f0462".into(),
345                crate::CRATES_IO_INDEX.to_owned(),
346            )
347        );
348        assert_eq!(
349            get_index_details(
350                "https://github.com/EmbarkStudios/cargo-test-index",
351                Some(PathBuf::new()),
352                true
353            )
354            .unwrap(),
355            (
356                "registry/index/github.com-513223c940e0f1e9".into(),
357                "https://github.com/embarkstudios/cargo-test-index".to_owned(),
358            )
359        );
360
361        assert_eq!(
362            get_index_details(
363                "sparse+https://cargo.cloudsmith.io/embark/deny/",
364                Some(PathBuf::new()),
365                true
366            )
367            .unwrap(),
368            (
369                "registry/index/cargo.cloudsmith.io-2fc1f5411e6e72fd".into(),
370                "sparse+https://cargo.cloudsmith.io/embark/deny".to_owned(),
371            )
372        );
373    }
374
375    #[test]
376    #[cfg(all(target_pointer_width = "32", target_endian = "little"))]
377    fn matches_cargo_32bit() {
378        assert_eq!(
379            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), false).unwrap(),
380            (
381                "registry/index/index.crates.io-1cd66030c949c28d".into(),
382                crate::CRATES_IO_HTTP_INDEX.to_owned(),
383            )
384        );
385    }
386
387    #[test]
388    fn gets_cargo_version() {
389        const MINIMUM: semver::Version = semver::Version::new(1, 70, 0);
390        let version = super::cargo_version(None).unwrap();
391        assert!(version >= MINIMUM);
392    }
393
394    #[test]
395    fn parses_cargo_semver() {
396        use super::parse_cargo_semver as pcs;
397
398        assert_eq!(
399            pcs("cargo 1.71.0 (cfd3bbd8f 2023-06-08)\n").unwrap(),
400            semver::Version::new(1, 71, 0)
401        );
402        assert_eq!(
403            pcs("cargo 1.73.0-nightly (7ac9416d8 2023-07-24)\n").unwrap(),
404            "1.73.0-nightly".parse().unwrap()
405        );
406        assert_eq!(
407            pcs("cargo 1.70.0\n").unwrap(),
408            semver::Version::new(1, 70, 0)
409        );
410    }
411}