crates_index/
dirs.rs

1use crate::Error;
2
3/// Get the disk location of the specified `url`, as well as its canonical form,
4/// exactly as cargo would.
5///
6/// `cargo_home` is used to root the directory at specific location, if not
7/// specified `CARGO_HOME` or else the default cargo location is used as the root.
8pub fn local_path_and_canonical_url(
9    url: &str,
10    cargo_home: Option<&std::path::Path>,
11) -> Result<(std::path::PathBuf, String), Error> {
12    local_path_and_canonical_url_with_hash_kind(url, cargo_home, &DEFAULT_HASHER_KIND)
13}
14
15/// Like [`local_path_and_canonical_url`] but accepts [`HashKind`] for determining the crate index path.
16pub fn local_path_and_canonical_url_with_hash_kind(
17    url: &str,
18    cargo_home: Option<&std::path::Path>,
19    hash_kind: &HashKind,
20) -> Result<(std::path::PathBuf, String), Error> {
21    let (dir_name, canonical_url) = url_to_local_dir(url, hash_kind)?;
22
23    let mut path = match cargo_home {
24        Some(path) => path.to_owned(),
25        None => home::cargo_home()?,
26    };
27
28    path.push("registry");
29    path.push("index");
30    path.push(dir_name);
31
32    Ok((path, canonical_url))
33}
34
35pub(crate) fn crate_prefix(accumulator: &mut String, crate_name: &str, separator: char) -> Option<()> {
36    match crate_name.len() {
37        0 => return None,
38        1 => accumulator.push('1'),
39        2 => accumulator.push('2'),
40        3 => {
41            accumulator.push('3');
42            accumulator.push(separator);
43            accumulator.extend(
44                crate_name
45                    .as_bytes()
46                    .get(0..1)?
47                    .iter()
48                    .map(|c| c.to_ascii_lowercase() as char),
49            );
50        }
51        _ => {
52            accumulator.extend(
53                crate_name
54                    .as_bytes()
55                    .get(0..2)?
56                    .iter()
57                    .map(|c| c.to_ascii_lowercase() as char),
58            );
59            accumulator.push(separator);
60            accumulator.extend(
61                crate_name
62                    .as_bytes()
63                    .get(2..4)?
64                    .iter()
65                    .map(|c| c.to_ascii_lowercase() as char),
66            );
67        }
68    };
69    Some(())
70}
71
72pub(crate) fn crate_name_to_relative_path(crate_name: &str, separator: Option<char>) -> Option<String> {
73    let separator = separator.unwrap_or(std::path::MAIN_SEPARATOR);
74    let mut rel_path = String::with_capacity(crate_name.len() + 6);
75    crate_prefix(&mut rel_path, crate_name, separator)?;
76    rel_path.push(separator);
77    rel_path.extend(crate_name.as_bytes().iter().map(|c| c.to_ascii_lowercase() as char));
78
79    Some(rel_path)
80}
81
82/// Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/crates/cargo-util-schemas/src/core/source_kind.rs#L5
83type SourceKind = u64;
84const SOURCE_KIND_REGISTRY: SourceKind = 2;
85const SOURCE_KIND_SPASE_REGISTRY: SourceKind = 3;
86
87/// Determine the crate registry hashing strategy for locating local crate indexes.
88pub enum HashKind {
89    /// Use the new hashing behavior introduced in Rust `1.85.0`.
90    Stable,
91
92    /// Use a hashing strategy that matches Cargo versions less than `1.85.0`
93    Legacy,
94}
95
96// For now, this acts as a centralized place to change the default. Ideally
97// this would be compiled conditionally based on the version of rustc as
98// a nice approximation of when consumers will be using the associated hash
99// implementation but this behavior is not yet stable: https://github.com/rust-lang/rust/issues/64796
100pub(crate) const DEFAULT_HASHER_KIND: HashKind = HashKind::Legacy;
101
102/// Converts a full url, eg https://github.com/rust-lang/crates.io-index, into
103/// the root directory name where cargo itself will fetch it on disk
104fn url_to_local_dir(url: &str, hash_kind: &HashKind) -> Result<(String, String), Error> {
105    #[allow(deprecated)]
106    fn legacy_hash_u64(url: &str, registry_kind: u64) -> u64 {
107        use std::hash::{Hash, Hasher, SipHasher};
108
109        let mut hasher = SipHasher::new_with_keys(0, 0);
110        // Registry
111        registry_kind.hash(&mut hasher);
112        // Url
113        url.hash(&mut hasher);
114        hasher.finish()
115    }
116
117    // Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/src/cargo/util/hasher.rs#L6
118    fn stable_hash_u64(url: &str, registry_kind: u64) -> u64 {
119        use rustc_stable_hash::StableSipHasher128 as StableHasher;
120        use std::hash::{Hash, Hasher};
121
122        let mut hasher = StableHasher::new();
123
124        // Type has an impact in the `rustc_stable_hasher`.
125        (registry_kind as isize).hash(&mut hasher);
126
127        url.hash(&mut hasher);
128
129        Hasher::finish(&hasher)
130    }
131
132    // Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/src/cargo/util/hex.rs#L6
133    fn to_hex(num: u64) -> String {
134        hex::encode(num.to_le_bytes())
135    }
136
137    let hash_u64 = match hash_kind {
138        HashKind::Stable => stable_hash_u64,
139        HashKind::Legacy => legacy_hash_u64,
140    };
141
142    let mut registry_kind = SOURCE_KIND_REGISTRY;
143
144    // Ensure we have a registry or bare url
145    let (url, scheme_ind) = {
146        let scheme_ind = url
147            .find("://")
148            .ok_or_else(|| Error::Url(format!("'{url}' is not a valid url")))?;
149
150        let scheme_str = &url[..scheme_ind];
151        if scheme_str.starts_with("sparse+http") {
152            registry_kind = SOURCE_KIND_SPASE_REGISTRY;
153            (url, scheme_ind)
154        } else if let Some(ind) = scheme_str.find('+') {
155            if &scheme_str[..ind] != "registry" {
156                return Err(Error::Url(format!("'{url}' is not a valid registry url")));
157            }
158
159            (&url[ind + 1..], scheme_ind - ind - 1)
160        } else {
161            (url, scheme_ind)
162        }
163    };
164
165    // Could use the Url crate for this, but it's simple enough and we don't
166    // need to deal with every possible url (I hope...)
167    let host = match url[scheme_ind + 3..].find('/') {
168        Some(end) => &url[scheme_ind + 3..scheme_ind + 3 + end],
169        None => &url[scheme_ind + 3..],
170    };
171
172    // trim port
173    let host = host.split(':').next().unwrap();
174
175    let (ident, url) = if registry_kind == SOURCE_KIND_REGISTRY {
176        // cargo special cases github.com for reasons, so do the same
177        let mut canonical = if host == "github.com" {
178            url.to_lowercase()
179        } else {
180            url.to_owned()
181        };
182
183        let ident = match hash_kind {
184            HashKind::Stable => {
185                // Locate the the first instance of params/fragments.
186                let mut params_index = {
187                    let question = canonical.find('?');
188                    let hash = canonical.rfind('#');
189
190                    question.zip(hash).map(|(q, h)| q.min(h)).or(question).or(hash)
191                };
192
193                // Attempt to trim `.git` from the end of url paths.
194                canonical = if let Some(idx) = params_index {
195                    let base_url = &canonical[..idx];
196                    let params = &canonical[idx..];
197
198                    if let Some(sanitized) = base_url.strip_suffix(".git") {
199                        params_index = Some(idx - 4);
200                        format!("{}{}", sanitized, params)
201                    } else {
202                        canonical
203                    }
204                } else {
205                    if canonical.ends_with(".git") {
206                        canonical.truncate(canonical.len() - 4);
207                    }
208                    canonical
209                };
210
211                let ident = to_hex(hash_u64(&canonical, registry_kind));
212
213                // Strip params
214                if let Some(idx) = params_index {
215                    canonical.truncate(canonical.len() - (canonical.len() - idx));
216                }
217
218                ident
219            }
220            HashKind::Legacy => {
221                // Chop off any query params/fragments
222                if let Some(hash) = canonical.rfind('#') {
223                    canonical.truncate(hash);
224                }
225
226                if let Some(query) = canonical.rfind('?') {
227                    canonical.truncate(query);
228                }
229
230                if canonical.ends_with('/') {
231                    canonical.pop();
232                }
233
234                let ident = to_hex(hash_u64(&canonical, registry_kind));
235
236                // Only GitHub (crates.io) repositories have their .git suffix truncated
237                if canonical.contains("github.com/") && canonical.ends_with(".git") {
238                    canonical.truncate(canonical.len() - 4);
239                }
240
241                ident
242            }
243        };
244
245        (ident, canonical)
246    } else {
247        (to_hex(hash_u64(url, registry_kind)), url.to_owned())
248    };
249
250    Ok((format!("{host}-{ident}"), url))
251}
252
253#[cfg(test)]
254mod test {
255    use crate::dirs::HashKind;
256
257    #[test]
258    fn http_index_url_matches_cargo() {
259        use crate::sparse::URL;
260        assert_eq!(
261            super::url_to_local_dir(URL, &HashKind::Legacy).unwrap(),
262            ("index.crates.io-6f17d22bba15001f".to_owned(), URL.to_owned(),)
263        );
264        assert_eq!(
265            super::url_to_local_dir(URL, &HashKind::Stable).unwrap(),
266            ("index.crates.io-1949cf8c6b5b557f".to_owned(), URL.to_owned(),)
267        );
268
269        // I've confirmed this also works with a custom registry, unfortunately
270        // that one includes a secret key as part of the url which would allow
271        // anyone to publish to the registry, so uhh...here's a fake one instead
272        assert_eq!(
273            super::url_to_local_dir(
274                "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git",
275                &HashKind::Legacy
276            )
277            .unwrap(),
278            (
279                "dl.cloudsmith.io-ff79e51ddd2b38fd".to_owned(),
280                "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git".to_owned()
281            )
282        );
283        assert_eq!(
284            super::url_to_local_dir(
285                "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git",
286                &HashKind::Stable
287            )
288            .unwrap(),
289            (
290                "dl.cloudsmith.io-5e6de3fada793d05".to_owned(),
291                "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index".to_owned()
292            )
293        );
294    }
295
296    #[test]
297    #[cfg(feature = "git")]
298    fn git_url_matches_cargo() {
299        use crate::git::URL;
300        assert_eq!(
301            crate::dirs::url_to_local_dir(URL, &HashKind::Legacy).unwrap(),
302            ("github.com-1ecc6299db9ec823".to_owned(), URL.to_owned())
303        );
304        assert_eq!(
305            crate::dirs::url_to_local_dir(URL, &HashKind::Stable).unwrap(),
306            ("github.com-25cdd57fae9f0462".to_owned(), URL.to_owned())
307        );
308
309        // Ensure we actually strip off the irrelevant parts of a url, note that
310        // the .git suffix is not part of the canonical url, but *is* used when hashing
311        assert_eq!(
312            crate::dirs::url_to_local_dir(&format!("registry+{}.git?one=1&two=2#fragment", URL), &HashKind::Legacy)
313                .unwrap(),
314            ("github.com-c786010fb7ef2e6e".to_owned(), URL.to_owned())
315        );
316        assert_eq!(
317            crate::dirs::url_to_local_dir(&format!("registry+{}.git?one=1&two=2#fragment", URL), &HashKind::Stable)
318                .unwrap(),
319            ("github.com-e78ed0bbfe5f35d7".to_owned(), URL.to_owned())
320        );
321    }
322}