tame_index/index/
sparse.rs

1use super::{cache::ValidCacheEntry, FileLock, IndexCache};
2use crate::{Error, HttpError, IndexKrate, KrateName};
3
4/// The default URL of the crates.io HTTP index
5pub const CRATES_IO_HTTP_INDEX: &str = "sparse+https://index.crates.io/";
6
7/// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches.
8///
9/// This implementation does no network I/O at all. If you want to make requests
10/// to the remote index you may use the [`Self::make_remote_request`] and
11/// [`Self::parse_remote_response`] methods, or you can enable the `sparse` feature
12/// and and use [`RemoteSparseIndex`](crate::index::RemoteSparseIndex) or
13/// [`AsyncRemoteSparseIndex`](crate::index::AsyncRemoteSparseIndex)
14pub struct SparseIndex {
15    cache: IndexCache,
16    url: String,
17}
18
19impl SparseIndex {
20    /// Creates a new sparse index for the specified location
21    #[inline]
22    pub fn new(il: crate::index::IndexLocation<'_>) -> Result<Self, Error> {
23        if !il.url.is_sparse() {
24            return Err(crate::InvalidUrl {
25                url: il.url.as_str().to_owned(),
26                source: crate::InvalidUrlError::MissingSparse,
27            }
28            .into());
29        }
30
31        let (path, url) = il.into_parts()?;
32        Ok(Self {
33            cache: IndexCache::at_path(path),
34            url,
35        })
36    }
37
38    /// Get the configuration of the index.
39    ///
40    /// See the [cargo docs](https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration)
41    pub fn index_config(&self) -> Result<super::IndexConfig, Error> {
42        let path = self.cache.path.join("config.json");
43        let bytes = std::fs::read(&path).map_err(|err| Error::IoPath(err, path))?;
44
45        Ok(serde_json::from_slice(&bytes)?)
46    }
47
48    /// Get the URL that can be used to fetch the index entry for the specified
49    /// crate
50    ///
51    /// The body of a successful response for the returned URL can be parsed
52    /// via [`IndexKrate::from_slice`]
53    ///
54    /// See [`Self::make_remote_request`] for a way to make a complete request
55    #[inline]
56    pub fn crate_url(&self, name: KrateName<'_>) -> String {
57        let rel_path = name.relative_path(Some('/'));
58        format!("{}{rel_path}", self.url())
59    }
60
61    /// The HTTP url of the index
62    #[inline]
63    pub fn url(&self) -> &str {
64        self.url.strip_prefix("sparse+").unwrap_or(&self.url)
65    }
66
67    /// Gets the accessor to the local index cache
68    #[inline]
69    pub fn cache(&self) -> &IndexCache {
70        &self.cache
71    }
72
73    /// Attempts to read the locally cached crate information
74    #[inline]
75    pub fn cached_krate(
76        &self,
77        name: KrateName<'_>,
78        lock: &FileLock,
79    ) -> Result<Option<IndexKrate>, Error> {
80        self.cache.cached_krate(name, None, lock)
81    }
82
83    /// Creates an HTTP request that can be sent via your HTTP client of choice
84    /// to retrieve the current metadata for the specified crate
85    ///
86    /// If specified, the etag is used instead of the possible etag stored in
87    /// a local cache entry, resulting in no disk I/O being performed by this
88    /// method
89    ///
90    /// See [`Self::parse_remote_response`] processing the response from the remote
91    /// index
92    ///
93    /// It is highly recommended to assume HTTP/2 when making requests to remote
94    /// indices, at least crates.io
95    pub fn make_remote_request(
96        &self,
97        name: KrateName<'_>,
98        etag: Option<&str>,
99        lock: &FileLock,
100    ) -> Result<http::Request<()>, Error> {
101        use http::header;
102
103        let url = self.crate_url(name);
104
105        let mut req = http::Request::get(url);
106
107        {
108            let headers = req.headers_mut().unwrap();
109
110            // AFAICT this does not affect responses at the moment, but could in
111            // the future if there are changes to the protocol
112            headers.insert(
113                "cargo-protocol",
114                header::HeaderValue::from_static("version=1"),
115            );
116            // All index entries are just files with lines of JSON
117            headers.insert(
118                header::ACCEPT,
119                header::HeaderValue::from_static("text/plain"),
120            );
121            // We need to accept both identity and gzip, as otherwise cloudfront will
122            // always respond to requests with strong etag's, which will differ from
123            // cache entries generated by cargo
124            headers.insert(
125                header::ACCEPT_ENCODING,
126                header::HeaderValue::from_static("gzip"),
127            );
128
129            // If we have a local cache entry, include its version with the
130            // appropriate header, this allows the server to respond with a
131            // cached, or even better, empty response if its version matches
132            // the local one making the request/response loop basically free
133
134            // If we're unable to get the cache version we can just ignore setting the
135            // header, guaranteeing we'll get the full index contents if the crate exists
136            let set_cache_version = |headers: &mut header::HeaderMap| -> Option<()> {
137                let contents = self.cache.read_cache_file(name, lock).ok()??;
138                let valid = ValidCacheEntry::read(&contents).ok()?;
139
140                let (key, value) = valid.revision.split_once(':')?;
141                let value = header::HeaderValue::from_str(value.trim()).ok()?;
142                let name = if key == header::ETAG {
143                    header::IF_NONE_MATCH
144                } else if key == header::LAST_MODIFIED {
145                    header::IF_MODIFIED_SINCE
146                } else {
147                    // We could error here, but that's kind of pointless
148                    // since the response will be sent in full if we haven't
149                    // specified one of the above headers. Though it does
150                    // potentially indicate something weird is going on
151                    return None;
152                };
153
154                headers.insert(name, value);
155                None
156            };
157
158            if let Some(etag) = etag {
159                let hv =
160                    header::HeaderValue::from_str(etag.trim()).map_err(crate::HttpError::from)?;
161                headers.insert(header::IF_NONE_MATCH, hv);
162            } else {
163                // Use the etag (or last modified, though crates.io does not use this AFAICT)
164                // from the cache entry if it exists
165                let _ = set_cache_version(headers);
166            }
167        }
168
169        Ok(req.body(()).unwrap())
170    }
171
172    /// Process the response to a request created by [`Self::make_remote_request`]
173    ///
174    /// This handles both the scenario where the local cache is missing the specified
175    /// crate, or it is out of date, as well as the local entry being up to date
176    /// and can just be read from disk
177    ///
178    /// You may specify whether an updated index entry is written locally to the
179    /// cache or not
180    ///
181    /// Note that responses from sparse HTTP indices, at least crates.io, may
182    /// send responses with `gzip` compression, it is your responsibility to
183    /// decompress it before sending to this function
184    pub fn parse_remote_response(
185        &self,
186        name: KrateName<'_>,
187        response: http::Response<Vec<u8>>,
188        write_cache_entry: bool,
189        lock: &FileLock,
190    ) -> Result<Option<IndexKrate>, Error> {
191        use http::{header, StatusCode};
192        let (parts, body) = response.into_parts();
193
194        match parts.status {
195            // The server responded with the full contents of the index entry
196            StatusCode::OK => {
197                let krate = IndexKrate::from_slice(&body)?;
198
199                if write_cache_entry {
200                    // The same as cargo, prefer etag over last-modified
201                    let version = if let Some(etag) = parts.headers.get(header::ETAG) {
202                        etag.to_str()
203                            .ok()
204                            .map(|etag| format!("{}: {etag}", header::ETAG))
205                    } else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) {
206                        lm.to_str()
207                            .ok()
208                            .map(|lm| format!("{}: {lm}", header::LAST_MODIFIED))
209                    } else {
210                        None
211                    };
212
213                    let revision = version.unwrap_or_else(|| "Unknown".to_owned());
214
215                    // It's unfortunate if we can't write to the cache, but we
216                    // don't treat it as a hard error since we still have the
217                    // index metadata
218                    let _err = self.cache.write_to_cache(&krate, &revision, lock);
219                }
220
221                Ok(Some(krate))
222            }
223            // The local cache entry is up to date with the latest entry on the
224            // server, we can just return the local one
225            StatusCode::NOT_MODIFIED => self.cache.cached_krate(name, None, lock),
226            // The server requires authorization but the user didn't provide it
227            StatusCode::UNAUTHORIZED => Err(HttpError::StatusCode {
228                code: StatusCode::UNAUTHORIZED,
229                msg: "the request was not authorized",
230            }
231            .into()),
232            // The crate does not exist, or has been removed
233            StatusCode::NOT_FOUND
234            | StatusCode::GONE
235            | StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None),
236            code => Err(HttpError::StatusCode {
237                code,
238                msg: "the status code is invalid for this protocol",
239            }
240            .into()),
241        }
242    }
243}