tame_index/index/sparse.rs
1use super::{cache::ValidCacheEntry, FileLock, IndexCache};
2use crate::{Error, HttpError, IndexKrate, KrateName};
3
4/// The default URL of the crates.io HTTP index
5pub const CRATES_IO_HTTP_INDEX: &str = "sparse+https://index.crates.io/";
6
7/// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches.
8///
9/// This implementation does no network I/O at all. If you want to make requests
10/// to the remote index you may use the [`Self::make_remote_request`] and
11/// [`Self::parse_remote_response`] methods, or you can enable the `sparse` feature
12/// and and use [`RemoteSparseIndex`](crate::index::RemoteSparseIndex) or
13/// [`AsyncRemoteSparseIndex`](crate::index::AsyncRemoteSparseIndex)
14pub struct SparseIndex {
15 cache: IndexCache,
16 url: String,
17}
18
19impl SparseIndex {
20 /// Creates a new sparse index for the specified location
21 #[inline]
22 pub fn new(il: crate::index::IndexLocation<'_>) -> Result<Self, Error> {
23 if !il.url.is_sparse() {
24 return Err(crate::InvalidUrl {
25 url: il.url.as_str().to_owned(),
26 source: crate::InvalidUrlError::MissingSparse,
27 }
28 .into());
29 }
30
31 let (path, url) = il.into_parts()?;
32 Ok(Self {
33 cache: IndexCache::at_path(path),
34 url,
35 })
36 }
37
38 /// Get the configuration of the index.
39 ///
40 /// See the [cargo docs](https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration)
41 pub fn index_config(&self) -> Result<super::IndexConfig, Error> {
42 let path = self.cache.path.join("config.json");
43 let bytes = std::fs::read(&path).map_err(|err| Error::IoPath(err, path))?;
44
45 Ok(serde_json::from_slice(&bytes)?)
46 }
47
48 /// Get the URL that can be used to fetch the index entry for the specified
49 /// crate
50 ///
51 /// The body of a successful response for the returned URL can be parsed
52 /// via [`IndexKrate::from_slice`]
53 ///
54 /// See [`Self::make_remote_request`] for a way to make a complete request
55 #[inline]
56 pub fn crate_url(&self, name: KrateName<'_>) -> String {
57 let rel_path = name.relative_path(Some('/'));
58 format!("{}{rel_path}", self.url())
59 }
60
61 /// The HTTP url of the index
62 #[inline]
63 pub fn url(&self) -> &str {
64 self.url.strip_prefix("sparse+").unwrap_or(&self.url)
65 }
66
67 /// Gets the accessor to the local index cache
68 #[inline]
69 pub fn cache(&self) -> &IndexCache {
70 &self.cache
71 }
72
73 /// Attempts to read the locally cached crate information
74 #[inline]
75 pub fn cached_krate(
76 &self,
77 name: KrateName<'_>,
78 lock: &FileLock,
79 ) -> Result<Option<IndexKrate>, Error> {
80 self.cache.cached_krate(name, None, lock)
81 }
82
83 /// Creates an HTTP request that can be sent via your HTTP client of choice
84 /// to retrieve the current metadata for the specified crate
85 ///
86 /// If specified, the etag is used instead of the possible etag stored in
87 /// a local cache entry, resulting in no disk I/O being performed by this
88 /// method
89 ///
90 /// See [`Self::parse_remote_response`] processing the response from the remote
91 /// index
92 ///
93 /// It is highly recommended to assume HTTP/2 when making requests to remote
94 /// indices, at least crates.io
95 pub fn make_remote_request(
96 &self,
97 name: KrateName<'_>,
98 etag: Option<&str>,
99 lock: &FileLock,
100 ) -> Result<http::Request<()>, Error> {
101 use http::header;
102
103 let url = self.crate_url(name);
104
105 let mut req = http::Request::get(url);
106
107 {
108 let headers = req.headers_mut().unwrap();
109
110 // AFAICT this does not affect responses at the moment, but could in
111 // the future if there are changes to the protocol
112 headers.insert(
113 "cargo-protocol",
114 header::HeaderValue::from_static("version=1"),
115 );
116 // All index entries are just files with lines of JSON
117 headers.insert(
118 header::ACCEPT,
119 header::HeaderValue::from_static("text/plain"),
120 );
121 // We need to accept both identity and gzip, as otherwise cloudfront will
122 // always respond to requests with strong etag's, which will differ from
123 // cache entries generated by cargo
124 headers.insert(
125 header::ACCEPT_ENCODING,
126 header::HeaderValue::from_static("gzip"),
127 );
128
129 // If we have a local cache entry, include its version with the
130 // appropriate header, this allows the server to respond with a
131 // cached, or even better, empty response if its version matches
132 // the local one making the request/response loop basically free
133
134 // If we're unable to get the cache version we can just ignore setting the
135 // header, guaranteeing we'll get the full index contents if the crate exists
136 let set_cache_version = |headers: &mut header::HeaderMap| -> Option<()> {
137 let contents = self.cache.read_cache_file(name, lock).ok()??;
138 let valid = ValidCacheEntry::read(&contents).ok()?;
139
140 let (key, value) = valid.revision.split_once(':')?;
141 let value = header::HeaderValue::from_str(value.trim()).ok()?;
142 let name = if key == header::ETAG {
143 header::IF_NONE_MATCH
144 } else if key == header::LAST_MODIFIED {
145 header::IF_MODIFIED_SINCE
146 } else {
147 // We could error here, but that's kind of pointless
148 // since the response will be sent in full if we haven't
149 // specified one of the above headers. Though it does
150 // potentially indicate something weird is going on
151 return None;
152 };
153
154 headers.insert(name, value);
155 None
156 };
157
158 if let Some(etag) = etag {
159 let hv =
160 header::HeaderValue::from_str(etag.trim()).map_err(crate::HttpError::from)?;
161 headers.insert(header::IF_NONE_MATCH, hv);
162 } else {
163 // Use the etag (or last modified, though crates.io does not use this AFAICT)
164 // from the cache entry if it exists
165 let _ = set_cache_version(headers);
166 }
167 }
168
169 Ok(req.body(()).unwrap())
170 }
171
172 /// Process the response to a request created by [`Self::make_remote_request`]
173 ///
174 /// This handles both the scenario where the local cache is missing the specified
175 /// crate, or it is out of date, as well as the local entry being up to date
176 /// and can just be read from disk
177 ///
178 /// You may specify whether an updated index entry is written locally to the
179 /// cache or not
180 ///
181 /// Note that responses from sparse HTTP indices, at least crates.io, may
182 /// send responses with `gzip` compression, it is your responsibility to
183 /// decompress it before sending to this function
184 pub fn parse_remote_response(
185 &self,
186 name: KrateName<'_>,
187 response: http::Response<Vec<u8>>,
188 write_cache_entry: bool,
189 lock: &FileLock,
190 ) -> Result<Option<IndexKrate>, Error> {
191 use http::{header, StatusCode};
192 let (parts, body) = response.into_parts();
193
194 match parts.status {
195 // The server responded with the full contents of the index entry
196 StatusCode::OK => {
197 let krate = IndexKrate::from_slice(&body)?;
198
199 if write_cache_entry {
200 // The same as cargo, prefer etag over last-modified
201 let version = if let Some(etag) = parts.headers.get(header::ETAG) {
202 etag.to_str()
203 .ok()
204 .map(|etag| format!("{}: {etag}", header::ETAG))
205 } else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) {
206 lm.to_str()
207 .ok()
208 .map(|lm| format!("{}: {lm}", header::LAST_MODIFIED))
209 } else {
210 None
211 };
212
213 let revision = version.unwrap_or_else(|| "Unknown".to_owned());
214
215 // It's unfortunate if we can't write to the cache, but we
216 // don't treat it as a hard error since we still have the
217 // index metadata
218 let _err = self.cache.write_to_cache(&krate, &revision, lock);
219 }
220
221 Ok(Some(krate))
222 }
223 // The local cache entry is up to date with the latest entry on the
224 // server, we can just return the local one
225 StatusCode::NOT_MODIFIED => self.cache.cached_krate(name, None, lock),
226 // The server requires authorization but the user didn't provide it
227 StatusCode::UNAUTHORIZED => Err(HttpError::StatusCode {
228 code: StatusCode::UNAUTHORIZED,
229 msg: "the request was not authorized",
230 }
231 .into()),
232 // The crate does not exist, or has been removed
233 StatusCode::NOT_FOUND
234 | StatusCode::GONE
235 | StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None),
236 code => Err(HttpError::StatusCode {
237 code,
238 msg: "the status code is invalid for this protocol",
239 }
240 .into()),
241 }
242 }
243}