tame_index/index/cache.rs
1//! Provides functionality for reading and writing cargo compatible .cache entries
2//! that can be wrapped by another index that has logic for fetching entries
3//! that aren't in the cache
4//!
5//! Cargo creates small cache entries for crates when they are accessed during
6//! any cargo operation that accesses a registry index (update/add/etc).
7//! Initially this was to accelerate accessing the contents of a bare clone of
8//! a git registry index as it skips accessing git blobs.
9//!
10//! Now with sparse HTTP indices, these .cache files are even more important as
11//! they allow skipping network access if in offline mode, as well as allowing
12//! responses from servers to tell the client they have the latest version if
13//! that crate has not been changed since it was last accessed.
14//!
15//! ```txt
16//! +-------------------+---------------------------+------------------+---+
17//! | cache version :u8 | index format version :u32 | revision :string | 0 |
18//! +-------------------+---------------------------+------------------+---+
19//! ```
20//!
21//! followed by 1+
22//!
23//! ```txt
24//! +----------------+---+-----------+---+
25//! | semver version | 0 | JSON blob | 0 |
26//! +----------------+---+-----------+---+
27//! ```
28
29/// The current (cargo 1.54.0+) cache version for cache entries.
30///
31/// This value's sole purpose is in determining if cargo will read or skip (and
32/// probably overwrite) a .cache entry.
33pub const CURRENT_CACHE_VERSION: u8 = 3;
34/// The maximum version of the `v` field in the index this crate supports
35pub const INDEX_V_MAX: u32 = 2;
36/// The byte representation of [`INDEX_V_MAX`]
37const INDEX_V_MAX_BYTES: [u8; 4] = INDEX_V_MAX.to_le_bytes();
38
39use super::FileLock;
40use crate::{CacheError, Error, IndexKrate, KrateName, PathBuf};
41
42/// A wrapper around a byte buffer that has been (partially) validated to be a
43/// valid cache entry
44pub struct ValidCacheEntry<'buffer> {
45 /// The cache entry's revision
46 ///
47 /// For git indices this will be the sha1 of the HEAD commit when the cache
48 /// entry was written
49 ///
50 /// For sparse indicies, this will be an HTTP header from the response that
51 /// was last written to disk, which is currently either `etag: <etag>` or
52 /// `last-modified: <timestamp>`
53 pub revision: &'buffer str,
54 /// Portion of the buffer containing the individual version entries for the
55 /// cache entry
56 pub version_entries: &'buffer [u8],
57}
58
59impl<'buffer> ValidCacheEntry<'buffer> {
60 /// Attempts to read a cache entry from a block of bytes.
61 ///
62 /// This can fail for a few reasons
63 /// 1. The cache version does not match the version(s) supported
64 /// 2. The index version is higher than that supported
65 /// 3. There is not at least 1 version entry
66 pub fn read(mut buffer: &'buffer [u8]) -> Result<Self, CacheError> {
67 let cache_version = *buffer.first().ok_or(CacheError::InvalidCacheEntry)?;
68
69 match cache_version.cmp(&CURRENT_CACHE_VERSION) {
70 std::cmp::Ordering::Less => return Err(CacheError::OutdatedCacheVersion),
71 std::cmp::Ordering::Greater => return Err(CacheError::UnknownCacheVersion),
72 std::cmp::Ordering::Equal => {}
73 }
74
75 buffer = &buffer[1..];
76 let index_version = u32::from_le_bytes(
77 buffer
78 .get(0..4)
79 .ok_or(CacheError::InvalidCacheEntry)
80 .and_then(|b| b.try_into().map_err(|_e| CacheError::InvalidCacheEntry))?,
81 );
82
83 if INDEX_V_MAX > index_version {
84 return Err(CacheError::UnknownIndexVersion);
85 }
86
87 buffer = &buffer[4..];
88
89 let mut iter = split(buffer, 0);
90 let revision = std::str::from_utf8(iter.next().ok_or(CacheError::InvalidCacheEntry)?)
91 .map_err(|_e| CacheError::OutdatedRevision)?;
92
93 // Ensure there is at least one valid entry, it _should_ be impossible
94 // to have an empty cache entry since you can't publish something to an
95 // index and still have zero versions
96 let _version = iter.next().ok_or(CacheError::InvalidCacheEntry)?;
97 let _blob = iter.next().ok_or(CacheError::InvalidCacheEntry)?;
98
99 let version_entries = &buffer[revision.len() + 1..];
100
101 Ok(Self {
102 revision,
103 version_entries,
104 })
105 }
106
107 /// Deserializes this cache entry into a [`IndexKrate`]
108 ///
109 /// If specified, the `revision` will be used to ignore cache entries
110 /// that are outdated
111 pub fn to_krate(&self, revision: Option<&str>) -> Result<Option<IndexKrate>, Error> {
112 if let Some(iv) = revision {
113 if iv != self.revision {
114 return Ok(None);
115 }
116 }
117
118 Ok(Some(IndexKrate::from_cache(split(
119 self.version_entries,
120 0,
121 ))?))
122 }
123}
124
125impl IndexKrate {
126 /// Reads entries from the versions portion of a cache file
127 pub(crate) fn from_cache<'cache>(
128 mut iter: impl Iterator<Item = &'cache [u8]> + 'cache,
129 ) -> Result<Self, Error> {
130 let mut versions = Vec::new();
131
132 // Each entry is a tuple of (semver, version_json)
133 while iter.next().is_some() {
134 let version_slice = iter
135 .next()
136 .ok_or(Error::Cache(CacheError::InvalidCrateVersion))?;
137 let version: crate::IndexVersion = serde_json::from_slice(version_slice)?;
138 versions.push(version);
139 }
140
141 Ok(Self { versions })
142 }
143
144 /// Writes a cache entry with the specified revision to an [`std::io::Write`]
145 ///
146 /// Note this method creates its own internal [`std::io::BufWriter`], there
147 /// is no need to wrap it yourself
148 pub fn write_cache_entry<W: std::io::Write>(
149 &self,
150 writer: &mut W,
151 revision: &str,
152 ) -> Result<(), std::io::Error> {
153 use std::io::Write;
154
155 const SPLIT: &[u8] = &[0];
156
157 let mut w = std::io::BufWriter::new(writer);
158 w.write_all(&[CURRENT_CACHE_VERSION])?;
159 w.write_all(&INDEX_V_MAX_BYTES)?;
160 w.write_all(revision.as_bytes())?;
161 w.write_all(SPLIT)?;
162
163 // crates.io limits crate names to a maximum of 64 characters, but this
164 // only applies to crates.io and not any cargo index, so don't set a hard
165 // limit
166 let mut semver = String::with_capacity(64);
167
168 for iv in &self.versions {
169 semver.clear();
170 // SAFETY: the only way this would fail would be OOM
171 std::fmt::write(&mut semver, format_args!("{}", iv.version)).unwrap();
172 w.write_all(semver.as_bytes())?;
173 w.write_all(SPLIT)?;
174
175 serde_json::to_writer(&mut w, &iv)?;
176 w.write_all(SPLIT)?;
177 }
178
179 w.flush()
180 }
181}
182
183/// Gives an iterator over the specified buffer, where each item is split by the specified
184/// needle value
185pub fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> + '_ {
186 struct Split<'a> {
187 haystack: &'a [u8],
188 needle: u8,
189 }
190
191 impl<'a> Iterator for Split<'a> {
192 type Item = &'a [u8];
193
194 #[inline]
195 fn next(&mut self) -> Option<&'a [u8]> {
196 if self.haystack.is_empty() {
197 return None;
198 }
199 let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
200 Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
201 None => (self.haystack, &[][..]),
202 };
203 self.haystack = remaining;
204 Some(ret)
205 }
206 }
207
208 Split { haystack, needle }
209}
210
211/// The [`IndexCache`] allows access to the local cache entries for a remote index
212///
213/// This implementation does no network I/O whatsoever, but does do disk I/O
214pub struct IndexCache {
215 /// The root disk location of the local index
216 pub(super) path: PathBuf,
217}
218
219impl IndexCache {
220 /// Creates a local index exactly at the specified path
221 #[inline]
222 pub fn at_path(path: PathBuf) -> Self {
223 Self { path }
224 }
225
226 /// Reads a crate from the local cache of the index.
227 ///
228 /// You may optionally pass in the revision the cache entry is expected to
229 /// have, if it does match the cache entry will be ignored and an error returned
230 #[inline]
231 pub fn cached_krate(
232 &self,
233 name: KrateName<'_>,
234 revision: Option<&str>,
235 lock: &FileLock,
236 ) -> Result<Option<IndexKrate>, Error> {
237 let Some(contents) = self.read_cache_file(name, lock)? else {
238 return Ok(None);
239 };
240
241 let valid = ValidCacheEntry::read(&contents)?;
242 valid.to_krate(revision)
243 }
244
245 /// Writes the specified crate and revision to the cache
246 pub fn write_to_cache(
247 &self,
248 krate: &IndexKrate,
249 revision: &str,
250 _lock: &FileLock,
251 ) -> Result<PathBuf, Error> {
252 let name = krate.name().try_into()?;
253 let cache_path = self.cache_path(name);
254
255 std::fs::create_dir_all(cache_path.parent().unwrap())?;
256
257 let mut cache_file = match std::fs::File::create(&cache_path) {
258 Ok(cf) => cf,
259 Err(err) => return Err(Error::IoPath(err, cache_path)),
260 };
261
262 // It's unfortunate if this fails for some reason, but
263 // not writing the cache entry shouldn't stop the user
264 // from getting the crate's metadata
265 match krate.write_cache_entry(&mut cache_file, revision) {
266 Ok(_) => Ok(cache_path),
267 Err(err) => {
268 drop(cache_file);
269 // _attempt_ to delete the file, to clean up after ourselves
270 let _ = std::fs::remove_file(&cache_path);
271 Err(Error::IoPath(err, cache_path))
272 }
273 }
274 }
275
276 /// Gets the path the crate's cache file would be located at if it exists
277 #[inline]
278 pub fn cache_path(&self, name: KrateName<'_>) -> PathBuf {
279 let rel_path = name.relative_path(None);
280
281 // avoid realloc on each push
282 let mut cache_path = PathBuf::with_capacity(self.path.as_str().len() + 8 + rel_path.len());
283 cache_path.push(&self.path);
284 cache_path.push(".cache");
285 cache_path.push(rel_path);
286
287 cache_path
288 }
289
290 /// Attempts to read the cache entry for the specified crate
291 ///
292 /// It is recommended to use [`Self::cached_krate`]
293 #[inline]
294 pub fn read_cache_file(
295 &self,
296 name: KrateName<'_>,
297 _lock: &FileLock,
298 ) -> Result<Option<Vec<u8>>, Error> {
299 let cache_path = self.cache_path(name);
300
301 match std::fs::read(&cache_path) {
302 Ok(cb) => Ok(Some(cb)),
303 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
304 Err(err) => Err(Error::IoPath(err, cache_path)),
305 }
306 }
307}