tame_index/index/
cache.rs

1//! Provides functionality for reading and writing cargo compatible .cache entries
2//! that can be wrapped by another index that has logic for fetching entries
3//! that aren't in the cache
4//!
5//! Cargo creates small cache entries for crates when they are accessed during
6//! any cargo operation that accesses a registry index (update/add/etc).
7//! Initially this was to accelerate accessing the contents of a bare clone of
8//! a git registry index as it skips accessing git blobs.
9//!
10//! Now with sparse HTTP indices, these .cache files are even more important as
11//! they allow skipping network access if in offline mode, as well as allowing
12//! responses from servers to tell the client they have the latest version if
13//! that crate has not been changed since it was last accessed.
14//!
15//! ```txt
16//! +-------------------+---------------------------+------------------+---+
17//! | cache version :u8 | index format version :u32 | revision :string | 0 |
18//! +-------------------+---------------------------+------------------+---+
19//! ```
20//!
21//! followed by 1+
22//!
23//! ```txt
24//! +----------------+---+-----------+---+
25//! | semver version | 0 | JSON blob | 0 |
26//! +----------------+---+-----------+---+
27//! ```
28
29/// The current (cargo 1.54.0+) cache version for cache entries.
30///
31/// This value's sole purpose is in determining if cargo will read or skip (and
32/// probably overwrite) a .cache entry.
33pub const CURRENT_CACHE_VERSION: u8 = 3;
34/// The maximum version of the `v` field in the index this crate supports
35pub const INDEX_V_MAX: u32 = 2;
36/// The byte representation of [`INDEX_V_MAX`]
37const INDEX_V_MAX_BYTES: [u8; 4] = INDEX_V_MAX.to_le_bytes();
38
39use super::FileLock;
40use crate::{CacheError, Error, IndexKrate, KrateName, PathBuf};
41
42/// A wrapper around a byte buffer that has been (partially) validated to be a
43/// valid cache entry
44pub struct ValidCacheEntry<'buffer> {
45    /// The cache entry's revision
46    ///
47    /// For git indices this will be the sha1 of the HEAD commit when the cache
48    /// entry was written
49    ///
50    /// For sparse indicies, this will be an HTTP header from the response that
51    /// was last written to disk, which is currently either `etag: <etag>` or
52    /// `last-modified: <timestamp>`
53    pub revision: &'buffer str,
54    /// Portion of the buffer containing the individual version entries for the
55    /// cache entry
56    pub version_entries: &'buffer [u8],
57}
58
59impl<'buffer> ValidCacheEntry<'buffer> {
60    /// Attempts to read a cache entry from a block of bytes.
61    ///
62    /// This can fail for a few reasons
63    /// 1. The cache version does not match the version(s) supported
64    /// 2. The index version is higher than that supported
65    /// 3. There is not at least 1 version entry
66    pub fn read(mut buffer: &'buffer [u8]) -> Result<Self, CacheError> {
67        let cache_version = *buffer.first().ok_or(CacheError::InvalidCacheEntry)?;
68
69        match cache_version.cmp(&CURRENT_CACHE_VERSION) {
70            std::cmp::Ordering::Less => return Err(CacheError::OutdatedCacheVersion),
71            std::cmp::Ordering::Greater => return Err(CacheError::UnknownCacheVersion),
72            std::cmp::Ordering::Equal => {}
73        }
74
75        buffer = &buffer[1..];
76        let index_version = u32::from_le_bytes(
77            buffer
78                .get(0..4)
79                .ok_or(CacheError::InvalidCacheEntry)
80                .and_then(|b| b.try_into().map_err(|_e| CacheError::InvalidCacheEntry))?,
81        );
82
83        if INDEX_V_MAX > index_version {
84            return Err(CacheError::UnknownIndexVersion);
85        }
86
87        buffer = &buffer[4..];
88
89        let mut iter = split(buffer, 0);
90        let revision = std::str::from_utf8(iter.next().ok_or(CacheError::InvalidCacheEntry)?)
91            .map_err(|_e| CacheError::OutdatedRevision)?;
92
93        // Ensure there is at least one valid entry, it _should_ be impossible
94        // to have an empty cache entry since you can't publish something to an
95        // index and still have zero versions
96        let _version = iter.next().ok_or(CacheError::InvalidCacheEntry)?;
97        let _blob = iter.next().ok_or(CacheError::InvalidCacheEntry)?;
98
99        let version_entries = &buffer[revision.len() + 1..];
100
101        Ok(Self {
102            revision,
103            version_entries,
104        })
105    }
106
107    /// Deserializes this cache entry into a [`IndexKrate`]
108    ///
109    /// If specified, the `revision` will be used to ignore cache entries
110    /// that are outdated
111    pub fn to_krate(&self, revision: Option<&str>) -> Result<Option<IndexKrate>, Error> {
112        if let Some(iv) = revision {
113            if iv != self.revision {
114                return Ok(None);
115            }
116        }
117
118        Ok(Some(IndexKrate::from_cache(split(
119            self.version_entries,
120            0,
121        ))?))
122    }
123}
124
125impl IndexKrate {
126    /// Reads entries from the versions portion of a cache file
127    pub(crate) fn from_cache<'cache>(
128        mut iter: impl Iterator<Item = &'cache [u8]> + 'cache,
129    ) -> Result<Self, Error> {
130        let mut versions = Vec::new();
131
132        // Each entry is a tuple of (semver, version_json)
133        while iter.next().is_some() {
134            let version_slice = iter
135                .next()
136                .ok_or(Error::Cache(CacheError::InvalidCrateVersion))?;
137            let version: crate::IndexVersion = serde_json::from_slice(version_slice)?;
138            versions.push(version);
139        }
140
141        Ok(Self { versions })
142    }
143
144    /// Writes a cache entry with the specified revision to an [`std::io::Write`]
145    ///
146    /// Note this method creates its own internal [`std::io::BufWriter`], there
147    /// is no need to wrap it yourself
148    pub fn write_cache_entry<W: std::io::Write>(
149        &self,
150        writer: &mut W,
151        revision: &str,
152    ) -> Result<(), std::io::Error> {
153        use std::io::Write;
154
155        const SPLIT: &[u8] = &[0];
156
157        let mut w = std::io::BufWriter::new(writer);
158        w.write_all(&[CURRENT_CACHE_VERSION])?;
159        w.write_all(&INDEX_V_MAX_BYTES)?;
160        w.write_all(revision.as_bytes())?;
161        w.write_all(SPLIT)?;
162
163        // crates.io limits crate names to a maximum of 64 characters, but this
164        // only applies to crates.io and not any cargo index, so don't set a hard
165        // limit
166        let mut semver = String::with_capacity(64);
167
168        for iv in &self.versions {
169            semver.clear();
170            // SAFETY: the only way this would fail would be OOM
171            std::fmt::write(&mut semver, format_args!("{}", iv.version)).unwrap();
172            w.write_all(semver.as_bytes())?;
173            w.write_all(SPLIT)?;
174
175            serde_json::to_writer(&mut w, &iv)?;
176            w.write_all(SPLIT)?;
177        }
178
179        w.flush()
180    }
181}
182
183/// Gives an iterator over the specified buffer, where each item is split by the specified
184/// needle value
185pub fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> + '_ {
186    struct Split<'a> {
187        haystack: &'a [u8],
188        needle: u8,
189    }
190
191    impl<'a> Iterator for Split<'a> {
192        type Item = &'a [u8];
193
194        #[inline]
195        fn next(&mut self) -> Option<&'a [u8]> {
196            if self.haystack.is_empty() {
197                return None;
198            }
199            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
200                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
201                None => (self.haystack, &[][..]),
202            };
203            self.haystack = remaining;
204            Some(ret)
205        }
206    }
207
208    Split { haystack, needle }
209}
210
211/// The [`IndexCache`] allows access to the local cache entries for a remote index
212///
213/// This implementation does no network I/O whatsoever, but does do disk I/O
214pub struct IndexCache {
215    /// The root disk location of the local index
216    pub(super) path: PathBuf,
217}
218
219impl IndexCache {
220    /// Creates a local index exactly at the specified path
221    #[inline]
222    pub fn at_path(path: PathBuf) -> Self {
223        Self { path }
224    }
225
226    /// Reads a crate from the local cache of the index.
227    ///
228    /// You may optionally pass in the revision the cache entry is expected to
229    /// have, if it does match the cache entry will be ignored and an error returned
230    #[inline]
231    pub fn cached_krate(
232        &self,
233        name: KrateName<'_>,
234        revision: Option<&str>,
235        lock: &FileLock,
236    ) -> Result<Option<IndexKrate>, Error> {
237        let Some(contents) = self.read_cache_file(name, lock)? else {
238            return Ok(None);
239        };
240
241        let valid = ValidCacheEntry::read(&contents)?;
242        valid.to_krate(revision)
243    }
244
245    /// Writes the specified crate and revision to the cache
246    pub fn write_to_cache(
247        &self,
248        krate: &IndexKrate,
249        revision: &str,
250        _lock: &FileLock,
251    ) -> Result<PathBuf, Error> {
252        let name = krate.name().try_into()?;
253        let cache_path = self.cache_path(name);
254
255        std::fs::create_dir_all(cache_path.parent().unwrap())?;
256
257        let mut cache_file = match std::fs::File::create(&cache_path) {
258            Ok(cf) => cf,
259            Err(err) => return Err(Error::IoPath(err, cache_path)),
260        };
261
262        // It's unfortunate if this fails for some reason, but
263        // not writing the cache entry shouldn't stop the user
264        // from getting the crate's metadata
265        match krate.write_cache_entry(&mut cache_file, revision) {
266            Ok(_) => Ok(cache_path),
267            Err(err) => {
268                drop(cache_file);
269                // _attempt_ to delete the file, to clean up after ourselves
270                let _ = std::fs::remove_file(&cache_path);
271                Err(Error::IoPath(err, cache_path))
272            }
273        }
274    }
275
276    /// Gets the path the crate's cache file would be located at if it exists
277    #[inline]
278    pub fn cache_path(&self, name: KrateName<'_>) -> PathBuf {
279        let rel_path = name.relative_path(None);
280
281        // avoid realloc on each push
282        let mut cache_path = PathBuf::with_capacity(self.path.as_str().len() + 8 + rel_path.len());
283        cache_path.push(&self.path);
284        cache_path.push(".cache");
285        cache_path.push(rel_path);
286
287        cache_path
288    }
289
290    /// Attempts to read the cache entry for the specified crate
291    ///
292    /// It is recommended to use [`Self::cached_krate`]
293    #[inline]
294    pub fn read_cache_file(
295        &self,
296        name: KrateName<'_>,
297        _lock: &FileLock,
298    ) -> Result<Option<Vec<u8>>, Error> {
299        let cache_path = self.cache_path(name);
300
301        match std::fs::read(&cache_path) {
302            Ok(cb) => Ok(Some(cb)),
303            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
304            Err(err) => Err(Error::IoPath(err, cache_path)),
305        }
306    }
307}