1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
//! Provides functionality for reading and writing cargo compatible .cache entries
//! that can be wrapped by another index that has logic for fetching entries
//! that aren't in the cache
//!
//! Cargo creates small cache entries for crates when they are accessed during
//! any cargo operation that accesses a registry index (update/add/etc).
//! Initially this was to accelerate accessing the contents of a bare clone of
//! a git registry index as it skips accessing git blobs.
//!
//! Now with sparse HTTP indices, these .cache files are even more important as
//! they allow skipping network access if in offline mode, as well as allowing
//! responses from servers to tell the client they have the latest version if
//! that crate has not been changed since it was last accessed.
//!
//! ```txt
//! +-------------------+---------------------------+------------------+---+
//! | cache version :u8 | index format version :u32 | revision :string | 0 |
//! +-------------------+---------------------------+------------------+---+
//! ```
//!
//! followed by 1+
//!
//! ```txt
//! +----------------+---+-----------+---+
//! | semver version | 0 | JSON blob | 0 |
//! +----------------+---+-----------+---+
//! ```

/// The current (cargo 1.54.0+) cache version for cache entries.
///
/// This value's sole purpose is in determining if cargo will read or skip (and
/// probably overwrite) a .cache entry.
pub const CURRENT_CACHE_VERSION: u8 = 3;
/// The maximum version of the `v` field in the index this crate supports
pub const INDEX_V_MAX: u32 = 2;
/// The byte representation of [`INDEX_V_MAX`]
const INDEX_V_MAX_BYTES: [u8; 4] = INDEX_V_MAX.to_le_bytes();

use super::FileLock;
use crate::{CacheError, Error, IndexKrate, KrateName, PathBuf};

/// A wrapper around a byte buffer that has been (partially) validated to be a
/// valid cache entry
pub struct ValidCacheEntry<'buffer> {
    /// The cache entry's revision
    ///
    /// For git indices this will be the sha1 of the HEAD commit when the cache
    /// entry was written
    ///
    /// For sparse indicies, this will be an HTTP header from the response that
    /// was last written to disk, which is currently either `etag: <etag>` or
    /// `last-modified: <timestamp>`
    pub revision: &'buffer str,
    /// Portion of the buffer containing the individual version entries for the
    /// cache entry
    pub version_entries: &'buffer [u8],
}

impl<'buffer> ValidCacheEntry<'buffer> {
    /// Attempts to read a cache entry from a block of bytes.
    ///
    /// This can fail for a few reasons
    /// 1. The cache version does not match the version(s) supported
    /// 2. The index version is higher than that supported
    /// 3. There is not at least 1 version entry
    pub fn read(mut buffer: &'buffer [u8]) -> Result<Self, CacheError> {
        let cache_version = *buffer.first().ok_or(CacheError::InvalidCacheEntry)?;

        match cache_version.cmp(&CURRENT_CACHE_VERSION) {
            std::cmp::Ordering::Less => return Err(CacheError::OutdatedCacheVersion),
            std::cmp::Ordering::Greater => return Err(CacheError::UnknownCacheVersion),
            std::cmp::Ordering::Equal => {}
        }

        buffer = &buffer[1..];
        let index_version = u32::from_le_bytes(
            buffer
                .get(0..4)
                .ok_or(CacheError::InvalidCacheEntry)
                .and_then(|b| b.try_into().map_err(|_e| CacheError::InvalidCacheEntry))?,
        );

        if INDEX_V_MAX > index_version {
            return Err(CacheError::UnknownIndexVersion);
        }

        buffer = &buffer[4..];

        let mut iter = split(buffer, 0);
        let revision = std::str::from_utf8(iter.next().ok_or(CacheError::InvalidCacheEntry)?)
            .map_err(|_e| CacheError::OutdatedRevision)?;

        // Ensure there is at least one valid entry, it _should_ be impossible
        // to have an empty cache entry since you can't publish something to an
        // index and still have zero versions
        let _version = iter.next().ok_or(CacheError::InvalidCacheEntry)?;
        let _blob = iter.next().ok_or(CacheError::InvalidCacheEntry)?;

        let version_entries = &buffer[revision.len() + 1..];

        Ok(Self {
            revision,
            version_entries,
        })
    }

    /// Deserializes this cache entry into a [`IndexKrate`]
    ///
    /// If specified, the `revision` will be used to ignore cache entries
    /// that are outdated
    pub fn to_krate(&self, revision: Option<&str>) -> Result<Option<IndexKrate>, Error> {
        if let Some(iv) = revision {
            if iv != self.revision {
                return Ok(None);
            }
        }

        Ok(Some(IndexKrate::from_cache(split(
            self.version_entries,
            0,
        ))?))
    }
}

impl IndexKrate {
    /// Reads entries from the versions portion of a cache file
    pub(crate) fn from_cache<'cache>(
        mut iter: impl Iterator<Item = &'cache [u8]> + 'cache,
    ) -> Result<Self, Error> {
        let mut versions = Vec::new();

        // Each entry is a tuple of (semver, version_json)
        while iter.next().is_some() {
            let version_slice = iter
                .next()
                .ok_or(Error::Cache(CacheError::InvalidCrateVersion))?;
            let version: crate::IndexVersion = serde_json::from_slice(version_slice)?;
            versions.push(version);
        }

        Ok(Self { versions })
    }

    /// Writes a cache entry with the specified revision to an [`std::io::Write`]
    ///
    /// Note this method creates its own internal [`std::io::BufWriter`], there
    /// is no need to wrap it yourself
    pub fn write_cache_entry<W: std::io::Write>(
        &self,
        writer: &mut W,
        revision: &str,
    ) -> Result<(), std::io::Error> {
        use std::io::Write;

        const SPLIT: &[u8] = &[0];

        let mut w = std::io::BufWriter::new(writer);
        w.write_all(&[CURRENT_CACHE_VERSION])?;
        w.write_all(&INDEX_V_MAX_BYTES)?;
        w.write_all(revision.as_bytes())?;
        w.write_all(SPLIT)?;

        // crates.io limits crate names to a maximum of 64 characters, but this
        // only applies to crates.io and not any cargo index, so don't set a hard
        // limit
        let mut semver = String::with_capacity(64);

        for iv in &self.versions {
            semver.clear();
            // SAFETY: the only way this would fail would be OOM
            std::fmt::write(&mut semver, format_args!("{}", iv.version)).unwrap();
            w.write_all(semver.as_bytes())?;
            w.write_all(SPLIT)?;

            serde_json::to_writer(&mut w, &iv)?;
            w.write_all(SPLIT)?;
        }

        w.flush()
    }
}

/// Gives an iterator over the specified buffer, where each item is split by the specified
/// needle value
pub fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> + '_ {
    struct Split<'a> {
        haystack: &'a [u8],
        needle: u8,
    }

    impl<'a> Iterator for Split<'a> {
        type Item = &'a [u8];

        #[inline]
        fn next(&mut self) -> Option<&'a [u8]> {
            if self.haystack.is_empty() {
                return None;
            }
            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
                None => (self.haystack, &[][..]),
            };
            self.haystack = remaining;
            Some(ret)
        }
    }

    Split { haystack, needle }
}

/// The [`IndexCache`] allows access to the local cache entries for a remote index
///
/// This implementation does no network I/O whatsoever, but does do disk I/O
pub struct IndexCache {
    /// The root disk location of the local index
    pub(super) path: PathBuf,
}

impl IndexCache {
    /// Creates a local index exactly at the specified path
    #[inline]
    pub fn at_path(path: PathBuf) -> Self {
        Self { path }
    }

    /// Reads a crate from the local cache of the index.
    ///
    /// You may optionally pass in the revision the cache entry is expected to
    /// have, if it does match the cache entry will be ignored and an error returned
    #[inline]
    pub fn cached_krate(
        &self,
        name: KrateName<'_>,
        revision: Option<&str>,
        lock: &FileLock,
    ) -> Result<Option<IndexKrate>, Error> {
        let Some(contents) = self.read_cache_file(name, lock)? else {
            return Ok(None);
        };

        let valid = ValidCacheEntry::read(&contents)?;
        valid.to_krate(revision)
    }

    /// Writes the specified crate and revision to the cache
    pub fn write_to_cache(
        &self,
        krate: &IndexKrate,
        revision: &str,
        _lock: &FileLock,
    ) -> Result<PathBuf, Error> {
        let name = krate.name().try_into()?;
        let cache_path = self.cache_path(name);

        std::fs::create_dir_all(cache_path.parent().unwrap())?;

        let mut cache_file = match std::fs::File::create(&cache_path) {
            Ok(cf) => cf,
            Err(err) => return Err(Error::IoPath(err, cache_path)),
        };

        // It's unfortunate if this fails for some reason, but
        // not writing the cache entry shouldn't stop the user
        // from getting the crate's metadata
        match krate.write_cache_entry(&mut cache_file, revision) {
            Ok(_) => Ok(cache_path),
            Err(err) => {
                drop(cache_file);
                // _attempt_ to delete the file, to clean up after ourselves
                let _ = std::fs::remove_file(&cache_path);
                Err(Error::IoPath(err, cache_path))
            }
        }
    }

    /// Gets the path the crate's cache file would be located at if it exists
    #[inline]
    pub fn cache_path(&self, name: KrateName<'_>) -> PathBuf {
        let rel_path = name.relative_path(None);

        // avoid realloc on each push
        let mut cache_path = PathBuf::with_capacity(self.path.as_str().len() + 8 + rel_path.len());
        cache_path.push(&self.path);
        cache_path.push(".cache");
        cache_path.push(rel_path);

        cache_path
    }

    /// Attempts to read the cache entry for the specified crate
    ///
    /// It is recommended to use [`Self::cached_krate`]
    #[inline]
    pub fn read_cache_file(
        &self,
        name: KrateName<'_>,
        _lock: &FileLock,
    ) -> Result<Option<Vec<u8>>, Error> {
        let cache_path = self.cache_path(name);

        match std::fs::read(&cache_path) {
            Ok(cb) => Ok(Some(cb)),
            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
            Err(err) => Err(Error::IoPath(err, cache_path)),
        }
    }
}