tame_index/
krate.rs

1//! Provides types for the structured metadata stored in a registry index
2
3mod dedupe;
4
5use crate::Error;
6use dedupe::DedupeContext;
7use semver::Version;
8use serde::{Deserialize, Serialize};
9use smol_str::SmolStr;
10use std::{collections::BTreeMap, sync::Arc};
11
12/// A mapping of feature name to the features it enables
13pub type FeatureMap = BTreeMap<String, Vec<String>>;
14
15/// A single version of a crate (package) published to the index
16#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq)]
17pub struct IndexVersion {
18    /// [Name](https://doc.rust-lang.org/cargo/reference/manifest.html#the-name-field)
19    pub name: SmolStr,
20    /// [Version](https://doc.rust-lang.org/cargo/reference/manifest.html#the-version-field)
21    #[serde(rename = "vers")]
22    pub version: SmolStr,
23    /// [Dependencies](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html)
24    pub deps: Arc<[IndexDependency]>,
25    /// The SHA-256 for this crate version's tarball
26    #[serde(rename = "cksum")]
27    pub checksum: Chksum,
28    /// [Features](https://doc.rust-lang.org/cargo/reference/features.html)
29    features: Arc<FeatureMap>,
30    /// Version 2 of the index includes this field
31    /// <https://rust-lang.github.io/rfcs/3143-cargo-weak-namespaced-features.html#index-changes>
32    #[serde(default, skip_serializing_if = "Option::is_none")]
33    features2: Option<Arc<FeatureMap>>,
34    /// Whether the crate is yanked from the remote index or not
35    #[serde(default)]
36    pub yanked: bool,
37    /// [Links](https://doc.rust-lang.org/cargo/reference/manifest.html#the-links-field)
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub links: Option<Box<SmolStr>>,
40    /// [Rust Version](https://doc.rust-lang.org/cargo/reference/manifest.html#the-rust-version-field)
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub rust_version: Option<SmolStr>,
43    /// The index version, 1 if not set, v2 indicates presence of feature2 field
44    #[serde(skip_serializing_if = "Option::is_none")]
45    v: Option<u32>,
46}
47
48impl IndexVersion {
49    /// Test functionality
50    #[doc(hidden)]
51    pub fn fake(name: &str, version: impl Into<SmolStr>) -> Self {
52        Self {
53            name: name.into(),
54            version: version.into(),
55            deps: Arc::new([]),
56            features: Arc::default(),
57            features2: None,
58            links: None,
59            rust_version: None,
60            checksum: Chksum(Default::default()),
61            yanked: false,
62            v: None,
63        }
64    }
65
66    /// Dependencies for this version
67    #[inline]
68    pub fn dependencies(&self) -> &[IndexDependency] {
69        &self.deps
70    }
71
72    /// Checksum of the package for this version
73    ///
74    /// SHA256 of the .crate file
75    #[inline]
76    pub fn checksum(&self) -> &[u8; 32] {
77        &self.checksum.0
78    }
79
80    /// Explicit feature set for this crate.
81    ///
82    /// This list is not exhaustive, because any optional dependency becomes a
83    /// feature automatically.
84    ///
85    /// `default` is a special feature name for implicitly enabled features.
86    #[inline]
87    pub fn features(&self) -> impl Iterator<Item = (&String, &Vec<String>)> {
88        self.features.iter().chain(
89            self.features2
90                .as_ref()
91                .map(|f| f.iter())
92                .into_iter()
93                .flatten(),
94        )
95    }
96
97    /// Exclusivity flag. If this is a sys crate, it informs it
98    /// conflicts with any other crate with the same links string.
99    ///
100    /// It does not involve linker or libraries in any way.
101    #[inline]
102    pub fn links(&self) -> Option<&str> {
103        self.links.as_ref().map(|s| s.as_str())
104    }
105
106    /// Whether this version was [yanked](http://doc.crates.io/crates-io.html#cargo-yank) from the
107    /// index
108    #[inline]
109    pub fn is_yanked(&self) -> bool {
110        self.yanked
111    }
112
113    /// Required version of rust
114    ///
115    /// Corresponds to `package.rust-version`.
116    ///
117    /// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
118    /// can be `None` if published before then or if not set in the manifest.
119    #[inline]
120    pub fn rust_version(&self) -> Option<&str> {
121        self.rust_version.as_deref()
122    }
123
124    /// Retrieves the URL this crate version's tarball can be downloaded from
125    #[inline]
126    pub fn download_url(&self, index: &crate::index::IndexConfig) -> Option<String> {
127        Some(index.download_url(self.name.as_str().try_into().ok()?, self.version.as_ref()))
128    }
129}
130
131/// A single dependency of a specific crate version
132#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq, Hash)]
133pub struct IndexDependency {
134    /// Dependency's arbitrary nickname (it may be an alias). Use [`Self::crate_name`] for actual crate name.
135    pub name: SmolStr,
136    /// The version requirement, as a string
137    pub req: SmolStr,
138    /// Double indirection to remove size from this struct, since the features are rarely set
139    pub features: Box<Box<[String]>>,
140    /// If it is an optional dependency
141    pub optional: bool,
142    /// True if the default features are enabled
143    pub default_features: bool,
144    /// Cfg expression applied to the dependency
145    pub target: Option<Box<SmolStr>>,
146    /// The kind of the dependency
147    #[serde(skip_serializing_if = "Option::is_none")]
148    pub kind: Option<DependencyKind>,
149    /// The name of the actual crate, if it was renamed in the crate's manifest
150    #[serde(skip_serializing_if = "Option::is_none")]
151    pub package: Option<Box<SmolStr>>,
152}
153
154impl IndexDependency {
155    /// Gets the version requirement for the dependency as a [`semver::VersionReq`]
156    #[inline]
157    pub fn version_requirement(&self) -> semver::VersionReq {
158        self.req.parse().unwrap()
159    }
160
161    /// Features unconditionally enabled when using this dependency, in addition
162    /// to [`Self::has_default_features`] and features enabled through the
163    /// parent crate's feature list.
164    #[inline]
165    pub fn features(&self) -> &[String] {
166        &self.features
167    }
168
169    /// If it's optional, it implies a feature of its [`Self::name`], and
170    /// can be enabled through the parent crate's features.
171    #[inline]
172    pub fn is_optional(&self) -> bool {
173        self.optional
174    }
175
176    /// If `true` (default), enable `default` feature of this dependency
177    #[inline]
178    pub fn has_default_features(&self) -> bool {
179        self.default_features
180    }
181
182    /// This dependency is only used when compiling for this `cfg` expression
183    #[inline]
184    pub fn target(&self) -> Option<&str> {
185        self.target.as_ref().map(|s| s.as_str())
186    }
187
188    /// The kind of the dependency
189    #[inline]
190    pub fn kind(&self) -> DependencyKind {
191        self.kind.unwrap_or_default()
192    }
193
194    /// Set if dependency's crate name is different from the `name` (alias)
195    #[inline]
196    pub fn package(&self) -> Option<&str> {
197        self.package.as_ref().map(|s| s.as_str())
198    }
199
200    /// Returns the name of the crate providing the dependency.
201    /// This is equivalent to `name()` unless `self.package()`
202    /// is not `None`, in which case it's equal to `self.package()`.
203    ///
204    /// Basically, you can define a dependency in your `Cargo.toml`
205    /// like this:
206    ///
207    /// ```toml
208    /// serde_lib = { version = "1", package = "serde" }
209    /// ```
210    ///
211    /// ...which means that it uses the crate `serde` but imports
212    /// it under the name `serde_lib`.
213    #[inline]
214    pub fn crate_name(&self) -> &str {
215        match &self.package {
216            Some(s) => s,
217            None => &self.name,
218        }
219    }
220}
221
222/// Section in which this dependency was defined
223#[derive(Debug, Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Hash, Default)]
224#[serde(rename_all = "lowercase")]
225pub enum DependencyKind {
226    /// Used at run time
227    #[default]
228    Normal,
229    /// Not fetched and not used, except for when used direclty in a workspace
230    Dev,
231    /// Used at build time, not available at run time
232    Build,
233}
234
235/// A whole crate with all its versions
236#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq)]
237pub struct IndexKrate {
238    /// All versions of the crate, sorted chronologically by when it was published
239    pub versions: Vec<IndexVersion>,
240}
241
242impl IndexKrate {
243    /// The highest version as per semantic versioning specification
244    ///
245    /// Note this may be a pre-release or yanked, use [`Self::highest_normal_version`]
246    /// to filter to the highest version that is not one of those
247    #[inline]
248    pub fn highest_version(&self) -> &IndexVersion {
249        self.versions
250            .iter()
251            .max_by_key(|v| Version::parse(&v.version).ok())
252            // SAFETY: Versions inside the index will always adhere to
253            // semantic versioning. If a crate is inside the index, at
254            // least one version is available.
255            .unwrap()
256    }
257
258    /// Returns crate version with the highest version number according to semver,
259    /// but excludes pre-release and yanked versions.
260    ///
261    /// 0.x.y versions are included.
262    ///
263    /// May return `None` if the crate has only pre-release or yanked versions.
264    #[inline]
265    pub fn highest_normal_version(&self) -> Option<&IndexVersion> {
266        self.versions
267            .iter()
268            .filter_map(|v| {
269                if v.is_yanked() {
270                    return None;
271                }
272
273                v.version
274                    .parse::<Version>()
275                    .ok()
276                    .filter(|v| v.pre.is_empty())
277                    .map(|vs| (v, vs))
278            })
279            .max_by(|a, b| a.1.cmp(&b.1))
280            .map(|(v, _vs)| v)
281    }
282
283    /// The crate's unique registry name. Case-sensitive, mostly.
284    #[inline]
285    pub fn name(&self) -> &str {
286        &self.versions[0].name
287    }
288
289    /// The last release by date, even if it's yanked or less than highest version.
290    ///
291    /// See [`Self::highest_normal_version`]
292    #[inline]
293    pub fn most_recent_version(&self) -> &IndexVersion {
294        &self.versions[self.versions.len() - 1]
295    }
296
297    /// First version ever published. May be yanked.
298    ///
299    /// It is not guaranteed to be the lowest version number.
300    #[inline]
301    pub fn earliest_version(&self) -> &IndexVersion {
302        &self.versions[0]
303    }
304}
305
306impl IndexKrate {
307    /// Parse an index file with all of crate's versions.
308    ///
309    /// The file must contain at least one version.
310    #[inline]
311    pub fn new(index_path: impl AsRef<crate::Path>) -> Result<Self, Error> {
312        let lines = std::fs::read(index_path.as_ref())?;
313        Self::from_slice(&lines)
314    }
315
316    /// Parse a crate from in-memory JSON-lines data
317    #[inline]
318    pub fn from_slice(bytes: &[u8]) -> Result<Self, Error> {
319        let mut dedupe = DedupeContext::default();
320        Self::from_slice_with_context(bytes, &mut dedupe)
321    }
322
323    /// Parse a [`Self`] file from in-memory JSON data
324    pub(crate) fn from_slice_with_context(
325        mut bytes: &[u8],
326        dedupe: &mut DedupeContext,
327    ) -> Result<Self, Error> {
328        use crate::index::cache::split;
329        // Trim last newline(s) so we don't need to special case the split
330        while bytes.last() == Some(&b'\n') {
331            bytes = &bytes[..bytes.len() - 1];
332        }
333
334        let num_versions = split(bytes, b'\n').count();
335        let mut versions = Vec::with_capacity(num_versions);
336        for line in split(bytes, b'\n') {
337            let mut version: IndexVersion = serde_json::from_slice(line)?;
338
339            // Many versions have identical dependencies and features
340            dedupe.deps(&mut version.deps);
341            dedupe.features(&mut version.features);
342
343            if let Some(features2) = &mut version.features2 {
344                dedupe.features(features2);
345            }
346
347            versions.push(version);
348        }
349
350        if versions.is_empty() {
351            return Err(Error::NoCrateVersions);
352        }
353
354        Ok(Self { versions })
355    }
356
357    /// Writes this crate into a JSON-lines formatted buffer
358    ///
359    /// Note this creates its own internal [`std::io::BufWriter`], there is no
360    /// need to wrap it in your own
361    pub fn write_json_lines<W: std::io::Write>(&self, writer: &mut W) -> Result<(), Error> {
362        use std::io::{BufWriter, Write};
363
364        let mut w = BufWriter::new(writer);
365        for iv in &self.versions {
366            serde_json::to_writer(&mut w, &iv)?;
367            w.write_all(b"\n")?;
368        }
369
370        Ok(w.flush()?)
371    }
372}
373
374/// A SHA-256 checksum, this is used by cargo to verify the contents of a crate's
375/// tarball
376#[derive(Clone, Eq, PartialEq)]
377pub struct Chksum(pub [u8; 32]);
378
379use std::fmt;
380
381impl fmt::Debug for Chksum {
382    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383        let mut hex = [0; 64];
384        let hs = crate::utils::encode_hex(&self.0, &mut hex);
385
386        f.debug_struct("Chksum").field("sha-256", &hs).finish()
387    }
388}
389
390impl fmt::Display for Chksum {
391    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
392        let mut hex = [0; 64];
393        let hs = crate::utils::encode_hex(&self.0, &mut hex);
394
395        f.write_str(hs)
396    }
397}
398
399/// Errors that can occur parsing a sha-256 hex string
400#[derive(Debug)]
401pub enum ChksumParseError {
402    /// The checksum string had an invalid length
403    InvalidLength(usize),
404    /// The checksum string contained a non-hex character
405    InvalidValue(char),
406}
407
408impl std::error::Error for ChksumParseError {}
409
410impl fmt::Display for ChksumParseError {
411    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
412        match self {
413            Self::InvalidLength(len) => {
414                write!(f, "expected string with length 64 but got length {len}")
415            }
416            Self::InvalidValue(c) => write!(f, "encountered non-hex character '{c}'"),
417        }
418    }
419}
420
421impl std::str::FromStr for Chksum {
422    type Err = ChksumParseError;
423
424    fn from_str(data: &str) -> Result<Self, Self::Err> {
425        if data.len() != 64 {
426            return Err(ChksumParseError::InvalidLength(data.len()));
427        }
428
429        let mut array = [0u8; 32];
430
431        for (ind, chunk) in data.as_bytes().chunks(2).enumerate() {
432            #[inline]
433            fn parse_hex(b: u8) -> Result<u8, ChksumParseError> {
434                Ok(match b {
435                    b'A'..=b'F' => b - b'A' + 10,
436                    b'a'..=b'f' => b - b'a' + 10,
437                    b'0'..=b'9' => b - b'0',
438                    c => {
439                        return Err(ChksumParseError::InvalidValue(c as char));
440                    }
441                })
442            }
443
444            let mut cur = parse_hex(chunk[0])?;
445            cur <<= 4;
446            cur |= parse_hex(chunk[1])?;
447
448            array[ind] = cur;
449        }
450
451        Ok(Self(array))
452    }
453}
454
455impl<'de> Deserialize<'de> for Chksum {
456    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
457    where
458        D: serde::Deserializer<'de>,
459    {
460        use serde::de::Error;
461        struct HexStrVisitor;
462
463        impl<'de> serde::de::Visitor<'de> for HexStrVisitor {
464            type Value = Chksum;
465
466            fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
467                write!(f, "a hex encoded string")
468            }
469
470            fn visit_str<E: Error>(self, data: &str) -> Result<Self::Value, E> {
471                data.parse().map_err(|err| match err {
472                    ChksumParseError::InvalidLength(len) => {
473                        serde::de::Error::invalid_length(len, &"a string with 64 characters")
474                    }
475                    ChksumParseError::InvalidValue(c) => serde::de::Error::invalid_value(
476                        serde::de::Unexpected::Char(c),
477                        &"a hexadecimal character",
478                    ),
479                })
480            }
481
482            fn visit_borrowed_str<E: Error>(self, data: &'de str) -> Result<Self::Value, E> {
483                self.visit_str(data)
484            }
485        }
486
487        deserializer.deserialize_str(HexStrVisitor)
488    }
489}
490
491impl Serialize for Chksum {
492    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
493    where
494        S: serde::Serializer,
495    {
496        let mut raw = [0u8; 64];
497        let s = crate::utils::encode_hex(&self.0, &mut raw);
498        serializer.serialize_str(s)
499    }
500}
501
502#[cfg(test)]
503mod test {
504    #[test]
505    fn krate_versions() {
506        use super::IndexVersion as iv;
507        let ik = super::IndexKrate {
508            versions: vec![
509                iv::fake("vers", "0.1.0"),
510                iv::fake("vers", "0.1.1"),
511                iv::fake("vers", "0.1.0"),
512                iv::fake("vers", "0.2.0"),
513                iv::fake("vers", "0.3.0"),
514                // These are ordered this way to actually test the methods correctly
515                iv::fake("vers", "0.4.0"),
516                iv::fake("vers", "0.4.0-alpha.00"),
517                {
518                    let mut iv = iv::fake("vers", "0.5.0");
519                    iv.yanked = true;
520                    iv
521                },
522            ],
523        };
524
525        assert_eq!(ik.earliest_version().version, "0.1.0");
526        assert_eq!(ik.most_recent_version().version, "0.5.0");
527        assert_eq!(ik.highest_version().version, "0.5.0");
528        assert_eq!(ik.highest_normal_version().unwrap().version, "0.4.0");
529    }
530}