oci_spec/distribution/
reference.rs

1use std::fmt;
2use std::str::FromStr;
3use std::{convert::TryFrom, sync::OnceLock};
4
5use regex::{Regex, RegexBuilder};
6use serde::{Deserialize, Serialize};
7use thiserror::Error;
8
9/// NAME_TOTAL_LENGTH_MAX is the maximum total number of characters in a repository name.
10const NAME_TOTAL_LENGTH_MAX: usize = 255;
11
12const DOCKER_HUB_DOMAIN_LEGACY: &str = "index.docker.io";
13const DOCKER_HUB_DOMAIN: &str = "docker.io";
14const DOCKER_HUB_OFFICIAL_REPO_NAME: &str = "library";
15const DEFAULT_TAG: &str = "latest";
16/// REFERENCE_REGEXP is the full supported format of a reference. The regexp
17/// is anchored and has capturing groups for name, tag, and digest components.
18const REFERENCE_REGEXP: &str = r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])(?:(?:\.(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]))+)?(?::[0-9]+)?/)?[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?(?:(?:/[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?)+)?)(?::([\w][\w.-]{0,127}))?(?:@([A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}))?$";
19
20fn reference_regexp() -> &'static Regex {
21    static RE: OnceLock<Regex> = OnceLock::new();
22    RE.get_or_init(|| {
23        RegexBuilder::new(REFERENCE_REGEXP)
24            .size_limit(10 * (1 << 21))
25            .build()
26            .unwrap()
27    })
28}
29
30/// Reasons that parsing a string as a Reference can fail.
31#[derive(Debug, Error, PartialEq, Eq)]
32pub enum ParseError {
33    /// Will be returned if digest is ill-formed
34    #[error("invalid checksum digest format")]
35    DigestInvalidFormat,
36    /// Will be returned if digest does not have a correct lenght
37    #[error("invalid checksum digest length")]
38    DigestInvalidLength,
39    /// Will be returned for an unknown digest algorithm
40    #[error("unsupported digest algorithm")]
41    DigestUnsupported,
42    /// Will be returned for an uppercase character in repository name
43    #[error("repository name must be lowercase")]
44    NameContainsUppercase,
45    /// Will be returned if a name is empty
46    #[error("repository name must have at least one component")]
47    NameEmpty,
48    /// Will be returned if a name is too long
49    #[error("repository name must not be more than {NAME_TOTAL_LENGTH_MAX} characters")]
50    NameTooLong,
51    /// Will be returned if a reference is ill-formed
52    #[error("invalid reference format")]
53    ReferenceInvalidFormat,
54    /// Will be returned if a tag is ill-formed
55    #[error("invalid tag format")]
56    TagInvalidFormat,
57}
58
59/// Reference provides a general type to represent any way of referencing images within an OCI registry.
60///
61/// # Examples
62///
63/// Parsing a tagged image reference:
64///
65/// ```
66/// use oci_spec::distribution::Reference;
67///
68/// let reference: Reference = "docker.io/library/hello-world:latest".parse().unwrap();
69///
70/// assert_eq!("docker.io/library/hello-world:latest", reference.whole().as_str());
71/// assert_eq!("docker.io", reference.registry());
72/// assert_eq!("library/hello-world", reference.repository());
73/// assert_eq!(Some("latest"), reference.tag());
74/// assert_eq!(None, reference.digest());
75/// ```
76#[derive(Clone, Hash, PartialEq, Eq, Debug, Serialize, Deserialize)]
77pub struct Reference {
78    registry: String,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mirror_registry: Option<String>,
81    repository: String,
82    #[serde(skip_serializing_if = "Option::is_none")]
83    tag: Option<String>,
84    #[serde(skip_serializing_if = "Option::is_none")]
85    digest: Option<String>,
86}
87
88impl Reference {
89    /// Create a Reference with a registry, repository and tag.
90    pub fn with_tag(registry: String, repository: String, tag: String) -> Self {
91        Self {
92            registry,
93            mirror_registry: None,
94            repository,
95            tag: Some(tag),
96            digest: None,
97        }
98    }
99
100    /// Create a Reference with a registry, repository and digest.
101    pub fn with_digest(registry: String, repository: String, digest: String) -> Self {
102        Self {
103            registry,
104            mirror_registry: None,
105            repository,
106            tag: None,
107            digest: Some(digest),
108        }
109    }
110
111    /// Clone the Reference for the same image with a new digest.
112    pub fn clone_with_digest(&self, digest: String) -> Self {
113        Self {
114            registry: self.registry.clone(),
115            mirror_registry: self.mirror_registry.clone(),
116            repository: self.repository.clone(),
117            tag: None,
118            digest: Some(digest),
119        }
120    }
121
122    /// Set a pull mirror registry for this reference.
123    ///
124    /// The mirror registry will be used to resolve the image, the original registry
125    /// is available via the [`Reference::namespace`] function.
126    ///
127    /// The original registry will be sent with the `ns` query parameter to the mirror registry.
128    /// The `ns` query parameter is currently not part of the stable OCI Distribution Spec yet,
129    /// but is being discussed to be added and is already used by some other implementations
130    /// (for example containerd). So be aware that this feature might not work with all registries.
131    ///
132    /// Since this is not part of the stable OCI Distribution Spec yet, this feature is exempt from
133    /// semver backwards compatibility guarantees and might change in the future.
134    #[doc(hidden)]
135    pub fn set_mirror_registry(&mut self, registry: String) {
136        self.mirror_registry = Some(registry);
137    }
138
139    /// Resolve the registry address of a given `Reference`.
140    ///
141    /// Some registries, such as docker.io, uses a different address for the actual
142    /// registry. This function implements such redirection.
143    ///
144    /// If a mirror registry is set, it will be used instead of the original registry.
145    pub fn resolve_registry(&self) -> &str {
146        match (self.registry(), self.mirror_registry.as_deref()) {
147            (_, Some(mirror_registry)) => mirror_registry,
148            ("docker.io", None) => "index.docker.io",
149            (registry, None) => registry,
150        }
151    }
152
153    /// Returns the name of the registry.
154    pub fn registry(&self) -> &str {
155        &self.registry
156    }
157
158    /// Returns the name of the repository.
159    pub fn repository(&self) -> &str {
160        &self.repository
161    }
162
163    /// Returns the object's tag, if present.
164    pub fn tag(&self) -> Option<&str> {
165        self.tag.as_deref()
166    }
167
168    /// Returns the object's digest, if present.
169    pub fn digest(&self) -> Option<&str> {
170        self.digest.as_deref()
171    }
172
173    /// Returns the original registry when pulled via a mirror.
174    ///
175    /// Since this is not part of the stable OCI Distribution Spec yet, this feature is exempt from
176    /// semver backwards compatibility guarantees and might change in the future.
177    #[doc(hidden)]
178    pub fn namespace(&self) -> Option<&str> {
179        if self.mirror_registry.is_some() {
180            Some(self.registry())
181        } else {
182            None
183        }
184    }
185
186    /// Returns the full repository name and path.
187    fn full_name(&self) -> String {
188        if self.registry() == "" {
189            self.repository().to_string()
190        } else {
191            format!("{}/{}", self.registry(), self.repository())
192        }
193    }
194
195    /// Returns the whole reference.
196    pub fn whole(&self) -> String {
197        let mut s = self.full_name();
198        if let Some(t) = self.tag() {
199            if !s.is_empty() {
200                s.push(':');
201            }
202            s.push_str(t);
203        }
204        if let Some(d) = self.digest() {
205            if !s.is_empty() {
206                s.push('@');
207            }
208            s.push_str(d);
209        }
210        s
211    }
212}
213
214impl fmt::Display for Reference {
215    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
216        write!(f, "{}", self.whole())
217    }
218}
219
220impl FromStr for Reference {
221    type Err = ParseError;
222
223    fn from_str(s: &str) -> Result<Self, Self::Err> {
224        Reference::try_from(s)
225    }
226}
227
228impl TryFrom<String> for Reference {
229    type Error = ParseError;
230
231    fn try_from(s: String) -> Result<Self, Self::Error> {
232        if s.is_empty() {
233            return Err(ParseError::NameEmpty);
234        }
235        let captures = match reference_regexp().captures(&s) {
236            Some(caps) => caps,
237            None => {
238                return Err(ParseError::ReferenceInvalidFormat);
239            }
240        };
241        let name = &captures[1];
242        let mut tag = captures.get(2).map(|m| m.as_str().to_owned());
243        let digest = captures.get(3).map(|m| m.as_str().to_owned());
244        if tag.is_none() && digest.is_none() {
245            tag = Some(DEFAULT_TAG.into());
246        }
247        let (registry, repository) = split_domain(name);
248        let reference = Reference {
249            registry,
250            mirror_registry: None,
251            repository,
252            tag,
253            digest,
254        };
255        if reference.repository().len() > NAME_TOTAL_LENGTH_MAX {
256            return Err(ParseError::NameTooLong);
257        }
258        // Digests much always be hex-encoded, ensuring that their hex portion will always be
259        // size*2
260        if let Some(digest) = reference.digest() {
261            match digest.split_once(':') {
262                None => return Err(ParseError::DigestInvalidFormat),
263                Some(("sha256", digest)) => {
264                    if digest.len() != 64 {
265                        return Err(ParseError::DigestInvalidLength);
266                    }
267                }
268                Some(("sha384", digest)) => {
269                    if digest.len() != 96 {
270                        return Err(ParseError::DigestInvalidLength);
271                    }
272                }
273                Some(("sha512", digest)) => {
274                    if digest.len() != 128 {
275                        return Err(ParseError::DigestInvalidLength);
276                    }
277                }
278                Some((_, _)) => return Err(ParseError::DigestUnsupported),
279            }
280        }
281        Ok(reference)
282    }
283}
284
285impl TryFrom<&str> for Reference {
286    type Error = ParseError;
287    fn try_from(string: &str) -> Result<Self, Self::Error> {
288        TryFrom::try_from(string.to_owned())
289    }
290}
291
292impl From<Reference> for String {
293    fn from(reference: Reference) -> Self {
294        reference.whole()
295    }
296}
297
298/// Splits a repository name to domain and remotename string.
299/// If no valid domain is found, the default domain is used. Repository name
300/// needs to be already validated before.
301///
302/// This function is a Rust rewrite of the official Go code used by Docker:
303/// https://github.com/distribution/distribution/blob/41a0452eea12416aaf01bceb02a924871e964c67/reference/normalize.go#L87-L104
304fn split_domain(name: &str) -> (String, String) {
305    let mut domain: String;
306    let mut remainder: String;
307
308    match name.split_once('/') {
309        None => {
310            domain = DOCKER_HUB_DOMAIN.into();
311            remainder = name.into();
312        }
313        Some((left, right)) => {
314            if !(left.contains('.') || left.contains(':')) && left != "localhost" {
315                domain = DOCKER_HUB_DOMAIN.into();
316                remainder = name.into();
317            } else {
318                domain = left.into();
319                remainder = right.into();
320            }
321        }
322    }
323    if domain == DOCKER_HUB_DOMAIN_LEGACY {
324        domain = DOCKER_HUB_DOMAIN.into();
325    }
326    if domain == DOCKER_HUB_DOMAIN && !remainder.contains('/') {
327        remainder = format!("{}/{}", DOCKER_HUB_OFFICIAL_REPO_NAME, remainder);
328    }
329
330    (domain, remainder)
331}
332
333#[cfg(test)]
334mod test {
335    use super::*;
336
337    mod parse {
338        use super::*;
339        use rstest::rstest;
340
341        #[rstest(input, registry, repository, tag, digest, whole,
342            case("busybox", "docker.io", "library/busybox", Some("latest"), None, "docker.io/library/busybox:latest"),
343            case("test.com:tag", "docker.io", "library/test.com", Some("tag"), None, "docker.io/library/test.com:tag"),
344            case("test.com:5000", "docker.io", "library/test.com", Some("5000"), None, "docker.io/library/test.com:5000"),
345            case("test.com/repo:tag", "test.com", "repo", Some("tag"), None, "test.com/repo:tag"),
346            case("test:5000/repo", "test:5000", "repo", Some("latest"), None, "test:5000/repo:latest"),
347            case("test:5000/repo:tag", "test:5000", "repo", Some("tag"), None, "test:5000/repo:tag"),
348            case("test:5000/repo@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "test:5000", "repo", None, Some("sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "test:5000/repo@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
349            case("test:5000/repo:tag@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "test:5000", "repo", Some("tag"), Some("sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "test:5000/repo:tag@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
350            case("lowercase:Uppercase", "docker.io", "library/lowercase", Some("Uppercase"), None, "docker.io/library/lowercase:Uppercase"),
351            case("sub-dom1.foo.com/bar/baz/quux", "sub-dom1.foo.com", "bar/baz/quux", Some("latest"), None, "sub-dom1.foo.com/bar/baz/quux:latest"),
352            case("sub-dom1.foo.com/bar/baz/quux:some-long-tag", "sub-dom1.foo.com", "bar/baz/quux", Some("some-long-tag"), None, "sub-dom1.foo.com/bar/baz/quux:some-long-tag"),
353            case("b.gcr.io/test.example.com/my-app:test.example.com", "b.gcr.io", "test.example.com/my-app", Some("test.example.com"), None, "b.gcr.io/test.example.com/my-app:test.example.com"),
354            // ☃.com in punycode
355            case("xn--n3h.com/myimage:xn--n3h.com", "xn--n3h.com", "myimage", Some("xn--n3h.com"), None, "xn--n3h.com/myimage:xn--n3h.com"),
356            // 🐳.com in punycode
357            case("xn--7o8h.com/myimage:xn--7o8h.com@sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "xn--7o8h.com", "myimage", Some("xn--7o8h.com"), Some("sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "xn--7o8h.com/myimage:xn--7o8h.com@sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
358            case("foo_bar.com:8080", "docker.io", "library/foo_bar.com", Some("8080"), None, "docker.io/library/foo_bar.com:8080" ),
359            case("foo/foo_bar.com:8080", "docker.io", "foo/foo_bar.com", Some("8080"), None, "docker.io/foo/foo_bar.com:8080"),
360            case("opensuse/leap:15.3", "docker.io", "opensuse/leap", Some("15.3"), None, "docker.io/opensuse/leap:15.3"),
361        )]
362        fn parse_good_reference(
363            input: &str,
364            registry: &str,
365            repository: &str,
366            tag: Option<&str>,
367            digest: Option<&str>,
368            whole: &str,
369        ) {
370            println!("input: {}", input);
371            let reference = Reference::try_from(input).expect("could not parse reference");
372            println!("{} -> {:?}", input, reference);
373            assert_eq!(registry, reference.registry());
374            assert_eq!(repository, reference.repository());
375            assert_eq!(tag, reference.tag());
376            assert_eq!(digest, reference.digest());
377            assert_eq!(whole, reference.whole());
378        }
379
380        #[rstest(input, err,
381            case("", ParseError::NameEmpty),
382            case(":justtag", ParseError::ReferenceInvalidFormat),
383            case("@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::ReferenceInvalidFormat),
384            case("repo@sha256:ffffffffffffffffffffffffffffffffff", ParseError::DigestInvalidLength),
385            case("validname@invaliddigest:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::DigestUnsupported),
386            // FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
387            case("Uppercase:tag", ParseError::ReferenceInvalidFormat),
388            // FIXME: "Uppercase" is incorrectly handled as a domain-name here, and therefore passes.
389            // https://github.com/docker/distribution/blob/master/reference/reference_test.go#L104-L109
390            // case("Uppercase/lowercase:tag", ParseError::NameContainsUppercase),
391            // FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
392            case("test:5000/Uppercase/lowercase:tag", ParseError::ReferenceInvalidFormat),
393            case("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ParseError::NameTooLong),
394            case("aa/asdf$$^/aa", ParseError::ReferenceInvalidFormat)
395        )]
396        fn parse_bad_reference(input: &str, err: ParseError) {
397            assert_eq!(Reference::try_from(input).unwrap_err(), err)
398        }
399
400        #[rstest(
401            input,
402            registry,
403            resolved_registry,
404            whole,
405            case(
406                "busybox",
407                "docker.io",
408                "index.docker.io",
409                "docker.io/library/busybox:latest"
410            ),
411            case("test.com/repo:tag", "test.com", "test.com", "test.com/repo:tag"),
412            case("test:5000/repo", "test:5000", "test:5000", "test:5000/repo:latest"),
413            case(
414                "sub-dom1.foo.com/bar/baz/quux",
415                "sub-dom1.foo.com",
416                "sub-dom1.foo.com",
417                "sub-dom1.foo.com/bar/baz/quux:latest"
418            ),
419            case(
420                "b.gcr.io/test.example.com/my-app:test.example.com",
421                "b.gcr.io",
422                "b.gcr.io",
423                "b.gcr.io/test.example.com/my-app:test.example.com"
424            )
425        )]
426        fn test_mirror_registry(input: &str, registry: &str, resolved_registry: &str, whole: &str) {
427            let mut reference = Reference::try_from(input).expect("could not parse reference");
428            assert_eq!(resolved_registry, reference.resolve_registry());
429            assert_eq!(registry, reference.registry());
430            assert_eq!(None, reference.namespace());
431            assert_eq!(whole, reference.whole());
432
433            reference.set_mirror_registry("docker.mirror.io".to_owned());
434            assert_eq!("docker.mirror.io", reference.resolve_registry());
435            assert_eq!(registry, reference.registry());
436            assert_eq!(Some(registry), reference.namespace());
437            assert_eq!(whole, reference.whole());
438        }
439    }
440}