iri_string/
resolve.rs

1//! URI and IRI resolvers.
2//!
3//! # IRI resolution can fail without WHATWG URL Standard serialization
4//!
5//! ## Pure RFC 3986 algorithm
6//!
7//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
8//! Below are examples:
9//!
10//! * base=`scheme:`, ref=`.///bar`.
11//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
12//! * base=`scheme:foo`, ref=`.///bar`.
13//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
14//! * base=`scheme:`, ref=`/..//baz`.
15//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
16//! * base=`scheme:foo/bar`, ref=`..//baz`.
17//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
18//!
19//! IRI without authority (note that this is different from "with empty authority")
20//! cannot have a path starting with `//`, since it is ambiguous and can be
21//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
22//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
23//! authority, not a path.
24//!
25//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
26//! cases.
27//!
28//! Note that this kind of failure can happen only when the base IRI has no
29//! authority and empty path. This would be rare in the wild, since many people
30//! would use an IRI with authority part, such as `http://`.
31//!
32//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
33//! failure. Currently no cases are known to fail when at least one of the base
34//! IRI or the relative IRI contains authorities.
35//!
36//! If you want this kind of abnormal IRI resolution to succeed and to be
37//! idempotent, check the resolution result using
38//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
39//!
40//! ## WHATWG serialization
41//!
42//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
43//! algorithm for this kind of result, and it makes IRI resolution (and even
44//! normalization) infallible and idempotent.
45//!
46//! IRI resolution and normalization provided by this crate automatically
47//! applies this special rule if necessary, so they are infallible. If you want
48//! to detect resolution/normalization failure, use
49//! [`Normalized::ensure_rfc3986_normalizable`] method.
50//!
51//! ## Examples
52//!
53//! ```
54//! # #[cfg(feature = "alloc")] {
55//! use iri_string::format::ToDedicatedString;
56//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
57//!
58//! let base = IriAbsoluteStr::new("scheme:")?;
59//! {
60//!     let reference = IriReferenceStr::new(".///not-a-host")?;
61//!     let result = reference.resolve_against(base);
62//!     assert!(result.ensure_rfc3986_normalizable().is_err());
63//!     assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
64//! }
65//!
66//! {
67//!     let reference2 = IriReferenceStr::new("/..//not-a-host")?;
68//!     // Resulting string will be `scheme://not-a-host`, but `not-a-host`
69//!     // should be a path segment, not a host. So, the semantically correct
70//!     // target IRI cannot be represented by RFC 3986 IRI resolution.
71//!     let result2 = reference2.resolve_against(base);
72//!     assert!(result2.ensure_rfc3986_normalizable().is_err());
73//!
74//!     // Algorithm defined in WHATWG URL Standard addresses this case.
75//!     assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
76//! }
77//! # }
78//! # Ok::<_, iri_string::validate::Error>(())
79//! ```
80
81use crate::components::RiReferenceComponents;
82use crate::normalize::{NormalizationInput, Normalized};
83use crate::spec::Spec;
84use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
85
86/// A resolver against the fixed base.
87#[derive(Debug, Clone, Copy)]
88pub struct FixedBaseResolver<'a, S: Spec> {
89    /// Components of the base IRI.
90    base_components: RiReferenceComponents<'a, S>,
91}
92
93impl<'a, S: Spec> FixedBaseResolver<'a, S> {
94    /// Creates a new resolver with the given base.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// # use iri_string::validate::Error;
100    /// # // `ToDedicatedString` is available only when
101    /// # // `alloc` feature is enabled.
102    /// #[cfg(feature = "alloc")] {
103    /// use iri_string::format::ToDedicatedString;
104    /// use iri_string::resolve::FixedBaseResolver;
105    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
106    ///
107    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
108    /// let resolver = FixedBaseResolver::new(base);
109    ///
110    /// let reference = IriReferenceStr::new("../there")?;
111    /// let resolved = resolver.resolve(reference);
112    ///
113    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
114    /// # }
115    /// # Ok::<_, Error>(())
116    /// ```
117    #[must_use]
118    pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
119        Self {
120            base_components: RiReferenceComponents::from(base.as_ref()),
121        }
122    }
123
124    /// Returns the base.
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use iri_string::resolve::FixedBaseResolver;
130    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
131    ///
132    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
133    /// let resolver = FixedBaseResolver::new(base);
134    ///
135    /// assert_eq!(resolver.base(), base);
136    /// # Ok::<_, iri_string::validate::Error>(())
137    /// ```
138    #[must_use]
139    pub fn base(&self) -> &'a RiAbsoluteStr<S> {
140        // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
141        // and the type of `base_components` does not allow modification of the
142        // content after it is created.
143        unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) }
144    }
145}
146
147/// Components getters.
148///
149/// These getters are more efficient than calling through the result of `.base()`.
150impl<S: Spec> FixedBaseResolver<'_, S> {
151    /// Returns the scheme.
152    ///
153    /// The following colon is truncated.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// # use iri_string::validate::Error;
159    /// use iri_string::resolve::FixedBaseResolver;
160    /// use iri_string::types::IriAbsoluteStr;
161    ///
162    /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
163    /// let resolver = FixedBaseResolver::new(base);
164    ///
165    /// assert_eq!(resolver.scheme_str(), "http");
166    /// assert_eq!(base.scheme_str(), "http");
167    /// # Ok::<_, Error>(())
168    /// ```
169    #[inline]
170    #[must_use]
171    pub fn scheme_str(&self) -> &str {
172        self.base_components
173            .scheme_str()
174            .expect("[validity] absolute IRI should have the scheme part")
175    }
176
177    /// Returns the authority.
178    ///
179    /// The leading `//` is truncated.
180    ///
181    /// # Examples
182    ///
183    /// ```
184    /// # use iri_string::validate::Error;
185    /// use iri_string::resolve::FixedBaseResolver;
186    /// use iri_string::types::IriAbsoluteStr;
187    ///
188    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
189    /// let resolver = FixedBaseResolver::new(base);
190    ///
191    /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
192    /// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
193    /// # Ok::<_, Error>(())
194    /// ```
195    #[inline]
196    #[must_use]
197    pub fn authority_str(&self) -> Option<&str> {
198        self.base_components.authority_str()
199    }
200
201    /// Returns the path.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// # use iri_string::validate::Error;
207    /// use iri_string::resolve::FixedBaseResolver;
208    /// use iri_string::types::IriAbsoluteStr;
209    ///
210    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
211    /// let resolver = FixedBaseResolver::new(base);
212    ///
213    /// assert_eq!(resolver.path_str(), "/base/");
214    /// assert_eq!(base.path_str(), "/base/");
215    /// # Ok::<_, Error>(())
216    /// ```
217    #[inline]
218    #[must_use]
219    pub fn path_str(&self) -> &str {
220        self.base_components.path_str()
221    }
222
223    /// Returns the query.
224    ///
225    /// The leading question mark (`?`) is truncated.
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// # use iri_string::validate::Error;
231    /// use iri_string::resolve::FixedBaseResolver;
232    /// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
233    ///
234    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
235    /// let resolver = FixedBaseResolver::new(base);
236    /// let query = IriQueryStr::new("query")?;
237    ///
238    /// assert_eq!(resolver.query(), Some(query));
239    /// assert_eq!(base.query(), Some(query));
240    /// # Ok::<_, Error>(())
241    /// ```
242    #[inline]
243    #[must_use]
244    pub fn query(&self) -> Option<&RiQueryStr<S>> {
245        let query_raw = self.query_str()?;
246        let query = RiQueryStr::new(query_raw)
247            .expect("[validity] must be valid query if present in an absolute-IRI");
248        Some(query)
249    }
250
251    /// Returns the query in a raw string slice.
252    ///
253    /// The leading question mark (`?`) is truncated.
254    ///
255    /// # Examples
256    ///
257    /// ```
258    /// # use iri_string::validate::Error;
259    /// use iri_string::resolve::FixedBaseResolver;
260    /// use iri_string::types::IriAbsoluteStr;
261    ///
262    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
263    /// let resolver = FixedBaseResolver::new(base);
264    ///
265    /// assert_eq!(resolver.query_str(), Some("query"));
266    /// assert_eq!(base.query_str(), Some("query"));
267    /// # Ok::<_, Error>(())
268    /// ```
269    #[inline]
270    #[must_use]
271    pub fn query_str(&self) -> Option<&str> {
272        self.base_components.query_str()
273    }
274}
275
276impl<'a, S: Spec> FixedBaseResolver<'a, S> {
277    /// Resolves the given reference against the fixed base.
278    ///
279    /// The task returned by this method does **not** normalize the resolution
280    /// result. However, `..` and `.` are recognized even when they are
281    /// percent-encoded.
282    ///
283    /// # Failures
284    ///
285    /// This function itself does not fail, but resolution algorithm defined by
286    /// RFC 3986 can fail. In that case, serialization algorithm defined by
287    /// WHATWG URL Standard would be automatically applied.
288    ///
289    /// See the documentation of [`Normalized`].
290    ///
291    /// # Examples
292    ///
293    /// ```
294    /// # use iri_string::validate::Error;
295    /// # // `ToDedicatedString` is available only when
296    /// # // `alloc` feature is enabled.
297    /// # #[cfg(feature = "alloc")] {
298    /// use iri_string::format::ToDedicatedString;
299    /// use iri_string::resolve::FixedBaseResolver;
300    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
301    ///
302    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
303    /// let resolver = FixedBaseResolver::new(base);
304    ///
305    /// let reference = IriReferenceStr::new("../there")?;
306    /// let resolved = resolver.resolve(reference);
307    ///
308    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
309    /// # }
310    /// # Ok::<_, Error>(())
311    /// ```
312    ///
313    /// Note that `..` and `.` path segments are recognized even when they are
314    /// percent-encoded.
315    ///
316    /// ```
317    /// # use iri_string::validate::Error;
318    /// # // `ToDedicatedString` is available only when
319    /// # // `alloc` feature is enabled.
320    /// # #[cfg(feature = "alloc")] {
321    /// use iri_string::format::ToDedicatedString;
322    /// use iri_string::resolve::FixedBaseResolver;
323    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
324    ///
325    /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
326    /// let resolver = FixedBaseResolver::new(base);
327    ///
328    /// // `%2e%2e` is recognized as `..`.
329    /// // However, `dot%2edot` is NOT normalized into `dot.dot`.
330    /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
331    /// let resolved = resolver.resolve(reference);
332    ///
333    /// // Resolved but not normalized.
334    /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
335    /// # }
336    /// # Ok::<_, Error>(())
337    /// ```
338    #[inline]
339    #[must_use]
340    pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
341        let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
342        Normalized::from_input(input)
343    }
344}