iri_string/resolve.rs
1//! URI and IRI resolvers.
2//!
3//! # IRI resolution can fail without WHATWG URL Standard serialization
4//!
5//! ## Pure RFC 3986 algorithm
6//!
7//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
8//! Below are examples:
9//!
10//! * base=`scheme:`, ref=`.///bar`.
11//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
12//! * base=`scheme:foo`, ref=`.///bar`.
13//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
14//! * base=`scheme:`, ref=`/..//baz`.
15//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
16//! * base=`scheme:foo/bar`, ref=`..//baz`.
17//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
18//!
19//! IRI without authority (note that this is different from "with empty authority")
20//! cannot have a path starting with `//`, since it is ambiguous and can be
21//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
22//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
23//! authority, not a path.
24//!
25//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
26//! cases.
27//!
28//! Note that this kind of failure can happen only when the base IRI has no
29//! authority and empty path. This would be rare in the wild, since many people
30//! would use an IRI with authority part, such as `http://`.
31//!
32//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
33//! failure. Currently no cases are known to fail when at least one of the base
34//! IRI or the relative IRI contains authorities.
35//!
36//! If you want this kind of abnormal IRI resolution to succeed and to be
37//! idempotent, check the resolution result using
38//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
39//!
40//! ## WHATWG serialization
41//!
42//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
43//! algorithm for this kind of result, and it makes IRI resolution (and even
44//! normalization) infallible and idempotent.
45//!
46//! IRI resolution and normalization provided by this crate automatically
47//! applies this special rule if necessary, so they are infallible. If you want
48//! to detect resolution/normalization failure, use
49//! [`Normalized::ensure_rfc3986_normalizable`] method.
50//!
51//! ## Examples
52//!
53//! ```
54//! # #[cfg(feature = "alloc")] {
55//! use iri_string::format::ToDedicatedString;
56//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
57//!
58//! let base = IriAbsoluteStr::new("scheme:")?;
59//! {
60//! let reference = IriReferenceStr::new(".///not-a-host")?;
61//! let result = reference.resolve_against(base);
62//! assert!(result.ensure_rfc3986_normalizable().is_err());
63//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
64//! }
65//!
66//! {
67//! let reference2 = IriReferenceStr::new("/..//not-a-host")?;
68//! // Resulting string will be `scheme://not-a-host`, but `not-a-host`
69//! // should be a path segment, not a host. So, the semantically correct
70//! // target IRI cannot be represented by RFC 3986 IRI resolution.
71//! let result2 = reference2.resolve_against(base);
72//! assert!(result2.ensure_rfc3986_normalizable().is_err());
73//!
74//! // Algorithm defined in WHATWG URL Standard addresses this case.
75//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
76//! }
77//! # }
78//! # Ok::<_, iri_string::validate::Error>(())
79//! ```
80
81use crate::components::RiReferenceComponents;
82use crate::normalize::{NormalizationInput, Normalized};
83use crate::spec::Spec;
84use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
85
86/// A resolver against the fixed base.
87#[derive(Debug, Clone, Copy)]
88pub struct FixedBaseResolver<'a, S: Spec> {
89 /// Components of the base IRI.
90 base_components: RiReferenceComponents<'a, S>,
91}
92
93impl<'a, S: Spec> FixedBaseResolver<'a, S> {
94 /// Creates a new resolver with the given base.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// # use iri_string::validate::Error;
100 /// # // `ToDedicatedString` is available only when
101 /// # // `alloc` feature is enabled.
102 /// #[cfg(feature = "alloc")] {
103 /// use iri_string::format::ToDedicatedString;
104 /// use iri_string::resolve::FixedBaseResolver;
105 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
106 ///
107 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
108 /// let resolver = FixedBaseResolver::new(base);
109 ///
110 /// let reference = IriReferenceStr::new("../there")?;
111 /// let resolved = resolver.resolve(reference);
112 ///
113 /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
114 /// # }
115 /// # Ok::<_, Error>(())
116 /// ```
117 #[must_use]
118 pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
119 Self {
120 base_components: RiReferenceComponents::from(base.as_ref()),
121 }
122 }
123
124 /// Returns the base.
125 ///
126 /// # Examples
127 ///
128 /// ```
129 /// use iri_string::resolve::FixedBaseResolver;
130 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
131 ///
132 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
133 /// let resolver = FixedBaseResolver::new(base);
134 ///
135 /// assert_eq!(resolver.base(), base);
136 /// # Ok::<_, iri_string::validate::Error>(())
137 /// ```
138 #[must_use]
139 pub fn base(&self) -> &'a RiAbsoluteStr<S> {
140 // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
141 // and the type of `base_components` does not allow modification of the
142 // content after it is created.
143 unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) }
144 }
145}
146
147/// Components getters.
148///
149/// These getters are more efficient than calling through the result of `.base()`.
150impl<S: Spec> FixedBaseResolver<'_, S> {
151 /// Returns the scheme.
152 ///
153 /// The following colon is truncated.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// # use iri_string::validate::Error;
159 /// use iri_string::resolve::FixedBaseResolver;
160 /// use iri_string::types::IriAbsoluteStr;
161 ///
162 /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
163 /// let resolver = FixedBaseResolver::new(base);
164 ///
165 /// assert_eq!(resolver.scheme_str(), "http");
166 /// assert_eq!(base.scheme_str(), "http");
167 /// # Ok::<_, Error>(())
168 /// ```
169 #[inline]
170 #[must_use]
171 pub fn scheme_str(&self) -> &str {
172 self.base_components
173 .scheme_str()
174 .expect("[validity] absolute IRI should have the scheme part")
175 }
176
177 /// Returns the authority.
178 ///
179 /// The leading `//` is truncated.
180 ///
181 /// # Examples
182 ///
183 /// ```
184 /// # use iri_string::validate::Error;
185 /// use iri_string::resolve::FixedBaseResolver;
186 /// use iri_string::types::IriAbsoluteStr;
187 ///
188 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
189 /// let resolver = FixedBaseResolver::new(base);
190 ///
191 /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
192 /// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
193 /// # Ok::<_, Error>(())
194 /// ```
195 #[inline]
196 #[must_use]
197 pub fn authority_str(&self) -> Option<&str> {
198 self.base_components.authority_str()
199 }
200
201 /// Returns the path.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// # use iri_string::validate::Error;
207 /// use iri_string::resolve::FixedBaseResolver;
208 /// use iri_string::types::IriAbsoluteStr;
209 ///
210 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
211 /// let resolver = FixedBaseResolver::new(base);
212 ///
213 /// assert_eq!(resolver.path_str(), "/base/");
214 /// assert_eq!(base.path_str(), "/base/");
215 /// # Ok::<_, Error>(())
216 /// ```
217 #[inline]
218 #[must_use]
219 pub fn path_str(&self) -> &str {
220 self.base_components.path_str()
221 }
222
223 /// Returns the query.
224 ///
225 /// The leading question mark (`?`) is truncated.
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// # use iri_string::validate::Error;
231 /// use iri_string::resolve::FixedBaseResolver;
232 /// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
233 ///
234 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
235 /// let resolver = FixedBaseResolver::new(base);
236 /// let query = IriQueryStr::new("query")?;
237 ///
238 /// assert_eq!(resolver.query(), Some(query));
239 /// assert_eq!(base.query(), Some(query));
240 /// # Ok::<_, Error>(())
241 /// ```
242 #[inline]
243 #[must_use]
244 pub fn query(&self) -> Option<&RiQueryStr<S>> {
245 let query_raw = self.query_str()?;
246 let query = RiQueryStr::new(query_raw)
247 .expect("[validity] must be valid query if present in an absolute-IRI");
248 Some(query)
249 }
250
251 /// Returns the query in a raw string slice.
252 ///
253 /// The leading question mark (`?`) is truncated.
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// # use iri_string::validate::Error;
259 /// use iri_string::resolve::FixedBaseResolver;
260 /// use iri_string::types::IriAbsoluteStr;
261 ///
262 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
263 /// let resolver = FixedBaseResolver::new(base);
264 ///
265 /// assert_eq!(resolver.query_str(), Some("query"));
266 /// assert_eq!(base.query_str(), Some("query"));
267 /// # Ok::<_, Error>(())
268 /// ```
269 #[inline]
270 #[must_use]
271 pub fn query_str(&self) -> Option<&str> {
272 self.base_components.query_str()
273 }
274}
275
276impl<'a, S: Spec> FixedBaseResolver<'a, S> {
277 /// Resolves the given reference against the fixed base.
278 ///
279 /// The task returned by this method does **not** normalize the resolution
280 /// result. However, `..` and `.` are recognized even when they are
281 /// percent-encoded.
282 ///
283 /// # Failures
284 ///
285 /// This function itself does not fail, but resolution algorithm defined by
286 /// RFC 3986 can fail. In that case, serialization algorithm defined by
287 /// WHATWG URL Standard would be automatically applied.
288 ///
289 /// See the documentation of [`Normalized`].
290 ///
291 /// # Examples
292 ///
293 /// ```
294 /// # use iri_string::validate::Error;
295 /// # // `ToDedicatedString` is available only when
296 /// # // `alloc` feature is enabled.
297 /// # #[cfg(feature = "alloc")] {
298 /// use iri_string::format::ToDedicatedString;
299 /// use iri_string::resolve::FixedBaseResolver;
300 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
301 ///
302 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
303 /// let resolver = FixedBaseResolver::new(base);
304 ///
305 /// let reference = IriReferenceStr::new("../there")?;
306 /// let resolved = resolver.resolve(reference);
307 ///
308 /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
309 /// # }
310 /// # Ok::<_, Error>(())
311 /// ```
312 ///
313 /// Note that `..` and `.` path segments are recognized even when they are
314 /// percent-encoded.
315 ///
316 /// ```
317 /// # use iri_string::validate::Error;
318 /// # // `ToDedicatedString` is available only when
319 /// # // `alloc` feature is enabled.
320 /// # #[cfg(feature = "alloc")] {
321 /// use iri_string::format::ToDedicatedString;
322 /// use iri_string::resolve::FixedBaseResolver;
323 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
324 ///
325 /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
326 /// let resolver = FixedBaseResolver::new(base);
327 ///
328 /// // `%2e%2e` is recognized as `..`.
329 /// // However, `dot%2edot` is NOT normalized into `dot.dot`.
330 /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
331 /// let resolved = resolver.resolve(reference);
332 ///
333 /// // Resolved but not normalized.
334 /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
335 /// # }
336 /// # Ok::<_, Error>(())
337 /// ```
338 #[inline]
339 #[must_use]
340 pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
341 let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
342 Normalized::from_input(input)
343 }
344}