iri_string/
percent_encode.rs

1//! Percent encoding.
2
3use core::fmt::{self, Write as _};
4use core::marker::PhantomData;
5
6use crate::parser::char;
7use crate::spec::{IriSpec, Spec, UriSpec};
8
9/// A proxy to percent-encode a string as a part of URI.
10pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
11
12/// A proxy to percent-encode a string as a part of IRI.
13pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
14
15/// Context for percent encoding.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17#[non_exhaustive]
18enum Context {
19    /// Encode the string as a reg-name (usually called as "hostname").
20    RegName,
21    /// Encode the string as a user name or a password (inside the `userinfo` component).
22    UserOrPassword,
23    /// Encode the string as a path segment.
24    ///
25    /// A slash (`/`) will be encoded to `%2F`.
26    PathSegment,
27    /// Encode the string as path segments joined with `/`.
28    ///
29    /// A slash (`/`) will be used as is.
30    Path,
31    /// Encode the string as a query string (without the `?` prefix).
32    Query,
33    /// Encode the string as a fragment string (without the `#` prefix).
34    Fragment,
35    /// Encode all characters except for `unreserved` characters.
36    Unreserve,
37    /// Encode characters only if they cannot appear anywhere in an IRI reference.
38    ///
39    /// `%` character will be always encoded.
40    Character,
41}
42
43/// A proxy to percent-encode a string.
44///
45/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
46/// You can use them to make the expression simpler, for example write
47/// `PercentEncodedForUri::from_path(foo)` instead of
48/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
49#[derive(Debug, Clone, Copy)]
50pub struct PercentEncoded<T, S> {
51    /// Source string context.
52    context: Context,
53    /// Raw string before being encoded.
54    raw: T,
55    /// Spec.
56    _spec: PhantomData<fn() -> S>,
57}
58
59impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
60    /// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// # #[cfg(feature = "alloc")] {
66    /// use iri_string::percent_encode::PercentEncoded;
67    /// use iri_string::spec::UriSpec;
68    ///
69    /// let raw = "alpha.\u{03B1}.example.com";
70    /// let encoded = "alpha.%CE%B1.example.com";
71    /// assert_eq!(
72    ///     PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
73    ///     encoded
74    /// );
75    /// # }
76    /// ```
77    pub fn from_reg_name(raw: T) -> Self {
78        Self {
79            context: Context::RegName,
80            raw,
81            _spec: PhantomData,
82        }
83    }
84
85    /// Creates an encoded string from a raw user name (inside `userinfo` component).
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// # #[cfg(feature = "alloc")] {
91    /// use iri_string::percent_encode::PercentEncoded;
92    /// use iri_string::spec::UriSpec;
93    ///
94    /// let raw = "user:\u{03B1}";
95    /// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
96    /// let encoded = "user%3A%CE%B1";
97    /// assert_eq!(
98    ///     PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
99    ///     encoded
100    /// );
101    /// # }
102    /// ```
103    pub fn from_user(raw: T) -> Self {
104        Self {
105            context: Context::UserOrPassword,
106            raw,
107            _spec: PhantomData,
108        }
109    }
110
111    /// Creates an encoded string from a raw user name (inside `userinfo` component).
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// # #[cfg(feature = "alloc")] {
117    /// use iri_string::percent_encode::PercentEncoded;
118    /// use iri_string::spec::UriSpec;
119    ///
120    /// let raw = "password:\u{03B1}";
121    /// // The first `:` will be interpreted as a delimiter, and the colon
122    /// // inside the password will be the first one if the user name is empty,
123    /// // so colons will be escaped.
124    /// let encoded = "password%3A%CE%B1";
125    /// assert_eq!(
126    ///     PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
127    ///     encoded
128    /// );
129    /// # }
130    /// ```
131    pub fn from_password(raw: T) -> Self {
132        Self {
133            context: Context::UserOrPassword,
134            raw,
135            _spec: PhantomData,
136        }
137    }
138
139    /// Creates an encoded string from a raw path segment.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// # #[cfg(feature = "alloc")] {
145    /// use iri_string::percent_encode::PercentEncoded;
146    /// use iri_string::spec::UriSpec;
147    ///
148    /// let raw = "alpha/\u{03B1}?#";
149    /// // Note that `/` is encoded to `%2F`.
150    /// let encoded = "alpha%2F%CE%B1%3F%23";
151    /// assert_eq!(
152    ///     PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
153    ///     encoded
154    /// );
155    /// # }
156    /// ```
157    pub fn from_path_segment(raw: T) -> Self {
158        Self {
159            context: Context::PathSegment,
160            raw,
161            _spec: PhantomData,
162        }
163    }
164
165    /// Creates an encoded string from a raw path.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// # #[cfg(feature = "alloc")] {
171    /// use iri_string::percent_encode::PercentEncoded;
172    /// use iri_string::spec::UriSpec;
173    ///
174    /// let raw = "alpha/\u{03B1}?#";
175    /// // Note that `/` is NOT percent encoded.
176    /// let encoded = "alpha/%CE%B1%3F%23";
177    /// assert_eq!(
178    ///     PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
179    ///     encoded
180    /// );
181    /// # }
182    /// ```
183    pub fn from_path(raw: T) -> Self {
184        Self {
185            context: Context::Path,
186            raw,
187            _spec: PhantomData,
188        }
189    }
190
191    /// Creates an encoded string from a raw query.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// # #[cfg(feature = "alloc")] {
197    /// use iri_string::percent_encode::PercentEncoded;
198    /// use iri_string::spec::UriSpec;
199    ///
200    /// let raw = "alpha/\u{03B1}?#";
201    /// let encoded = "alpha/%CE%B1?%23";
202    /// assert_eq!(
203    ///     PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
204    ///     encoded
205    /// );
206    /// # }
207    /// ```
208    pub fn from_query(raw: T) -> Self {
209        Self {
210            context: Context::Query,
211            raw,
212            _spec: PhantomData,
213        }
214    }
215
216    /// Creates an encoded string from a raw fragment.
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// # #[cfg(feature = "alloc")] {
222    /// use iri_string::percent_encode::PercentEncoded;
223    /// use iri_string::spec::UriSpec;
224    ///
225    /// let raw = "alpha/\u{03B1}?#";
226    /// let encoded = "alpha/%CE%B1?%23";
227    /// assert_eq!(
228    ///     PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
229    ///     encoded
230    /// );
231    /// # }
232    /// ```
233    pub fn from_fragment(raw: T) -> Self {
234        Self {
235            context: Context::Fragment,
236            raw,
237            _spec: PhantomData,
238        }
239    }
240
241    /// Creates a string consists of only `unreserved` string and percent-encoded triplets.
242    ///
243    /// # Examples
244    ///
245    /// ```
246    /// # #[cfg(feature = "alloc")] {
247    /// use iri_string::percent_encode::PercentEncoded;
248    /// use iri_string::spec::UriSpec;
249    ///
250    /// let unreserved = "%a0-._~\u{03B1}";
251    /// let unreserved_encoded = "%25a0-._~%CE%B1";
252    /// assert_eq!(
253    ///     PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
254    ///     unreserved_encoded
255    /// );
256    ///
257    /// let reserved = ":/?#[]@ !$&'()*+,;=";
258    /// let reserved_encoded =
259    ///     "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
260    /// assert_eq!(
261    ///     PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
262    ///     reserved_encoded
263    /// );
264    /// # }
265    /// ```
266    #[inline]
267    #[must_use]
268    pub fn unreserve(raw: T) -> Self {
269        Self {
270            context: Context::Unreserve,
271            raw,
272            _spec: PhantomData,
273        }
274    }
275
276    /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
277    ///
278    /// `%` character will be always encoded. In other words, this conversion
279    /// is not aware of percent-encoded triplets.
280    ///
281    /// Note that this encoding process does not guarantee that the resulting
282    /// string is a valid IRI reference.
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// # #[cfg(feature = "alloc")] {
288    /// use iri_string::percent_encode::PercentEncoded;
289    /// use iri_string::spec::UriSpec;
290    ///
291    /// let unreserved = "%a0-._~\u{03B1}";
292    /// let unreserved_encoded = "%25a0-._~%CE%B1";
293    /// assert_eq!(
294    ///     PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
295    ///     unreserved_encoded
296    /// );
297    ///
298    /// let reserved = ":/?#[]@ !$&'()*+,;=";
299    /// // Note that `%20` cannot appear directly in an IRI reference.
300    /// let expected = ":/?#[]@%20!$&'()*+,;=";
301    /// assert_eq!(
302    ///     PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
303    ///     expected
304    /// );
305    /// # }
306    /// ```
307    #[inline]
308    #[must_use]
309    pub fn characters(raw: T) -> Self {
310        Self {
311            context: Context::Character,
312            raw,
313            _spec: PhantomData,
314        }
315    }
316}
317
318impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
319    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
320        /// Filter that encodes a character before written if necessary.
321        struct Filter<'a, 'b, S> {
322            /// Encoding context.
323            context: Context,
324            /// Writer.
325            writer: &'a mut fmt::Formatter<'b>,
326            /// Spec.
327            _spec: PhantomData<fn() -> S>,
328        }
329        impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
330            fn write_str(&mut self, s: &str) -> fmt::Result {
331                s.chars().try_for_each(|c| self.write_char(c))
332            }
333            fn write_char(&mut self, c: char) -> fmt::Result {
334                let is_valid_char = match (self.context, c.is_ascii()) {
335                    (Context::RegName, true) => char::is_ascii_regname(c as u8),
336                    (Context::RegName, false) => char::is_nonascii_regname::<S>(c),
337                    (Context::UserOrPassword, true) => {
338                        c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
339                    }
340                    (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
341                    (Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
342                    (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
343                    (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
344                    (Context::Path, false) => S::is_nonascii_char_unreserved(c),
345                    (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
346                    (Context::Query, false) => char::is_nonascii_query::<S>(c),
347                    (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
348                    (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
349                    (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
350                    (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
351                    (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
352                    (Context::Character, false) => {
353                        S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
354                    }
355                };
356                if is_valid_char {
357                    self.writer.write_char(c)
358                } else {
359                    write_pct_encoded_char(&mut self.writer, c)
360                }
361            }
362        }
363        let mut filter = Filter {
364            context: self.context,
365            writer: f,
366            _spec: PhantomData::<fn() -> S>,
367        };
368        write!(filter, "{}", self.raw)
369    }
370}
371
372/// Percent-encodes the given character and writes it.
373#[inline]
374fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
375    let mut buf = [0_u8; 4];
376    let buf = c.encode_utf8(&mut buf);
377    buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
378}