iri_string/percent_encode.rs
1//! Percent encoding.
2
3use core::fmt::{self, Write as _};
4use core::marker::PhantomData;
5
6use crate::parser::char;
7use crate::spec::{IriSpec, Spec, UriSpec};
8
9/// A proxy to percent-encode a string as a part of URI.
10pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
11
12/// A proxy to percent-encode a string as a part of IRI.
13pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
14
15/// Context for percent encoding.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17#[non_exhaustive]
18enum Context {
19 /// Encode the string as a reg-name (usually called as "hostname").
20 RegName,
21 /// Encode the string as a user name or a password (inside the `userinfo` component).
22 UserOrPassword,
23 /// Encode the string as a path segment.
24 ///
25 /// A slash (`/`) will be encoded to `%2F`.
26 PathSegment,
27 /// Encode the string as path segments joined with `/`.
28 ///
29 /// A slash (`/`) will be used as is.
30 Path,
31 /// Encode the string as a query string (without the `?` prefix).
32 Query,
33 /// Encode the string as a fragment string (without the `#` prefix).
34 Fragment,
35 /// Encode all characters except for `unreserved` characters.
36 Unreserve,
37 /// Encode characters only if they cannot appear anywhere in an IRI reference.
38 ///
39 /// `%` character will be always encoded.
40 Character,
41}
42
43/// A proxy to percent-encode a string.
44///
45/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
46/// You can use them to make the expression simpler, for example write
47/// `PercentEncodedForUri::from_path(foo)` instead of
48/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
49#[derive(Debug, Clone, Copy)]
50pub struct PercentEncoded<T, S> {
51 /// Source string context.
52 context: Context,
53 /// Raw string before being encoded.
54 raw: T,
55 /// Spec.
56 _spec: PhantomData<fn() -> S>,
57}
58
59impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
60 /// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// # #[cfg(feature = "alloc")] {
66 /// use iri_string::percent_encode::PercentEncoded;
67 /// use iri_string::spec::UriSpec;
68 ///
69 /// let raw = "alpha.\u{03B1}.example.com";
70 /// let encoded = "alpha.%CE%B1.example.com";
71 /// assert_eq!(
72 /// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
73 /// encoded
74 /// );
75 /// # }
76 /// ```
77 pub fn from_reg_name(raw: T) -> Self {
78 Self {
79 context: Context::RegName,
80 raw,
81 _spec: PhantomData,
82 }
83 }
84
85 /// Creates an encoded string from a raw user name (inside `userinfo` component).
86 ///
87 /// # Examples
88 ///
89 /// ```
90 /// # #[cfg(feature = "alloc")] {
91 /// use iri_string::percent_encode::PercentEncoded;
92 /// use iri_string::spec::UriSpec;
93 ///
94 /// let raw = "user:\u{03B1}";
95 /// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
96 /// let encoded = "user%3A%CE%B1";
97 /// assert_eq!(
98 /// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
99 /// encoded
100 /// );
101 /// # }
102 /// ```
103 pub fn from_user(raw: T) -> Self {
104 Self {
105 context: Context::UserOrPassword,
106 raw,
107 _spec: PhantomData,
108 }
109 }
110
111 /// Creates an encoded string from a raw user name (inside `userinfo` component).
112 ///
113 /// # Examples
114 ///
115 /// ```
116 /// # #[cfg(feature = "alloc")] {
117 /// use iri_string::percent_encode::PercentEncoded;
118 /// use iri_string::spec::UriSpec;
119 ///
120 /// let raw = "password:\u{03B1}";
121 /// // The first `:` will be interpreted as a delimiter, and the colon
122 /// // inside the password will be the first one if the user name is empty,
123 /// // so colons will be escaped.
124 /// let encoded = "password%3A%CE%B1";
125 /// assert_eq!(
126 /// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
127 /// encoded
128 /// );
129 /// # }
130 /// ```
131 pub fn from_password(raw: T) -> Self {
132 Self {
133 context: Context::UserOrPassword,
134 raw,
135 _spec: PhantomData,
136 }
137 }
138
139 /// Creates an encoded string from a raw path segment.
140 ///
141 /// # Examples
142 ///
143 /// ```
144 /// # #[cfg(feature = "alloc")] {
145 /// use iri_string::percent_encode::PercentEncoded;
146 /// use iri_string::spec::UriSpec;
147 ///
148 /// let raw = "alpha/\u{03B1}?#";
149 /// // Note that `/` is encoded to `%2F`.
150 /// let encoded = "alpha%2F%CE%B1%3F%23";
151 /// assert_eq!(
152 /// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
153 /// encoded
154 /// );
155 /// # }
156 /// ```
157 pub fn from_path_segment(raw: T) -> Self {
158 Self {
159 context: Context::PathSegment,
160 raw,
161 _spec: PhantomData,
162 }
163 }
164
165 /// Creates an encoded string from a raw path.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// # #[cfg(feature = "alloc")] {
171 /// use iri_string::percent_encode::PercentEncoded;
172 /// use iri_string::spec::UriSpec;
173 ///
174 /// let raw = "alpha/\u{03B1}?#";
175 /// // Note that `/` is NOT percent encoded.
176 /// let encoded = "alpha/%CE%B1%3F%23";
177 /// assert_eq!(
178 /// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
179 /// encoded
180 /// );
181 /// # }
182 /// ```
183 pub fn from_path(raw: T) -> Self {
184 Self {
185 context: Context::Path,
186 raw,
187 _spec: PhantomData,
188 }
189 }
190
191 /// Creates an encoded string from a raw query.
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// # #[cfg(feature = "alloc")] {
197 /// use iri_string::percent_encode::PercentEncoded;
198 /// use iri_string::spec::UriSpec;
199 ///
200 /// let raw = "alpha/\u{03B1}?#";
201 /// let encoded = "alpha/%CE%B1?%23";
202 /// assert_eq!(
203 /// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
204 /// encoded
205 /// );
206 /// # }
207 /// ```
208 pub fn from_query(raw: T) -> Self {
209 Self {
210 context: Context::Query,
211 raw,
212 _spec: PhantomData,
213 }
214 }
215
216 /// Creates an encoded string from a raw fragment.
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// # #[cfg(feature = "alloc")] {
222 /// use iri_string::percent_encode::PercentEncoded;
223 /// use iri_string::spec::UriSpec;
224 ///
225 /// let raw = "alpha/\u{03B1}?#";
226 /// let encoded = "alpha/%CE%B1?%23";
227 /// assert_eq!(
228 /// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
229 /// encoded
230 /// );
231 /// # }
232 /// ```
233 pub fn from_fragment(raw: T) -> Self {
234 Self {
235 context: Context::Fragment,
236 raw,
237 _spec: PhantomData,
238 }
239 }
240
241 /// Creates a string consists of only `unreserved` string and percent-encoded triplets.
242 ///
243 /// # Examples
244 ///
245 /// ```
246 /// # #[cfg(feature = "alloc")] {
247 /// use iri_string::percent_encode::PercentEncoded;
248 /// use iri_string::spec::UriSpec;
249 ///
250 /// let unreserved = "%a0-._~\u{03B1}";
251 /// let unreserved_encoded = "%25a0-._~%CE%B1";
252 /// assert_eq!(
253 /// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
254 /// unreserved_encoded
255 /// );
256 ///
257 /// let reserved = ":/?#[]@ !$&'()*+,;=";
258 /// let reserved_encoded =
259 /// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
260 /// assert_eq!(
261 /// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
262 /// reserved_encoded
263 /// );
264 /// # }
265 /// ```
266 #[inline]
267 #[must_use]
268 pub fn unreserve(raw: T) -> Self {
269 Self {
270 context: Context::Unreserve,
271 raw,
272 _spec: PhantomData,
273 }
274 }
275
276 /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
277 ///
278 /// `%` character will be always encoded. In other words, this conversion
279 /// is not aware of percent-encoded triplets.
280 ///
281 /// Note that this encoding process does not guarantee that the resulting
282 /// string is a valid IRI reference.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// # #[cfg(feature = "alloc")] {
288 /// use iri_string::percent_encode::PercentEncoded;
289 /// use iri_string::spec::UriSpec;
290 ///
291 /// let unreserved = "%a0-._~\u{03B1}";
292 /// let unreserved_encoded = "%25a0-._~%CE%B1";
293 /// assert_eq!(
294 /// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
295 /// unreserved_encoded
296 /// );
297 ///
298 /// let reserved = ":/?#[]@ !$&'()*+,;=";
299 /// // Note that `%20` cannot appear directly in an IRI reference.
300 /// let expected = ":/?#[]@%20!$&'()*+,;=";
301 /// assert_eq!(
302 /// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
303 /// expected
304 /// );
305 /// # }
306 /// ```
307 #[inline]
308 #[must_use]
309 pub fn characters(raw: T) -> Self {
310 Self {
311 context: Context::Character,
312 raw,
313 _spec: PhantomData,
314 }
315 }
316}
317
318impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
319 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
320 /// Filter that encodes a character before written if necessary.
321 struct Filter<'a, 'b, S> {
322 /// Encoding context.
323 context: Context,
324 /// Writer.
325 writer: &'a mut fmt::Formatter<'b>,
326 /// Spec.
327 _spec: PhantomData<fn() -> S>,
328 }
329 impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
330 fn write_str(&mut self, s: &str) -> fmt::Result {
331 s.chars().try_for_each(|c| self.write_char(c))
332 }
333 fn write_char(&mut self, c: char) -> fmt::Result {
334 let is_valid_char = match (self.context, c.is_ascii()) {
335 (Context::RegName, true) => char::is_ascii_regname(c as u8),
336 (Context::RegName, false) => char::is_nonascii_regname::<S>(c),
337 (Context::UserOrPassword, true) => {
338 c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
339 }
340 (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
341 (Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
342 (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
343 (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
344 (Context::Path, false) => S::is_nonascii_char_unreserved(c),
345 (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
346 (Context::Query, false) => char::is_nonascii_query::<S>(c),
347 (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
348 (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
349 (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
350 (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
351 (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
352 (Context::Character, false) => {
353 S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
354 }
355 };
356 if is_valid_char {
357 self.writer.write_char(c)
358 } else {
359 write_pct_encoded_char(&mut self.writer, c)
360 }
361 }
362 }
363 let mut filter = Filter {
364 context: self.context,
365 writer: f,
366 _spec: PhantomData::<fn() -> S>,
367 };
368 write!(filter, "{}", self.raw)
369 }
370}
371
372/// Percent-encodes the given character and writes it.
373#[inline]
374fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
375 let mut buf = [0_u8; 4];
376 let buf = c.encode_utf8(&mut buf);
377 buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
378}