actix_router/
quoter.rs

1/// Partial percent-decoding.
2///
3/// Performs percent-decoding on a slice but can selectively skip decoding certain sequences.
4///
5/// # Examples
6/// ```
7/// # use actix_router::Quoter;
8/// // + is set as a protected character and will not be decoded...
9/// let q = Quoter::new(&[], b"+");
10///
11/// // ...but the other encoded characters (like the hyphen below) will.
12/// assert_eq!(q.requote(b"/a%2Db%2Bc").unwrap(), b"/a-b%2Bc");
13/// ```
14pub struct Quoter {
15    /// Simple bit-map of protected values in the 0-127 ASCII range.
16    protected_table: AsciiBitmap,
17}
18
19impl Quoter {
20    /// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
21    ///
22    /// The first argument is ignored but is kept for backward compatibility.
23    ///
24    /// # Panics
25    /// Panics if any of the `protected` bytes are not in the 0-127 ASCII range.
26    pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
27        let mut protected_table = AsciiBitmap::default();
28
29        // prepare protected table
30        for &ch in protected {
31            protected_table.set_bit(ch);
32        }
33
34        Quoter { protected_table }
35    }
36
37    /// Decodes the next escape sequence, if any, and advances `val`.
38    #[inline(always)]
39    fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
40        for i in 0..val.len() {
41            if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
42                if let Some(ch) = hex_pair_to_char(*p1, *p2)
43                    // ignore protected ascii bytes
44                    .filter(|&ch| !(ch < 128 && self.protected_table.bit_at(ch)))
45                {
46                    *val = rem;
47                    return Some((prev, ch));
48                }
49            }
50        }
51
52        None
53    }
54
55    /// Partially percent-decodes the given bytes.
56    ///
57    /// Escape sequences of the protected set are *not* decoded.
58    ///
59    /// Returns `None` when no modification to the original bytes was required.
60    ///
61    /// Invalid/incomplete percent-encoding sequences are passed unmodified.
62    pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
63        let mut remaining = val;
64
65        // early return indicates that no percent-encoded sequences exist and we can skip allocation
66        let (pre, decoded_char) = self.decode_next(&mut remaining)?;
67
68        // decoded output will always be shorter than the input
69        let mut decoded = Vec::<u8>::with_capacity(val.len());
70
71        // push first segment and decoded char
72        decoded.extend_from_slice(pre);
73        decoded.push(decoded_char);
74
75        // decode and push rest of segments and decoded chars
76        while let Some((prev, ch)) = self.decode_next(&mut remaining) {
77            // this ugly conditional achieves +50% perf in cases where this is a tight loop.
78            if !prev.is_empty() {
79                decoded.extend_from_slice(prev);
80            }
81            decoded.push(ch);
82        }
83
84        decoded.extend_from_slice(remaining);
85
86        Some(decoded)
87    }
88
89    pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
90        self.requote(val.as_bytes())
91            .map(|data| String::from_utf8_lossy(&data).into_owned())
92    }
93}
94
95/// Decode a ASCII hex-encoded pair to an integer.
96///
97/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
98///
99/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
100/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
101/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
102#[inline(always)]
103fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
104    let d_high = char::from(d1).to_digit(16)?;
105    let d_low = char::from(d2).to_digit(16)?;
106
107    // left shift high nibble by 4 bits
108    Some((d_high as u8) << 4 | (d_low as u8))
109}
110
111#[derive(Debug, Default, Clone)]
112struct AsciiBitmap {
113    array: [u8; 16],
114}
115
116impl AsciiBitmap {
117    /// Sets bit in given bit-map to 1=true.
118    ///
119    /// # Panics
120    /// Panics if `ch` index is out of bounds.
121    fn set_bit(&mut self, ch: u8) {
122        self.array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
123    }
124
125    /// Returns true if bit to true in given bit-map.
126    ///
127    /// # Panics
128    /// Panics if `ch` index is out of bounds.
129    fn bit_at(&self, ch: u8) -> bool {
130        self.array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn custom_quoter() {
140        let q = Quoter::new(b"", b"+");
141        assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
142        assert_eq!(q.requote(b"/a%2Bc"), None);
143
144        let q = Quoter::new(b"%+", b"/");
145        assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
146        assert_eq!(q.requote(b"/a%2fb"), None);
147        assert_eq!(q.requote(b"/a%2Fb"), None);
148        assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
149        assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
150        assert_eq!(q.requote(b"/a\xfe\xffb"), None);
151    }
152
153    #[test]
154    fn non_ascii() {
155        let q = Quoter::new(b"%+", b"/");
156        assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
157        assert_eq!(q.requote(b"/a\xfe\xffb"), None);
158    }
159
160    #[test]
161    fn invalid_sequences() {
162        let q = Quoter::new(b"%+", b"/");
163        assert_eq!(q.requote(b"/a%2x%2X%%"), None);
164        assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
165    }
166
167    #[test]
168    fn quoter_no_modification() {
169        let q = Quoter::new(b"", b"");
170        assert_eq!(q.requote(b"/abc/../efg"), None);
171    }
172}