picky_asn1/
restricted_string.rs

1use serde::{de, ser};
2use std::error::Error;
3use std::fmt;
4use std::marker::PhantomData;
5use std::ops::Deref;
6use std::str::FromStr;
7
8// === CharSetError === //
9
10#[derive(Debug)]
11pub struct CharSetError;
12
13impl Error for CharSetError {}
14
15impl fmt::Display for CharSetError {
16    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
17        writeln!(f, "invalid charset")
18    }
19}
20
21// === CharSet === //
22
23pub trait CharSet {
24    const NAME: &'static str;
25
26    /// Checks whether a sequence is a valid string or not.
27    fn check(data: &[u8]) -> bool;
28}
29
30// === RestrictedString === //
31
32/// A generic restricted character string.
33#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
34pub struct RestrictedString<C> {
35    data: Vec<u8>,
36    marker: PhantomData<C>,
37}
38
39impl<C: CharSet> RestrictedString<C> {
40    /// Create a new RestrictedString without CharSet validation.
41    ///
42    /// # Safety
43    ///
44    /// You have to make sure the right CharSet is used.
45    pub unsafe fn new_unchecked<V>(data: V) -> Self
46    where
47        V: Into<Vec<u8>>,
48    {
49        RestrictedString {
50            data: data.into(),
51            marker: PhantomData,
52        }
53    }
54
55    pub fn new<V>(data: V) -> Result<Self, CharSetError>
56    where
57        V: Into<Vec<u8>>,
58    {
59        let data = data.into();
60        if !C::check(&data) {
61            return Err(CharSetError);
62        };
63        Ok(RestrictedString {
64            data,
65            marker: PhantomData,
66        })
67    }
68
69    /// Converts into underlying bytes.
70    pub fn into_bytes(self) -> Vec<u8> {
71        self.data
72    }
73
74    /// Returns underlying bytes.
75    pub fn as_bytes(&self) -> &[u8] {
76        &self.data
77    }
78}
79
80impl<C: CharSet> Deref for RestrictedString<C> {
81    type Target = [u8];
82
83    fn deref(&self) -> &Self::Target {
84        &self.data
85    }
86}
87
88impl<C: CharSet> AsRef<[u8]> for RestrictedString<C> {
89    fn as_ref(&self) -> &[u8] {
90        &self.data
91    }
92}
93
94impl<C: CharSet> fmt::Debug for RestrictedString<C> {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        write!(f, "{}(", C::NAME)?;
97        if let Ok(utf8) = std::str::from_utf8(&self.data) {
98            fmt::Debug::fmt(utf8, f)?;
99        } else {
100            write!(f, "0x")?;
101            self.data.iter().try_for_each(|byte| write!(f, "{byte:02X}"))?;
102        }
103        write!(f, ")")?;
104
105        Ok(())
106    }
107}
108
109impl<C: CharSet> fmt::Display for RestrictedString<C> {
110    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
111        fmt::Display::fmt(&String::from_utf8_lossy(&self.data), fmt)
112    }
113}
114
115impl<C: CharSet> From<RestrictedString<C>> for Vec<u8> {
116    fn from(rs: RestrictedString<C>) -> Self {
117        rs.into_bytes()
118    }
119}
120
121impl<'de, C> de::Deserialize<'de> for RestrictedString<C>
122where
123    C: CharSet,
124{
125    fn deserialize<D>(deserializer: D) -> Result<RestrictedString<C>, D::Error>
126    where
127        D: de::Deserializer<'de>,
128    {
129        struct Visitor<C>(std::marker::PhantomData<C>);
130
131        impl<'de, C> de::Visitor<'de> for Visitor<C>
132        where
133            C: CharSet,
134        {
135            type Value = RestrictedString<C>;
136
137            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
138                formatter.write_str("a valid buffer representing a restricted string")
139            }
140
141            fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
142            where
143                E: de::Error,
144            {
145                self.visit_byte_buf(v.to_vec())
146            }
147
148            fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
149            where
150                E: de::Error,
151            {
152                RestrictedString::new(v).map_err(|_| {
153                    E::invalid_value(
154                        de::Unexpected::Other("invalid charset"),
155                        &"a buffer representing a string using the right charset",
156                    )
157                })
158            }
159        }
160
161        deserializer.deserialize_byte_buf(Visitor(std::marker::PhantomData))
162    }
163}
164
165impl<C> ser::Serialize for RestrictedString<C> {
166    fn serialize<S>(&self, serializer: S) -> Result<<S as ser::Serializer>::Ok, <S as ser::Serializer>::Error>
167    where
168        S: ser::Serializer,
169    {
170        serializer.serialize_bytes(&self.data)
171    }
172}
173
174// === NumericString === //
175
176/// 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, and SPACE
177#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
178pub struct NumericCharSet;
179
180impl CharSet for NumericCharSet {
181    const NAME: &'static str = "NUMERIC";
182
183    fn check(data: &[u8]) -> bool {
184        for &c in data {
185            if c != b' ' && !c.is_ascii_digit() {
186                return false;
187            }
188        }
189        true
190    }
191}
192
193pub type NumericString = RestrictedString<NumericCharSet>;
194
195impl NumericString {
196    pub fn from_string(s: String) -> Result<Self, CharSetError> {
197        Self::new(s.into_bytes())
198    }
199
200    pub fn as_utf8(&self) -> &str {
201        core::str::from_utf8(self.as_bytes()).expect("valid UTF-8 subset")
202    }
203
204    pub fn into_string(self) -> String {
205        String::from_utf8(self.into_bytes()).expect("valid UTF-8 subset")
206    }
207}
208
209impl FromStr for NumericString {
210    type Err = CharSetError;
211
212    fn from_str(s: &str) -> Result<Self, Self::Err> {
213        Self::new(s.as_bytes())
214    }
215}
216
217// === PrintableString === //
218
219/// a-z, A-Z, ' () +,-.?:/= and SPACE
220#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
221pub struct PrintableCharSet;
222
223impl CharSet for PrintableCharSet {
224    const NAME: &'static str = "PRINTABLE";
225
226    fn check(data: &[u8]) -> bool {
227        for &c in data {
228            if !(c.is_ascii_alphanumeric()
229                || c == b' '
230                || c == b'\''
231                || c == b'('
232                || c == b')'
233                || c == b'+'
234                || c == b','
235                || c == b'-'
236                || c == b'.'
237                || c == b'/'
238                || c == b':'
239                || c == b'='
240                || c == b'?')
241            {
242                return false;
243            }
244        }
245        true
246    }
247}
248
249pub type PrintableString = RestrictedString<PrintableCharSet>;
250
251impl PrintableString {
252    pub fn from_string(s: String) -> Result<Self, CharSetError> {
253        Self::new(s.into_bytes())
254    }
255
256    pub fn as_utf8(&self) -> &str {
257        core::str::from_utf8(self.as_bytes()).expect("valid UTF-8 subset")
258    }
259
260    pub fn into_string(self) -> String {
261        String::from_utf8(self.into_bytes()).expect("valid UTF-8 subset")
262    }
263}
264
265impl FromStr for PrintableString {
266    type Err = CharSetError;
267
268    fn from_str(s: &str) -> Result<Self, Self::Err> {
269        Self::new(s.as_bytes())
270    }
271}
272
273// === Utf8String === //
274
275/// any character from a recognized alphabet (including ASCII control characters)
276#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
277pub struct Utf8CharSet;
278
279impl CharSet for Utf8CharSet {
280    const NAME: &'static str = "UTF8";
281
282    fn check(data: &[u8]) -> bool {
283        std::str::from_utf8(data).is_ok()
284    }
285}
286
287pub type Utf8String = RestrictedString<Utf8CharSet>;
288
289impl Utf8String {
290    pub fn from_string(s: String) -> Result<Self, CharSetError> {
291        Self::new(s.into_bytes())
292    }
293
294    pub fn as_utf8(&self) -> &str {
295        core::str::from_utf8(self.as_bytes()).expect("valid UTF-8 subset")
296    }
297
298    pub fn into_string(self) -> String {
299        String::from_utf8(self.into_bytes()).expect("valid UTF-8 subset")
300    }
301}
302
303impl FromStr for Utf8String {
304    type Err = CharSetError;
305
306    fn from_str(s: &str) -> Result<Self, Self::Err> {
307        Self::new(s.as_bytes())
308    }
309}
310
311// === IA5String === //
312
313/// First 128 ASCII characters (values from `0x00` to `0x7F`)
314/// Used to represent ISO 646 (IA5) characters.
315#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
316pub struct Ia5CharSet;
317
318impl CharSet for Ia5CharSet {
319    const NAME: &'static str = "IA5";
320
321    fn check(data: &[u8]) -> bool {
322        for &c in data {
323            if !c.is_ascii() {
324                return false;
325            }
326        }
327        true
328    }
329}
330
331pub type Ia5String = RestrictedString<Ia5CharSet>;
332
333#[deprecated = "Use IA5String instead"]
334pub use Ia5String as IA5String;
335
336impl Ia5String {
337    pub fn from_string(s: String) -> Result<Self, CharSetError> {
338        Self::new(s.into_bytes())
339    }
340
341    pub fn as_utf8(&self) -> &str {
342        core::str::from_utf8(self.as_bytes()).expect("valid UTF-8 subset")
343    }
344
345    pub fn into_string(self) -> String {
346        String::from_utf8(self.into_bytes()).expect("valid UTF-8 subset")
347    }
348}
349
350impl FromStr for Ia5String {
351    type Err = CharSetError;
352
353    fn from_str(s: &str) -> Result<Self, Self::Err> {
354        Self::new(s.as_bytes())
355    }
356}
357
358// === BmpString === //
359
360#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
361pub struct BmpCharSet;
362
363impl CharSet for BmpCharSet {
364    const NAME: &'static str = "BMP";
365
366    fn check(data: &[u8]) -> bool {
367        // BMP strings are two-byte characters
368        if data.len() % 2 != 0 {
369            return false;
370        }
371
372        let chunk_it = data.chunks_exact(2);
373        debug_assert!(chunk_it.remainder().is_empty());
374
375        // Characters are encoded in big-endian
376        let u16_it = chunk_it.map(|code_unit| u16::from_be_bytes([code_unit[0], code_unit[1]]));
377
378        let mut count = 0;
379
380        for res in char::decode_utf16(u16_it) {
381            if res.is_err() {
382                return false;
383            }
384
385            count += 1;
386        }
387
388        // Unlike UTF-16, BMP encoding is not a variable-length encoding.
389        // (i.e.: BMP is only the first plane, "plane 0", of the Unicode standard.)
390        count == data.len() / 2
391    }
392}
393
394pub type BmpString = RestrictedString<BmpCharSet>;
395
396#[deprecated = "Use BmpString instead"]
397pub use BmpString as BMPString;
398
399impl BmpString {
400    pub fn to_utf8(&self) -> String {
401        let chunk_it = self.as_bytes().chunks_exact(2);
402        debug_assert!(chunk_it.remainder().is_empty());
403        let u16_it = chunk_it.map(|code_unit| u16::from_be_bytes([code_unit[0], code_unit[1]]));
404        char::decode_utf16(u16_it)
405            .map(|res| res.expect("valid code point"))
406            .collect()
407    }
408}
409
410impl FromStr for BmpString {
411    type Err = CharSetError;
412
413    fn from_str(s: &str) -> Result<Self, Self::Err> {
414        let data: Vec<u8> = s.encode_utf16().flat_map(|code_unit| code_unit.to_be_bytes()).collect();
415        Self::new(data)
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn valid_printable_string() {
425        PrintableString::from_str("29INRUSAET3snre?:=tanui83  9283019").expect("valid string");
426    }
427
428    #[test]
429    fn invalid_printable_string() {
430        assert!(PrintableString::from_str("1224na÷日本語はむずかちー−×—«BUeisuteurnt").is_err());
431    }
432
433    #[test]
434    fn valid_numeric_string() {
435        NumericString::from_str("2983  9283019").expect("valid string");
436    }
437
438    #[test]
439    fn invalid_numeric_string() {
440        assert!(NumericString::from_str("1224na÷日本語はむずかちー−×—«BUeisuteurnt").is_err());
441    }
442
443    #[test]
444    fn valid_ia5_string() {
445        Ia5String::from_str("BUeisuteurnt").expect("valid string");
446    }
447
448    #[test]
449    fn invalid_ia5_string() {
450        assert!(Ia5String::from_str("BUéisuteurnt").is_err());
451    }
452
453    #[test]
454    fn valid_utf8_string() {
455        Utf8String::from_str("1224na÷日本語はむずかちー−×—«BUeisuteurnt").expect("valid string");
456    }
457
458    #[test]
459    fn valid_bmp_string() {
460        assert_eq!(
461            BmpString::from_str("语言处理").expect("valid BMP string").to_utf8(),
462            "语言处理"
463        );
464
465        assert_eq!(
466            BmpString::new(vec![
467                0x00, 0x43, 0x00, 0x65, 0x00, 0x72, 0x00, 0x74, 0x00, 0x69, 0x00, 0x66, 0x00, 0x69, 0x00, 0x63, 0x00,
468                0x61, 0x00, 0x74, 0x00, 0x65, 0x00, 0x54, 0x00, 0x65, 0x00, 0x6d, 0x00, 0x70, 0x00, 0x6c, 0x00, 0x61,
469                0x00, 0x74, 0x00, 0x65,
470            ])
471            .expect("valid BMP string")
472            .to_utf8(),
473            "CertificateTemplate"
474        );
475
476        assert_eq!(
477            BmpString::new(vec![0x00, 0x55, 0x00, 0x73, 0x00, 0x65, 0x00, 0x72])
478                .expect("valid BMP string")
479                .to_utf8(),
480            "User"
481        );
482    }
483
484    #[test]
485    fn invalid_bmp_string() {
486        assert!(BmpString::new("1224na÷日本語はむずかちー−×—«BUeisuteurnt".as_bytes()).is_err())
487    }
488}