hickory_proto/rr/domain/
label.rs

1// Copyright 2015-2018 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Labels are used as the internal components of a Name.
9//!
10//! A label is stored internally as ascii, where all unicode characters are converted to punycode internally.
11
12use alloc::{string::String, vec::Vec};
13use core::borrow::Borrow;
14use core::cmp::{Ordering, PartialEq};
15use core::fmt::{self, Debug, Display, Formatter, Write};
16use core::hash::{Hash, Hasher};
17
18use idna::uts46::{AsciiDenyList, DnsLength, Hyphens, Uts46};
19use tinyvec::TinyVec;
20use tracing::debug;
21
22use crate::error::*;
23
24const WILDCARD: &[u8] = b"*";
25const IDNA_PREFIX: &[u8] = b"xn--";
26
27/// Labels are always stored as ASCII, unicode characters must be encoded with punycode
28#[derive(Clone, Eq)]
29pub struct Label(TinyVec<[u8; 24]>);
30
31impl Label {
32    /// These must only be ASCII, with unicode encoded to PunyCode, or other such transformation.
33    ///
34    /// This uses the bytes as raw ascii values, with nothing escaped on the wire.
35    /// Generally users should use `from_str` or `from_ascii`
36    pub fn from_raw_bytes(bytes: &[u8]) -> ProtoResult<Self> {
37        // Check for label validity.
38        // RFC 2181, Section 11 "Name Syntax".
39        // > The length of any one label is limited to between 1 and 63 octets.
40        if bytes.is_empty() {
41            return Err("Label requires a minimum length of 1".into());
42        }
43        if bytes.len() > 63 {
44            return Err(ProtoErrorKind::LabelBytesTooLong(bytes.len()).into());
45        };
46        Ok(Self(TinyVec::from(bytes)))
47    }
48
49    /// Translates this string into IDNA safe name, encoding to punycode as necessary.
50    pub fn from_utf8(s: &str) -> ProtoResult<Self> {
51        if s.as_bytes() == WILDCARD {
52            return Ok(Self::wildcard());
53        }
54
55        // special case for SRV type records
56        if s.starts_with('_') {
57            return Self::from_ascii(s);
58        }
59
60        // length don't exceeding 63 is done in `from_ascii`
61        // on puny encoded string
62        // idna error are opaque so early failure is not possible.
63        match Uts46::new().to_ascii(
64            s.as_bytes(),
65            AsciiDenyList::STD3,
66            Hyphens::Allow,
67            DnsLength::Ignore,
68        ) {
69            Ok(puny) => Self::from_ascii(&puny),
70            e => Err(format!("Label contains invalid characters: {e:?}").into()),
71        }
72    }
73
74    /// Takes the ascii string and returns a new label.
75    ///
76    /// This will return an Error if the label is not an ascii string
77    pub fn from_ascii(s: &str) -> ProtoResult<Self> {
78        if s.len() > 63 {
79            return Err(ProtoErrorKind::LabelBytesTooLong(s.len()).into());
80        }
81
82        if s.as_bytes() == WILDCARD {
83            return Ok(Self::wildcard());
84        }
85
86        if !s.is_empty()
87            && s.is_ascii()
88            && s.chars().take(1).all(|c| is_safe_ascii(c, true, false))
89            && s.chars().skip(1).all(|c| is_safe_ascii(c, false, false))
90        {
91            Self::from_raw_bytes(s.as_bytes())
92        } else {
93            Err(format!("Malformed label: {s}").into())
94        }
95    }
96
97    /// Returns a new Label of the Wildcard, i.e. "*"
98    pub fn wildcard() -> Self {
99        Self(TinyVec::from(WILDCARD))
100    }
101
102    /// Converts this label to lowercase
103    pub fn to_lowercase(&self) -> Self {
104        // TODO: replace case conversion when (ascii_ctype #39658) stabilizes
105        if let Some((idx, _)) = self
106            .0
107            .iter()
108            .enumerate()
109            .find(|&(_, c)| *c != c.to_ascii_lowercase())
110        {
111            let mut lower_label: Vec<u8> = self.0.to_vec();
112            lower_label[idx..].make_ascii_lowercase();
113            Self(TinyVec::from(lower_label.as_slice()))
114        } else {
115            self.clone()
116        }
117    }
118
119    /// Returns true if this label is the wildcard, '*', label
120    pub fn is_wildcard(&self) -> bool {
121        self.as_bytes() == WILDCARD
122    }
123
124    /// Returns the length in bytes of this label
125    pub fn len(&self) -> usize {
126        self.0.len()
127    }
128
129    /// True if the label contains no characters
130    pub fn is_empty(&self) -> bool {
131        self.0.is_empty()
132    }
133
134    /// Returns the raw bytes of the label, this is good for writing to the wire.
135    ///
136    /// See [`Display`] for presentation version (unescaped from punycode, etc)
137    pub fn as_bytes(&self) -> &[u8] {
138        &self.0
139    }
140
141    /// Performs the equivalence operation disregarding case
142    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
143        self.0.eq_ignore_ascii_case(&other.0)
144    }
145
146    /// compares with the other label, ignoring case
147    pub fn cmp_with_f<F: LabelCmp>(&self, other: &Self) -> Ordering {
148        let s = self.0.iter();
149        let o = other.0.iter();
150
151        for (s, o) in s.zip(o) {
152            match F::cmp_u8(*s, *o) {
153                Ordering::Equal => continue,
154                not_eq => return not_eq,
155            }
156        }
157
158        self.0.len().cmp(&other.0.len())
159    }
160
161    /// Performs the conversion to utf8 from IDNA as necessary, see `fmt` for more details
162    pub fn to_utf8(&self) -> String {
163        format!("{self}")
164    }
165
166    /// Converts this label to safe ascii, escaping characters as necessary
167    ///
168    /// If this is an IDNA, punycode, label, then the xn-- prefix will be maintained as ascii
169    pub fn to_ascii(&self) -> String {
170        let mut ascii = String::with_capacity(self.as_bytes().len());
171
172        self.write_ascii(&mut ascii)
173            .expect("should never fail to write a new string");
174        ascii
175    }
176
177    /// Writes this label to safe ascii, escaping characters as necessary
178    pub fn write_ascii<W: Write>(&self, f: &mut W) -> Result<(), fmt::Error> {
179        // We can't guarantee that the same input will always translate to the same output
180        fn escape_non_ascii<W: Write>(
181            byte: u8,
182            f: &mut W,
183            is_first: bool,
184        ) -> Result<(), fmt::Error> {
185            let to_triple_escape = |ch: u8| format!("\\{ch:03o}");
186            let to_single_escape = |ch: char| format!("\\{ch}");
187
188            match char::from(byte) {
189                c if is_safe_ascii(c, is_first, true) => f.write_char(c)?,
190                // it's not a control and is printable as well as inside the standard ascii range
191                c if byte > b'\x20' && byte < b'\x7f' => f.write_str(&to_single_escape(c))?,
192                _ => f.write_str(&to_triple_escape(byte))?,
193            }
194
195            Ok(())
196        }
197
198        // traditional ascii case...
199        let mut chars = self.as_bytes().iter();
200        if let Some(ch) = chars.next() {
201            escape_non_ascii(*ch, f, true)?;
202        }
203
204        for ch in chars {
205            escape_non_ascii(*ch, f, false)?;
206        }
207
208        Ok(())
209    }
210}
211
212impl AsRef<[u8]> for Label {
213    fn as_ref(&self) -> &[u8] {
214        self.as_bytes()
215    }
216}
217
218impl Borrow<[u8]> for Label {
219    fn borrow(&self) -> &[u8] {
220        &self.0
221    }
222}
223
224fn is_safe_ascii(c: char, is_first: bool, for_encoding: bool) -> bool {
225    match c {
226        c if !c.is_ascii() => false,
227        c if c.is_alphanumeric() => true,
228        '-' if !is_first => true,     // dash is allowed
229        '_' => true,                  // SRV like labels
230        '*' if is_first => true,      // wildcard
231        '.' if !for_encoding => true, // needed to allow dots, for things like email addresses
232        _ => false,
233    }
234}
235
236impl Display for Label {
237    /// outputs characters in a safe string manner.
238    ///
239    /// if the string is punycode, i.e. starts with `xn--`, otherwise it translates to a safe ascii string
240    ///   escaping characters as necessary.
241    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
242        if self.as_bytes().starts_with(IDNA_PREFIX) {
243            // this should never be outside the ascii codes...
244            let label = String::from_utf8_lossy(self.borrow());
245            let (label, e) =
246                Uts46::new().to_unicode(label.as_bytes(), AsciiDenyList::EMPTY, Hyphens::Allow);
247
248            if e.is_ok() {
249                return f.write_str(&label);
250            } else {
251                debug!(
252                    "xn-- prefixed string did not translate via IDNA properly: {:?}",
253                    e
254                )
255            }
256        }
257
258        // it wasn't known to be utf8
259        self.write_ascii(f)
260    }
261}
262
263impl Debug for Label {
264    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
265        let label = String::from_utf8_lossy(self.borrow());
266        f.write_str(&label)
267    }
268}
269
270impl PartialEq<Self> for Label {
271    fn eq(&self, other: &Self) -> bool {
272        self.eq_ignore_ascii_case(other)
273    }
274}
275
276impl PartialOrd<Self> for Label {
277    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
278        Some(self.cmp(other))
279    }
280}
281
282impl Ord for Label {
283    fn cmp(&self, other: &Self) -> Ordering {
284        self.cmp_with_f::<CaseInsensitive>(other)
285    }
286}
287
288impl Hash for Label {
289    fn hash<H>(&self, state: &mut H)
290    where
291        H: Hasher,
292    {
293        for b in self.borrow() as &[u8] {
294            state.write_u8(b.to_ascii_lowercase());
295        }
296    }
297}
298
299/// Label comparison trait for case sensitive or insensitive comparisons
300pub trait LabelCmp {
301    /// this should mimic the cmp method from [`PartialOrd`]
302    fn cmp_u8(l: u8, r: u8) -> Ordering;
303}
304
305/// For case sensitive comparisons
306pub(super) struct CaseSensitive;
307
308impl LabelCmp for CaseSensitive {
309    fn cmp_u8(l: u8, r: u8) -> Ordering {
310        l.cmp(&r)
311    }
312}
313
314/// For case insensitive comparisons
315pub(super) struct CaseInsensitive;
316
317impl LabelCmp for CaseInsensitive {
318    fn cmp_u8(l: u8, r: u8) -> Ordering {
319        l.to_ascii_lowercase().cmp(&r.to_ascii_lowercase())
320    }
321}
322
323/// Conversion into a Label
324pub trait IntoLabel: Sized {
325    /// Convert this into Label
326    fn into_label(self) -> ProtoResult<Label>;
327}
328
329impl IntoLabel for &Label {
330    fn into_label(self) -> ProtoResult<Label> {
331        Ok(self.clone())
332    }
333}
334
335impl IntoLabel for Label {
336    fn into_label(self) -> ProtoResult<Label> {
337        Ok(self)
338    }
339}
340
341impl IntoLabel for &str {
342    fn into_label(self) -> ProtoResult<Label> {
343        Label::from_utf8(self)
344    }
345}
346
347impl IntoLabel for String {
348    fn into_label(self) -> ProtoResult<Label> {
349        Label::from_utf8(&self)
350    }
351}
352
353impl IntoLabel for &[u8] {
354    fn into_label(self) -> ProtoResult<Label> {
355        Label::from_raw_bytes(self)
356    }
357}
358
359impl IntoLabel for Vec<u8> {
360    fn into_label(self) -> ProtoResult<Label> {
361        Label::from_raw_bytes(&self)
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    #![allow(clippy::dbg_macro, clippy::print_stdout)]
368
369    use alloc::string::ToString;
370    #[cfg(feature = "std")]
371    use std::{eprintln, println};
372
373    use super::*;
374
375    #[test]
376    fn test_encoding() {
377        assert_eq!(
378            Label::from_utf8("abc").unwrap(),
379            Label::from_raw_bytes(b"abc").unwrap()
380        );
381        // case insensitive, this works...
382        assert_eq!(
383            Label::from_utf8("ABC").unwrap(),
384            Label::from_raw_bytes(b"ABC").unwrap()
385        );
386        assert_eq!(
387            Label::from_utf8("🦀").unwrap(),
388            Label::from_raw_bytes(b"xn--zs9h").unwrap()
389        );
390        assert_eq!(
391            Label::from_utf8("rust-🦀-icon").unwrap(),
392            Label::from_raw_bytes(b"xn--rust--icon-9447i").unwrap()
393        );
394        assert_eq!(
395            Label::from_ascii("ben.fry").unwrap(),
396            Label::from_raw_bytes(b"ben.fry").unwrap()
397        );
398        assert_eq!(Label::from_utf8("🦀").unwrap().to_utf8(), "🦀");
399        assert_eq!(Label::from_utf8("🦀").unwrap().to_ascii(), "xn--zs9h");
400    }
401
402    fn assert_panic_label_too_long(error: ProtoResult<Label>, len: usize) {
403        // poor man debug since ProtoResult don't implement Partial Eq due to ssl errors.
404        #[cfg(feature = "std")]
405        eprintln!("{error:?}");
406        assert!(error.is_err());
407        match error.unwrap_err().kind() {
408            ProtoErrorKind::LabelBytesTooLong(n) if *n == len => (),
409            ProtoErrorKind::LabelBytesTooLong(e) => {
410                panic!(
411                    "LabelTooLongError error don't report expected size {} of the label provided.",
412                    e
413                )
414            }
415            _ => panic!("Should have returned a LabelTooLongError"),
416        }
417    }
418
419    #[test]
420    fn test_label_too_long_ascii_with_utf8() {
421        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
422        let error = Label::from_utf8(label_too_long);
423        assert_panic_label_too_long(error, label_too_long.len());
424    }
425
426    #[test]
427    fn test_label_too_long_utf8_puny_emoji() {
428        // too long only puny 65
429        let emoji_case = "💜🦀🏖️🖥️😨🚀✨🤖💚🦾🦿😱😨✉️👺📚💻🗓️🤡🦀😈🚀💀⚡🦄";
430        let error = Label::from_utf8(emoji_case);
431        assert_panic_label_too_long(error, 64);
432    }
433
434    #[test]
435    fn test_label_too_long_utf8_puny_emoji_mixed() {
436        // too long mixed 65
437        // Something international to say
438        // "Hello I like automn coffee 🦀 interesting"
439        let emoji_case = "こんにちは-I-mögen-jesień-café-🦀-intéressant";
440        let error = Label::from_utf8(emoji_case);
441        assert_panic_label_too_long(error, 65);
442    }
443
444    #[test]
445    fn test_label_too_long_utf8_puny_mixed() {
446        // edge case 64 octet long.
447        // xn--testwithalonglabelinutf8tofitin63octetsisagoodhabit-f2106cqb
448        let edge_case = "🦀testwithalonglabelinutf8tofitin63octetsisagoodhabit🦀";
449        let error = Label::from_utf8(edge_case);
450        assert_panic_label_too_long(error, 64);
451    }
452
453    #[test]
454    fn test_label_too_long_raw() {
455        let label_too_long = b"alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
456        let error = Label::from_raw_bytes(label_too_long);
457        assert_panic_label_too_long(error, label_too_long.len());
458    }
459
460    #[test]
461    fn test_label_too_long_ascii() {
462        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
463        let error = Label::from_ascii(label_too_long);
464        assert_panic_label_too_long(error, label_too_long.len());
465    }
466
467    #[test]
468    fn test_decoding() {
469        assert_eq!(Label::from_raw_bytes(b"abc").unwrap().to_string(), "abc");
470        assert_eq!(
471            Label::from_raw_bytes(b"xn--zs9h").unwrap().to_string(),
472            "🦀"
473        );
474        assert_eq!(
475            Label::from_raw_bytes(b"xn--rust--icon-9447i")
476                .unwrap()
477                .to_string(),
478            "rust-🦀-icon"
479        );
480    }
481
482    #[test]
483    fn test_from_ascii_adversial_utf8() {
484        let expect_err = Label::from_ascii("🦀");
485        assert!(expect_err.is_err());
486    }
487
488    #[test]
489    fn test_to_lowercase() {
490        assert_ne!(Label::from_ascii("ABC").unwrap().to_string(), "abc");
491        assert_ne!(Label::from_ascii("abcDEF").unwrap().to_string(), "abcdef");
492        assert_eq!(
493            Label::from_ascii("ABC").unwrap().to_lowercase().to_string(),
494            "abc"
495        );
496        assert_eq!(
497            Label::from_ascii("abcDEF")
498                .unwrap()
499                .to_lowercase()
500                .to_string(),
501            "abcdef"
502        );
503    }
504
505    #[test]
506    fn test_to_cmp_f() {
507        assert_eq!(
508            Label::from_ascii("ABC")
509                .unwrap()
510                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abc").unwrap()),
511            Ordering::Equal
512        );
513        assert_eq!(
514            Label::from_ascii("abcDEF")
515                .unwrap()
516                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abcdef").unwrap()),
517            Ordering::Equal
518        );
519        assert_eq!(
520            Label::from_ascii("ABC")
521                .unwrap()
522                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abc").unwrap()),
523            Ordering::Less
524        );
525        assert_eq!(
526            Label::from_ascii("abcDEF")
527                .unwrap()
528                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abcdef").unwrap()),
529            Ordering::Less
530        );
531    }
532
533    #[test]
534    fn test_partial_cmp() {
535        let comparisons: Vec<(Label, Label)> = vec![
536            (
537                Label::from_raw_bytes(b"yljkjljk").unwrap(),
538                Label::from_raw_bytes(b"Z").unwrap(),
539            ),
540            (
541                Label::from_raw_bytes(b"Z").unwrap(),
542                Label::from_raw_bytes(b"zABC").unwrap(),
543            ),
544            (
545                Label::from_raw_bytes(&[1]).unwrap(),
546                Label::from_raw_bytes(b"*").unwrap(),
547            ),
548            (
549                Label::from_raw_bytes(b"*").unwrap(),
550                Label::from_raw_bytes(&[200]).unwrap(),
551            ),
552        ];
553
554        for (left, right) in comparisons {
555            #[cfg(feature = "std")]
556            println!("left: {left}, right: {right}");
557            assert_eq!(left.cmp(&right), Ordering::Less);
558        }
559    }
560
561    #[test]
562    fn test_is_wildcard() {
563        assert!(Label::from_raw_bytes(b"*").unwrap().is_wildcard());
564        assert!(Label::from_ascii("*").unwrap().is_wildcard());
565        assert!(Label::from_utf8("*").unwrap().is_wildcard());
566        assert!(!Label::from_raw_bytes(b"abc").unwrap().is_wildcard());
567    }
568
569    #[test]
570    fn test_ascii_escape() {
571        assert_eq!(
572            Label::from_raw_bytes(&[0o200]).unwrap().to_string(),
573            "\\200"
574        );
575        assert_eq!(
576            Label::from_raw_bytes(&[0o001]).unwrap().to_string(),
577            "\\001"
578        );
579        assert_eq!(Label::from_ascii(".").unwrap().to_ascii(), "\\.");
580        assert_eq!(
581            Label::from_ascii("ben.fry").unwrap().to_string(),
582            "ben\\.fry"
583        );
584        assert_eq!(Label::from_raw_bytes(&[0o200]).unwrap().to_ascii(), "\\200");
585    }
586}