hickory_proto/rr/domain/
label.rs

1// Copyright 2015-2018 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Labels are used as the internal components of a Name.
9//!
10//! A label is stored internally as ascii, where all unicode characters are converted to punycode internally.
11
12#[allow(clippy::useless_attribute)]
13#[allow(unused)]
14#[allow(deprecated)]
15use std::ascii::AsciiExt;
16use std::borrow::Borrow;
17use std::cmp::{Ordering, PartialEq};
18use std::fmt::{self, Debug, Display, Formatter, Write};
19use std::hash::{Hash, Hasher};
20
21use idna::uts46::{AsciiDenyList, DnsLength, Hyphens, Uts46};
22use tinyvec::TinyVec;
23use tracing::debug;
24
25use crate::error::*;
26
27const WILDCARD: &[u8] = b"*";
28const IDNA_PREFIX: &[u8] = b"xn--";
29
30/// Labels are always stored as ASCII, unicode characters must be encoded with punycode
31#[derive(Clone, Eq)]
32pub struct Label(TinyVec<[u8; 24]>);
33
34impl Label {
35    /// These must only be ASCII, with unicode encoded to PunyCode, or other such transformation.
36    ///
37    /// This uses the bytes as raw ascii values, with nothing escaped on the wire.
38    /// Generally users should use `from_str` or `from_ascii`
39    pub fn from_raw_bytes(bytes: &[u8]) -> ProtoResult<Self> {
40        // Check for label validity.
41        // RFC 2181, Section 11 "Name Syntax".
42        // > The length of any one label is limited to between 1 and 63 octets.
43        if bytes.is_empty() {
44            return Err("Label requires a minimum length of 1".into());
45        }
46        if bytes.len() > 63 {
47            return Err(ProtoErrorKind::LabelBytesTooLong(bytes.len()).into());
48        };
49        Ok(Self(TinyVec::from(bytes)))
50    }
51
52    /// Translates this string into IDNA safe name, encoding to punycode as necessary.
53    pub fn from_utf8(s: &str) -> ProtoResult<Self> {
54        if s.as_bytes() == WILDCARD {
55            return Ok(Self::wildcard());
56        }
57
58        // special case for SRV type records
59        if s.starts_with('_') {
60            return Self::from_ascii(s);
61        }
62
63        // length don't exceeding 63 is done in `from_ascii`
64        // on puny encoded string
65        // idna error are opaque so early failure is not possible.
66        match Uts46::new().to_ascii(
67            s.as_bytes(),
68            AsciiDenyList::STD3,
69            Hyphens::Allow,
70            DnsLength::Ignore,
71        ) {
72            Ok(puny) => Self::from_ascii(&puny),
73            e => Err(format!("Label contains invalid characters: {e:?}").into()),
74        }
75    }
76
77    /// Takes the ascii string and returns a new label.
78    ///
79    /// This will return an Error if the label is not an ascii string
80    pub fn from_ascii(s: &str) -> ProtoResult<Self> {
81        if s.len() > 63 {
82            return Err(ProtoErrorKind::LabelBytesTooLong(s.len()).into());
83        }
84
85        if s.as_bytes() == WILDCARD {
86            return Ok(Self::wildcard());
87        }
88
89        if !s.is_empty()
90            && s.is_ascii()
91            && s.chars().take(1).all(|c| is_safe_ascii(c, true, false))
92            && s.chars().skip(1).all(|c| is_safe_ascii(c, false, false))
93        {
94            Self::from_raw_bytes(s.as_bytes())
95        } else {
96            Err(format!("Malformed label: {s}").into())
97        }
98    }
99
100    /// Returns a new Label of the Wildcard, i.e. "*"
101    pub fn wildcard() -> Self {
102        Self(TinyVec::from(WILDCARD))
103    }
104
105    /// Converts this label to lowercase
106    pub fn to_lowercase(&self) -> Self {
107        // TODO: replace case conversion when (ascii_ctype #39658) stabilizes
108        if let Some((idx, _)) = self
109            .0
110            .iter()
111            .enumerate()
112            .find(|&(_, c)| *c != c.to_ascii_lowercase())
113        {
114            let mut lower_label: Vec<u8> = self.0.to_vec();
115            lower_label[idx..].make_ascii_lowercase();
116            Self(TinyVec::from(lower_label.as_slice()))
117        } else {
118            self.clone()
119        }
120    }
121
122    /// Returns true if this label is the wildcard, '*', label
123    pub fn is_wildcard(&self) -> bool {
124        self.as_bytes() == WILDCARD
125    }
126
127    /// Returns the lenght in bytes of this label
128    pub fn len(&self) -> usize {
129        self.0.len()
130    }
131
132    /// True if the label contains no characters
133    pub fn is_empty(&self) -> bool {
134        self.0.is_empty()
135    }
136
137    /// Returns the raw bytes of the label, this is good for writing to the wire.
138    ///
139    /// See [`Display`] for presentation version (unescaped from punycode, etc)
140    pub fn as_bytes(&self) -> &[u8] {
141        &self.0
142    }
143
144    /// Performs the equivalence operation disregarding case
145    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
146        self.0.eq_ignore_ascii_case(&other.0)
147    }
148
149    /// compares with the other label, ignoring case
150    pub fn cmp_with_f<F: LabelCmp>(&self, other: &Self) -> Ordering {
151        let s = self.0.iter();
152        let o = other.0.iter();
153
154        for (s, o) in s.zip(o) {
155            match F::cmp_u8(*s, *o) {
156                Ordering::Equal => continue,
157                not_eq => return not_eq,
158            }
159        }
160
161        self.0.len().cmp(&other.0.len())
162    }
163
164    /// Performs the conversion to utf8 from IDNA as necessary, see `fmt` for more details
165    pub fn to_utf8(&self) -> String {
166        format!("{self}")
167    }
168
169    /// Converts this label to safe ascii, escaping characters as necessary
170    ///
171    /// If this is an IDNA, punycode, label, then the xn-- prefix will be maintained as ascii
172    pub fn to_ascii(&self) -> String {
173        let mut ascii = String::with_capacity(self.as_bytes().len());
174
175        self.write_ascii(&mut ascii)
176            .expect("should never fail to write a new string");
177        ascii
178    }
179
180    /// Writes this label to safe ascii, escaping characters as necessary
181    pub fn write_ascii<W: Write>(&self, f: &mut W) -> Result<(), fmt::Error> {
182        // We can't guarantee that the same input will always translate to the same output
183        fn escape_non_ascii<W: Write>(
184            byte: u8,
185            f: &mut W,
186            is_first: bool,
187        ) -> Result<(), fmt::Error> {
188            let to_triple_escape = |ch: u8| format!("\\{ch:03o}");
189            let to_single_escape = |ch: char| format!("\\{ch}");
190
191            match char::from(byte) {
192                c if is_safe_ascii(c, is_first, true) => f.write_char(c)?,
193                // it's not a control and is printable as well as inside the standard ascii range
194                c if byte > b'\x20' && byte < b'\x7f' => f.write_str(&to_single_escape(c))?,
195                _ => f.write_str(&to_triple_escape(byte))?,
196            }
197
198            Ok(())
199        }
200
201        // traditional ascii case...
202        let mut chars = self.as_bytes().iter();
203        if let Some(ch) = chars.next() {
204            escape_non_ascii(*ch, f, true)?;
205        }
206
207        for ch in chars {
208            escape_non_ascii(*ch, f, false)?;
209        }
210
211        Ok(())
212    }
213}
214
215impl AsRef<[u8]> for Label {
216    fn as_ref(&self) -> &[u8] {
217        self.as_bytes()
218    }
219}
220
221impl Borrow<[u8]> for Label {
222    fn borrow(&self) -> &[u8] {
223        &self.0
224    }
225}
226
227fn is_safe_ascii(c: char, is_first: bool, for_encoding: bool) -> bool {
228    match c {
229        c if !c.is_ascii() => false,
230        c if c.is_alphanumeric() => true,
231        '-' if !is_first => true,     // dash is allowed
232        '_' => true,                  // SRV like labels
233        '*' if is_first => true,      // wildcard
234        '.' if !for_encoding => true, // needed to allow dots, for things like email addresses
235        _ => false,
236    }
237}
238
239impl Display for Label {
240    /// outputs characters in a safe string manner.
241    ///
242    /// if the string is punycode, i.e. starts with `xn--`, otherwise it translates to a safe ascii string
243    ///   escaping characters as necessary.
244    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
245        if self.as_bytes().starts_with(IDNA_PREFIX) {
246            // this should never be outside the ascii codes...
247            let label = String::from_utf8_lossy(self.borrow());
248            let (label, e) =
249                Uts46::new().to_unicode(label.as_bytes(), AsciiDenyList::EMPTY, Hyphens::Allow);
250
251            if e.is_ok() {
252                return f.write_str(&label);
253            } else {
254                debug!(
255                    "xn-- prefixed string did not translate via IDNA properly: {:?}",
256                    e
257                )
258            }
259        }
260
261        // it wasn't known to be utf8
262        self.write_ascii(f)
263    }
264}
265
266impl Debug for Label {
267    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
268        let label = String::from_utf8_lossy(self.borrow());
269        f.write_str(&label)
270    }
271}
272
273impl PartialEq<Self> for Label {
274    fn eq(&self, other: &Self) -> bool {
275        self.eq_ignore_ascii_case(other)
276    }
277}
278
279impl PartialOrd<Self> for Label {
280    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
281        Some(self.cmp(other))
282    }
283}
284
285impl Ord for Label {
286    fn cmp(&self, other: &Self) -> Ordering {
287        self.cmp_with_f::<CaseInsensitive>(other)
288    }
289}
290
291impl Hash for Label {
292    fn hash<H>(&self, state: &mut H)
293    where
294        H: Hasher,
295    {
296        for b in self.borrow() as &[u8] {
297            state.write_u8(b.to_ascii_lowercase());
298        }
299    }
300}
301
302/// Label comparison trait for case sensitive or insensitive comparisons
303pub trait LabelCmp {
304    /// this should mimic the cmp method from [`PartialOrd`]
305    fn cmp_u8(l: u8, r: u8) -> Ordering;
306}
307
308/// For case sensitive comparisons
309pub(super) struct CaseSensitive;
310
311impl LabelCmp for CaseSensitive {
312    fn cmp_u8(l: u8, r: u8) -> Ordering {
313        l.cmp(&r)
314    }
315}
316
317/// For case insensitive comparisons
318pub(super) struct CaseInsensitive;
319
320impl LabelCmp for CaseInsensitive {
321    fn cmp_u8(l: u8, r: u8) -> Ordering {
322        l.to_ascii_lowercase().cmp(&r.to_ascii_lowercase())
323    }
324}
325
326/// Conversion into a Label
327pub trait IntoLabel: Sized {
328    /// Convert this into Label
329    fn into_label(self) -> ProtoResult<Label>;
330}
331
332impl IntoLabel for &Label {
333    fn into_label(self) -> ProtoResult<Label> {
334        Ok(self.clone())
335    }
336}
337
338impl IntoLabel for Label {
339    fn into_label(self) -> ProtoResult<Label> {
340        Ok(self)
341    }
342}
343
344impl IntoLabel for &str {
345    fn into_label(self) -> ProtoResult<Label> {
346        Label::from_utf8(self)
347    }
348}
349
350impl IntoLabel for String {
351    fn into_label(self) -> ProtoResult<Label> {
352        Label::from_utf8(&self)
353    }
354}
355
356impl IntoLabel for &[u8] {
357    fn into_label(self) -> ProtoResult<Label> {
358        Label::from_raw_bytes(self)
359    }
360}
361
362impl IntoLabel for Vec<u8> {
363    fn into_label(self) -> ProtoResult<Label> {
364        Label::from_raw_bytes(&self)
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    #![allow(clippy::dbg_macro, clippy::print_stdout)]
371
372    use super::*;
373
374    #[test]
375    fn test_encoding() {
376        assert_eq!(
377            Label::from_utf8("abc").unwrap(),
378            Label::from_raw_bytes(b"abc").unwrap()
379        );
380        // case insensitive, this works...
381        assert_eq!(
382            Label::from_utf8("ABC").unwrap(),
383            Label::from_raw_bytes(b"ABC").unwrap()
384        );
385        assert_eq!(
386            Label::from_utf8("🦀").unwrap(),
387            Label::from_raw_bytes(b"xn--zs9h").unwrap()
388        );
389        assert_eq!(
390            Label::from_utf8("rust-🦀-icon").unwrap(),
391            Label::from_raw_bytes(b"xn--rust--icon-9447i").unwrap()
392        );
393        assert_eq!(
394            Label::from_ascii("ben.fry").unwrap(),
395            Label::from_raw_bytes(b"ben.fry").unwrap()
396        );
397        assert_eq!(Label::from_utf8("🦀").unwrap().to_utf8(), "🦀");
398        assert_eq!(Label::from_utf8("🦀").unwrap().to_ascii(), "xn--zs9h");
399    }
400
401    fn assert_panic_label_too_long(error: ProtoResult<Label>, len: usize) {
402        // poor man debug since ProtoResult don't implement Partial Eq due to ssl errors.
403        eprintln!("{error:?}");
404        assert!(error.is_err());
405        match *error.unwrap_err().kind() {
406            ProtoErrorKind::LabelBytesTooLong(n) if n == len => (),
407            ProtoErrorKind::LabelBytesTooLong(e) => {
408                panic!(
409                    "LabelTooLongError error don't report expected size {} of the label provided.",
410                    e
411                )
412            }
413            _ => panic!("Should have returned a LabelTooLongError"),
414        }
415    }
416
417    #[test]
418    fn test_label_too_long_ascii_with_utf8() {
419        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
420        let error = Label::from_utf8(label_too_long);
421        assert_panic_label_too_long(error, label_too_long.len());
422    }
423
424    #[test]
425    fn test_label_too_long_utf8_puny_emoji() {
426        // too long only puny 65
427        let emoji_case = "💜🦀🏖️🖥️😨🚀✨🤖💚🦾🦿😱😨✉️👺📚💻🗓️🤡🦀😈🚀💀⚡🦄";
428        let error = Label::from_utf8(emoji_case);
429        assert_panic_label_too_long(error, 64);
430    }
431
432    #[test]
433    fn test_label_too_long_utf8_puny_emoji_mixed() {
434        // too long mixed 65
435        // Something international to say
436        // "Hello I like automn coffee 🦀 interresting"
437        let emoji_case = "こんにちは-I-mögen-jesień-café-🦀-intéressant";
438        let error = Label::from_utf8(emoji_case);
439        assert_panic_label_too_long(error, 65);
440    }
441
442    #[test]
443    fn test_label_too_long_utf8_puny_mixed() {
444        // edge case 64 octet long.
445        // xn--testwithalonglabelinutf8tofitin63octetsisagoodhabit-f2106cqb
446        let edge_case = "🦀testwithalonglabelinutf8tofitin63octetsisagoodhabit🦀";
447        let error = Label::from_utf8(edge_case);
448        assert_panic_label_too_long(error, 64);
449    }
450
451    #[test]
452    fn test_label_too_long_raw() {
453        let label_too_long = b"alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
454        let error = Label::from_raw_bytes(label_too_long);
455        assert_panic_label_too_long(error, label_too_long.len());
456    }
457
458    #[test]
459    fn test_label_too_long_ascii() {
460        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
461        let error = Label::from_ascii(label_too_long);
462        assert_panic_label_too_long(error, label_too_long.len());
463    }
464
465    #[test]
466    fn test_decoding() {
467        assert_eq!(Label::from_raw_bytes(b"abc").unwrap().to_string(), "abc");
468        assert_eq!(
469            Label::from_raw_bytes(b"xn--zs9h").unwrap().to_string(),
470            "🦀"
471        );
472        assert_eq!(
473            Label::from_raw_bytes(b"xn--rust--icon-9447i")
474                .unwrap()
475                .to_string(),
476            "rust-🦀-icon"
477        );
478    }
479
480    #[test]
481    fn test_from_ascii_adversial_utf8() {
482        let expect_err = Label::from_ascii("🦀");
483        assert!(expect_err.is_err());
484    }
485
486    #[test]
487    fn test_to_lowercase() {
488        assert_ne!(Label::from_ascii("ABC").unwrap().to_string(), "abc");
489        assert_ne!(Label::from_ascii("abcDEF").unwrap().to_string(), "abcdef");
490        assert_eq!(
491            Label::from_ascii("ABC").unwrap().to_lowercase().to_string(),
492            "abc"
493        );
494        assert_eq!(
495            Label::from_ascii("abcDEF")
496                .unwrap()
497                .to_lowercase()
498                .to_string(),
499            "abcdef"
500        );
501    }
502
503    #[test]
504    fn test_to_cmp_f() {
505        assert_eq!(
506            Label::from_ascii("ABC")
507                .unwrap()
508                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abc").unwrap()),
509            Ordering::Equal
510        );
511        assert_eq!(
512            Label::from_ascii("abcDEF")
513                .unwrap()
514                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abcdef").unwrap()),
515            Ordering::Equal
516        );
517        assert_eq!(
518            Label::from_ascii("ABC")
519                .unwrap()
520                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abc").unwrap()),
521            Ordering::Less
522        );
523        assert_eq!(
524            Label::from_ascii("abcDEF")
525                .unwrap()
526                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abcdef").unwrap()),
527            Ordering::Less
528        );
529    }
530
531    #[test]
532    fn test_partial_cmp() {
533        let comparisons: Vec<(Label, Label)> = vec![
534            (
535                Label::from_raw_bytes(b"yljkjljk").unwrap(),
536                Label::from_raw_bytes(b"Z").unwrap(),
537            ),
538            (
539                Label::from_raw_bytes(b"Z").unwrap(),
540                Label::from_raw_bytes(b"zABC").unwrap(),
541            ),
542            (
543                Label::from_raw_bytes(&[1]).unwrap(),
544                Label::from_raw_bytes(b"*").unwrap(),
545            ),
546            (
547                Label::from_raw_bytes(b"*").unwrap(),
548                Label::from_raw_bytes(&[200]).unwrap(),
549            ),
550        ];
551
552        for (left, right) in comparisons {
553            println!("left: {left}, right: {right}");
554            assert_eq!(left.cmp(&right), Ordering::Less);
555        }
556    }
557
558    #[test]
559    fn test_is_wildcard() {
560        assert!(Label::from_raw_bytes(b"*").unwrap().is_wildcard());
561        assert!(Label::from_ascii("*").unwrap().is_wildcard());
562        assert!(Label::from_utf8("*").unwrap().is_wildcard());
563        assert!(!Label::from_raw_bytes(b"abc").unwrap().is_wildcard());
564    }
565
566    #[test]
567    fn test_ascii_escape() {
568        assert_eq!(
569            Label::from_raw_bytes(&[0o200]).unwrap().to_string(),
570            "\\200"
571        );
572        assert_eq!(
573            Label::from_raw_bytes(&[0o001]).unwrap().to_string(),
574            "\\001"
575        );
576        assert_eq!(Label::from_ascii(".").unwrap().to_ascii(), "\\.");
577        assert_eq!(
578            Label::from_ascii("ben.fry").unwrap().to_string(),
579            "ben\\.fry"
580        );
581        assert_eq!(Label::from_raw_bytes(&[0o200]).unwrap().to_ascii(), "\\200");
582    }
583}