1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
//! a string conforms to

use crate::mixed_script::AugmentedScriptSet;
use crate::GeneralSecurityProfile;
use unicode_script::Script;

#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string conforms to
pub enum RestrictionLevel {
    /// https://www.unicode.org/reports/tr39/#ascii_only
    ASCIIOnly,
    /// https://www.unicode.org/reports/tr39/#single_script
    SingleScript,
    /// https://www.unicode.org/reports/tr39/#highly_restrictive
    HighlyRestrictive,
    /// https://www.unicode.org/reports/tr39/#moderately_restrictive
    ModeratelyRestrictive,
    /// https://www.unicode.org/reports/tr39/#minimally_restrictive
    MinimallyRestrictive,
    /// https://www.unicode.org/reports/tr39/#unrestricted
    Unrestricted,
}

/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string satisfies
pub trait RestrictionLevelDetection: Sized {
    /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
    ///
    /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
    fn detect_restriction_level(self) -> RestrictionLevel;

    /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
    ///
    /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
    fn check_restriction_level(self, level: RestrictionLevel) -> bool {
        self.detect_restriction_level() <= level
    }
}

impl RestrictionLevelDetection for &'_ str {
    fn detect_restriction_level(self) -> RestrictionLevel {
        let mut ascii_only = true;
        let mut set = AugmentedScriptSet::default();
        let mut exclude_latin_set = AugmentedScriptSet::default();
        for ch in self.chars() {
            if !GeneralSecurityProfile::identifier_allowed(ch) {
                return RestrictionLevel::Unrestricted;
            }
            if !ch.is_ascii() {
                ascii_only = false;
            }
            let ch_set = ch.into();
            set.intersect_with(ch_set);
            if !ch_set.base.contains_script(Script::Latin) {
                exclude_latin_set.intersect_with(ch_set);
            }
        }

        if ascii_only {
            return RestrictionLevel::ASCIIOnly;
        } else if !set.is_empty() {
            return RestrictionLevel::SingleScript;
        } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
            return RestrictionLevel::HighlyRestrictive;
        } else if exclude_latin_set.base.len() == 1 {
            let script = exclude_latin_set.base.iter().next().unwrap();
            if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
                return RestrictionLevel::ModeratelyRestrictive;
            }
        }
        return RestrictionLevel::MinimallyRestrictive;
    }
}