1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
//! a string conforms to
use crate::mixed_script::AugmentedScriptSet;
use crate::GeneralSecurityProfile;
use unicode_script::Script;
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string conforms to
pub enum RestrictionLevel {
/// https://www.unicode.org/reports/tr39/#ascii_only
ASCIIOnly,
/// https://www.unicode.org/reports/tr39/#single_script
SingleScript,
/// https://www.unicode.org/reports/tr39/#highly_restrictive
HighlyRestrictive,
/// https://www.unicode.org/reports/tr39/#moderately_restrictive
ModeratelyRestrictive,
/// https://www.unicode.org/reports/tr39/#minimally_restrictive
MinimallyRestrictive,
/// https://www.unicode.org/reports/tr39/#unrestricted
Unrestricted,
}
/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string satisfies
pub trait RestrictionLevelDetection: Sized {
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
///
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
fn detect_restriction_level(self) -> RestrictionLevel;
/// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
///
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
fn check_restriction_level(self, level: RestrictionLevel) -> bool {
self.detect_restriction_level() <= level
}
}
impl RestrictionLevelDetection for &'_ str {
fn detect_restriction_level(self) -> RestrictionLevel {
let mut ascii_only = true;
let mut set = AugmentedScriptSet::default();
let mut exclude_latin_set = AugmentedScriptSet::default();
for ch in self.chars() {
if !GeneralSecurityProfile::identifier_allowed(ch) {
return RestrictionLevel::Unrestricted;
}
if !ch.is_ascii() {
ascii_only = false;
}
let ch_set = ch.into();
set.intersect_with(ch_set);
if !ch_set.base.contains_script(Script::Latin) {
exclude_latin_set.intersect_with(ch_set);
}
}
if ascii_only {
return RestrictionLevel::ASCIIOnly;
} else if !set.is_empty() {
return RestrictionLevel::SingleScript;
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
return RestrictionLevel::HighlyRestrictive;
} else if exclude_latin_set.base.len() == 1 {
let script = exclude_latin_set.base.iter().next().unwrap();
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
return RestrictionLevel::ModeratelyRestrictive;
}
}
return RestrictionLevel::MinimallyRestrictive;
}
}