unic_ucd_normal/
canonical_combining_class.rs

1// Copyright 2015 The Servo Project Developers.
2// Copyright 2017 The UNIC Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! Accessor for `Canonical_Combining_Class` (ccc) property
13//!
14//! Reference: <http://unicode.org/reports/tr44/#Canonical_Combining_Class_Values>
15
16use core::fmt;
17
18use unic_char_property::{CharProperty, NumericCharProperty, TotalCharProperty};
19
20/// Represents `Canonical_Combining_Class` property of a Unicode character.
21///
22/// * <http://unicode.org/reports/tr44/#Canonical_Combining_Class>
23#[derive(Copy, Clone, Default, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
24pub struct CanonicalCombiningClass(u8);
25
26impl CharProperty for CanonicalCombiningClass {
27    fn prop_abbr_name() -> &'static str {
28        "ccc"
29    }
30
31    fn prop_long_name() -> &'static str {
32        "Canonical_Combining_Class"
33    }
34
35    fn prop_human_name() -> &'static str {
36        "Canonical Combining Class"
37    }
38}
39
40impl TotalCharProperty for CanonicalCombiningClass {
41    fn of(ch: char) -> Self {
42        Self::of(ch)
43    }
44}
45
46impl NumericCharProperty<u8> for CanonicalCombiningClass {
47    /// Get numeric value for character property value
48    fn number(&self) -> u8 {
49        Self::number(self)
50    }
51}
52
53mod data {
54    use super::CanonicalCombiningClass;
55    use unic_char_property::tables::CharDataTable;
56    pub const CANONICAL_COMBINING_CLASS_VALUES: CharDataTable<CanonicalCombiningClass> =
57        include!("../tables/canonical_combining_class_values.rsv");
58}
59
60#[cfg_attr(rustfmt, rustfmt_skip)]  // We want the consts ordered by value.
61#[allow(non_upper_case_globals)]
62impl CanonicalCombiningClass {
63    /// Find the character `Canonical_Combining_Class` property value.
64    pub fn of(ch: char) -> CanonicalCombiningClass {
65        data::CANONICAL_COMBINING_CLASS_VALUES.find_or_default(ch)
66    }
67
68    // == Named values ==
69    // TODO(DOCS): Add reference.
70    /// Spacing and enclosing marks; also many vowel and consonant signs, even if nonspacing
71    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0);
72    /// Marks which overlay a base letter or symbol
73    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1);
74    /// Diacritic nukta marks in Brahmi-derived scripts
75    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7);
76    /// Hiragana/Katakana voicing marks
77    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8);
78    /// Viramas
79    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9);
80    /// Marks attached at the bottom left
81    pub const AttatchedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200);
82    /// Marks attached directly below
83    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202);
84    /// Marks attached at the bottom right
85    pub const AttachedBelowRight: CanonicalCombiningClass = CanonicalCombiningClass(204);
86    /// Marks attached to the left
87    pub const AttachedLeft: CanonicalCombiningClass = CanonicalCombiningClass(208);
88    /// Marks attached to the right
89    pub const AttachedRight: CanonicalCombiningClass = CanonicalCombiningClass(210);
90    /// Marks attached at the top left
91    pub const AttachedAboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(212);
92    /// Marks attached directly above
93    pub const AttatchedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214);
94    /// Marks attached at the top right
95    pub const AttatchedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216);
96    /// Distinct marks at the bottom left
97    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218);
98    /// Distinct marks directly below
99    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220);
100    /// Distinct marks at the bottom right
101    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222);
102    /// Distinct marks to the left
103    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224);
104    /// Distinct marks to the right
105    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226);
106    /// Distinct marks at the top left
107    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228);
108    /// Distinct marks directly above
109    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230);
110    /// Distinct marks at the top right
111    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232);
112    /// Distinct marks subtending two bases
113    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233);
114    /// Distinct marks extending above two bases
115    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234);
116    /// Greek iota subscript only
117    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240);
118}
119
120impl fmt::Display for CanonicalCombiningClass {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        write!(f, "{}", self.number())
123    }
124}
125
126impl CanonicalCombiningClass {
127    /// Get numeric `Canonical_Combining_Class` value
128    pub fn number(&self) -> u8 {
129        self.0
130    }
131
132    /// If the *ccc* has value `Not_Reordered` (`0`).
133    pub fn is_not_reordered(&self) -> bool {
134        self.0 == 0
135    }
136
137    /// If the *ccc* any value other than `Not_Reordered` (`0`).
138    pub fn is_reordered(&self) -> bool {
139        self.0 != 0
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::CanonicalCombiningClass as CCC;
146
147    #[test]
148    fn test_ascii() {
149        assert_eq!(CCC::of('\u{0000}'), CCC::NotReordered);
150        assert_eq!(CCC::of('\u{0040}'), CCC::NotReordered);
151        assert_eq!(CCC::of('\u{0041}'), CCC::NotReordered);
152        assert_eq!(CCC::of('\u{0062}'), CCC::NotReordered);
153        assert_eq!(CCC::of('\u{007F}'), CCC::NotReordered);
154    }
155
156    #[test]
157    fn test_bmp() {
158        // Combining Diacritical Marks
159        assert_eq!(CCC::of('\u{0300}'), CCC::Above);
160        assert_eq!(CCC::of('\u{0314}'), CCC::Above);
161        assert_eq!(CCC::of('\u{0315}'), CCC::AboveRight);
162        assert_eq!(CCC::of('\u{0316}'), CCC::Below);
163        assert_eq!(CCC::of('\u{0319}'), CCC::Below);
164
165        // Hebrew
166        assert_eq!(CCC::of('\u{0590}'), CCC::NotReordered);
167        assert_eq!(CCC::of('\u{05D0}'), CCC::NotReordered);
168        assert_eq!(CCC::of('\u{05D1}'), CCC::NotReordered);
169        assert_eq!(CCC::of('\u{05FF}'), CCC::NotReordered);
170
171        // Arabic
172        assert_eq!(CCC::of('\u{0600}'), CCC::NotReordered);
173        assert_eq!(CCC::of('\u{0627}'), CCC::NotReordered);
174        assert_eq!(CCC::of('\u{064B}'), CCC(27));
175        assert_eq!(CCC::of('\u{064C}'), CCC(28));
176        assert_eq!(CCC::of('\u{064D}'), CCC(29));
177        assert_eq!(CCC::of('\u{064E}'), CCC(30));
178        assert_eq!(CCC::of('\u{064F}'), CCC(31));
179        assert_eq!(CCC::of('\u{0650}'), CCC(32));
180        assert_eq!(CCC::of('\u{0651}'), CCC(33));
181        assert_eq!(CCC::of('\u{0652}'), CCC(34));
182
183        assert_eq!(CCC::of('\u{07BF}'), CCC::NotReordered);
184        assert_eq!(CCC::of('\u{07C0}'), CCC::NotReordered);
185        assert_eq!(CCC::of('\u{085F}'), CCC::NotReordered);
186        assert_eq!(CCC::of('\u{0860}'), CCC::NotReordered);
187        assert_eq!(CCC::of('\u{0870}'), CCC::NotReordered);
188        assert_eq!(CCC::of('\u{089F}'), CCC::NotReordered);
189        assert_eq!(CCC::of('\u{08A0}'), CCC::NotReordered);
190        assert_eq!(CCC::of('\u{089F}'), CCC::NotReordered);
191        assert_eq!(CCC::of('\u{08FF}'), CCC::Above);
192
193        //  Currency Symbols
194        assert_eq!(CCC::of('\u{20A0}'), CCC::NotReordered);
195        assert_eq!(CCC::of('\u{20CF}'), CCC::NotReordered);
196
197        // Arabic Presentation Forms
198        assert_eq!(CCC::of('\u{FB1D}'), CCC::NotReordered);
199        assert_eq!(CCC::of('\u{FB4F}'), CCC::NotReordered);
200        assert_eq!(CCC::of('\u{FB50}'), CCC::NotReordered);
201        assert_eq!(CCC::of('\u{FDCF}'), CCC::NotReordered);
202        assert_eq!(CCC::of('\u{FDF0}'), CCC::NotReordered);
203        assert_eq!(CCC::of('\u{FDFF}'), CCC::NotReordered);
204        assert_eq!(CCC::of('\u{FE70}'), CCC::NotReordered);
205        assert_eq!(CCC::of('\u{FEFE}'), CCC::NotReordered);
206        assert_eq!(CCC::of('\u{FEFF}'), CCC::NotReordered);
207
208        // noncharacters
209        assert_eq!(CCC::of('\u{FDD0}'), CCC::NotReordered);
210        assert_eq!(CCC::of('\u{FDD1}'), CCC::NotReordered);
211        assert_eq!(CCC::of('\u{FDEE}'), CCC::NotReordered);
212        assert_eq!(CCC::of('\u{FDEF}'), CCC::NotReordered);
213        assert_eq!(CCC::of('\u{FFFE}'), CCC::NotReordered);
214        assert_eq!(CCC::of('\u{FFFF}'), CCC::NotReordered);
215    }
216
217    #[test]
218    fn test_smp() {
219        assert_eq!(CCC::of('\u{10000}'), CCC::NotReordered);
220        assert_eq!(CCC::of('\u{101fc}'), CCC::NotReordered);
221        assert_eq!(CCC::of('\u{101fd}'), CCC::Below);
222        assert_eq!(CCC::of('\u{101fe}'), CCC::NotReordered);
223
224        assert_eq!(CCC::of('\u{1e000}'), CCC::Above);
225
226        assert_eq!(CCC::of('\u{1e949}'), CCC::Above);
227        assert_eq!(CCC::of('\u{1e94a}'), CCC(7));
228        assert_eq!(CCC::of('\u{1e94b}'), CCC::NotReordered);
229
230        assert_eq!(CCC::of('\u{1efff}'), CCC::NotReordered);
231
232        // noncharacters
233        assert_eq!(CCC::of('\u{1fffe}'), CCC::NotReordered);
234        assert_eq!(CCC::of('\u{1ffff}'), CCC::NotReordered);
235    }
236
237    #[test]
238    fn test_unassigned_planes() {
239        assert_eq!(CCC::of('\u{30000}'), CCC::NotReordered);
240        assert_eq!(CCC::of('\u{40000}'), CCC::NotReordered);
241        assert_eq!(CCC::of('\u{50000}'), CCC::NotReordered);
242        assert_eq!(CCC::of('\u{60000}'), CCC::NotReordered);
243        assert_eq!(CCC::of('\u{70000}'), CCC::NotReordered);
244        assert_eq!(CCC::of('\u{80000}'), CCC::NotReordered);
245        assert_eq!(CCC::of('\u{90000}'), CCC::NotReordered);
246        assert_eq!(CCC::of('\u{a0000}'), CCC::NotReordered);
247    }
248
249    #[test]
250    fn test_number() {
251        assert_eq!(CCC::of('\u{0000}').number(), 0);
252        assert_eq!(CCC::of('\u{0300}').number(), 230);
253        assert_eq!(CCC::of('\u{0315}').number(), 232);
254        assert_eq!(CCC::of('\u{1e94a}').number(), 7);
255    }
256}