unicode_ccc/
lib.rs

1/*!
2
3This library implements
4[Unicode Canonical Combining Class](https://unicode.org/reports/tr44/#Canonical_Combining_Class_Values) detection.
5
6```rust
7use unicode_ccc::*;
8
9assert_eq!(get_canonical_combining_class('A'), CanonicalCombiningClass::NotReordered);
10assert_eq!(get_canonical_combining_class('\u{0A3C}'), CanonicalCombiningClass::Nukta);
11assert_eq!(get_canonical_combining_class('\u{18A9}'), CanonicalCombiningClass::AboveLeft);
12```
13
14*/
15
16#![no_std]
17
18#![forbid(unsafe_code)]
19
20/// The Unicode version.
21pub const UNICODE_VERSION: (u8, u8, u8) = (16, 0, 0);
22
23/// Character Canonical Combining Class.
24#[derive(Clone, Copy, PartialEq, Debug)]
25pub enum CanonicalCombiningClass {
26    NotReordered = 0,
27    Overlay = 1,
28    HanReading = 6,
29    Nukta = 7,
30    KanaVoicing = 8,
31    Virama = 9,
32    // Hebrew
33    CCC10 = 10,
34    CCC11 = 11,
35    CCC12 = 12,
36    CCC13 = 13,
37    CCC14 = 14,
38    CCC15 = 15,
39    CCC16 = 16,
40    CCC17 = 17,
41    CCC18 = 18,
42    CCC19 = 19,
43    CCC20 = 20,
44    CCC21 = 21,
45    CCC22 = 22,
46    CCC23 = 23,
47    CCC24 = 24,
48    CCC25 = 25,
49    CCC26 = 26,
50    // Arabic
51    CCC27 = 27,
52    CCC28 = 28,
53    CCC29 = 29,
54    CCC30 = 30,
55    CCC31 = 31,
56    CCC32 = 32,
57    CCC33 = 33,
58    CCC34 = 34,
59    CCC35 = 35,
60    // Syriac
61    CCC36 = 36,
62    // Telugu
63    CCC84 = 84,
64    CCC91 = 91,
65    // Thai
66    CCC103 = 103,
67    CCC107 = 107,
68    // Lao
69    CCC118 = 118,
70    CCC122 = 122,
71    // Tibetan
72    CCC129 = 129,
73    CCC130 = 130,
74    CCC132 = 132,
75    AttachedBelowLeft = 200,
76    AttachedBelow = 202,
77    AttachedAbove = 214,
78    AttachedAboveRight = 216,
79    BelowLeft = 218,
80    Below = 220,
81    BelowRight = 222,
82    Left = 224,
83    Right = 226,
84    AboveLeft = 228,
85    Above = 230,
86    AboveRight = 232,
87    DoubleBelow = 233,
88    DoubleAbove = 234,
89    IotaSubscript = 240,
90}
91
92/// Returns a Canonical Combining Class of a character.
93///
94/// Based on <https://www.unicode.org/Public/14.0.0/ucd/extracted/DerivedCombiningClass.txt>.
95pub fn get_canonical_combining_class(c: char) -> CanonicalCombiningClass {
96    use CanonicalCombiningClass::*;
97
98    match c as u32 {
99        0x0334..=0x0338 => Overlay,
100        0x1CD4 => Overlay,
101        0x1CE2..=0x1CE8 => Overlay,
102        0x20D2..=0x20D3 => Overlay,
103        0x20D8..=0x20DA => Overlay,
104        0x20E5..=0x20E6 => Overlay,
105        0x20EA..=0x20EB => Overlay,
106        0x10A39 => Overlay,
107        0x16AF0..=0x16AF4 => Overlay,
108        0x1BC9E => Overlay,
109        0x1D167..=0x1D169 => Overlay,
110        0x16FF0..=0x16FF1 => HanReading,
111        0x093C => Nukta,
112        0x09BC => Nukta,
113        0x0A3C => Nukta,
114        0x0ABC => Nukta,
115        0x0B3C => Nukta,
116        0x0C3C => Nukta,
117        0x0CBC => Nukta,
118        0x1037 => Nukta,
119        0x1B34 => Nukta,
120        0x1BE6 => Nukta,
121        0x1C37 => Nukta,
122        0xA9B3 => Nukta,
123        0x110BA => Nukta,
124        0x11173 => Nukta,
125        0x111CA => Nukta,
126        0x11236 => Nukta,
127        0x112E9 => Nukta,
128        0x1133B..=0x1133C => Nukta,
129        0x11446 => Nukta,
130        0x114C3 => Nukta,
131        0x115C0 => Nukta,
132        0x116B7 => Nukta,
133        0x1183A => Nukta,
134        0x11943 => Nukta,
135        0x11D42 => Nukta,
136        0x1E94A => Nukta,
137        0x3099..=0x309A => KanaVoicing,
138        0x094D => Virama,
139        0x09CD => Virama,
140        0x0A4D => Virama,
141        0x0ACD => Virama,
142        0x0B4D => Virama,
143        0x0BCD => Virama,
144        0x0C4D => Virama,
145        0x0CCD => Virama,
146        0x0D3B..=0x0D3C => Virama,
147        0x0D4D => Virama,
148        0x0DCA => Virama,
149        0x0E3A => Virama,
150        0x0EBA => Virama,
151        0x0F84 => Virama,
152        0x1039..=0x103A => Virama,
153        0x1714 => Virama,
154        0x1715 => Virama,
155        0x1734 => Virama,
156        0x17D2 => Virama,
157        0x1A60 => Virama,
158        0x1B44 => Virama,
159        0x1BAA => Virama,
160        0x1BAB => Virama,
161        0x1BF2..=0x1BF3 => Virama,
162        0x2D7F => Virama,
163        0xA806 => Virama,
164        0xA82C => Virama,
165        0xA8C4 => Virama,
166        0xA953 => Virama,
167        0xA9C0 => Virama,
168        0xAAF6 => Virama,
169        0xABED => Virama,
170        0x10A3F => Virama,
171        0x11046 => Virama,
172        0x11070 => Virama,
173        0x1107F => Virama,
174        0x110B9 => Virama,
175        0x11133..=0x11134 => Virama,
176        0x111C0 => Virama,
177        0x11235 => Virama,
178        0x112EA => Virama,
179        0x1134D => Virama,
180        0x113CE => Virama,
181        0x113CF => Virama,
182        0x113D0 => Virama,
183        0x11442 => Virama,
184        0x114C2 => Virama,
185        0x115BF => Virama,
186        0x1163F => Virama,
187        0x116B6 => Virama,
188        0x1172B => Virama,
189        0x11839 => Virama,
190        0x1193D => Virama,
191        0x1193E => Virama,
192        0x119E0 => Virama,
193        0x11A34 => Virama,
194        0x11A47 => Virama,
195        0x11A99 => Virama,
196        0x11C3F => Virama,
197        0x11D44..=0x11D45 => Virama,
198        0x11D97 => Virama,
199        0x11F41 => Virama,
200        0x11F42 => Virama,
201        0x1612F => Virama,
202        0x05B0 => CCC10,
203        0x05B1 => CCC11,
204        0x05B2 => CCC12,
205        0x05B3 => CCC13,
206        0x05B4 => CCC14,
207        0x05B5 => CCC15,
208        0x05B6 => CCC16,
209        0x05B7 => CCC17,
210        0x05B8 => CCC18,
211        0x05C7 => CCC18,
212        0x05B9..=0x05BA => CCC19,
213        0x05BB => CCC20,
214        0x05BC => CCC21,
215        0x05BD => CCC22,
216        0x05BF => CCC23,
217        0x05C1 => CCC24,
218        0x05C2 => CCC25,
219        0xFB1E => CCC26,
220        0x064B => CCC27,
221        0x08F0 => CCC27,
222        0x064C => CCC28,
223        0x08F1 => CCC28,
224        0x064D => CCC29,
225        0x08F2 => CCC29,
226        0x0618 => CCC30,
227        0x064E => CCC30,
228        0x0619 => CCC31,
229        0x064F => CCC31,
230        0x061A => CCC32,
231        0x0650 => CCC32,
232        0x0651 => CCC33,
233        0x0652 => CCC34,
234        0x0670 => CCC35,
235        0x0711 => CCC36,
236        0x0C55 => CCC84,
237        0x0C56 => CCC91,
238        0x0E38..=0x0E39 => CCC103,
239        0x0E48..=0x0E4B => CCC107,
240        0x0EB8..=0x0EB9 => CCC118,
241        0x0EC8..=0x0ECB => CCC122,
242        0x0F71 => CCC129,
243        0x0F72 => CCC130,
244        0x0F7A..=0x0F7D => CCC130,
245        0x0F80 => CCC130,
246        0x0F74 => CCC132,
247        0x0321..=0x0322 => AttachedBelow,
248        0x0327..=0x0328 => AttachedBelow,
249        0x1DD0 => AttachedBelow,
250        0x1DCE => AttachedAbove,
251        0x031B => AttachedAboveRight,
252        0x0F39 => AttachedAboveRight,
253        0x1D165..=0x1D166 => AttachedAboveRight,
254        0x1D16E..=0x1D172 => AttachedAboveRight,
255        0x1DFA => BelowLeft,
256        0x302A => BelowLeft,
257        0x0316..=0x0319 => Below,
258        0x031C..=0x0320 => Below,
259        0x0323..=0x0326 => Below,
260        0x0329..=0x0333 => Below,
261        0x0339..=0x033C => Below,
262        0x0347..=0x0349 => Below,
263        0x034D..=0x034E => Below,
264        0x0353..=0x0356 => Below,
265        0x0359..=0x035A => Below,
266        0x0591 => Below,
267        0x0596 => Below,
268        0x059B => Below,
269        0x05A2..=0x05A7 => Below,
270        0x05AA => Below,
271        0x05C5 => Below,
272        0x0655..=0x0656 => Below,
273        0x065C => Below,
274        0x065F => Below,
275        0x06E3 => Below,
276        0x06EA => Below,
277        0x06ED => Below,
278        0x0731 => Below,
279        0x0734 => Below,
280        0x0737..=0x0739 => Below,
281        0x073B..=0x073C => Below,
282        0x073E => Below,
283        0x0742 => Below,
284        0x0744 => Below,
285        0x0746 => Below,
286        0x0748 => Below,
287        0x07F2 => Below,
288        0x07FD => Below,
289        0x0859..=0x085B => Below,
290        0x0899..=0x089B => Below,
291        0x08CF..=0x08D3 => Below,
292        0x08E3 => Below,
293        0x08E6 => Below,
294        0x08E9 => Below,
295        0x08ED..=0x08EF => Below,
296        0x08F6 => Below,
297        0x08F9..=0x08FA => Below,
298        0x0952 => Below,
299        0x0F18..=0x0F19 => Below,
300        0x0F35 => Below,
301        0x0F37 => Below,
302        0x0FC6 => Below,
303        0x108D => Below,
304        0x193B => Below,
305        0x1A18 => Below,
306        0x1A7F => Below,
307        0x1AB5..=0x1ABA => Below,
308        0x1ABD => Below,
309        0x1ABF..=0x1AC0 => Below,
310        0x1AC3..=0x1AC4 => Below,
311        0x1ACA => Below,
312        0x1B6C => Below,
313        0x1CD5..=0x1CD9 => Below,
314        0x1CDC..=0x1CDF => Below,
315        0x1CED => Below,
316        0x1DC2 => Below,
317        0x1DCA => Below,
318        0x1DCF => Below,
319        0x1DF9 => Below,
320        0x1DFD => Below,
321        0x1DFF => Below,
322        0x20E8 => Below,
323        0x20EC..=0x20EF => Below,
324        0xA92B..=0xA92D => Below,
325        0xAAB4 => Below,
326        0xFE27..=0xFE2D => Below,
327        0x101FD => Below,
328        0x102E0 => Below,
329        0x10A0D => Below,
330        0x10A3A => Below,
331        0x10AE6 => Below,
332        0x10EFD..=0x10EFF => Below,
333        0x10F46..=0x10F47 => Below,
334        0x10F4B => Below,
335        0x10F4D..=0x10F50 => Below,
336        0x10F83 => Below,
337        0x10F85 => Below,
338        0x1D17B..=0x1D182 => Below,
339        0x1D18A..=0x1D18B => Below,
340        0x1E4EE => Below,
341        0x1E5EF => Below,
342        0x1E8D0..=0x1E8D6 => Below,
343        0x059A => BelowRight,
344        0x05AD => BelowRight,
345        0x1939 => BelowRight,
346        0x302D => BelowRight,
347        0x302E..=0x302F => Left,
348        0x1D16D => Right,
349        0x05AE => AboveLeft,
350        0x18A9 => AboveLeft,
351        0x1DF7..=0x1DF8 => AboveLeft,
352        0x302B => AboveLeft,
353        0x0300..=0x0314 => Above,
354        0x033D..=0x0344 => Above,
355        0x0346 => Above,
356        0x034A..=0x034C => Above,
357        0x0350..=0x0352 => Above,
358        0x0357 => Above,
359        0x035B => Above,
360        0x0363..=0x036F => Above,
361        0x0483..=0x0487 => Above,
362        0x0592..=0x0595 => Above,
363        0x0597..=0x0599 => Above,
364        0x059C..=0x05A1 => Above,
365        0x05A8..=0x05A9 => Above,
366        0x05AB..=0x05AC => Above,
367        0x05AF => Above,
368        0x05C4 => Above,
369        0x0610..=0x0617 => Above,
370        0x0653..=0x0654 => Above,
371        0x0657..=0x065B => Above,
372        0x065D..=0x065E => Above,
373        0x06D6..=0x06DC => Above,
374        0x06DF..=0x06E2 => Above,
375        0x06E4 => Above,
376        0x06E7..=0x06E8 => Above,
377        0x06EB..=0x06EC => Above,
378        0x0730 => Above,
379        0x0732..=0x0733 => Above,
380        0x0735..=0x0736 => Above,
381        0x073A => Above,
382        0x073D => Above,
383        0x073F..=0x0741 => Above,
384        0x0743 => Above,
385        0x0745 => Above,
386        0x0747 => Above,
387        0x0749..=0x074A => Above,
388        0x07EB..=0x07F1 => Above,
389        0x07F3 => Above,
390        0x0816..=0x0819 => Above,
391        0x081B..=0x0823 => Above,
392        0x0825..=0x0827 => Above,
393        0x0829..=0x082D => Above,
394        0x0897..=0x0898 => Above,
395        0x089C..=0x089F => Above,
396        0x08CA..=0x08CE => Above,
397        0x08D4..=0x08E1 => Above,
398        0x08E4..=0x08E5 => Above,
399        0x08E7..=0x08E8 => Above,
400        0x08EA..=0x08EC => Above,
401        0x08F3..=0x08F5 => Above,
402        0x08F7..=0x08F8 => Above,
403        0x08FB..=0x08FF => Above,
404        0x0951 => Above,
405        0x0953..=0x0954 => Above,
406        0x09FE => Above,
407        0x0F82..=0x0F83 => Above,
408        0x0F86..=0x0F87 => Above,
409        0x135D..=0x135F => Above,
410        0x17DD => Above,
411        0x193A => Above,
412        0x1A17 => Above,
413        0x1A75..=0x1A7C => Above,
414        0x1AB0..=0x1AB4 => Above,
415        0x1ABB..=0x1ABC => Above,
416        0x1AC1..=0x1AC2 => Above,
417        0x1AC5..=0x1AC9 => Above,
418        0x1ACB..=0x1ACE => Above,
419        0x1B6B => Above,
420        0x1B6D..=0x1B73 => Above,
421        0x1CD0..=0x1CD2 => Above,
422        0x1CDA..=0x1CDB => Above,
423        0x1CE0 => Above,
424        0x1CF4 => Above,
425        0x1CF8..=0x1CF9 => Above,
426        0x1DC0..=0x1DC1 => Above,
427        0x1DC3..=0x1DC9 => Above,
428        0x1DCB..=0x1DCC => Above,
429        0x1DD1..=0x1DF5 => Above,
430        0x1DFB => Above,
431        0x1DFE => Above,
432        0x20D0..=0x20D1 => Above,
433        0x20D4..=0x20D7 => Above,
434        0x20DB..=0x20DC => Above,
435        0x20E1 => Above,
436        0x20E7 => Above,
437        0x20E9 => Above,
438        0x20F0 => Above,
439        0x2CEF..=0x2CF1 => Above,
440        0x2DE0..=0x2DFF => Above,
441        0xA66F => Above,
442        0xA674..=0xA67D => Above,
443        0xA69E..=0xA69F => Above,
444        0xA6F0..=0xA6F1 => Above,
445        0xA8E0..=0xA8F1 => Above,
446        0xAAB0 => Above,
447        0xAAB2..=0xAAB3 => Above,
448        0xAAB7..=0xAAB8 => Above,
449        0xAABE..=0xAABF => Above,
450        0xAAC1 => Above,
451        0xFE20..=0xFE26 => Above,
452        0xFE2E..=0xFE2F => Above,
453        0x10376..=0x1037A => Above,
454        0x10A0F => Above,
455        0x10A38 => Above,
456        0x10AE5 => Above,
457        0x10D24..=0x10D27 => Above,
458        0x10D69..=0x10D6D => Above,
459        0x10EAB..=0x10EAC => Above,
460        0x10F48..=0x10F4A => Above,
461        0x10F4C => Above,
462        0x10F82 => Above,
463        0x10F84 => Above,
464        0x11100..=0x11102 => Above,
465        0x11366..=0x1136C => Above,
466        0x11370..=0x11374 => Above,
467        0x1145E => Above,
468        0x16B30..=0x16B36 => Above,
469        0x1D185..=0x1D189 => Above,
470        0x1D1AA..=0x1D1AD => Above,
471        0x1D242..=0x1D244 => Above,
472        0x1E000..=0x1E006 => Above,
473        0x1E008..=0x1E018 => Above,
474        0x1E01B..=0x1E021 => Above,
475        0x1E023..=0x1E024 => Above,
476        0x1E026..=0x1E02A => Above,
477        0x1E08F => Above,
478        0x1E130..=0x1E136 => Above,
479        0x1E2AE => Above,
480        0x1E2EC..=0x1E2EF => Above,
481        0x1E4EF => Above,
482        0x1E5EE => Above,
483        0x1E944..=0x1E949 => Above,
484        0x0315 => AboveRight,
485        0x031A => AboveRight,
486        0x0358 => AboveRight,
487        0x1DF6 => AboveRight,
488        0x302C => AboveRight,
489        0x1E4EC..=0x1E4ED => AboveRight,
490        0x035C => DoubleBelow,
491        0x035F => DoubleBelow,
492        0x0362 => DoubleBelow,
493        0x1DFC => DoubleBelow,
494        0x035D..=0x035E => DoubleAbove,
495        0x0360..=0x0361 => DoubleAbove,
496        0x1DCD => DoubleAbove,
497        0x0345 => IotaSubscript,
498        _ => NotReordered,
499    }
500}