unic_ucd_normal/
gen_cat.rs

1// Copyright 2015 The Servo Project Developers.
2// Copyright 2017 The UNIC Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12#[cfg(not(feature = "unic-ucd-category"))]
13mod mark {
14    use unic_char_property::tables::CharDataTable;
15
16    const GENERAL_CATEGORY_MARK: CharDataTable<()> =
17        include!("../tables/general_category_mark.rsv");
18
19    /// Return whether the given character is a combining mark (`General_Category=Mark`)
20    pub fn is_combining_mark(c: char) -> bool {
21        CharDataTable::<()>::find(&GENERAL_CATEGORY_MARK, c).is_some()
22    }
23}
24
25#[cfg(feature = "unic-ucd-category")]
26mod mark {
27    use unic_ucd_category;
28
29    use self::unic_ucd_category::GeneralCategory;
30
31    /// Return whether the given character is a combining mark (`General_Category=Mark`)
32    pub fn is_combining_mark(c: char) -> bool {
33        GeneralCategory::of(c).is_mark()
34    }
35}
36
37pub use self::mark::is_combining_mark;
38
39#[cfg(test)]
40mod tests {
41    use super::*;
42    use core::char;
43
44    #[test]
45    fn test_is_combining_mark_ascii() {
46        for cp in 0..0x7f {
47            assert!(!is_combining_mark(char::from_u32(cp).unwrap()));
48        }
49    }
50
51    // TODO: Add more tests for edge cases, Hangul comp/decomp, etc
52
53    #[test]
54    fn test_is_combining_mark_misc() {
55        // https://github.com/unicode-rs/unicode-normalization/issues/16
56        // U+11C3A BHAIKSUKI VOWEL SIGN O
57        // Category: Mark, Nonspacing [Mn]
58        assert!(is_combining_mark('\u{11C3A}'));
59
60        // U+11C3F BHAIKSUKI SIGN VIRAMA
61        // Category: Mark, Nonspacing [Mn]
62        assert!(is_combining_mark('\u{11C3F}'));
63    }
64}