unic_ucd_normal/
gen_cat.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
// Copyright 2015 The Servo Project Developers.
// Copyright 2017 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[cfg(not(feature = "unic-ucd-category"))]
mod mark {
    use unic_char_property::tables::CharDataTable;

    const GENERAL_CATEGORY_MARK: CharDataTable<()> =
        include!("../tables/general_category_mark.rsv");

    /// Return whether the given character is a combining mark (`General_Category=Mark`)
    pub fn is_combining_mark(c: char) -> bool {
        CharDataTable::<()>::find(&GENERAL_CATEGORY_MARK, c).is_some()
    }
}

#[cfg(feature = "unic-ucd-category")]
mod mark {
    use unic_ucd_category;

    use self::unic_ucd_category::GeneralCategory;

    /// Return whether the given character is a combining mark (`General_Category=Mark`)
    pub fn is_combining_mark(c: char) -> bool {
        GeneralCategory::of(c).is_mark()
    }
}

pub use self::mark::is_combining_mark;

#[cfg(test)]
mod tests {
    use super::*;
    use core::char;

    #[test]
    fn test_is_combining_mark_ascii() {
        for cp in 0..0x7f {
            assert!(!is_combining_mark(char::from_u32(cp).unwrap()));
        }
    }

    // TODO: Add more tests for edge cases, Hangul comp/decomp, etc

    #[test]
    fn test_is_combining_mark_misc() {
        // https://github.com/unicode-rs/unicode-normalization/issues/16
        // U+11C3A BHAIKSUKI VOWEL SIGN O
        // Category: Mark, Nonspacing [Mn]
        assert!(is_combining_mark('\u{11C3A}'));

        // U+11C3F BHAIKSUKI SIGN VIRAMA
        // Category: Mark, Nonspacing [Mn]
        assert!(is_combining_mark('\u{11C3F}'));
    }
}