unic_ucd_normal/gen_cat.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
// Copyright 2015 The Servo Project Developers.
// Copyright 2017 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[cfg(not(feature = "unic-ucd-category"))]
mod mark {
use unic_char_property::tables::CharDataTable;
const GENERAL_CATEGORY_MARK: CharDataTable<()> =
include!("../tables/general_category_mark.rsv");
/// Return whether the given character is a combining mark (`General_Category=Mark`)
pub fn is_combining_mark(c: char) -> bool {
CharDataTable::<()>::find(&GENERAL_CATEGORY_MARK, c).is_some()
}
}
#[cfg(feature = "unic-ucd-category")]
mod mark {
use unic_ucd_category;
use self::unic_ucd_category::GeneralCategory;
/// Return whether the given character is a combining mark (`General_Category=Mark`)
pub fn is_combining_mark(c: char) -> bool {
GeneralCategory::of(c).is_mark()
}
}
pub use self::mark::is_combining_mark;
#[cfg(test)]
mod tests {
use super::*;
use core::char;
#[test]
fn test_is_combining_mark_ascii() {
for cp in 0..0x7f {
assert!(!is_combining_mark(char::from_u32(cp).unwrap()));
}
}
// TODO: Add more tests for edge cases, Hangul comp/decomp, etc
#[test]
fn test_is_combining_mark_misc() {
// https://github.com/unicode-rs/unicode-normalization/issues/16
// U+11C3A BHAIKSUKI VOWEL SIGN O
// Category: Mark, Nonspacing [Mn]
assert!(is_combining_mark('\u{11C3A}'));
// U+11C3F BHAIKSUKI SIGN VIRAMA
// Category: Mark, Nonspacing [Mn]
assert!(is_combining_mark('\u{11C3F}'));
}
}