ucd_util/ideograph.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
/// A set of ranges that corresponds to the set of all ideograph codepoints.
///
/// These ranges are defined in Unicode 4.8 Table 4-13.
pub const RANGE_IDEOGRAPH: &'static [(u32, u32)] = &[
(0x3400, 0x4DB5),
(0x4E00, 0x9FD5),
(0x4E00, 0x9FD5),
(0x20000, 0x2A6D6),
(0x2A700, 0x2B734),
(0x2B740, 0x2B81D),
(0x2B820, 0x2CEA1),
(0x17000, 0x187EC),
(0xF900, 0xFA6D),
(0xFA70, 0xFAD9),
(0x2F800, 0x2FA1D),
];
/// Return the character name of the given ideograph codepoint.
///
/// This operation is only defined on ideographic codepoints. This includes
/// precisely the following inclusive ranges:
///
/// * `3400..4DB5`
/// * `4E00..9FD5`
/// * `20000..2A6D6`
/// * `2A700..2B734`
/// * `2B740..2B81D`
/// * `2B820..2CEA1`
/// * `17000..187EC`
/// * `F900..FA6D`
/// * `FA70..FAD9`
/// * `2F800..2FA1D`
///
/// If the given codepoint is not in any of the above ranges, then `None` is
/// returned.
///
/// This implements the algorithm described in Unicode 4.8.
pub fn ideograph_name(cp: u32) -> Option<String> {
// This match should be in sync with the `RANGE_IDEOGRAPH` constant.
match cp {
0x3400..=0x4DB5
| 0x4E00..=0x9FD5
| 0x20000..=0x2A6D6
| 0x2A700..=0x2B734
| 0x2B740..=0x2B81D
| 0x2B820..=0x2CEA1 => {
Some(format!("CJK UNIFIED IDEOGRAPH-{:04X}", cp))
}
0x17000..=0x187EC => Some(format!("TANGUT IDEOGRAPH-{:04X}", cp)),
0xF900..=0xFA6D | 0xFA70..=0xFAD9 | 0x2F800..=0x2FA1D => {
Some(format!("CJK COMPATIBILITY IDEOGRAPH-{:04X}", cp))
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::ideograph_name;
#[test]
fn name() {
assert_eq!(
ideograph_name(0x4E00).unwrap(),
"CJK UNIFIED IDEOGRAPH-4E00"
);
assert_eq!(
ideograph_name(0x9FD5).unwrap(),
"CJK UNIFIED IDEOGRAPH-9FD5"
);
assert_eq!(ideograph_name(0x17000).unwrap(), "TANGUT IDEOGRAPH-17000");
assert_eq!(
ideograph_name(0xF900).unwrap(),
"CJK COMPATIBILITY IDEOGRAPH-F900"
);
}
#[test]
fn invalid() {
assert!(ideograph_name(0).is_none());
}
}