unic_char_basics/
noncharacter.rs

1// Copyright 2018 The UNIC Project Developers.
2//
3// See the COPYRIGHT file at the top-level directory of this distribution.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Unicode Noncharacters
12//!
13//! *Unicode Noncharacters* are the Unicode code-poitns permanently reserved in the Unicode Standard
14//! for internal use in applications. These code-points should not be used in text storage or
15//! exchange.
16//!
17//! Since Unicode 3.1.0, the list of 66 Unicode Noncharacters is *stabilized* by the Unicode
18//! Standard and will never change.
19//!
20//! References:
21//! - https://www.unicode.org/faq/private_use.html#noncharacters
22//! - https://www.unicode.org/policies/stability_policy.html#Property_Value
23
24/// Check if the Unicode code-point provided is a *Unicode Noncharacter*.
25///
26/// - https://www.unicode.org/faq/private_use.html#noncharacters
27pub fn is_noncharacter(codepoint: char) -> bool {
28    match codepoint {
29        '\u{fdd0}'..='\u{fdef}'
30        | '\u{fffe}'..='\u{ffff}'
31        | '\u{1_fffe}'..='\u{1_ffff}'
32        | '\u{2_fffe}'..='\u{2_ffff}'
33        | '\u{3_fffe}'..='\u{3_ffff}'
34        | '\u{4_fffe}'..='\u{4_ffff}'
35        | '\u{5_fffe}'..='\u{5_ffff}'
36        | '\u{6_fffe}'..='\u{6_ffff}'
37        | '\u{7_fffe}'..='\u{7_ffff}'
38        | '\u{8_fffe}'..='\u{8_ffff}'
39        | '\u{9_fffe}'..='\u{9_ffff}'
40        | '\u{A_fffe}'..='\u{A_ffff}'
41        | '\u{B_fffe}'..='\u{B_ffff}'
42        | '\u{C_fffe}'..='\u{C_ffff}'
43        | '\u{D_fffe}'..='\u{D_ffff}'
44        | '\u{E_fffe}'..='\u{E_ffff}'
45        | '\u{F_fffe}'..='\u{F_ffff}'
46        | '\u{10_fffe}'..='\u{10_ffff}' => true,
47        _ => false,
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::is_noncharacter;
54
55    #[test]
56    fn test_sample_codepoints() {
57        // Plane 0 (BMP)
58        assert_eq!(is_noncharacter('\u{0}'), false);
59        assert_eq!(is_noncharacter('\u{20}'), false);
60        assert_eq!(is_noncharacter('\u{41}'), false);
61        assert_eq!(is_noncharacter('\u{80}'), false);
62        assert_eq!(is_noncharacter('\u{200c}'), false);
63        assert_eq!(is_noncharacter('\u{d7ff}'), false);
64
65        assert_eq!(is_noncharacter('\u{e000}'), false);
66        assert_eq!(is_noncharacter('\u{e001}'), false);
67        assert_eq!(is_noncharacter('\u{f8fe}'), false);
68        assert_eq!(is_noncharacter('\u{f8ff}'), false);
69        assert_eq!(is_noncharacter('\u{f900}'), false);
70
71        assert_eq!(is_noncharacter('\u{fdcf}'), false);
72        assert_eq!(is_noncharacter('\u{fdd0}'), true);
73        assert_eq!(is_noncharacter('\u{fdd1}'), true);
74        assert_eq!(is_noncharacter('\u{fdee}'), true);
75        assert_eq!(is_noncharacter('\u{fdef}'), true);
76        assert_eq!(is_noncharacter('\u{fdf0}'), false);
77
78        assert_eq!(is_noncharacter('\u{fff0}'), false);
79        assert_eq!(is_noncharacter('\u{fffc}'), false);
80        assert_eq!(is_noncharacter('\u{fffd}'), false);
81        assert_eq!(is_noncharacter('\u{fffe}'), true);
82        assert_eq!(is_noncharacter('\u{ffff}'), true);
83
84        // Plane 1 (SMP)
85        assert_eq!(is_noncharacter('\u{1_0000}'), false);
86        assert_eq!(is_noncharacter('\u{1_0001}'), false);
87        assert_eq!(is_noncharacter('\u{1_fffd}'), false);
88        assert_eq!(is_noncharacter('\u{1_fffe}'), true);
89        assert_eq!(is_noncharacter('\u{1_ffff}'), true);
90
91        // Plane 14 (SSP)
92        assert_eq!(is_noncharacter('\u{e_0000}'), false);
93        assert_eq!(is_noncharacter('\u{e_0001}'), false);
94        assert_eq!(is_noncharacter('\u{e_fffd}'), false);
95        assert_eq!(is_noncharacter('\u{e_fffe}'), true);
96        assert_eq!(is_noncharacter('\u{e_ffff}'), true);
97
98        // Plane 15 (PUA-A)
99        assert_eq!(is_noncharacter('\u{f_0000}'), false);
100        assert_eq!(is_noncharacter('\u{f_0001}'), false);
101        assert_eq!(is_noncharacter('\u{f_fffd}'), false);
102        assert_eq!(is_noncharacter('\u{f_fffe}'), true);
103        assert_eq!(is_noncharacter('\u{f_ffff}'), true);
104
105        // Plane 16 (PUA-B)
106        assert_eq!(is_noncharacter('\u{10_0000}'), false);
107        assert_eq!(is_noncharacter('\u{10_0001}'), false);
108        assert_eq!(is_noncharacter('\u{10_fffd}'), false);
109        assert_eq!(is_noncharacter('\u{10_fffe}'), true);
110        assert_eq!(is_noncharacter('\u{10_ffff}'), true);
111    }
112}