unic_normal/
lib.rs

1// Copyright 2012-2015 The Rust Project Developers.
2// Copyright 2017 The UNIC Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12#![warn(
13    bad_style,
14    missing_debug_implementations,
15    missing_docs,
16    unconditional_recursion
17)]
18#![forbid(unsafe_code)]
19
20//! # UNIC — Unicode Normalization Forms
21//!
22//! A component of [`unic`: Unicode and Internationalization Crates for Rust](/unic/).
23//!
24//! This UNIC component implements algorithms from [Unicode Standard Annex #15 - Unicode
25//! Normalization Forms](http://unicode.org/reports/tr15/).
26//!
27//! ```rust
28//! extern crate unic_normal;
29//!
30//! use unic_normal::StrNormalForm;
31//!
32//! fn main() {
33//!     let s = "ÅΩ";
34//!     let c = s.nfc().collect::<String>();
35//!     assert_eq!(c, "ÅΩ");
36//! }
37//! ```
38
39mod decompose;
40mod recompose;
41
42use std::str::Chars;
43
44pub use crate::decompose::Decompositions;
45pub use crate::recompose::Recompositions;
46pub use unic_ucd_normal::UNICODE_VERSION;
47
48mod pkg_info;
49pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};
50
51/// Methods for iterating over strings while applying Unicode normalizations
52/// as described in
53/// [Unicode Standard Annex #15](https://www.unicode.org/reports/tr15/).
54pub trait StrNormalForm<I: Iterator<Item = char>> {
55    /// Returns an iterator over the string in Unicode Normalization Form D
56    /// (canonical decomposition).
57    fn nfd(self) -> Decompositions<I>;
58
59    /// Returns an iterator over the string in Unicode Normalization Form KD
60    /// (compatibility decomposition).
61    fn nfkd(self) -> Decompositions<I>;
62
63    /// An Iterator over the string in Unicode Normalization Form C
64    /// (canonical decomposition followed by canonical composition).
65    fn nfc(self) -> Recompositions<I>;
66
67    /// An Iterator over the string in Unicode Normalization Form KC
68    /// (compatibility decomposition followed by canonical composition).
69    fn nfkc(self) -> Recompositions<I>;
70}
71
72impl<'a> StrNormalForm<Chars<'a>> for &'a str {
73    #[inline]
74    fn nfd(self) -> Decompositions<Chars<'a>> {
75        decompose::new_canonical(self.chars())
76    }
77
78    #[inline]
79    fn nfkd(self) -> Decompositions<Chars<'a>> {
80        decompose::new_compatible(self.chars())
81    }
82
83    #[inline]
84    fn nfc(self) -> Recompositions<Chars<'a>> {
85        recompose::new_canonical(self.chars())
86    }
87
88    #[inline]
89    fn nfkc(self) -> Recompositions<Chars<'a>> {
90        recompose::new_compatible(self.chars())
91    }
92}
93
94impl<I: Iterator<Item = char>> StrNormalForm<I> for I {
95    #[inline]
96    fn nfd(self) -> Decompositions<I> {
97        decompose::new_canonical(self)
98    }
99
100    #[inline]
101    fn nfkd(self) -> Decompositions<I> {
102        decompose::new_compatible(self)
103    }
104
105    #[inline]
106    fn nfc(self) -> Recompositions<I> {
107        recompose::new_canonical(self)
108    }
109
110    #[inline]
111    fn nfkc(self) -> Recompositions<I> {
112        recompose::new_compatible(self)
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::StrNormalForm;
119
120    #[test]
121    fn test_nfd() {
122        macro_rules! nfg_eq {
123            ($input: expr, $expected: expr) => {
124                assert_eq!($input.nfd().to_string(), $expected);
125                // A dummy iterator that is not std::str::Chars directly;
126                // note that `id_func` is used to ensure `Clone` implementation
127                assert_eq!(
128                    $input.chars().map(|c| c).nfd().collect::<String>(),
129                    $expected
130                );
131            };
132        }
133        nfg_eq!("abc", "abc");
134        nfg_eq!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
135        nfg_eq!("\u{2026}", "\u{2026}");
136        nfg_eq!("\u{2126}", "\u{3a9}");
137        nfg_eq!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
138        nfg_eq!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
139        nfg_eq!("a\u{301}", "a\u{301}");
140        nfg_eq!("\u{301}a", "\u{301}a");
141        nfg_eq!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
142        nfg_eq!("\u{ac1c}", "\u{1100}\u{1162}");
143    }
144
145    #[test]
146    fn test_nfkd() {
147        macro_rules! nfkd_eq {
148            ($input: expr, $expected: expr) => {
149                assert_eq!($input.nfkd().to_string(), $expected);
150            };
151        }
152        nfkd_eq!("abc", "abc");
153        nfkd_eq!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
154        nfkd_eq!("\u{2026}", "...");
155        nfkd_eq!("\u{2126}", "\u{3a9}");
156        nfkd_eq!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
157        nfkd_eq!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
158        nfkd_eq!("a\u{301}", "a\u{301}");
159        nfkd_eq!("\u{301}a", "\u{301}a");
160        nfkd_eq!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
161        nfkd_eq!("\u{ac1c}", "\u{1100}\u{1162}");
162    }
163
164    #[test]
165    fn test_nfc() {
166        macro_rules! nfc_eq {
167            ($input: expr, $expected: expr) => {
168                assert_eq!($input.nfc().to_string(), $expected);
169            };
170        }
171        nfc_eq!("abc", "abc");
172        nfc_eq!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
173        nfc_eq!("\u{2026}", "\u{2026}");
174        nfc_eq!("\u{2126}", "\u{3a9}");
175        nfc_eq!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
176        nfc_eq!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
177        nfc_eq!("a\u{301}", "\u{e1}");
178        nfc_eq!("\u{301}a", "\u{301}a");
179        nfc_eq!("\u{d4db}", "\u{d4db}");
180        nfc_eq!("\u{ac1c}", "\u{ac1c}");
181        nfc_eq!(
182            "a\u{300}\u{305}\u{315}\u{5ae}b",
183            "\u{e0}\u{5ae}\u{305}\u{315}b"
184        );
185    }
186
187    #[test]
188    fn test_nfkc() {
189        macro_rules! nfkc_eq {
190            ($input: expr, $expected: expr) => {
191                assert_eq!($input.nfkc().to_string(), $expected);
192            };
193        }
194        nfkc_eq!("abc", "abc");
195        nfkc_eq!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
196        nfkc_eq!("\u{2026}", "...");
197        nfkc_eq!("\u{2126}", "\u{3a9}");
198        nfkc_eq!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
199        nfkc_eq!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
200        nfkc_eq!("a\u{301}", "\u{e1}");
201        nfkc_eq!("\u{301}a", "\u{301}a");
202        nfkc_eq!("\u{d4db}", "\u{d4db}");
203        nfkc_eq!("\u{ac1c}", "\u{ac1c}");
204        nfkc_eq!(
205            "a\u{300}\u{305}\u{315}\u{5ae}b",
206            "\u{e0}\u{5ae}\u{305}\u{315}b"
207        );
208    }
209}