unicode_normalization_alignments/
lib.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Unicode character composition and decomposition utilities
12//! as described in
13//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
14//!
15//! ```rust
16//! extern crate unicode_normalization_alignments;
17//!
18//! use unicode_normalization_alignments::char::compose;
19//! use unicode_normalization_alignments::UnicodeNormalization;
20//!
21//! fn main() {
22//!     assert_eq!(compose('A','\u{30a}'), Some('Å'));
23//!
24//!     let s = "ÅΩ";
25//!     let c = s.nfc().map(|c| c.0).collect::<String>();
26//!     assert_eq!(c, "ÅΩ");
27//! }
28//! ```
29//!
30//! # crates.io
31//!
32//! You can use this package in your project by adding the following
33//! to your `Cargo.toml`:
34//!
35//! ```toml
36//! [dependencies]
37//! unicode-normalization-alignments = "0.1.12"
38//! ```
39
40#![deny(missing_docs, unsafe_code)]
41#![doc(
42    html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
43    html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
44)]
45
46extern crate smallvec;
47
48pub use decompose::Decompositions;
49pub use quick_check::{
50    is_nfc, is_nfc_quick, is_nfc_stream_safe, is_nfc_stream_safe_quick, is_nfd, is_nfd_quick,
51    is_nfd_stream_safe, is_nfd_stream_safe_quick, is_nfkc, is_nfkc_quick, is_nfkd, is_nfkd_quick,
52    IsNormalized,
53};
54pub use recompose::Recompositions;
55use std::str::Chars;
56pub use stream_safe::StreamSafe;
57pub use tables::UNICODE_VERSION;
58
59mod decompose;
60mod lookups;
61mod normalize;
62mod perfect_hash;
63mod quick_check;
64mod recompose;
65mod stream_safe;
66mod tables;
67
68#[doc(hidden)]
69pub mod __test_api;
70#[cfg(test)]
71mod test;
72
73/// Methods for composing and decomposing characters.
74pub mod char {
75    pub use normalize::{compose, decompose_canonical, decompose_compatible};
76
77    pub use lookups::{canonical_combining_class, is_combining_mark};
78}
79
80/// Methods for iterating over strings while applying Unicode normalizations
81/// as described in
82/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
83pub trait UnicodeNormalization<I: Iterator<Item = char>> {
84    /// Returns an iterator over the string in Unicode Normalization Form D
85    /// (canonical decomposition).
86    #[inline]
87    fn nfd(self) -> Decompositions<I>;
88
89    /// Returns an iterator over the string in Unicode Normalization Form KD
90    /// (compatibility decomposition).
91    #[inline]
92    fn nfkd(self) -> Decompositions<I>;
93
94    /// An Iterator over the string in Unicode Normalization Form C
95    /// (canonical decomposition followed by canonical composition).
96    #[inline]
97    fn nfc(self) -> Recompositions<I>;
98
99    /// An Iterator over the string in Unicode Normalization Form KC
100    /// (compatibility decomposition followed by canonical composition).
101    #[inline]
102    fn nfkc(self) -> Recompositions<I>;
103
104    /// An Iterator over the string with Conjoining Grapheme Joiner characters
105    /// inserted according to the Stream-Safe Text Process (UAX15-D4)
106    #[inline]
107    fn stream_safe(self) -> StreamSafe<I>;
108}
109
110impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
111    #[inline]
112    fn nfd(self) -> Decompositions<Chars<'a>> {
113        decompose::new_canonical(self.chars())
114    }
115
116    #[inline]
117    fn nfkd(self) -> Decompositions<Chars<'a>> {
118        decompose::new_compatible(self.chars())
119    }
120
121    #[inline]
122    fn nfc(self) -> Recompositions<Chars<'a>> {
123        recompose::new_canonical(self.chars())
124    }
125
126    #[inline]
127    fn nfkc(self) -> Recompositions<Chars<'a>> {
128        recompose::new_compatible(self.chars())
129    }
130
131    #[inline]
132    fn stream_safe(self) -> StreamSafe<Chars<'a>> {
133        StreamSafe::new(self.chars())
134    }
135}
136
137impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
138    #[inline]
139    fn nfd(self) -> Decompositions<I> {
140        decompose::new_canonical(self)
141    }
142
143    #[inline]
144    fn nfkd(self) -> Decompositions<I> {
145        decompose::new_compatible(self)
146    }
147
148    #[inline]
149    fn nfc(self) -> Recompositions<I> {
150        recompose::new_canonical(self)
151    }
152
153    #[inline]
154    fn nfkc(self) -> Recompositions<I> {
155        recompose::new_compatible(self)
156    }
157
158    #[inline]
159    fn stream_safe(self) -> StreamSafe<I> {
160        StreamSafe::new(self)
161    }
162}