1use any_ascii::any_ascii_char;
11use core::iter::FusedIterator;
12
13pub struct LexicalChar(CharOrSlice);
19
20impl LexicalChar {
21 #[inline]
22 fn from_char(c: char) -> Self {
23 LexicalChar(CharOrSlice::Char(c))
24 }
25
26 #[inline]
27 fn from_slice(s: &'static [u8]) -> Self {
28 LexicalChar(CharOrSlice::Slice(s))
29 }
30
31 #[inline]
32 fn empty() -> Self {
33 LexicalChar(CharOrSlice::Slice(&[]))
34 }
35
36 #[inline]
37 fn inner(&self) -> &CharOrSlice {
38 &self.0
39 }
40
41 #[inline]
42 fn inner_mut(&mut self) -> &mut CharOrSlice {
43 &mut self.0
44 }
45}
46
47enum CharOrSlice {
48 Char(char),
49 Slice(&'static [u8]),
50}
51
52impl Iterator for LexicalChar {
53 type Item = char;
54
55 #[inline]
56 fn next(&mut self) -> Option<Self::Item> {
57 match self.inner_mut() {
58 &mut CharOrSlice::Char(c) => {
59 *self = LexicalChar::empty();
60 Some(c)
61 }
62 CharOrSlice::Slice(slice) => match slice.get(0_usize) {
63 Some(&next) => {
64 *slice = &slice[1..];
65 Some((next as char).to_ascii_lowercase())
66 }
67 None => None,
68 },
69 }
70 }
71
72 #[inline]
73 fn size_hint(&self) -> (usize, Option<usize>) {
74 match self.inner() {
75 CharOrSlice::Char(_) => (1, Some(1)),
76 CharOrSlice::Slice(s) => (s.len(), Some(s.len())),
77 }
78 }
79
80 #[inline]
81 fn nth(&mut self, n: usize) -> Option<Self::Item> {
82 if n == 0 {
83 self.next()
84 } else if let CharOrSlice::Slice(slice) = self.inner_mut() {
85 match slice.get(n) {
86 Some(&next) => {
87 *slice = &slice[1..];
88 Some((next as char).to_ascii_lowercase())
89 }
90 None => None,
91 }
92 } else {
93 None
94 }
95 }
96}
97
98impl FusedIterator for LexicalChar {}
99
100impl ExactSizeIterator for LexicalChar {}
101
102impl DoubleEndedIterator for LexicalChar {
103 #[inline]
104 fn next_back(&mut self) -> Option<Self::Item> {
105 match self.inner_mut() {
106 &mut CharOrSlice::Char(c) => {
107 *self = LexicalChar::empty();
108 Some(c)
109 }
110 CharOrSlice::Slice(slice) => {
111 if slice.len() > 0 {
112 let ix = slice.len() - 1;
113 *slice = &slice[..ix];
114 Some((slice[ix] as char).to_ascii_lowercase())
115 } else {
116 None
117 }
118 }
119 }
120 }
121}
122
123#[inline]
126pub fn iterate_lexical_char(c: char) -> LexicalChar {
127 if c.is_ascii() {
128 LexicalChar::from_char(c.to_ascii_lowercase())
129 } else if c.is_alphanumeric() {
130 match any_ascii_char(c) {
131 s if s.is_empty() => LexicalChar::from_char(c),
132 s => LexicalChar::from_slice(s.as_bytes()),
133 }
134 } else if combining_diacritical(&c) {
135 LexicalChar::empty()
136 } else {
137 LexicalChar::from_char(c)
138 }
139}
140
141#[inline]
144pub fn iterate_lexical_char_only_alnum(c: char) -> LexicalChar {
145 if c.is_ascii() {
146 if c.is_ascii_alphanumeric() {
147 LexicalChar::from_char(c.to_ascii_lowercase())
148 } else {
149 LexicalChar::empty()
150 }
151 } else if c.is_alphanumeric() {
152 match any_ascii_char(c) {
153 s if s.is_empty() => LexicalChar::from_char(c),
154 s => LexicalChar::from_slice(s.as_bytes()),
155 }
156 } else {
157 LexicalChar::empty()
158 }
159}
160
161#[inline]
163fn combining_diacritical(&c: &char) -> bool {
164 c >= '\u{300}' && c <= '\u{36F}'
165}
166
167pub fn iterate_lexical(s: &'_ str) -> impl Iterator<Item = char> + '_ {
170 s.chars().flat_map(iterate_lexical_char)
171}
172
173pub fn iterate_lexical_only_alnum(s: &'_ str) -> impl Iterator<Item = char> + '_ {
176 s.chars().flat_map(iterate_lexical_char_only_alnum)
177}
178
179#[test]
180#[cfg(feature = "std")]
181fn test_iteration() {
182 fn it(s: &'static str) -> String {
183 iterate_lexical(s).collect()
184 }
185
186 assert_eq!(&it("Hello, world!"), "hello, world!");
187 assert_eq!(&it("Ω A æ b ö ß é"), "o a ae b o ss e");
188 assert_eq!(&it("3½/⅝ £ → € ®™"), "31/2/5/8 £ → € ®™");
189 assert_eq!(&it("»@« 15% ¡¹!"), "»@« 15% ¡1!");
190 assert_eq!(&it("🎉🦄☣"), "🎉🦄☣");
191 assert_eq!(&it("北亰"), "beijing");
192 assert_eq!(&it("ΣΣΣ"), "sss");
193 assert_eq!(&it("à"), "a"); }
195
196#[test]
197#[cfg(feature = "std")]
198fn test_iteration_only_alnum() {
199 fn it(s: &'static str) -> String {
200 iterate_lexical_only_alnum(s).collect()
201 }
202
203 assert_eq!(&it("Hello, world!"), "helloworld");
204 assert_eq!(&it("Ω A æ b ö ß é"), "oaaebosse");
205 assert_eq!(&it("3½/⅝ £ → € ®™"), "31/25/8");
206 assert_eq!(&it("»@« 15% ¡¹!"), "151");
207 assert_eq!(&it("🎉🦄☣"), "");
208 assert_eq!(&it("北亰"), "beijing");
209 assert_eq!(&it("ΣΣΣ"), "sss");
210 assert_eq!(&it("à"), "a"); }