lexical_sort/
cmp.rs

1use crate::iter::{iterate_lexical, iterate_lexical_only_alnum};
2use core::cmp::Ordering;
3
4macro_rules! cmp_ascii_digits {
5    (first_digits($lhs:ident, $rhs:ident), iterators($iter1:ident, $iter2:ident)) => {
6        let mut n1 = ascii_to_u64($lhs);
7        let mut n2 = ascii_to_u64($rhs);
8        loop {
9            match (
10                $iter1.peek().copied().filter(|c| c.is_ascii_digit()),
11                $iter2.peek().copied().filter(|c| c.is_ascii_digit()),
12            ) {
13                (Some(lhs), Some(rhs)) => {
14                    n1 = n1 * 10 + ascii_to_u64(lhs);
15                    n2 = n2 * 10 + ascii_to_u64(rhs);
16                    let _ = $iter1.next();
17                    let _ = $iter2.next();
18                }
19                (Some(_), None) => return Ordering::Greater,
20                (None, Some(_)) => return Ordering::Less,
21                (None, None) => {
22                    if n1 != n2 {
23                        return n1.cmp(&n2);
24                    } else {
25                        break;
26                    }
27                }
28            }
29        }
30    };
31}
32
33#[inline]
34fn ascii_to_u64(c: char) -> u64 {
35    (c as u64) - (b'0' as u64)
36}
37
38#[inline]
39fn ret_ordering(lhs: char, rhs: char) -> Ordering {
40    let is_lhs_alnum = lhs.is_alphanumeric();
41    let is_rhs_alnum = rhs.is_alphanumeric();
42
43    let result = if is_lhs_alnum == is_rhs_alnum {
44        lhs.cmp(&rhs)
45    } else if is_lhs_alnum {
46        Ordering::Greater
47    } else {
48        Ordering::Less
49    };
50    result
51}
52
53/// Compares strings lexicographically
54///
55/// For example, `"a" < "ä" < "aa"`
56pub fn lexical_cmp(lhs: &str, rhs: &str) -> Ordering {
57    let mut iter1 = iterate_lexical(lhs);
58    let mut iter2 = iterate_lexical(rhs);
59
60    loop {
61        match (iter1.next(), iter2.next()) {
62            (Some(lhs), Some(rhs)) => {
63                if lhs != rhs {
64                    return ret_ordering(lhs, rhs);
65                }
66            }
67            (Some(_), None) => return Ordering::Greater,
68            (None, Some(_)) => return Ordering::Less,
69            (None, None) => return lhs.cmp(&rhs),
70        }
71    }
72}
73
74/// Compares strings lexicographically, skipping non-alphanumeric characters
75///
76/// For example, `"a" < " ä" < "ä" < "aa"`
77pub fn lexical_only_alnum_cmp(s1: &str, s2: &str) -> Ordering {
78    let mut iter1 = iterate_lexical_only_alnum(s1);
79    let mut iter2 = iterate_lexical_only_alnum(s2);
80
81    loop {
82        match (iter1.next(), iter2.next()) {
83            (Some(lhs), Some(rhs)) => {
84                if lhs != rhs {
85                    return lhs.cmp(&rhs);
86                }
87            }
88            (Some(_), None) => return Ordering::Greater,
89            (None, Some(_)) => return Ordering::Less,
90            (None, None) => return s1.cmp(&s2),
91        }
92    }
93}
94
95/// Compares strings naturally and lexicographically
96///
97/// For example, `"a" < "ä" < "aa"`, `"50" < "100"`
98pub fn natural_lexical_cmp(s1: &str, s2: &str) -> Ordering {
99    let mut iter1 = iterate_lexical(s1).peekable();
100    let mut iter2 = iterate_lexical(s2).peekable();
101
102    loop {
103        match (iter1.next(), iter2.next()) {
104            (Some(lhs), Some(rhs)) => {
105                if lhs.is_ascii_digit() && rhs.is_ascii_digit() {
106                    cmp_ascii_digits!(first_digits(lhs, rhs), iterators(iter1, iter2));
107                } else if lhs != rhs {
108                    return ret_ordering(lhs, rhs);
109                }
110            }
111            (Some(_), None) => return Ordering::Greater,
112            (None, Some(_)) => return Ordering::Less,
113            (None, None) => return s1.cmp(&s2),
114        }
115    }
116}
117
118/// Compares strings naturally and lexicographically, skipping non-alphanumeric characters
119///
120/// For example, `"a" < " ä" < "ä" < "aa"`, `"50" < "100"`
121pub fn natural_lexical_only_alnum_cmp(s1: &str, s2: &str) -> Ordering {
122    let mut iter1 = iterate_lexical_only_alnum(s1).peekable();
123    let mut iter2 = iterate_lexical_only_alnum(s2).peekable();
124
125    loop {
126        match (iter1.next(), iter2.next()) {
127            (Some(lhs), Some(rhs)) => {
128                if lhs.is_ascii_digit() && rhs.is_ascii_digit() {
129                    cmp_ascii_digits!(first_digits(lhs, rhs), iterators(iter1, iter2));
130                } else if lhs != rhs {
131                    return lhs.cmp(&rhs);
132                }
133            }
134            (Some(_), None) => return Ordering::Greater,
135            (None, Some(_)) => return Ordering::Less,
136            (None, None) => return s1.cmp(&s2),
137        }
138    }
139}
140
141/// Compares strings naturally
142///
143/// For example, `"50" < "100"`
144pub fn natural_cmp(s1: &str, s2: &str) -> Ordering {
145    let mut iter1 = s1.chars().peekable();
146    let mut iter2 = s2.chars().peekable();
147
148    loop {
149        match (iter1.next(), iter2.next()) {
150            (Some(lhs), Some(rhs)) => {
151                if lhs.is_ascii_digit() && rhs.is_ascii_digit() {
152                    cmp_ascii_digits!(first_digits(lhs, rhs), iterators(iter1, iter2));
153                } else if lhs != rhs {
154                    return lhs.cmp(&rhs);
155                }
156            }
157            (Some(_), None) => return Ordering::Greater,
158            (None, Some(_)) => return Ordering::Less,
159            (None, None) => return Ordering::Equal,
160        }
161    }
162}
163
164/// Compares strings naturally, skipping non-alphanumeric characters
165///
166/// For example, `"a" < " b" < "b"`, `"50" < "100"`
167pub fn natural_only_alnum_cmp(s1: &str, s2: &str) -> Ordering {
168    let mut iter1 = s1.chars().filter(|c| c.is_alphanumeric()).peekable();
169    let mut iter2 = s2.chars().filter(|c| c.is_alphanumeric()).peekable();
170
171    loop {
172        match (iter1.next(), iter2.next()) {
173            (Some(lhs), Some(rhs)) => {
174                if lhs.is_ascii_digit() && rhs.is_ascii_digit() {
175                    cmp_ascii_digits!(first_digits(lhs, rhs), iterators(iter1, iter2));
176                } else if lhs != rhs {
177                    return lhs.cmp(&rhs);
178                }
179            }
180            (Some(_), None) => return Ordering::Greater,
181            (None, Some(_)) => return Ordering::Less,
182            (None, None) => return s1.cmp(&s2),
183        }
184    }
185}
186
187/// Compares strings, skipping non-alphanumeric characters
188///
189/// For example, `"a" < " b" < "b"`
190pub fn only_alnum_cmp(s1: &str, s2: &str) -> Ordering {
191    let mut iter1 = s1.chars().filter(|c| c.is_alphanumeric());
192    let mut iter2 = s2.chars().filter(|c| c.is_alphanumeric());
193
194    loop {
195        match (iter1.next(), iter2.next()) {
196            (Some(lhs), Some(rhs)) => {
197                if lhs != rhs {
198                    return lhs.cmp(&rhs);
199                }
200            }
201            (Some(_), None) => return Ordering::Greater,
202            (None, Some(_)) => return Ordering::Less,
203            (None, None) => return s1.cmp(&s2),
204        }
205    }
206}
207
208/// Compares strings (not lexicographically or naturally, doesn't skip non-alphanumeric characters)
209///
210/// For example, `"B" < "a" < "b" < "ä"`
211pub fn cmp(s1: &str, s2: &str) -> Ordering {
212    let mut iter1 = s1.chars();
213    let mut iter2 = s2.chars();
214
215    loop {
216        match (iter1.next(), iter2.next()) {
217            (Some(lhs), Some(rhs)) => {
218                if lhs != rhs {
219                    return lhs.cmp(&rhs);
220                }
221            }
222            (Some(_), None) => return Ordering::Greater,
223            (None, Some(_)) => return Ordering::Less,
224            (None, None) => return Ordering::Equal,
225        }
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    fn make_test(desc: &'static str, algo: impl Fn(&str, &str) -> Ordering) -> impl Fn(&str, &str) {
234        move |lhs, rhs| {
235            let success = algo(lhs, rhs) == Ordering::Less;
236            assert!(success, "{} comparison {:?} < {:?} failed", desc, lhs, rhs);
237
238            let success = algo(rhs, lhs) == Ordering::Greater;
239            assert!(success, "{} comparison {:?} > {:?} failed", desc, rhs, lhs);
240        }
241    }
242
243    #[test]
244    fn test_cmp() {
245        let ordered = make_test("Cmp", cmp);
246
247        ordered("aaa", "aaaa");
248        ordered("aaa", "aab");
249        ordered("AAb", "aaa");
250        ordered("aab", "äáa");
251        ordered("aaa", "äáb");
252
253        ordered("T-20", "T-5");
254        ordered("T-5", "Ŧ-5");
255    }
256
257    #[test]
258    fn test_only_alnum() {
259        let ordered = make_test("Only-alnum", only_alnum_cmp);
260
261        ordered("aaa", "aaaa");
262        ordered("aaa", "aab");
263        ordered("AAb", "aaa");
264        ordered("aab", "äáa");
265        ordered("aaa", "äáb");
266
267        ordered("_ad", "_æ");
268        ordered("_ae", "_æ");
269        ordered("_ae_", "_æ");
270        ordered("_af", "_æ");
271
272        ordered("T-20", "T-5");
273        ordered("T-5", "Ŧ-5");
274    }
275
276    #[test]
277    fn test_lexical() {
278        let ordered = make_test("Lexical", lexical_cmp);
279
280        ordered("aaa", "aaaa");
281        ordered("aaa", "aab");
282        ordered("aaa", "AAb");
283        ordered("äáa", "aab");
284        ordered("aaa", "äáb");
285
286        ordered("_ad", "_æ");
287        ordered("_ae", "_æ");
288        ordered("_æ", "_ae_");
289        ordered("_æ", "_af");
290
291        ordered("T-20", "T-5");
292        ordered("T-5", "Ŧ-5");
293    }
294
295    #[test]
296    fn test_lexical_only_alnum() {
297        let ordered = make_test("Lexical, only-alnum", lexical_only_alnum_cmp);
298
299        ordered("aaa", "aaaa");
300        ordered("aaa", "aab");
301        ordered("aaa", "AAb");
302        ordered("äáa", "aab");
303        ordered("aaa", "äáb");
304
305        ordered("_ad", "_æ");
306        ordered("_ae", "_æ");
307        ordered("_ae_", "_æ");
308        ordered("_æ", "_af");
309
310        ordered("T20", "T-21");
311        ordered("T-21", "T22");
312        ordered("T-21", "T3");
313    }
314
315    #[test]
316    fn test_natural() {
317        let ordered = make_test("Natural", natural_cmp);
318
319        ordered("1", "10");
320        ordered("10", "15");
321        ordered("150", "220");
322        ordered("334", "335");
323        ordered("433", "533");
324
325        ordered("T-1", "T-5");
326        ordered("T-27", "T5");
327        ordered("T-27a", "T27b");
328
329        ordered("T-27", "Ŧ-5");
330        ordered("T-5", "Ŧ-27");
331        ordered("T-5", "Ŧ-5");
332    }
333
334    #[test]
335    fn test_natural_only_alnum() {
336        let ordered = make_test("Natural, only-alnum", natural_only_alnum_cmp);
337
338        ordered("aaa", "aaaa");
339        ordered("aaa", "aab");
340        ordered("AAb", "aaa");
341        ordered("aab", "äáa");
342        ordered("aaa", "äáb");
343
344        ordered("_ad", "_æ");
345        ordered("_ae", "_æ");
346        ordered("_ae_", "_æ");
347        ordered("_af", "_æ");
348
349        ordered("T20", "T-21");
350        ordered("T-21", "T22");
351        ordered("T3", "T-21");
352    }
353
354    #[test]
355    fn test_natural_lexical() {
356        let ordered = make_test("Natural, lexical", natural_lexical_cmp);
357
358        ordered("1", "10");
359        ordered("10", "15");
360        ordered("150", "220");
361        ordered("334", "335");
362        ordered("433", "533");
363
364        ordered("T-1", "T-5");
365        ordered("T-5", "T-27");
366        ordered("T-27a", "T-27b");
367
368        ordered("Ŧ-5", "T-27");
369        ordered("T-5", "Ŧ-27");
370        ordered("T-5", "Ŧ-5");
371    }
372
373    #[test]
374    fn test_natural_lexical_only_alnum() {
375        let ordered = make_test(
376            "Natural, lexical, only-alnum",
377            natural_lexical_only_alnum_cmp,
378        );
379
380        ordered("1", "10");
381        ordered("10", "15");
382        ordered("150", "220");
383        ordered("334", "335");
384        ordered("433", "533");
385
386        ordered("T-1", "T-5");
387        ordered("T5", "T-27");
388        ordered("T-27a", "T27b");
389
390        ordered("Ŧ-5", "T-27");
391        ordered("T-5", "Ŧ-27");
392        ordered("T-5", "Ŧ-5");
393    }
394}