lexical_sort/
lib.rs

1//! This is a library to compare and sort strings (or file paths) **lexicographically**. This
2//! means that non-ASCII characters such as `á` or `ß` are treated like their closest ASCII
3//! character: `á` is treated as `a`, `ß` is treated as `ss`, etc.
4//!
5//! Lexical comparisons are case-insensitive. Alphanumeric characters are sorted after all other
6//! characters (punctuation, whitespace, special characters, emojis, ...).
7//!
8//! It is possible to enable **natural sorting**, which also handles ASCII numbers. For example,
9//! `50` is less than `100` with natural sorting turned on. It's also possible to skip
10//! characters that aren't alphanumeric, so e.g. `f-5` is next to `f5`.
11//!
12//! If different strings have the same ASCII representation (e.g. `"Foo"` and `"fóò"`), it
13//! falls back to the default method from the standard library, so sorting is deterministic.
14//!
15//! <table><tr><td>
16//! <b>NOTE</b>: This crate doesn't attempt to be correct for every locale, but it should work
17//! reasonably well for a wide range of locales, while providing excellent performance.
18//! </td></tr></table>
19//!
20//! ## Usage
21//!
22//! To sort strings or paths, you can use the `StringSort` or `PathSort` trait:
23//!
24//! ```rust
25//! # #[cfg(feature = "std")] {
26//! use lexical_sort::{StringSort, natural_lexical_cmp};
27//!
28//! let mut strings = vec!["ß", "é", "100", "hello", "world", "50", ".", "B!"];
29//! strings.string_sort_unstable(natural_lexical_cmp);
30//!
31//! assert_eq!(&strings, &[".", "50", "100", "B!", "é", "hello", "ß", "world"]);
32//! # }
33//! ```
34//!
35//! There are eight comparison functions:
36//!
37//! | Function                         | lexico­graphical | natural | skips non-alphanumeric chars |
38//! | -------------------------------- |:---------------:|:-------:|:----------------------------:|
39//! | `cmp`                            |                 |         |                              |
40//! | `only_alnum_cmp`                 |                 |         | yes                          |
41//! | `lexical_cmp`                    | yes             |         |                              |
42//! | `lexical_only_alnum_cmp`         | yes             |         | yes                          |
43//! | `natural_cmp`                    |                 | yes     |                              |
44//! | `natural_only_alnum_cmp`         |                 | yes     | yes                          |
45//! | `natural_lexical_cmp`            | yes             | yes     |                              |
46//! | `natural_lexical_­only_alnum_cmp` | yes             | yes     | yes                          |
47//!
48//! Note that only the functions that sort lexicographically are case insensitive.
49
50#![cfg_attr(not(feature = "std"), no_std)]
51
52mod cmp;
53pub mod iter;
54
55pub use cmp::{
56    cmp, lexical_cmp, lexical_only_alnum_cmp, natural_cmp, natural_lexical_cmp,
57    natural_lexical_only_alnum_cmp, natural_only_alnum_cmp, only_alnum_cmp,
58};
59
60use core::cmp::Ordering;
61#[cfg(feature = "std")]
62use std::path::Path;
63
64/// A trait to sort strings. This is a convenient wrapper for the standard library sort functions.
65///
66/// This trait is implemented for all slices whose inner type implements `AsRef<str>`.
67///
68/// ## Example
69///
70/// ```rust
71/// use lexical_sort::StringSort;
72///
73/// let slice = &mut ["Hello", " world", "!"];
74/// slice.string_sort_unstable(lexical_sort::natural_lexical_cmp);
75///
76/// // or trim the strings before comparing:
77/// slice.string_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
78/// ```
79///
80/// If you want to sort file paths or OsStrings, use the `PathSort` trait instead.
81pub trait StringSort {
82    /// Sorts the items using the provided comparison function.
83    ///
84    /// **This is a stable sort, which is often not required**.
85    /// You can use `string_sort_unstable` instead.
86    ///
87    /// ## Example
88    ///
89    /// ```rust
90    /// use lexical_sort::StringSort;
91    ///
92    /// let slice = &mut ["Lorem", "ipsum", "dolor", "sit", "amet"];
93    /// slice.string_sort(lexical_sort::natural_lexical_cmp);
94    ///
95    /// assert_eq!(slice, &["amet", "dolor", "ipsum", "Lorem", "sit"]);
96    /// ```
97    fn string_sort(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
98
99    /// Sorts the items using the provided comparison function.
100    ///
101    /// This sort is unstable: The original order of equal strings is not preserved.
102    /// It is slightly more efficient than the stable alternative.
103    ///
104    /// ## Example
105    ///
106    /// ```rust
107    /// use lexical_sort::StringSort;
108    ///
109    /// let slice = &mut ["The", "quick", "brown", "fox"];
110    /// slice.string_sort_unstable(lexical_sort::natural_lexical_cmp);
111    ///
112    /// assert_eq!(slice, &["brown", "fox", "quick", "The"]);
113    /// ```
114    fn string_sort_unstable(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
115
116    /// Sorts the items using the provided comparison function and another function that is
117    /// applied to each string before the comparison. This can be used to trim the strings.
118    ///
119    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
120    /// In this case you should use `[_]::sort_by()` directly.
121    ///
122    /// **This is a stable sort, which is often not required**.
123    /// You can use `string_sort_unstable` instead.
124    ///
125    /// ## Example
126    ///
127    /// ```rust
128    /// use lexical_sort::StringSort;
129    ///
130    /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
131    /// slice.string_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
132    ///
133    /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
134    /// ```
135    fn string_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
136    where
137        Cmp: FnMut(&str, &str) -> Ordering,
138        Map: FnMut(&str) -> &str;
139
140    /// Sorts the items using the provided comparison function and another function that is
141    /// applied to each string before the comparison. This can be used to trim the strings.
142    ///
143    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
144    /// In this case you should use `[_]::sort_by()` directly.
145    ///
146    /// This sort is unstable: The original order of equal strings is not preserved.
147    /// It is slightly more efficient than the stable alternative.
148    ///
149    /// ## Example
150    ///
151    /// ```rust
152    /// use lexical_sort::StringSort;
153    ///
154    /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
155    /// slice.string_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
156    ///
157    /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
158    /// ```
159    fn string_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
160    where
161        Cmp: FnMut(&str, &str) -> Ordering,
162        Map: FnMut(&str) -> &str;
163}
164
165impl<A: AsRef<str>> StringSort for [A] {
166    fn string_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
167        self.sort_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
168    }
169
170    fn string_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
171        self.sort_unstable_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
172    }
173
174    fn string_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
175    where
176        Cmp: FnMut(&str, &str) -> Ordering,
177        Map: FnMut(&str) -> &str,
178    {
179        self.sort_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
180    }
181
182    fn string_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
183    where
184        Cmp: FnMut(&str, &str) -> Ordering,
185        Map: FnMut(&str) -> &str,
186    {
187        self.sort_unstable_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
188    }
189}
190
191/// A trait to sort paths and OsStrings. This is a convenient wrapper for the standard library
192/// sort functions.
193///
194/// This trait is implemented for all slices whose inner type implements `AsRef<Path>`.
195///
196/// ## Example
197///
198/// ```rust
199/// # use std::path::Path;
200/// use lexical_sort::PathSort;
201///
202/// let slice: &mut [&Path] = &mut ["Hello".as_ref(), " world".as_ref(), "!".as_ref()];
203/// slice.path_sort_unstable(lexical_sort::natural_lexical_cmp);
204///
205/// // or trim the strings before comparing:
206/// slice.path_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
207/// ```
208///
209/// If you want to sort regular strings, use the `StringSort` trait instead.
210#[cfg(feature = "std")]
211pub trait PathSort {
212    /// Sorts the items using the provided comparison function.
213    ///
214    /// **This is a stable sort, which is often not required**.
215    /// You can use `path_sort_unstable` instead.
216    ///
217    /// ## Example
218    ///
219    /// ```rust
220    /// # use std::path::Path;
221    /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
222    /// use lexical_sort::PathSort;
223    ///
224    /// let mut vec: Vec<&Path> = paths(&["Lorem", "ipsum", "dolor", "sit", "amet"]);
225    /// vec.path_sort(lexical_sort::natural_lexical_cmp);
226    ///
227    /// assert_eq!(vec, paths(&["amet", "dolor", "ipsum", "Lorem", "sit"]));
228    /// ```
229    fn path_sort(&mut self, comparator: impl FnMut(&str, &str) -> Ordering);
230
231    /// Sorts the items using the provided comparison function.
232    ///
233    /// This sort is unstable: The original order of equal strings is not preserved.
234    /// It is slightly more efficient than the stable alternative.
235    ///
236    /// ## Example
237    ///
238    /// ```rust
239    /// # use std::path::Path;
240    /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
241    /// use lexical_sort::PathSort;
242    ///
243    /// let mut vec: Vec<&Path> = paths(&["The", "quick", "brown", "fox"]);
244    /// vec.path_sort_unstable(lexical_sort::natural_lexical_cmp);
245    ///
246    /// assert_eq!(vec, paths(&["brown", "fox", "quick", "The"]));
247    /// ```
248    fn path_sort_unstable(&mut self, comparator: impl FnMut(&str, &str) -> Ordering);
249
250    /// Sorts the items using the provided comparison function and another function that is
251    /// applied to each string before the comparison. This can be used to trim the strings.
252    ///
253    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
254    /// In this case you should use `[_]::sort_by()` directly. You'll need to call
255    /// `to_string_lossy()` or `to_str().unwrap()` to convert a `Path` or `OsStr` to a `&str` first.
256    ///
257    /// **This is a stable sort, which is often not required**.
258    /// You can use `path_sort_unstable` instead.
259    ///
260    /// ## Example
261    ///
262    /// ```rust
263    /// # use std::path::Path;
264    /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
265    /// use lexical_sort::PathSort;
266    ///
267    /// let mut vec: Vec<&Path> = paths(&["Eeny", " meeny", " miny", " moe"]);
268    /// vec.path_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
269    ///
270    /// assert_eq!(vec, paths(&["Eeny", " meeny", " miny", " moe"]));
271    /// ```
272    fn path_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
273    where
274        Cmp: FnMut(&str, &str) -> Ordering,
275        Map: FnMut(&str) -> &str;
276
277    /// Sorts the items using the provided comparison function and another function that is
278    /// applied to each string before the comparison. This can be used to trim the strings.
279    ///
280    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
281    /// In this case you should use `[_]::sort_by()` directly. You'll need to call
282    /// `to_string_lossy()` or `to_str().unwrap()` to convert a `Path` or `OsStr` to a `&str` first.
283    ///
284    /// This sort is unstable: The original order of equal strings is not preserved.
285    /// It is slightly more efficient than the stable alternative.
286    ///
287    /// ## Example
288    ///
289    /// ```rust
290    /// # use std::path::Path;
291    /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
292    /// use lexical_sort::PathSort;
293    ///
294    /// let mut vec: Vec<&Path> = paths(&["Eeny", " meeny", " miny", " moe"]);
295    /// vec.path_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
296    ///
297    /// assert_eq!(vec, paths(&["Eeny", " meeny", " miny", " moe"]));
298    /// ```
299    fn path_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
300    where
301        Cmp: FnMut(&str, &str) -> Ordering,
302        Map: FnMut(&str) -> &str;
303}
304
305#[cfg(feature = "std")]
306impl<A: AsRef<Path>> PathSort for [A] {
307    fn path_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
308        self.sort_by(|lhs, rhs| {
309            cmp(
310                &lhs.as_ref().to_string_lossy(),
311                &rhs.as_ref().to_string_lossy(),
312            )
313        });
314    }
315
316    fn path_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
317        self.sort_unstable_by(|lhs, rhs| {
318            cmp(
319                &lhs.as_ref().to_string_lossy(),
320                &rhs.as_ref().to_string_lossy(),
321            )
322        });
323    }
324
325    fn path_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
326    where
327        Cmp: FnMut(&str, &str) -> Ordering,
328        Map: FnMut(&str) -> &str,
329    {
330        self.sort_by(|lhs, rhs| {
331            cmp(
332                map(&lhs.as_ref().to_string_lossy()),
333                map(&rhs.as_ref().to_string_lossy()),
334            )
335        });
336    }
337
338    fn path_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
339    where
340        Cmp: FnMut(&str, &str) -> Ordering,
341        Map: FnMut(&str) -> &str,
342    {
343        self.sort_unstable_by(|lhs, rhs| {
344            cmp(
345                map(&lhs.as_ref().to_string_lossy()),
346                map(&rhs.as_ref().to_string_lossy()),
347            )
348        });
349    }
350}
351
352#[test]
353fn test_sort() {
354    macro_rules! assert_lexically_sorted {
355        ($T:ident, $array:expr, natural = $natural:expr) => {{
356            let mut sorted = $array.clone();
357            if $natural {
358                sorted.$T(natural_lexical_cmp);
359            } else {
360                sorted.$T(lexical_cmp);
361            }
362
363            assert_eq!($array, sorted);
364        }};
365    }
366
367    let strings = [
368        "-", "-$", "-a", "100", "50", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
369    ];
370    let strings_nat = [
371        "-", "-$", "-a", "50", "100", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
372    ];
373
374    assert_lexically_sorted!(string_sort, strings, natural = false);
375    assert_lexically_sorted!(string_sort, strings_nat, natural = true);
376
377    #[cfg(feature = "std")]
378    {
379        let paths: Vec<&Path> = strings.iter().map(|s| Path::new(s)).collect();
380        let paths_nat: Vec<&Path> = strings_nat.iter().map(|s| Path::new(s)).collect();
381
382        assert_lexically_sorted!(path_sort, paths, natural = false);
383        assert_lexically_sorted!(path_sort, paths_nat, natural = true);
384    }
385}