lexical_sort/lib.rs
1//! This is a library to compare and sort strings (or file paths) **lexicographically**. This
2//! means that non-ASCII characters such as `á` or `ß` are treated like their closest ASCII
3//! character: `á` is treated as `a`, `ß` is treated as `ss`, etc.
4//!
5//! Lexical comparisons are case-insensitive. Alphanumeric characters are sorted after all other
6//! characters (punctuation, whitespace, special characters, emojis, ...).
7//!
8//! It is possible to enable **natural sorting**, which also handles ASCII numbers. For example,
9//! `50` is less than `100` with natural sorting turned on. It's also possible to skip
10//! characters that aren't alphanumeric, so e.g. `f-5` is next to `f5`.
11//!
12//! If different strings have the same ASCII representation (e.g. `"Foo"` and `"fóò"`), it
13//! falls back to the default method from the standard library, so sorting is deterministic.
14//!
15//! <table><tr><td>
16//! <b>NOTE</b>: This crate doesn't attempt to be correct for every locale, but it should work
17//! reasonably well for a wide range of locales, while providing excellent performance.
18//! </td></tr></table>
19//!
20//! ## Usage
21//!
22//! To sort strings or paths, you can use the `StringSort` or `PathSort` trait:
23//!
24//! ```rust
25//! # #[cfg(feature = "std")] {
26//! use lexical_sort::{StringSort, natural_lexical_cmp};
27//!
28//! let mut strings = vec!["ß", "é", "100", "hello", "world", "50", ".", "B!"];
29//! strings.string_sort_unstable(natural_lexical_cmp);
30//!
31//! assert_eq!(&strings, &[".", "50", "100", "B!", "é", "hello", "ß", "world"]);
32//! # }
33//! ```
34//!
35//! There are eight comparison functions:
36//!
37//! | Function | lexicographical | natural | skips non-alphanumeric chars |
38//! | -------------------------------- |:---------------:|:-------:|:----------------------------:|
39//! | `cmp` | | | |
40//! | `only_alnum_cmp` | | | yes |
41//! | `lexical_cmp` | yes | | |
42//! | `lexical_only_alnum_cmp` | yes | | yes |
43//! | `natural_cmp` | | yes | |
44//! | `natural_only_alnum_cmp` | | yes | yes |
45//! | `natural_lexical_cmp` | yes | yes | |
46//! | `natural_lexical_only_alnum_cmp` | yes | yes | yes |
47//!
48//! Note that only the functions that sort lexicographically are case insensitive.
49
50#![cfg_attr(not(feature = "std"), no_std)]
51
52mod cmp;
53pub mod iter;
54
55pub use cmp::{
56 cmp, lexical_cmp, lexical_only_alnum_cmp, natural_cmp, natural_lexical_cmp,
57 natural_lexical_only_alnum_cmp, natural_only_alnum_cmp, only_alnum_cmp,
58};
59
60use core::cmp::Ordering;
61#[cfg(feature = "std")]
62use std::path::Path;
63
64/// A trait to sort strings. This is a convenient wrapper for the standard library sort functions.
65///
66/// This trait is implemented for all slices whose inner type implements `AsRef<str>`.
67///
68/// ## Example
69///
70/// ```rust
71/// use lexical_sort::StringSort;
72///
73/// let slice = &mut ["Hello", " world", "!"];
74/// slice.string_sort_unstable(lexical_sort::natural_lexical_cmp);
75///
76/// // or trim the strings before comparing:
77/// slice.string_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
78/// ```
79///
80/// If you want to sort file paths or OsStrings, use the `PathSort` trait instead.
81pub trait StringSort {
82 /// Sorts the items using the provided comparison function.
83 ///
84 /// **This is a stable sort, which is often not required**.
85 /// You can use `string_sort_unstable` instead.
86 ///
87 /// ## Example
88 ///
89 /// ```rust
90 /// use lexical_sort::StringSort;
91 ///
92 /// let slice = &mut ["Lorem", "ipsum", "dolor", "sit", "amet"];
93 /// slice.string_sort(lexical_sort::natural_lexical_cmp);
94 ///
95 /// assert_eq!(slice, &["amet", "dolor", "ipsum", "Lorem", "sit"]);
96 /// ```
97 fn string_sort(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
98
99 /// Sorts the items using the provided comparison function.
100 ///
101 /// This sort is unstable: The original order of equal strings is not preserved.
102 /// It is slightly more efficient than the stable alternative.
103 ///
104 /// ## Example
105 ///
106 /// ```rust
107 /// use lexical_sort::StringSort;
108 ///
109 /// let slice = &mut ["The", "quick", "brown", "fox"];
110 /// slice.string_sort_unstable(lexical_sort::natural_lexical_cmp);
111 ///
112 /// assert_eq!(slice, &["brown", "fox", "quick", "The"]);
113 /// ```
114 fn string_sort_unstable(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
115
116 /// Sorts the items using the provided comparison function and another function that is
117 /// applied to each string before the comparison. This can be used to trim the strings.
118 ///
119 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
120 /// In this case you should use `[_]::sort_by()` directly.
121 ///
122 /// **This is a stable sort, which is often not required**.
123 /// You can use `string_sort_unstable` instead.
124 ///
125 /// ## Example
126 ///
127 /// ```rust
128 /// use lexical_sort::StringSort;
129 ///
130 /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
131 /// slice.string_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
132 ///
133 /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
134 /// ```
135 fn string_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
136 where
137 Cmp: FnMut(&str, &str) -> Ordering,
138 Map: FnMut(&str) -> &str;
139
140 /// Sorts the items using the provided comparison function and another function that is
141 /// applied to each string before the comparison. This can be used to trim the strings.
142 ///
143 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
144 /// In this case you should use `[_]::sort_by()` directly.
145 ///
146 /// This sort is unstable: The original order of equal strings is not preserved.
147 /// It is slightly more efficient than the stable alternative.
148 ///
149 /// ## Example
150 ///
151 /// ```rust
152 /// use lexical_sort::StringSort;
153 ///
154 /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
155 /// slice.string_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
156 ///
157 /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
158 /// ```
159 fn string_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
160 where
161 Cmp: FnMut(&str, &str) -> Ordering,
162 Map: FnMut(&str) -> &str;
163}
164
165impl<A: AsRef<str>> StringSort for [A] {
166 fn string_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
167 self.sort_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
168 }
169
170 fn string_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
171 self.sort_unstable_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
172 }
173
174 fn string_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
175 where
176 Cmp: FnMut(&str, &str) -> Ordering,
177 Map: FnMut(&str) -> &str,
178 {
179 self.sort_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
180 }
181
182 fn string_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
183 where
184 Cmp: FnMut(&str, &str) -> Ordering,
185 Map: FnMut(&str) -> &str,
186 {
187 self.sort_unstable_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
188 }
189}
190
191/// A trait to sort paths and OsStrings. This is a convenient wrapper for the standard library
192/// sort functions.
193///
194/// This trait is implemented for all slices whose inner type implements `AsRef<Path>`.
195///
196/// ## Example
197///
198/// ```rust
199/// # use std::path::Path;
200/// use lexical_sort::PathSort;
201///
202/// let slice: &mut [&Path] = &mut ["Hello".as_ref(), " world".as_ref(), "!".as_ref()];
203/// slice.path_sort_unstable(lexical_sort::natural_lexical_cmp);
204///
205/// // or trim the strings before comparing:
206/// slice.path_sort_unstable_by(lexical_sort::natural_lexical_cmp, str::trim_start);
207/// ```
208///
209/// If you want to sort regular strings, use the `StringSort` trait instead.
210#[cfg(feature = "std")]
211pub trait PathSort {
212 /// Sorts the items using the provided comparison function.
213 ///
214 /// **This is a stable sort, which is often not required**.
215 /// You can use `path_sort_unstable` instead.
216 ///
217 /// ## Example
218 ///
219 /// ```rust
220 /// # use std::path::Path;
221 /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
222 /// use lexical_sort::PathSort;
223 ///
224 /// let mut vec: Vec<&Path> = paths(&["Lorem", "ipsum", "dolor", "sit", "amet"]);
225 /// vec.path_sort(lexical_sort::natural_lexical_cmp);
226 ///
227 /// assert_eq!(vec, paths(&["amet", "dolor", "ipsum", "Lorem", "sit"]));
228 /// ```
229 fn path_sort(&mut self, comparator: impl FnMut(&str, &str) -> Ordering);
230
231 /// Sorts the items using the provided comparison function.
232 ///
233 /// This sort is unstable: The original order of equal strings is not preserved.
234 /// It is slightly more efficient than the stable alternative.
235 ///
236 /// ## Example
237 ///
238 /// ```rust
239 /// # use std::path::Path;
240 /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
241 /// use lexical_sort::PathSort;
242 ///
243 /// let mut vec: Vec<&Path> = paths(&["The", "quick", "brown", "fox"]);
244 /// vec.path_sort_unstable(lexical_sort::natural_lexical_cmp);
245 ///
246 /// assert_eq!(vec, paths(&["brown", "fox", "quick", "The"]));
247 /// ```
248 fn path_sort_unstable(&mut self, comparator: impl FnMut(&str, &str) -> Ordering);
249
250 /// Sorts the items using the provided comparison function and another function that is
251 /// applied to each string before the comparison. This can be used to trim the strings.
252 ///
253 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
254 /// In this case you should use `[_]::sort_by()` directly. You'll need to call
255 /// `to_string_lossy()` or `to_str().unwrap()` to convert a `Path` or `OsStr` to a `&str` first.
256 ///
257 /// **This is a stable sort, which is often not required**.
258 /// You can use `path_sort_unstable` instead.
259 ///
260 /// ## Example
261 ///
262 /// ```rust
263 /// # use std::path::Path;
264 /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
265 /// use lexical_sort::PathSort;
266 ///
267 /// let mut vec: Vec<&Path> = paths(&["Eeny", " meeny", " miny", " moe"]);
268 /// vec.path_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
269 ///
270 /// assert_eq!(vec, paths(&["Eeny", " meeny", " miny", " moe"]));
271 /// ```
272 fn path_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
273 where
274 Cmp: FnMut(&str, &str) -> Ordering,
275 Map: FnMut(&str) -> &str;
276
277 /// Sorts the items using the provided comparison function and another function that is
278 /// applied to each string before the comparison. This can be used to trim the strings.
279 ///
280 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
281 /// In this case you should use `[_]::sort_by()` directly. You'll need to call
282 /// `to_string_lossy()` or `to_str().unwrap()` to convert a `Path` or `OsStr` to a `&str` first.
283 ///
284 /// This sort is unstable: The original order of equal strings is not preserved.
285 /// It is slightly more efficient than the stable alternative.
286 ///
287 /// ## Example
288 ///
289 /// ```rust
290 /// # use std::path::Path;
291 /// # fn paths<'a>(s: &'a[&'a str]) -> Vec<&'a Path> { s.iter().map(Path::new).collect() }
292 /// use lexical_sort::PathSort;
293 ///
294 /// let mut vec: Vec<&Path> = paths(&["Eeny", " meeny", " miny", " moe"]);
295 /// vec.path_sort_by(lexical_sort::natural_lexical_cmp, str::trim_start);
296 ///
297 /// assert_eq!(vec, paths(&["Eeny", " meeny", " miny", " moe"]));
298 /// ```
299 fn path_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
300 where
301 Cmp: FnMut(&str, &str) -> Ordering,
302 Map: FnMut(&str) -> &str;
303}
304
305#[cfg(feature = "std")]
306impl<A: AsRef<Path>> PathSort for [A] {
307 fn path_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
308 self.sort_by(|lhs, rhs| {
309 cmp(
310 &lhs.as_ref().to_string_lossy(),
311 &rhs.as_ref().to_string_lossy(),
312 )
313 });
314 }
315
316 fn path_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
317 self.sort_unstable_by(|lhs, rhs| {
318 cmp(
319 &lhs.as_ref().to_string_lossy(),
320 &rhs.as_ref().to_string_lossy(),
321 )
322 });
323 }
324
325 fn path_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
326 where
327 Cmp: FnMut(&str, &str) -> Ordering,
328 Map: FnMut(&str) -> &str,
329 {
330 self.sort_by(|lhs, rhs| {
331 cmp(
332 map(&lhs.as_ref().to_string_lossy()),
333 map(&rhs.as_ref().to_string_lossy()),
334 )
335 });
336 }
337
338 fn path_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
339 where
340 Cmp: FnMut(&str, &str) -> Ordering,
341 Map: FnMut(&str) -> &str,
342 {
343 self.sort_unstable_by(|lhs, rhs| {
344 cmp(
345 map(&lhs.as_ref().to_string_lossy()),
346 map(&rhs.as_ref().to_string_lossy()),
347 )
348 });
349 }
350}
351
352#[test]
353fn test_sort() {
354 macro_rules! assert_lexically_sorted {
355 ($T:ident, $array:expr, natural = $natural:expr) => {{
356 let mut sorted = $array.clone();
357 if $natural {
358 sorted.$T(natural_lexical_cmp);
359 } else {
360 sorted.$T(lexical_cmp);
361 }
362
363 assert_eq!($array, sorted);
364 }};
365 }
366
367 let strings = [
368 "-", "-$", "-a", "100", "50", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
369 ];
370 let strings_nat = [
371 "-", "-$", "-a", "50", "100", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
372 ];
373
374 assert_lexically_sorted!(string_sort, strings, natural = false);
375 assert_lexically_sorted!(string_sort, strings_nat, natural = true);
376
377 #[cfg(feature = "std")]
378 {
379 let paths: Vec<&Path> = strings.iter().map(|s| Path::new(s)).collect();
380 let paths_nat: Vec<&Path> = strings_nat.iter().map(|s| Path::new(s)).collect();
381
382 assert_lexically_sorted!(path_sort, paths, natural = false);
383 assert_lexically_sorted!(path_sort, paths_nat, natural = true);
384 }
385}