gix_utils/str.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
use std::borrow::Cow;
use std::ffi::OsStr;
use std::path::Path;
/// Assure that `s` is precomposed, i.e. `ä` is a single code-point, and not two i.e. `a` and `<umlaut>`.
///
/// At the expense of extra-compute, it does nothing if there is no work to be done, returning the original input without allocating.
pub fn precompose(s: Cow<'_, str>) -> Cow<'_, str> {
use unicode_normalization::UnicodeNormalization;
if s.as_ref().nfc().cmp(s.as_ref().chars()).is_eq() {
s
} else {
Cow::Owned(s.as_ref().nfc().collect())
}
}
/// Assure that `s` is decomposed, i.e. `ä` turns into `a` and `<umlaut>`.
///
/// At the expense of extra-compute, it does nothing if there is no work to be done, returning the original input without allocating.
pub fn decompose(s: Cow<'_, str>) -> Cow<'_, str> {
use unicode_normalization::UnicodeNormalization;
if s.as_ref().nfd().cmp(s.as_ref().chars()).is_eq() {
s
} else {
Cow::Owned(s.as_ref().nfd().collect())
}
}
/// Return the precomposed version of `path`, or `path` itself if it contained illformed unicode,
/// or if the unicode version didn't contains decomposed unicode.
/// Otherwise, similar to [`precompose()`]
pub fn precompose_path(path: Cow<'_, Path>) -> Cow<'_, Path> {
match path.to_str() {
None => path,
Some(maybe_decomposed) => match precompose(maybe_decomposed.into()) {
Cow::Borrowed(_) => path,
Cow::Owned(precomposed) => Cow::Owned(precomposed.into()),
},
}
}
/// Return the precomposed version of `name`, or `name` itself if it contained illformed unicode,
/// or if the unicode version didn't contains decomposed unicode.
/// Otherwise, similar to [`precompose()`]
pub fn precompose_os_string(name: Cow<'_, OsStr>) -> Cow<'_, OsStr> {
match name.to_str() {
None => name,
Some(maybe_decomposed) => match precompose(maybe_decomposed.into()) {
Cow::Borrowed(_) => name,
Cow::Owned(precomposed) => Cow::Owned(precomposed.into()),
},
}
}
/// Return the precomposed version of `s`, or `s` itself if it contained illformed unicode,
/// or if the unicode version didn't contains decomposed unicode.
/// Otherwise, similar to [`precompose()`]
#[cfg(feature = "bstr")]
pub fn precompose_bstr(s: Cow<'_, bstr::BStr>) -> Cow<'_, bstr::BStr> {
use bstr::ByteSlice;
match s.to_str().ok() {
None => s,
Some(maybe_decomposed) => match precompose(maybe_decomposed.into()) {
Cow::Borrowed(_) => s,
Cow::Owned(precomposed) => Cow::Owned(precomposed.into()),
},
}
}