gix_path/
convert.rs

1use std::path::Component;
2use std::{
3    borrow::Cow,
4    ffi::{OsStr, OsString},
5    path::{Path, PathBuf},
6};
7
8use bstr::{BStr, BString};
9
10#[derive(Debug)]
11/// The error type returned by [`into_bstr()`] and others may suffer from failed conversions from or to bytes.
12pub struct Utf8Error;
13
14impl std::fmt::Display for Utf8Error {
15    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16        f.write_str("Could not convert to UTF8 or from UTF8 due to ill-formed input")
17    }
18}
19
20impl std::error::Error for Utf8Error {}
21
22/// Like [`into_bstr()`], but takes `OsStr` as input for a lossless, but fallible, conversion.
23pub fn os_str_into_bstr(path: &OsStr) -> Result<&BStr, Utf8Error> {
24    let path = try_into_bstr(Cow::Borrowed(path.as_ref()))?;
25    match path {
26        Cow::Borrowed(path) => Ok(path),
27        Cow::Owned(_) => unreachable!("borrowed cows stay borrowed"),
28    }
29}
30
31/// Like [`into_bstr()`], but takes `OsString` as input for a lossless, but fallible, conversion.
32pub fn os_string_into_bstring(path: OsString) -> Result<BString, Utf8Error> {
33    let path = try_into_bstr(Cow::Owned(path.into()))?;
34    match path {
35        Cow::Borrowed(_path) => unreachable!("borrowed cows stay borrowed"),
36        Cow::Owned(path) => Ok(path),
37    }
38}
39
40/// Like [`into_bstr()`], but takes `Cow<OsStr>` as input for a lossless, but fallible, conversion.
41pub fn try_os_str_into_bstr(path: Cow<'_, OsStr>) -> Result<Cow<'_, BStr>, Utf8Error> {
42    match path {
43        Cow::Borrowed(path) => os_str_into_bstr(path).map(Cow::Borrowed),
44        Cow::Owned(path) => os_string_into_bstring(path).map(Cow::Owned),
45    }
46}
47
48/// Convert the given path either into its raw bytes on unix or its UTF8 encoded counterpart on windows.
49///
50/// On windows, if the source Path contains ill-formed, lone surrogates, the UTF-8 conversion will fail
51/// causing `Utf8Error` to be returned.
52pub fn try_into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Result<Cow<'a, BStr>, Utf8Error> {
53    let path = path.into();
54    let path_str = match path {
55        Cow::Owned(path) => Cow::Owned({
56            #[cfg(unix)]
57            let p: BString = {
58                use std::os::unix::ffi::OsStringExt;
59                path.into_os_string().into_vec().into()
60            };
61            #[cfg(target_os = "wasi")]
62            let p: BString = {
63                use std::os::wasi::ffi::OsStringExt;
64                path.into_os_string().into_vec().into()
65            };
66            #[cfg(not(any(unix, target_os = "wasi")))]
67            let p: BString = path.into_os_string().into_string().map_err(|_| Utf8Error)?.into();
68            p
69        }),
70        Cow::Borrowed(path) => Cow::Borrowed({
71            #[cfg(unix)]
72            let p: &BStr = {
73                use std::os::unix::ffi::OsStrExt;
74                path.as_os_str().as_bytes().into()
75            };
76            #[cfg(target_os = "wasi")]
77            let p: &BStr = {
78                use std::os::wasi::ffi::OsStrExt;
79                path.as_os_str().as_bytes().into()
80            };
81            #[cfg(not(any(unix, target_os = "wasi")))]
82            let p: &BStr = path.to_str().ok_or(Utf8Error)?.as_bytes().into();
83            p
84        }),
85    };
86    Ok(path_str)
87}
88
89/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on windows.
90pub fn into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Cow<'a, BStr> {
91    try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8")
92}
93
94/// Join `path` to `base` such that they are separated with a `/`, i.e. `base/path`.
95pub fn join_bstr_unix_pathsep<'a, 'b>(base: impl Into<Cow<'a, BStr>>, path: impl Into<&'b BStr>) -> Cow<'a, BStr> {
96    let mut base = base.into();
97    if !base.is_empty() && base.last() != Some(&b'/') {
98        base.to_mut().push(b'/');
99    }
100    base.to_mut().extend_from_slice(path.into());
101    base
102}
103
104/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on unix.
105///
106/// On windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before. There are some potential
107/// git versions and windows installation which produce mal-formed UTF-16 if certain emojies are in the path. It's as rare as
108/// it sounds, but possible.
109pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> {
110    #[cfg(unix)]
111    let p = {
112        use std::os::unix::ffi::OsStrExt;
113        OsStr::from_bytes(input).as_ref()
114    };
115    #[cfg(target_os = "wasi")]
116    let p: &Path = {
117        use std::os::wasi::ffi::OsStrExt;
118        OsStr::from_bytes(input).as_ref()
119    };
120    #[cfg(not(any(unix, target_os = "wasi")))]
121    let p = Path::new(std::str::from_utf8(input).map_err(|_| Utf8Error)?);
122    Ok(p)
123}
124
125/// Similar to [`from_byte_slice()`], but takes either borrowed or owned `input`.
126pub fn try_from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Result<Cow<'a, Path>, Utf8Error> {
127    let input = input.into();
128    match input {
129        Cow::Borrowed(input) => try_from_byte_slice(input).map(Cow::Borrowed),
130        Cow::Owned(input) => try_from_bstring(input).map(Cow::Owned),
131    }
132}
133
134/// Similar to [`try_from_bstr()`], but **panics** if malformed surrogates are encountered on windows.
135pub fn from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Cow<'a, Path> {
136    try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8")
137}
138
139/// Similar to [`try_from_bstr()`], but takes and produces owned data.
140pub fn try_from_bstring(input: impl Into<BString>) -> Result<PathBuf, Utf8Error> {
141    let input = input.into();
142    #[cfg(unix)]
143    let p = {
144        use std::os::unix::ffi::OsStringExt;
145        std::ffi::OsString::from_vec(input.into()).into()
146    };
147    #[cfg(target_os = "wasi")]
148    let p: PathBuf = {
149        use std::os::wasi::ffi::OsStringExt;
150        std::ffi::OsString::from_vec(input.into()).into()
151    };
152    #[cfg(not(any(unix, target_os = "wasi")))]
153    let p = {
154        use bstr::ByteVec;
155        PathBuf::from(
156            {
157                let v: Vec<_> = input.into();
158                v
159            }
160            .into_string()
161            .map_err(|_| Utf8Error)?,
162        )
163    };
164    Ok(p)
165}
166
167/// Similar to [`try_from_bstring()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
168pub fn from_bstring(input: impl Into<BString>) -> PathBuf {
169    try_from_bstring(input).expect("well-formed UTF-8 on windows")
170}
171
172/// Similar to [`try_from_byte_slice()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
173pub fn from_byte_slice(input: &[u8]) -> &Path {
174    try_from_byte_slice(input).expect("well-formed UTF-8 on windows")
175}
176
177fn replace<'a>(path: impl Into<Cow<'a, BStr>>, find: u8, replace: u8) -> Cow<'a, BStr> {
178    let path = path.into();
179    match path {
180        Cow::Owned(mut path) => {
181            for b in path.iter_mut().filter(|b| **b == find) {
182                *b = replace;
183            }
184            path.into()
185        }
186        Cow::Borrowed(path) => {
187            if !path.contains(&find) {
188                return path.into();
189            }
190            let mut path = path.to_owned();
191            for b in path.iter_mut().filter(|b| **b == find) {
192                *b = replace;
193            }
194            path.into()
195        }
196    }
197}
198
199/// Assures the given bytes use the native path separator.
200pub fn to_native_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
201    #[cfg(not(windows))]
202    let p = to_unix_separators(path);
203    #[cfg(windows)]
204    let p = to_windows_separators(path);
205    p
206}
207
208/// Convert paths with slashes to backslashes on windows and do nothing on unix, but **panics** if malformed surrogates are encountered on windows.
209pub fn to_native_path_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, std::path::Path> {
210    #[cfg(not(windows))]
211    {
212        crate::from_bstr(path)
213    }
214    #[cfg(windows)]
215    {
216        crate::from_bstr(to_windows_separators(path))
217    }
218}
219
220/// Replaces windows path separators with slashes, but only do so on windows.
221pub fn to_unix_separators_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
222    #[cfg(windows)]
223    {
224        replace(path, b'\\', b'/')
225    }
226    #[cfg(not(windows))]
227    {
228        path.into()
229    }
230}
231
232/// Replaces windows path separators with slashes, unconditionally.
233///
234/// **Note** Do not use these and prefer the conditional versions of this method.
235// TODO: use https://lib.rs/crates/path-slash to handle escapes
236pub fn to_unix_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
237    replace(path, b'\\', b'/')
238}
239
240/// Find backslashes and replace them with slashes, which typically resembles a unix path, unconditionally.
241///
242/// **Note** Do not use these and prefer the conditional versions of this method.
243// TODO: use https://lib.rs/crates/path-slash to handle escapes
244pub fn to_windows_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
245    replace(path, b'/', b'\\')
246}
247
248/// Resolve relative components virtually without accessing the file system, e.g. turn `a/./b/c/.././..` into `a`,
249/// without keeping intermediate `..` and `/a/../b/..` becomes `/`.
250/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of the full path to `current_dir`.
251/// Note that single `.` components as well as duplicate separators are left untouched.
252///
253/// This is particularly useful when manipulating paths that are based on user input, and not resolving intermediate
254/// symlinks keeps the path similar to what the user provided. If that's not desirable, use `[realpath()][crate::realpath()`
255/// instead.
256///
257/// Note that we might access the `current_dir` if we run out of path components to pop off, which is expected to be absolute
258/// as typical return value of `std::env::current_dir()` or `gix_fs::current_dir(…)` when `core.precomposeUnicode` is known.
259/// As a `current_dir` like `/c` can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability
260/// to produce a logically consistent path.
261pub fn normalize<'a>(path: Cow<'a, Path>, current_dir: &Path) -> Option<Cow<'a, Path>> {
262    use std::path::Component::ParentDir;
263
264    if !path.components().any(|c| matches!(c, ParentDir)) {
265        return Some(path);
266    }
267    let mut current_dir_opt = Some(current_dir);
268    let was_relative = path.is_relative();
269    let components = path.components();
270    let mut path = PathBuf::new();
271    for component in components {
272        if let ParentDir = component {
273            let path_was_dot = path == Path::new(".");
274            if path.as_os_str().is_empty() || path_was_dot {
275                path.push(current_dir_opt.take()?);
276            }
277            if !path.pop() {
278                return None;
279            }
280        } else {
281            path.push(component);
282        }
283    }
284
285    if (path.as_os_str().is_empty() || path == current_dir) && was_relative {
286        Cow::Borrowed(Path::new("."))
287    } else {
288        path.into()
289    }
290    .into()
291}
292
293/// Rebuild the worktree-relative `relative_path` to be relative to `prefix`, which is the worktree-relative
294/// path equivalent to the position of the user, or current working directory.
295/// This is a no-op if `prefix` is empty.
296///
297/// Note that both `relative_path` and `prefix` are assumed to be [normalized](normalize()), and failure to do so
298/// will lead to incorrect results.
299///
300/// Note that both input paths are expected to be equal in terms of case too, as comparisons will be case-sensitive.
301pub fn relativize_with_prefix<'a>(relative_path: &'a Path, prefix: &Path) -> Cow<'a, Path> {
302    if prefix.as_os_str().is_empty() {
303        return Cow::Borrowed(relative_path);
304    }
305    debug_assert!(
306        relative_path.components().all(|c| matches!(c, Component::Normal(_))),
307        "BUG: all input is expected to be normalized, but relative_path was not"
308    );
309    debug_assert!(
310        prefix.components().all(|c| matches!(c, Component::Normal(_))),
311        "BUG: all input is expected to be normalized, but prefix was not"
312    );
313
314    let mut buf = PathBuf::new();
315    let mut rpc = relative_path.components().peekable();
316    let mut equal_thus_far = true;
317    for pcomp in prefix.components() {
318        if equal_thus_far {
319            if let (Component::Normal(pname), Some(Component::Normal(rpname))) = (pcomp, rpc.peek()) {
320                if &pname == rpname {
321                    rpc.next();
322                    continue;
323                } else {
324                    equal_thus_far = false;
325                }
326            }
327        }
328        buf.push(Component::ParentDir);
329    }
330    buf.extend(rpc);
331    if buf.as_os_str().is_empty() {
332        Cow::Borrowed(Path::new("."))
333    } else {
334        Cow::Owned(buf)
335    }
336}