gix_path/
convert.rs

1use std::path::Component;
2use std::{
3    borrow::Cow,
4    ffi::{OsStr, OsString},
5    path::{Path, PathBuf},
6};
7
8use bstr::{BStr, BString};
9
10#[derive(Debug)]
11/// The error type returned by [`into_bstr()`] and others may suffer from failed conversions from or to bytes.
12pub struct Utf8Error;
13
14impl std::fmt::Display for Utf8Error {
15    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16        f.write_str("Could not convert to UTF8 or from UTF8 due to ill-formed input")
17    }
18}
19
20impl std::error::Error for Utf8Error {}
21
22/// Like [`into_bstr()`], but takes `OsStr` as input for a lossless, but fallible, conversion.
23pub fn os_str_into_bstr(path: &OsStr) -> Result<&BStr, Utf8Error> {
24    let path = try_into_bstr(Cow::Borrowed(path.as_ref()))?;
25    match path {
26        Cow::Borrowed(path) => Ok(path),
27        Cow::Owned(_) => unreachable!("borrowed cows stay borrowed"),
28    }
29}
30
31/// Like [`into_bstr()`], but takes `OsString` as input for a lossless, but fallible, conversion.
32pub fn os_string_into_bstring(path: OsString) -> Result<BString, Utf8Error> {
33    let path = try_into_bstr(Cow::Owned(path.into()))?;
34    match path {
35        Cow::Borrowed(_path) => unreachable!("borrowed cows stay borrowed"),
36        Cow::Owned(path) => Ok(path),
37    }
38}
39
40/// Like [`into_bstr()`], but takes `Cow<OsStr>` as input for a lossless, but fallible, conversion.
41pub fn try_os_str_into_bstr(path: Cow<'_, OsStr>) -> Result<Cow<'_, BStr>, Utf8Error> {
42    match path {
43        Cow::Borrowed(path) => os_str_into_bstr(path).map(Cow::Borrowed),
44        Cow::Owned(path) => os_string_into_bstring(path).map(Cow::Owned),
45    }
46}
47
48/// Convert the given path either into its raw bytes on Unix or its UTF-8 encoded counterpart on Windows.
49///
50/// On Windows, if the source `Path`` contains ill-formed, lone surrogates, the UTF-8 conversion will fail
51/// causing `Utf8Error` to be returned.
52pub fn try_into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Result<Cow<'a, BStr>, Utf8Error> {
53    let path = path.into();
54    let path_str = match path {
55        Cow::Owned(path) => Cow::Owned({
56            #[cfg(unix)]
57            let p: BString = {
58                use std::os::unix::ffi::OsStringExt;
59                path.into_os_string().into_vec().into()
60            };
61            #[cfg(target_os = "wasi")]
62            let p: BString = {
63                use std::os::wasi::ffi::OsStringExt;
64                path.into_os_string().into_vec().into()
65            };
66            #[cfg(not(any(unix, target_os = "wasi")))]
67            let p: BString = path.into_os_string().into_string().map_err(|_| Utf8Error)?.into();
68            p
69        }),
70        Cow::Borrowed(path) => Cow::Borrowed({
71            #[cfg(unix)]
72            let p: &BStr = {
73                use std::os::unix::ffi::OsStrExt;
74                path.as_os_str().as_bytes().into()
75            };
76            #[cfg(target_os = "wasi")]
77            let p: &BStr = {
78                use std::os::wasi::ffi::OsStrExt;
79                path.as_os_str().as_bytes().into()
80            };
81            #[cfg(not(any(unix, target_os = "wasi")))]
82            let p: &BStr = path.to_str().ok_or(Utf8Error)?.as_bytes().into();
83            p
84        }),
85    };
86    Ok(path_str)
87}
88
89/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on Windows.
90pub fn into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Cow<'a, BStr> {
91    try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8")
92}
93
94/// Join `path` to `base` such that they are separated with a `/`, i.e. `base/path`.
95pub fn join_bstr_unix_pathsep<'a, 'b>(base: impl Into<Cow<'a, BStr>>, path: impl Into<&'b BStr>) -> Cow<'a, BStr> {
96    let mut base = base.into();
97    if !base.is_empty() && base.last() != Some(&b'/') {
98        base.to_mut().push(b'/');
99    }
100    base.to_mut().extend_from_slice(path.into());
101    base
102}
103
104/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on Unix.
105///
106/// On Windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before.
107/// There are some potential Git versions and Windows installations which produce malformed UTF-16
108/// if certain emojis are in the path. It's as rare as it sounds, but possible.
109pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> {
110    #[cfg(unix)]
111    let p = {
112        use std::os::unix::ffi::OsStrExt;
113        OsStr::from_bytes(input).as_ref()
114    };
115    #[cfg(target_os = "wasi")]
116    let p: &Path = {
117        use std::os::wasi::ffi::OsStrExt;
118        OsStr::from_bytes(input).as_ref()
119    };
120    #[cfg(not(any(unix, target_os = "wasi")))]
121    let p = Path::new(std::str::from_utf8(input).map_err(|_| Utf8Error)?);
122    Ok(p)
123}
124
125/// Similar to [`from_byte_slice()`], but takes either borrowed or owned `input`.
126pub fn try_from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Result<Cow<'a, Path>, Utf8Error> {
127    let input = input.into();
128    match input {
129        Cow::Borrowed(input) => try_from_byte_slice(input).map(Cow::Borrowed),
130        Cow::Owned(input) => try_from_bstring(input).map(Cow::Owned),
131    }
132}
133
134/// Similar to [`try_from_bstr()`], but **panics** if malformed surrogates are encountered on Windows.
135pub fn from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Cow<'a, Path> {
136    try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8")
137}
138
139/// Similar to [`try_from_bstr()`], but takes and produces owned data.
140pub fn try_from_bstring(input: impl Into<BString>) -> Result<PathBuf, Utf8Error> {
141    let input = input.into();
142    #[cfg(unix)]
143    let p = {
144        use std::os::unix::ffi::OsStringExt;
145        std::ffi::OsString::from_vec(input.into()).into()
146    };
147    #[cfg(target_os = "wasi")]
148    let p: PathBuf = {
149        use std::os::wasi::ffi::OsStringExt;
150        std::ffi::OsString::from_vec(input.into()).into()
151    };
152    #[cfg(not(any(unix, target_os = "wasi")))]
153    let p = {
154        use bstr::ByteVec;
155        PathBuf::from(
156            {
157                let v: Vec<_> = input.into();
158                v
159            }
160            .into_string()
161            .map_err(|_| Utf8Error)?,
162        )
163    };
164    Ok(p)
165}
166
167/// Similar to [`try_from_bstring()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
168pub fn from_bstring(input: impl Into<BString>) -> PathBuf {
169    try_from_bstring(input).expect("well-formed UTF-8 on windows")
170}
171
172/// Similar to [`try_from_byte_slice()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
173pub fn from_byte_slice(input: &[u8]) -> &Path {
174    try_from_byte_slice(input).expect("well-formed UTF-8 on windows")
175}
176
177fn replace<'a>(path: impl Into<Cow<'a, BStr>>, find: u8, replace: u8) -> Cow<'a, BStr> {
178    let path = path.into();
179    match path {
180        Cow::Owned(mut path) => {
181            for b in path.iter_mut().filter(|b| **b == find) {
182                *b = replace;
183            }
184            path.into()
185        }
186        Cow::Borrowed(path) => {
187            if !path.contains(&find) {
188                return path.into();
189            }
190            let mut path = path.to_owned();
191            for b in path.iter_mut().filter(|b| **b == find) {
192                *b = replace;
193            }
194            path.into()
195        }
196    }
197}
198
199/// Assures the given bytes use the native path separator.
200pub fn to_native_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
201    #[cfg(not(windows))]
202    let p = to_unix_separators(path);
203    #[cfg(windows)]
204    let p = to_windows_separators(path);
205    p
206}
207
208/// Convert paths with slashes to backslashes on Windows and do nothing on Unix,
209/// but **panic** if unpaired surrogates are encountered on Windows.
210pub fn to_native_path_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, std::path::Path> {
211    #[cfg(not(windows))]
212    {
213        crate::from_bstr(path)
214    }
215    #[cfg(windows)]
216    {
217        crate::from_bstr(to_windows_separators(path))
218    }
219}
220
221/// Replace Windows path separators with slashes, but only do so on Windows.
222pub fn to_unix_separators_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
223    #[cfg(windows)]
224    {
225        to_unix_separators(path)
226    }
227    #[cfg(not(windows))]
228    {
229        path.into()
230    }
231}
232
233/// Replace Windows path separators with slashes, which typically resembles a Unix path, unconditionally.
234///
235/// **Note** Do not use these and prefer the conditional versions of this method.
236pub fn to_unix_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
237    replace(path, b'\\', b'/')
238}
239
240/// Find slashes and replace them with backslashes, unconditionally.
241///
242/// **Note** Do not use these and prefer the conditional versions of this method.
243pub fn to_windows_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
244    replace(path, b'/', b'\\')
245}
246
247/// Resolve relative components virtually, eliminating intermediate `..` without accessing the filesystem.
248///
249/// For example, this turns `a/./b/c/.././..` into `a`, and turns `/a/../b/..` into `/`.
250///
251/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of
252/// the full path to `current_dir`.
253///
254/// Single `.` components as well as duplicate separators are left untouched.
255///
256/// This is particularly useful when manipulating paths that are based on user input, and not
257/// resolving intermediate symlinks keeps the path similar to what the user provided. If that's not
258/// desirable, use `[realpath()][crate::realpath()` instead.
259///
260/// Note that we might access the `current_dir` if we run out of path components to pop off, which
261/// is expected to be absolute as typical return value of `std::env::current_dir()` or
262/// `gix_fs::current_dir(…)` when `core.precomposeUnicode` is known. As a `current_dir` like `/c`
263/// can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability to
264/// produce a logically consistent path.
265pub fn normalize<'a>(path: Cow<'a, Path>, current_dir: &Path) -> Option<Cow<'a, Path>> {
266    use std::path::Component::ParentDir;
267
268    if !path.components().any(|c| matches!(c, ParentDir)) {
269        return Some(path);
270    }
271    let mut current_dir_opt = Some(current_dir);
272    let was_relative = path.is_relative();
273    let components = path.components();
274    let mut path = PathBuf::new();
275    for component in components {
276        if let ParentDir = component {
277            let path_was_dot = path == Path::new(".");
278            if path.as_os_str().is_empty() || path_was_dot {
279                path.push(current_dir_opt.take()?);
280            }
281            if !path.pop() {
282                return None;
283            }
284        } else {
285            path.push(component);
286        }
287    }
288
289    if (path.as_os_str().is_empty() || path == current_dir) && was_relative {
290        Cow::Borrowed(Path::new("."))
291    } else {
292        path.into()
293    }
294    .into()
295}
296
297/// Rebuild the worktree-relative `relative_path` to be relative to `prefix`, which is the
298/// worktree-relative path equivalent to the position of the user, or current working directory.
299///
300/// This is a no-op if `prefix` is empty.
301///
302/// Note that both `relative_path` and `prefix` are assumed to be [normalized](normalize()), and
303/// failure to do so will lead to incorrect results.
304///
305/// Note that both input paths are expected to be equal in terms of case too, as comparisons will
306/// be case-sensitive.
307pub fn relativize_with_prefix<'a>(relative_path: &'a Path, prefix: &Path) -> Cow<'a, Path> {
308    if prefix.as_os_str().is_empty() {
309        return Cow::Borrowed(relative_path);
310    }
311    debug_assert!(
312        relative_path.components().all(|c| matches!(c, Component::Normal(_))),
313        "BUG: all input is expected to be normalized, but relative_path was not"
314    );
315    debug_assert!(
316        prefix.components().all(|c| matches!(c, Component::Normal(_))),
317        "BUG: all input is expected to be normalized, but prefix was not"
318    );
319
320    let mut buf = PathBuf::new();
321    let mut rpc = relative_path.components().peekable();
322    let mut equal_thus_far = true;
323    for pcomp in prefix.components() {
324        if equal_thus_far {
325            if let (Component::Normal(pname), Some(Component::Normal(rpname))) = (pcomp, rpc.peek()) {
326                if &pname == rpname {
327                    rpc.next();
328                    continue;
329                } else {
330                    equal_thus_far = false;
331                }
332            }
333        }
334        buf.push(Component::ParentDir);
335    }
336    buf.extend(rpc);
337    if buf.as_os_str().is_empty() {
338        Cow::Borrowed(Path::new("."))
339    } else {
340        Cow::Owned(buf)
341    }
342}