gix_command/
lib.rs

1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2#![deny(rust_2018_idioms, missing_docs)]
3#![forbid(unsafe_code)]
4
5use std::io::Read;
6use std::{
7    ffi::OsString,
8    path::{Path, PathBuf},
9};
10
11use bstr::{BString, ByteSlice};
12
13/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()], after creating it with [`prepare()`].
14pub struct Prepare {
15    /// The command to invoke (either with or without shell depending on `use_shell`.
16    pub command: OsString,
17    /// Additional information to be passed to the spawned command.
18    pub context: Option<Context>,
19    /// The way standard input is configured.
20    pub stdin: std::process::Stdio,
21    /// The way standard output is configured.
22    pub stdout: std::process::Stdio,
23    /// The way standard error is configured.
24    pub stderr: std::process::Stdio,
25    /// The arguments to pass to the spawned process.
26    pub args: Vec<OsString>,
27    /// environment variables to set in the spawned process.
28    pub env: Vec<(OsString, OsString)>,
29    /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
30    pub use_shell: bool,
31    /// The name or path to the shell program to use instead of `sh`.
32    pub shell_program: Option<OsString>,
33    /// If `true` (default `true` on windows and `false` everywhere else)
34    /// we will see if it's safe to manually invoke `command` after splitting
35    /// its arguments as a shell would do.
36    /// Note that outside of windows, it's generally not advisable as this
37    /// removes support for literal shell scripts with shell-builtins.
38    ///
39    /// This mimics the behaviour we see with `git` on windows, which also
40    /// won't invoke the shell there at all.
41    ///
42    /// Only effective if `use_shell` is `true` as well, as the shell will
43    /// be used as a fallback if it's not possible to split arguments as
44    /// the command-line contains 'scripting'.
45    pub allow_manual_arg_splitting: bool,
46}
47
48/// Additional information that is relevant to spawned processes, which typically receive
49/// a wealth of contextual information when spawned from `git`.
50///
51/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
52/// for details.
53#[derive(Debug, Default, Clone)]
54pub struct Context {
55    /// The `.git` directory that contains the repository.
56    ///
57    /// If set, it will be used to set the `GIT_DIR` environment variable.
58    pub git_dir: Option<PathBuf>,
59    /// Set the `GIT_WORK_TREE` environment variable with the given path.
60    pub worktree_dir: Option<PathBuf>,
61    /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
62    /// If `None`, the variable won't be set.
63    pub no_replace_objects: Option<bool>,
64    /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
65    /// operations on references.
66    pub ref_namespace: Option<BString>,
67    /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
68    /// If `None`, the variable won't be set.
69    pub literal_pathspecs: Option<bool>,
70    /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
71    /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
72    /// If `None`, the variable won't be set.
73    pub glob_pathspecs: Option<bool>,
74    /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
75    /// If `None`, the variable won't be set.
76    pub icase_pathspecs: Option<bool>,
77    /// If `true`, inherit `stderr` just like it's the default when spawning processes.
78    /// If `false`, suppress all stderr output.
79    /// If not `None`, this will override any value set with [`Prepare::stderr()`].
80    pub stderr: Option<bool>,
81}
82
83mod prepare {
84    use std::borrow::Cow;
85    use std::{
86        ffi::OsString,
87        process::{Command, Stdio},
88    };
89
90    use bstr::ByteSlice;
91
92    use crate::{extract_interpreter, win_path_lookup, Context, Prepare};
93
94    /// Builder
95    impl Prepare {
96        /// If called, the command will not be executed directly, but with `sh`, but only if the
97        /// command passed to [`prepare`](super::prepare()) requires this.
98        ///
99        /// This also allows to pass shell scripts as command, or use commands that contain arguments which are subsequently
100        /// parsed by `sh`.
101        pub fn with_shell(mut self) -> Self {
102            self.use_shell = self.command.to_str().map_or(true, |cmd| {
103                cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some()
104            });
105            self
106        }
107
108        /// Set the name or path to the shell `program` to use, to avoid using the default shell which is `sh`.
109        pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
110            self.shell_program = Some(program.into());
111            self
112        }
113
114        /// Unconditionally turn off using the shell when spawning the command.
115        /// Note that not using the shell is the default so an effective use of this method
116        /// is some time after [`with_shell()`][Prepare::with_shell()] was called.
117        pub fn without_shell(mut self) -> Self {
118            self.use_shell = false;
119            self
120        }
121
122        /// Set additional `ctx` to be used when spawning the process.
123        ///
124        /// Note that this is a must for most kind of commands that `git` usually spawns,
125        /// as at least they need to know the correct `git` repository to function.
126        pub fn with_context(mut self, ctx: Context) -> Self {
127            self.context = Some(ctx);
128            self
129        }
130
131        /// Use a shell, but try to split arguments by hand if this can be safely done without a shell.
132        ///
133        /// If that's not the case, use a shell instead.
134        pub fn with_shell_allow_manual_argument_splitting(mut self) -> Self {
135            self.allow_manual_arg_splitting = true;
136            self.with_shell()
137        }
138
139        /// Use a shell, but prohibit splitting arguments by hand even if this could be safely done without a shell.
140        pub fn with_shell_disallow_manual_argument_splitting(mut self) -> Self {
141            self.allow_manual_arg_splitting = false;
142            self.with_shell()
143        }
144
145        /// Configure the process to use `stdio` for _stdin.
146        pub fn stdin(mut self, stdio: Stdio) -> Self {
147            self.stdin = stdio;
148            self
149        }
150        /// Configure the process to use `stdio` for _stdout_.
151        pub fn stdout(mut self, stdio: Stdio) -> Self {
152            self.stdout = stdio;
153            self
154        }
155        /// Configure the process to use `stdio` for _stderr.
156        pub fn stderr(mut self, stdio: Stdio) -> Self {
157            self.stderr = stdio;
158            self
159        }
160
161        /// Add `arg` to the list of arguments to call the command with.
162        pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
163            self.args.push(arg.into());
164            self
165        }
166
167        /// Add `args` to the list of arguments to call the command with.
168        pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
169            self.args
170                .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
171            self
172        }
173
174        /// Add `key` with `value` to the environment of the spawned command.
175        pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
176            self.env.push((key.into(), value.into()));
177            self
178        }
179    }
180
181    /// Finalization
182    impl Prepare {
183        /// Spawn the command as configured.
184        pub fn spawn(self) -> std::io::Result<std::process::Child> {
185            let mut cmd = Command::from(self);
186            gix_trace::debug!(cmd = ?cmd);
187            cmd.spawn()
188        }
189    }
190
191    impl From<Prepare> for Command {
192        fn from(mut prep: Prepare) -> Command {
193            let mut cmd = if prep.use_shell {
194                let split_args = prep
195                    .allow_manual_arg_splitting
196                    .then(|| {
197                        if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
198                            .find_byteset(b"\\|&;<>()$`\n*?[#~%")
199                            .is_none()
200                        {
201                            prep.command
202                                .to_str()
203                                .and_then(|args| shell_words::split(args).ok().map(Vec::into_iter))
204                        } else {
205                            None
206                        }
207                    })
208                    .flatten();
209                match split_args {
210                    Some(mut args) => {
211                        let mut cmd = Command::new(args.next().expect("non-empty input"));
212                        cmd.args(args);
213                        cmd
214                    }
215                    None => {
216                        let mut cmd = Command::new(
217                            prep.shell_program
218                                .unwrap_or(if cfg!(windows) { "sh" } else { "/bin/sh" }.into()),
219                        );
220                        cmd.arg("-c");
221                        if !prep.args.is_empty() {
222                            if prep.command.to_str().map_or(true, |cmd| !cmd.contains("$@")) {
223                                prep.command.push(" \"$@\"");
224                            } else {
225                                gix_trace::debug!(
226                                    "Will not add '$@' to '{:?}' as it seems to contain it already",
227                                    prep.command
228                                );
229                            }
230                        }
231                        cmd.arg(prep.command);
232                        cmd.arg("--");
233                        cmd
234                    }
235                }
236            } else if cfg!(windows) {
237                let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
238                    .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
239                    .map(Cow::Owned)
240                    .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
241                if let Some(shebang) = extract_interpreter(program.as_ref()) {
242                    let mut cmd = Command::new(shebang.interpreter);
243                    // For relative paths, we may have picked up a file in the current repository
244                    // for which an attacker could control everything. Hence, strip options just like Git.
245                    // If the file was found in the PATH though, it should be trustworthy.
246                    if program.is_absolute() {
247                        cmd.args(shebang.args);
248                    }
249                    cmd.arg(prep.command);
250                    cmd
251                } else {
252                    Command::new(prep.command)
253                }
254            } else {
255                Command::new(prep.command)
256            };
257            // We never want to have terminals pop-up on Windows if this runs from a GUI application.
258            #[cfg(windows)]
259            {
260                use std::os::windows::process::CommandExt;
261                const CREATE_NO_WINDOW: u32 = 0x08000000;
262                cmd.creation_flags(CREATE_NO_WINDOW);
263            }
264            cmd.stdin(prep.stdin)
265                .stdout(prep.stdout)
266                .stderr(prep.stderr)
267                .envs(prep.env)
268                .args(prep.args);
269            if let Some(ctx) = prep.context {
270                if let Some(git_dir) = ctx.git_dir {
271                    cmd.env("GIT_DIR", &git_dir);
272                }
273                if let Some(worktree_dir) = ctx.worktree_dir {
274                    cmd.env("GIT_WORK_TREE", worktree_dir);
275                }
276                if let Some(value) = ctx.no_replace_objects {
277                    cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
278                }
279                if let Some(namespace) = ctx.ref_namespace {
280                    cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
281                }
282                if let Some(value) = ctx.literal_pathspecs {
283                    cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
284                }
285                if let Some(value) = ctx.glob_pathspecs {
286                    cmd.env(
287                        if value {
288                            "GIT_GLOB_PATHSPECS"
289                        } else {
290                            "GIT_NOGLOB_PATHSPECS"
291                        },
292                        "1",
293                    );
294                }
295                if let Some(value) = ctx.icase_pathspecs {
296                    cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
297                }
298                if let Some(stderr) = ctx.stderr {
299                    cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
300                }
301            }
302            cmd
303        }
304    }
305}
306
307fn is_exe(executable: &Path) -> bool {
308    executable.extension() == Some(std::ffi::OsStr::new("exe"))
309}
310
311/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
312/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
313/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
314fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
315    fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
316        let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
317        if !is_exe {
318            path.set_extension("exe");
319        }
320        if path.is_file() {
321            return Some(path);
322        }
323        if is_exe {
324            return None;
325        }
326        path.set_extension("");
327        path.is_file().then_some(path)
328    }
329    if command.components().take(2).count() == 2 {
330        return None;
331    }
332    let path = gix_path::os_str_into_bstr(path_value).ok()?;
333    let is_exe = is_exe(command);
334
335    for root in path.split(|b| *b == b';') {
336        if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
337            return Some(executable);
338        }
339    }
340    None
341}
342
343/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
344/// data when available.
345pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
346    #[cfg(windows)]
347    if is_exe(executable) {
348        return None;
349    }
350    let mut buf = [0; 100]; // Note: just like Git
351    let mut file = std::fs::File::open(executable).ok()?;
352    let n = file.read(&mut buf).ok()?;
353    shebang::parse(buf[..n].as_bstr())
354}
355
356///
357pub mod shebang {
358    use bstr::{BStr, ByteSlice};
359    use std::ffi::OsString;
360    use std::path::PathBuf;
361
362    /// Parse `buf` to extract all shebang information.
363    pub fn parse(buf: &BStr) -> Option<Data> {
364        let mut line = buf.lines().next()?;
365        line = line.strip_prefix(b"#!")?;
366
367        let slash_idx = line.rfind_byteset(b"/\\")?;
368        Some(match line[slash_idx..].find_byte(b' ') {
369            Some(space_idx) => {
370                let space = slash_idx + space_idx;
371                Data {
372                    interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
373                    args: line
374                        .get(space + 1..)
375                        .and_then(|mut r| {
376                            r = r.trim();
377                            if r.is_empty() {
378                                return None;
379                            }
380
381                            match r.as_bstr().to_str() {
382                                Ok(args) => shell_words::split(args)
383                                    .ok()
384                                    .map(|args| args.into_iter().map(Into::into).collect()),
385                                Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
386                            }
387                        })
388                        .unwrap_or_default(),
389                }
390            }
391            None => Data {
392                interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
393                args: Vec::new(),
394            },
395        })
396    }
397
398    /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
399    ///
400    /// ### Deviation
401    ///
402    /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
403    /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
404    /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
405    ///
406    /// To make that work without the kernel, we perform the splitting while Git just ignores options.
407    /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
408    #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
409    pub struct Data {
410        /// The interpreter to run.
411        pub interpreter: PathBuf,
412        /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
413        /// as pre-split arguments just like a shell would do it.
414        /// Note that we accept that illformed UTF-8 will prevent argument splitting.
415        pub args: Vec<OsString>,
416    }
417}
418
419/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
420///
421/// Note that the default IO is configured for typical API usage, that is
422///
423/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
424/// - `stdout` is captured for consumption by the caller
425/// - `stderr` is inherited to allow the command to provide context to the user
426///
427/// On Windows, terminal Windows will be suppressed automatically.
428///
429/// ### Warning
430///
431/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
432/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
433/// additional information.
434pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
435    Prepare {
436        command: cmd.into(),
437        shell_program: None,
438        context: None,
439        stdin: std::process::Stdio::null(),
440        stdout: std::process::Stdio::piped(),
441        stderr: std::process::Stdio::inherit(),
442        args: Vec::new(),
443        env: Vec::new(),
444        use_shell: false,
445        allow_manual_arg_splitting: cfg!(windows),
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    #[test]
454    fn internal_win_path_lookup() -> gix_testtools::Result {
455        let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
456        let mut paths: Vec<_> = std::fs::read_dir(&root)?
457            .filter_map(Result::ok)
458            .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
459            .collect();
460        paths.sort();
461        let lookup_path: OsString = paths.join(";").into();
462
463        assert_eq!(
464            win_path_lookup("a/b".as_ref(), &lookup_path),
465            None,
466            "any path with separator is considered ready to use"
467        );
468        assert_eq!(
469            win_path_lookup("x".as_ref(), &lookup_path),
470            Some(root.join("a").join("x.exe")),
471            "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
472        );
473        assert_eq!(
474            win_path_lookup("x.exe".as_ref(), &lookup_path),
475            Some(root.join("a").join("x.exe")),
476            "no matter what, a/x won't be found as it's shadowed by an exe file"
477        );
478        assert_eq!(
479            win_path_lookup("exe".as_ref(), &lookup_path),
480            Some(root.join("b").join("exe")),
481            "it finds files further down the path as well"
482        );
483        Ok(())
484    }
485}