gix_command/lib.rs
1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2#![deny(rust_2018_idioms, missing_docs)]
3#![forbid(unsafe_code)]
4
5use std::io::Read;
6use std::{
7 ffi::OsString,
8 path::{Path, PathBuf},
9};
10
11use bstr::{BString, ByteSlice};
12
13/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()],
14/// after creating it with [`prepare()`].
15pub struct Prepare {
16 /// The command to invoke, either directly or with a shell depending on `use_shell`.
17 pub command: OsString,
18 /// Additional information to be passed to the spawned command.
19 pub context: Option<Context>,
20 /// The way standard input is configured.
21 pub stdin: std::process::Stdio,
22 /// The way standard output is configured.
23 pub stdout: std::process::Stdio,
24 /// The way standard error is configured.
25 pub stderr: std::process::Stdio,
26 /// The arguments to pass to the process being spawned.
27 pub args: Vec<OsString>,
28 /// Environment variables to set for the spawned process.
29 pub env: Vec<(OsString, OsString)>,
30 /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
31 pub use_shell: bool,
32 /// If `true`, `command` is assumed to be a command or path to the program to execute, and it
33 /// will be shell-quoted to assure it will be executed as is and without splitting across
34 /// whitespace.
35 pub quote_command: bool,
36 /// The name or path to the shell program to use instead of `sh`.
37 pub shell_program: Option<OsString>,
38 /// If `true` (default `true` on Windows and `false` everywhere else) we will see if it's safe
39 /// to manually invoke `command` after splitting its arguments as a shell would do.
40 ///
41 /// Note that outside of Windows, it's generally not advisable as this removes support for
42 /// literal shell scripts with shell-builtins.
43 ///
44 /// This mimics the behaviour we see with `git` on Windows, which also won't invoke the shell
45 /// there at all.
46 ///
47 /// Only effective if `use_shell` is `true` as well, as the shell will be used as a fallback if
48 /// it's not possible to split arguments as the command-line contains 'scripting'.
49 pub allow_manual_arg_splitting: bool,
50}
51
52/// Additional information that is relevant to spawned processes, which typically receive
53/// a wealth of contextual information when spawned from `git`.
54///
55/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
56/// for details.
57#[derive(Debug, Default, Clone)]
58pub struct Context {
59 /// The `.git` directory that contains the repository.
60 ///
61 /// If set, it will be used to set the `GIT_DIR` environment variable.
62 pub git_dir: Option<PathBuf>,
63 /// Set the `GIT_WORK_TREE` environment variable with the given path.
64 pub worktree_dir: Option<PathBuf>,
65 /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
66 /// If `None`, the variable won't be set.
67 pub no_replace_objects: Option<bool>,
68 /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
69 /// operations on references.
70 pub ref_namespace: Option<BString>,
71 /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
72 /// If `None`, the variable won't be set.
73 pub literal_pathspecs: Option<bool>,
74 /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
75 /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
76 /// If `None`, the variable won't be set.
77 pub glob_pathspecs: Option<bool>,
78 /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
79 /// If `None`, the variable won't be set.
80 pub icase_pathspecs: Option<bool>,
81 /// If `true`, inherit `stderr` just like it's the default when spawning processes.
82 /// If `false`, suppress all stderr output.
83 /// If not `None`, this will override any value set with [`Prepare::stderr()`].
84 pub stderr: Option<bool>,
85}
86
87mod prepare {
88 use std::borrow::Cow;
89 use std::{
90 ffi::OsString,
91 process::{Command, Stdio},
92 };
93
94 use bstr::ByteSlice;
95
96 use crate::{extract_interpreter, win_path_lookup, Context, Prepare};
97
98 /// Builder
99 impl Prepare {
100 /// If called, the command will be checked for characters that are typical for shell
101 /// scripts, and if found will use `sh` to execute it or whatever is set as
102 /// [`with_shell_program()`](Self::with_shell_program()).
103 ///
104 /// If the command isn't valid UTF-8, a shell will always be used.
105 ///
106 /// If a shell is used, then arguments given here with [arg()](Self::arg) or
107 /// [args()](Self::args) will be substituted via `"$@"` if it's not already present in the
108 /// command.
109 ///
110 ///
111 /// The [`command_may_be_shell_script_allow_manual_argument_splitting()`](Self::command_may_be_shell_script_allow_manual_argument_splitting())
112 /// and [`command_may_be_shell_script_disallow_manual_argument_splitting()`](Self::command_may_be_shell_script_disallow_manual_argument_splitting())
113 /// methods also call this method.
114 ///
115 /// If neither this method nor [`with_shell()`](Self::with_shell()) is called, commands are
116 /// always executed verbatim and directly, without the use of a shell.
117 pub fn command_may_be_shell_script(mut self) -> Self {
118 self.use_shell = self.command.to_str().map_or(true, |cmd| {
119 cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some()
120 });
121 self
122 }
123
124 /// If called, unconditionally use a shell to execute the command and its arguments.
125 ///
126 /// This uses `sh` to execute it, or whatever is set as
127 /// [`with_shell_program()`](Self::with_shell_program()).
128 ///
129 /// Arguments given here with [arg()](Self::arg) or [args()](Self::args) will be
130 /// substituted via `"$@"` if it's not already present in the command.
131 ///
132 /// If neither this method nor
133 /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) is called,
134 /// commands are always executed verbatim and directly, without the use of a shell. (But
135 /// see [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) on other
136 /// methods that call that method.)
137 pub fn with_shell(mut self) -> Self {
138 self.use_shell = true;
139 self
140 }
141
142 /// Quote the command if it is run in a shell, so its path is left intact.
143 ///
144 /// This is only meaningful if the command has been arranged to run in a shell, either
145 /// unconditionally with [`with_shell()`](Self::with_shell()), or conditionally with
146 /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()).
147 ///
148 /// Note that this should not be used if the command is a script - quoting is only the
149 /// right choice if it's known to be a program path.
150 ///
151 /// Note also that this does not affect arguments passed with [arg()](Self::arg) or
152 /// [args()](Self::args), which do not have to be quoted by the *caller* because they are
153 /// passed as `"$@"` positional parameters (`"$1"`, `"$2"`, and so on).
154 pub fn with_quoted_command(mut self) -> Self {
155 self.quote_command = true;
156 self
157 }
158
159 /// Set the name or path to the shell `program` to use if a shell is to be used, to avoid
160 /// using the default shell which is `sh`.
161 ///
162 /// Note that that shells that are not Bourne-style cannot be expected to work correctly,
163 /// because POSIX shell syntax is assumed when searching for and conditionally adding
164 /// `"$@"` to receive arguments, where applicable (and in the behaviour of
165 /// [`with_quoted_command()`](Self::with_quoted_command()), if called).
166 pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
167 self.shell_program = Some(program.into());
168 self
169 }
170
171 /// Unconditionally turn off using the shell when spawning the command.
172 ///
173 /// Note that not using the shell is the default. So an effective use of this method
174 /// is some time after [`command_may_be_shell_script()`](Self::command_may_be_shell_script())
175 /// or [`with_shell()`](Self::with_shell()) was called.
176 pub fn without_shell(mut self) -> Self {
177 self.use_shell = false;
178 self
179 }
180
181 /// Set additional `ctx` to be used when spawning the process.
182 ///
183 /// Note that this is a must for most kind of commands that `git` usually spawns, as at
184 /// least they need to know the correct Git repository to function.
185 pub fn with_context(mut self, ctx: Context) -> Self {
186 self.context = Some(ctx);
187 self
188 }
189
190 /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but try to
191 /// split arguments by hand if this can be safely done without a shell.
192 ///
193 /// This is useful on platforms where spawning processes is slow, or where many processes
194 /// have to be spawned in a row which should be sped up. Manual argument splitting is
195 /// enabled by default on Windows only.
196 ///
197 /// Note that this does *not* check for the use of possible shell builtins. Commands may
198 /// fail or behave differently if they are available as shell builtins and no corresponding
199 /// external command exists, or the external command behaves differently.
200 pub fn command_may_be_shell_script_allow_manual_argument_splitting(mut self) -> Self {
201 self.allow_manual_arg_splitting = true;
202 self.command_may_be_shell_script()
203 }
204
205 /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but don't
206 /// allow to bypass the shell even if manual argument splitting can be performed safely.
207 pub fn command_may_be_shell_script_disallow_manual_argument_splitting(mut self) -> Self {
208 self.allow_manual_arg_splitting = false;
209 self.command_may_be_shell_script()
210 }
211
212 /// Configure the process to use `stdio` for _stdin_.
213 pub fn stdin(mut self, stdio: Stdio) -> Self {
214 self.stdin = stdio;
215 self
216 }
217 /// Configure the process to use `stdio` for _stdout_.
218 pub fn stdout(mut self, stdio: Stdio) -> Self {
219 self.stdout = stdio;
220 self
221 }
222 /// Configure the process to use `stdio` for _stderr_.
223 pub fn stderr(mut self, stdio: Stdio) -> Self {
224 self.stderr = stdio;
225 self
226 }
227
228 /// Add `arg` to the list of arguments to call the command with.
229 pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
230 self.args.push(arg.into());
231 self
232 }
233
234 /// Add `args` to the list of arguments to call the command with.
235 pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
236 self.args
237 .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
238 self
239 }
240
241 /// Add `key` with `value` to the environment of the spawned command.
242 pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
243 self.env.push((key.into(), value.into()));
244 self
245 }
246 }
247
248 /// Finalization
249 impl Prepare {
250 /// Spawn the command as configured.
251 pub fn spawn(self) -> std::io::Result<std::process::Child> {
252 let mut cmd = Command::from(self);
253 gix_trace::debug!(cmd = ?cmd);
254 cmd.spawn()
255 }
256 }
257
258 impl From<Prepare> for Command {
259 fn from(mut prep: Prepare) -> Command {
260 let mut cmd = if prep.use_shell {
261 let split_args = prep
262 .allow_manual_arg_splitting
263 .then(|| {
264 if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
265 .find_byteset(b"\\|&;<>()$`\n*?[#~%")
266 .is_none()
267 {
268 prep.command
269 .to_str()
270 .and_then(|args| shell_words::split(args).ok().map(Vec::into_iter))
271 } else {
272 None
273 }
274 })
275 .flatten();
276 match split_args {
277 Some(mut args) => {
278 let mut cmd = Command::new(args.next().expect("non-empty input"));
279 cmd.args(args);
280 cmd
281 }
282 None => {
283 let shell = prep.shell_program.unwrap_or_else(|| gix_path::env::shell().into());
284 let mut cmd = Command::new(shell);
285 cmd.arg("-c");
286 if !prep.args.is_empty() {
287 if prep.command.to_str().map_or(true, |cmd| !cmd.contains("$@")) {
288 if prep.quote_command {
289 if let Ok(command) = gix_path::os_str_into_bstr(&prep.command) {
290 prep.command = gix_path::from_bstring(gix_quote::single(command)).into();
291 }
292 }
293 prep.command.push(r#" "$@""#);
294 } else {
295 gix_trace::debug!(
296 r#"Will not add '"$@"' to '{:?}' as it seems to contain '$@' already"#,
297 prep.command
298 );
299 }
300 }
301 cmd.arg(prep.command);
302 cmd.arg("--");
303 cmd
304 }
305 }
306 } else if cfg!(windows) {
307 let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
308 .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
309 .map(Cow::Owned)
310 .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
311 if let Some(shebang) = extract_interpreter(program.as_ref()) {
312 let mut cmd = Command::new(shebang.interpreter);
313 // For relative paths, we may have picked up a file in the current repository
314 // for which an attacker could control everything. Hence, strip options just like Git.
315 // If the file was found in the PATH though, it should be trustworthy.
316 if program.is_absolute() {
317 cmd.args(shebang.args);
318 }
319 cmd.arg(prep.command);
320 cmd
321 } else {
322 Command::new(prep.command)
323 }
324 } else {
325 Command::new(prep.command)
326 };
327 // We never want to have terminals pop-up on Windows if this runs from a GUI application.
328 #[cfg(windows)]
329 {
330 use std::os::windows::process::CommandExt;
331 const CREATE_NO_WINDOW: u32 = 0x08000000;
332 cmd.creation_flags(CREATE_NO_WINDOW);
333 }
334 cmd.stdin(prep.stdin)
335 .stdout(prep.stdout)
336 .stderr(prep.stderr)
337 .envs(prep.env)
338 .args(prep.args);
339 if let Some(ctx) = prep.context {
340 if let Some(git_dir) = ctx.git_dir {
341 cmd.env("GIT_DIR", &git_dir);
342 }
343 if let Some(worktree_dir) = ctx.worktree_dir {
344 cmd.env("GIT_WORK_TREE", worktree_dir);
345 }
346 if let Some(value) = ctx.no_replace_objects {
347 cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
348 }
349 if let Some(namespace) = ctx.ref_namespace {
350 cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
351 }
352 if let Some(value) = ctx.literal_pathspecs {
353 cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
354 }
355 if let Some(value) = ctx.glob_pathspecs {
356 cmd.env(
357 if value {
358 "GIT_GLOB_PATHSPECS"
359 } else {
360 "GIT_NOGLOB_PATHSPECS"
361 },
362 "1",
363 );
364 }
365 if let Some(value) = ctx.icase_pathspecs {
366 cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
367 }
368 if let Some(stderr) = ctx.stderr {
369 cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
370 }
371 }
372 cmd
373 }
374 }
375}
376
377fn is_exe(executable: &Path) -> bool {
378 executable.extension() == Some(std::ffi::OsStr::new("exe"))
379}
380
381/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
382/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
383/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
384fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
385 fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
386 let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
387 if !is_exe {
388 path.set_extension("exe");
389 }
390 if path.is_file() {
391 return Some(path);
392 }
393 if is_exe {
394 return None;
395 }
396 path.set_extension("");
397 path.is_file().then_some(path)
398 }
399 if command.components().take(2).count() == 2 {
400 return None;
401 }
402 let path = gix_path::os_str_into_bstr(path_value).ok()?;
403 let is_exe = is_exe(command);
404
405 for root in path.split(|b| *b == b';') {
406 if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
407 return Some(executable);
408 }
409 }
410 None
411}
412
413/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
414/// data when available.
415pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
416 #[cfg(windows)]
417 if is_exe(executable) {
418 return None;
419 }
420 let mut buf = [0; 100]; // Note: just like Git
421 let mut file = std::fs::File::open(executable).ok()?;
422 let n = file.read(&mut buf).ok()?;
423 shebang::parse(buf[..n].as_bstr())
424}
425
426///
427pub mod shebang {
428 use bstr::{BStr, ByteSlice};
429 use std::ffi::OsString;
430 use std::path::PathBuf;
431
432 /// Parse `buf` to extract all shebang information.
433 pub fn parse(buf: &BStr) -> Option<Data> {
434 let mut line = buf.lines().next()?;
435 line = line.strip_prefix(b"#!")?;
436
437 let slash_idx = line.rfind_byteset(br"/\")?;
438 Some(match line[slash_idx..].find_byte(b' ') {
439 Some(space_idx) => {
440 let space = slash_idx + space_idx;
441 Data {
442 interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
443 args: line
444 .get(space + 1..)
445 .and_then(|mut r| {
446 r = r.trim();
447 if r.is_empty() {
448 return None;
449 }
450
451 match r.as_bstr().to_str() {
452 Ok(args) => shell_words::split(args)
453 .ok()
454 .map(|args| args.into_iter().map(Into::into).collect()),
455 Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
456 }
457 })
458 .unwrap_or_default(),
459 }
460 }
461 None => Data {
462 interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
463 args: Vec::new(),
464 },
465 })
466 }
467
468 /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
469 ///
470 /// ### Deviation
471 ///
472 /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
473 /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
474 /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
475 ///
476 /// To make that work without the kernel, we perform the splitting while Git just ignores options.
477 /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
478 #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
479 pub struct Data {
480 /// The interpreter to run.
481 pub interpreter: PathBuf,
482 /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
483 /// as pre-split arguments just like a shell would do it.
484 /// Note that we accept that illformed UTF-8 will prevent argument splitting.
485 pub args: Vec<OsString>,
486 }
487}
488
489/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
490///
491/// Note that the default IO is configured for typical API usage, that is
492///
493/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
494/// - `stdout` is captured for consumption by the caller
495/// - `stderr` is inherited to allow the command to provide context to the user
496///
497/// On Windows, terminal Windows will be suppressed automatically.
498///
499/// ### Warning
500///
501/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
502/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
503/// additional information.
504pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
505 Prepare {
506 command: cmd.into(),
507 shell_program: None,
508 context: None,
509 stdin: std::process::Stdio::null(),
510 stdout: std::process::Stdio::piped(),
511 stderr: std::process::Stdio::inherit(),
512 args: Vec::new(),
513 env: Vec::new(),
514 use_shell: false,
515 quote_command: false,
516 allow_manual_arg_splitting: cfg!(windows),
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523
524 #[test]
525 fn internal_win_path_lookup() -> gix_testtools::Result {
526 let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
527 let mut paths: Vec<_> = std::fs::read_dir(&root)?
528 .filter_map(Result::ok)
529 .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
530 .collect();
531 paths.sort();
532 let lookup_path: OsString = paths.join(";").into();
533
534 assert_eq!(
535 win_path_lookup("a/b".as_ref(), &lookup_path),
536 None,
537 "any path with separator is considered ready to use"
538 );
539 assert_eq!(
540 win_path_lookup("x".as_ref(), &lookup_path),
541 Some(root.join("a").join("x.exe")),
542 "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
543 );
544 assert_eq!(
545 win_path_lookup("x.exe".as_ref(), &lookup_path),
546 Some(root.join("a").join("x.exe")),
547 "no matter what, a/x won't be found as it's shadowed by an exe file"
548 );
549 assert_eq!(
550 win_path_lookup("exe".as_ref(), &lookup_path),
551 Some(root.join("b").join("exe")),
552 "it finds files further down the path as well"
553 );
554 Ok(())
555 }
556}