gix_command/lib.rs
1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2#![deny(rust_2018_idioms, missing_docs)]
3#![forbid(unsafe_code)]
4
5use std::io::Read;
6use std::{
7 ffi::OsString,
8 path::{Path, PathBuf},
9};
10
11use bstr::{BString, ByteSlice};
12
13/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()], after creating it with [`prepare()`].
14pub struct Prepare {
15 /// The command to invoke (either with or without shell depending on `use_shell`.
16 pub command: OsString,
17 /// Additional information to be passed to the spawned command.
18 pub context: Option<Context>,
19 /// The way standard input is configured.
20 pub stdin: std::process::Stdio,
21 /// The way standard output is configured.
22 pub stdout: std::process::Stdio,
23 /// The way standard error is configured.
24 pub stderr: std::process::Stdio,
25 /// The arguments to pass to the spawned process.
26 pub args: Vec<OsString>,
27 /// environment variables to set in the spawned process.
28 pub env: Vec<(OsString, OsString)>,
29 /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
30 pub use_shell: bool,
31 /// The name or path to the shell program to use instead of `sh`.
32 pub shell_program: Option<OsString>,
33 /// If `true` (default `true` on windows and `false` everywhere else)
34 /// we will see if it's safe to manually invoke `command` after splitting
35 /// its arguments as a shell would do.
36 /// Note that outside of windows, it's generally not advisable as this
37 /// removes support for literal shell scripts with shell-builtins.
38 ///
39 /// This mimics the behaviour we see with `git` on windows, which also
40 /// won't invoke the shell there at all.
41 ///
42 /// Only effective if `use_shell` is `true` as well, as the shell will
43 /// be used as a fallback if it's not possible to split arguments as
44 /// the command-line contains 'scripting'.
45 pub allow_manual_arg_splitting: bool,
46}
47
48/// Additional information that is relevant to spawned processes, which typically receive
49/// a wealth of contextual information when spawned from `git`.
50///
51/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
52/// for details.
53#[derive(Debug, Default, Clone)]
54pub struct Context {
55 /// The `.git` directory that contains the repository.
56 ///
57 /// If set, it will be used to set the `GIT_DIR` environment variable.
58 pub git_dir: Option<PathBuf>,
59 /// Set the `GIT_WORK_TREE` environment variable with the given path.
60 pub worktree_dir: Option<PathBuf>,
61 /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
62 /// If `None`, the variable won't be set.
63 pub no_replace_objects: Option<bool>,
64 /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
65 /// operations on references.
66 pub ref_namespace: Option<BString>,
67 /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
68 /// If `None`, the variable won't be set.
69 pub literal_pathspecs: Option<bool>,
70 /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
71 /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
72 /// If `None`, the variable won't be set.
73 pub glob_pathspecs: Option<bool>,
74 /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
75 /// If `None`, the variable won't be set.
76 pub icase_pathspecs: Option<bool>,
77 /// If `true`, inherit `stderr` just like it's the default when spawning processes.
78 /// If `false`, suppress all stderr output.
79 /// If not `None`, this will override any value set with [`Prepare::stderr()`].
80 pub stderr: Option<bool>,
81}
82
83mod prepare {
84 use std::borrow::Cow;
85 use std::{
86 ffi::OsString,
87 process::{Command, Stdio},
88 };
89
90 use bstr::ByteSlice;
91
92 use crate::{extract_interpreter, win_path_lookup, Context, Prepare};
93
94 /// Builder
95 impl Prepare {
96 /// If called, the command will not be executed directly, but with `sh`, but only if the
97 /// command passed to [`prepare`](super::prepare()) requires this.
98 ///
99 /// This also allows to pass shell scripts as command, or use commands that contain arguments which are subsequently
100 /// parsed by `sh`.
101 pub fn with_shell(mut self) -> Self {
102 self.use_shell = self.command.to_str().map_or(true, |cmd| {
103 cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some()
104 });
105 self
106 }
107
108 /// Set the name or path to the shell `program` to use, to avoid using the default shell which is `sh`.
109 pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
110 self.shell_program = Some(program.into());
111 self
112 }
113
114 /// Unconditionally turn off using the shell when spawning the command.
115 /// Note that not using the shell is the default so an effective use of this method
116 /// is some time after [`with_shell()`][Prepare::with_shell()] was called.
117 pub fn without_shell(mut self) -> Self {
118 self.use_shell = false;
119 self
120 }
121
122 /// Set additional `ctx` to be used when spawning the process.
123 ///
124 /// Note that this is a must for most kind of commands that `git` usually spawns,
125 /// as at least they need to know the correct `git` repository to function.
126 pub fn with_context(mut self, ctx: Context) -> Self {
127 self.context = Some(ctx);
128 self
129 }
130
131 /// Use a shell, but try to split arguments by hand if this can be safely done without a shell.
132 ///
133 /// If that's not the case, use a shell instead.
134 pub fn with_shell_allow_manual_argument_splitting(mut self) -> Self {
135 self.allow_manual_arg_splitting = true;
136 self.with_shell()
137 }
138
139 /// Use a shell, but prohibit splitting arguments by hand even if this could be safely done without a shell.
140 pub fn with_shell_disallow_manual_argument_splitting(mut self) -> Self {
141 self.allow_manual_arg_splitting = false;
142 self.with_shell()
143 }
144
145 /// Configure the process to use `stdio` for _stdin.
146 pub fn stdin(mut self, stdio: Stdio) -> Self {
147 self.stdin = stdio;
148 self
149 }
150 /// Configure the process to use `stdio` for _stdout_.
151 pub fn stdout(mut self, stdio: Stdio) -> Self {
152 self.stdout = stdio;
153 self
154 }
155 /// Configure the process to use `stdio` for _stderr.
156 pub fn stderr(mut self, stdio: Stdio) -> Self {
157 self.stderr = stdio;
158 self
159 }
160
161 /// Add `arg` to the list of arguments to call the command with.
162 pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
163 self.args.push(arg.into());
164 self
165 }
166
167 /// Add `args` to the list of arguments to call the command with.
168 pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
169 self.args
170 .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
171 self
172 }
173
174 /// Add `key` with `value` to the environment of the spawned command.
175 pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
176 self.env.push((key.into(), value.into()));
177 self
178 }
179 }
180
181 /// Finalization
182 impl Prepare {
183 /// Spawn the command as configured.
184 pub fn spawn(self) -> std::io::Result<std::process::Child> {
185 let mut cmd = Command::from(self);
186 gix_trace::debug!(cmd = ?cmd);
187 cmd.spawn()
188 }
189 }
190
191 impl From<Prepare> for Command {
192 fn from(mut prep: Prepare) -> Command {
193 let mut cmd = if prep.use_shell {
194 let split_args = prep
195 .allow_manual_arg_splitting
196 .then(|| {
197 if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
198 .find_byteset(b"\\|&;<>()$`\n*?[#~%")
199 .is_none()
200 {
201 prep.command
202 .to_str()
203 .and_then(|args| shell_words::split(args).ok().map(Vec::into_iter))
204 } else {
205 None
206 }
207 })
208 .flatten();
209 match split_args {
210 Some(mut args) => {
211 let mut cmd = Command::new(args.next().expect("non-empty input"));
212 cmd.args(args);
213 cmd
214 }
215 None => {
216 let mut cmd = Command::new(
217 prep.shell_program
218 .unwrap_or(if cfg!(windows) { "sh" } else { "/bin/sh" }.into()),
219 );
220 cmd.arg("-c");
221 if !prep.args.is_empty() {
222 if prep.command.to_str().map_or(true, |cmd| !cmd.contains("$@")) {
223 prep.command.push(" \"$@\"");
224 } else {
225 gix_trace::debug!(
226 "Will not add '$@' to '{:?}' as it seems to contain it already",
227 prep.command
228 );
229 }
230 }
231 cmd.arg(prep.command);
232 cmd.arg("--");
233 cmd
234 }
235 }
236 } else if cfg!(windows) {
237 let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
238 .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
239 .map(Cow::Owned)
240 .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
241 if let Some(shebang) = extract_interpreter(program.as_ref()) {
242 let mut cmd = Command::new(shebang.interpreter);
243 // For relative paths, we may have picked up a file in the current repository
244 // for which an attacker could control everything. Hence, strip options just like Git.
245 // If the file was found in the PATH though, it should be trustworthy.
246 if program.is_absolute() {
247 cmd.args(shebang.args);
248 }
249 cmd.arg(prep.command);
250 cmd
251 } else {
252 Command::new(prep.command)
253 }
254 } else {
255 Command::new(prep.command)
256 };
257 // We never want to have terminals pop-up on Windows if this runs from a GUI application.
258 #[cfg(windows)]
259 {
260 use std::os::windows::process::CommandExt;
261 const CREATE_NO_WINDOW: u32 = 0x08000000;
262 cmd.creation_flags(CREATE_NO_WINDOW);
263 }
264 cmd.stdin(prep.stdin)
265 .stdout(prep.stdout)
266 .stderr(prep.stderr)
267 .envs(prep.env)
268 .args(prep.args);
269 if let Some(ctx) = prep.context {
270 if let Some(git_dir) = ctx.git_dir {
271 cmd.env("GIT_DIR", &git_dir);
272 }
273 if let Some(worktree_dir) = ctx.worktree_dir {
274 cmd.env("GIT_WORK_TREE", worktree_dir);
275 }
276 if let Some(value) = ctx.no_replace_objects {
277 cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
278 }
279 if let Some(namespace) = ctx.ref_namespace {
280 cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
281 }
282 if let Some(value) = ctx.literal_pathspecs {
283 cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
284 }
285 if let Some(value) = ctx.glob_pathspecs {
286 cmd.env(
287 if value {
288 "GIT_GLOB_PATHSPECS"
289 } else {
290 "GIT_NOGLOB_PATHSPECS"
291 },
292 "1",
293 );
294 }
295 if let Some(value) = ctx.icase_pathspecs {
296 cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
297 }
298 if let Some(stderr) = ctx.stderr {
299 cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
300 }
301 }
302 cmd
303 }
304 }
305}
306
307fn is_exe(executable: &Path) -> bool {
308 executable.extension() == Some(std::ffi::OsStr::new("exe"))
309}
310
311/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
312/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
313/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
314fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
315 fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
316 let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
317 if !is_exe {
318 path.set_extension("exe");
319 }
320 if path.is_file() {
321 return Some(path);
322 }
323 if is_exe {
324 return None;
325 }
326 path.set_extension("");
327 path.is_file().then_some(path)
328 }
329 if command.components().take(2).count() == 2 {
330 return None;
331 }
332 let path = gix_path::os_str_into_bstr(path_value).ok()?;
333 let is_exe = is_exe(command);
334
335 for root in path.split(|b| *b == b';') {
336 if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
337 return Some(executable);
338 }
339 }
340 None
341}
342
343/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
344/// data when available.
345pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
346 #[cfg(windows)]
347 if is_exe(executable) {
348 return None;
349 }
350 let mut buf = [0; 100]; // Note: just like Git
351 let mut file = std::fs::File::open(executable).ok()?;
352 let n = file.read(&mut buf).ok()?;
353 shebang::parse(buf[..n].as_bstr())
354}
355
356///
357pub mod shebang {
358 use bstr::{BStr, ByteSlice};
359 use std::ffi::OsString;
360 use std::path::PathBuf;
361
362 /// Parse `buf` to extract all shebang information.
363 pub fn parse(buf: &BStr) -> Option<Data> {
364 let mut line = buf.lines().next()?;
365 line = line.strip_prefix(b"#!")?;
366
367 let slash_idx = line.rfind_byteset(b"/\\")?;
368 Some(match line[slash_idx..].find_byte(b' ') {
369 Some(space_idx) => {
370 let space = slash_idx + space_idx;
371 Data {
372 interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
373 args: line
374 .get(space + 1..)
375 .and_then(|mut r| {
376 r = r.trim();
377 if r.is_empty() {
378 return None;
379 }
380
381 match r.as_bstr().to_str() {
382 Ok(args) => shell_words::split(args)
383 .ok()
384 .map(|args| args.into_iter().map(Into::into).collect()),
385 Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
386 }
387 })
388 .unwrap_or_default(),
389 }
390 }
391 None => Data {
392 interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
393 args: Vec::new(),
394 },
395 })
396 }
397
398 /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
399 ///
400 /// ### Deviation
401 ///
402 /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
403 /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
404 /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
405 ///
406 /// To make that work without the kernel, we perform the splitting while Git just ignores options.
407 /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
408 #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
409 pub struct Data {
410 /// The interpreter to run.
411 pub interpreter: PathBuf,
412 /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
413 /// as pre-split arguments just like a shell would do it.
414 /// Note that we accept that illformed UTF-8 will prevent argument splitting.
415 pub args: Vec<OsString>,
416 }
417}
418
419/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
420///
421/// Note that the default IO is configured for typical API usage, that is
422///
423/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
424/// - `stdout` is captured for consumption by the caller
425/// - `stderr` is inherited to allow the command to provide context to the user
426///
427/// On Windows, terminal Windows will be suppressed automatically.
428///
429/// ### Warning
430///
431/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
432/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
433/// additional information.
434pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
435 Prepare {
436 command: cmd.into(),
437 shell_program: None,
438 context: None,
439 stdin: std::process::Stdio::null(),
440 stdout: std::process::Stdio::piped(),
441 stderr: std::process::Stdio::inherit(),
442 args: Vec::new(),
443 env: Vec::new(),
444 use_shell: false,
445 allow_manual_arg_splitting: cfg!(windows),
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452
453 #[test]
454 fn internal_win_path_lookup() -> gix_testtools::Result {
455 let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
456 let mut paths: Vec<_> = std::fs::read_dir(&root)?
457 .filter_map(Result::ok)
458 .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
459 .collect();
460 paths.sort();
461 let lookup_path: OsString = paths.join(";").into();
462
463 assert_eq!(
464 win_path_lookup("a/b".as_ref(), &lookup_path),
465 None,
466 "any path with separator is considered ready to use"
467 );
468 assert_eq!(
469 win_path_lookup("x".as_ref(), &lookup_path),
470 Some(root.join("a").join("x.exe")),
471 "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
472 );
473 assert_eq!(
474 win_path_lookup("x.exe".as_ref(), &lookup_path),
475 Some(root.join("a").join("x.exe")),
476 "no matter what, a/x won't be found as it's shadowed by an exe file"
477 );
478 assert_eq!(
479 win_path_lookup("exe".as_ref(), &lookup_path),
480 Some(root.join("b").join("exe")),
481 "it finds files further down the path as well"
482 );
483 Ok(())
484 }
485}