gix_command/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
#![deny(rust_2018_idioms, missing_docs)]
#![forbid(unsafe_code)]

use std::io::Read;
use std::{
    ffi::OsString,
    path::{Path, PathBuf},
};

use bstr::{BString, ByteSlice};

/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()], after creating it with [`prepare()`].
pub struct Prepare {
    /// The command to invoke (either with or without shell depending on `use_shell`.
    pub command: OsString,
    /// Additional information to be passed to the spawned command.
    pub context: Option<Context>,
    /// The way standard input is configured.
    pub stdin: std::process::Stdio,
    /// The way standard output is configured.
    pub stdout: std::process::Stdio,
    /// The way standard error is configured.
    pub stderr: std::process::Stdio,
    /// The arguments to pass to the spawned process.
    pub args: Vec<OsString>,
    /// environment variables to set in the spawned process.
    pub env: Vec<(OsString, OsString)>,
    /// If `true`, we will use `sh` to execute the `command`.
    pub use_shell: bool,
    /// If `true` (default `true` on windows and `false` everywhere else)
    /// we will see if it's safe to manually invoke `command` after splitting
    /// its arguments as a shell would do.
    /// Note that outside of windows, it's generally not advisable as this
    /// removes support for literal shell scripts with shell-builtins.
    ///
    /// This mimics the behaviour we see with `git` on windows, which also
    /// won't invoke the shell there at all.
    ///
    /// Only effective if `use_shell` is `true` as well, as the shell will
    /// be used as a fallback if it's not possible to split arguments as
    /// the command-line contains 'scripting'.
    pub allow_manual_arg_splitting: bool,
}

/// Additional information that is relevant to spawned processes, which typically receive
/// a wealth of contextual information when spawned from `git`.
///
/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
/// for details.
#[derive(Debug, Default, Clone)]
pub struct Context {
    /// The `.git` directory that contains the repository.
    ///
    /// If set, it will be used to set the `GIT_DIR` environment variable.
    pub git_dir: Option<PathBuf>,
    /// Set the `GIT_WORK_TREE` environment variable with the given path.
    pub worktree_dir: Option<PathBuf>,
    /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
    /// If `None`, the variable won't be set.
    pub no_replace_objects: Option<bool>,
    /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
    /// operations on references.
    pub ref_namespace: Option<BString>,
    /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
    /// If `None`, the variable won't be set.
    pub literal_pathspecs: Option<bool>,
    /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
    /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
    /// If `None`, the variable won't be set.
    pub glob_pathspecs: Option<bool>,
    /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
    /// If `None`, the variable won't be set.
    pub icase_pathspecs: Option<bool>,
    /// If `true`, inherit `stderr` just like it's the default when spawning processes.
    /// If `false`, suppress all stderr output.
    /// If not `None`, this will override any value set with [`Prepare::stderr()`].
    pub stderr: Option<bool>,
}

mod prepare {
    use std::borrow::Cow;
    use std::{
        ffi::OsString,
        process::{Command, Stdio},
    };

    use bstr::ByteSlice;

    use crate::{extract_interpreter, win_path_lookup, Context, Prepare};

    /// Builder
    impl Prepare {
        /// If called, the command will not be executed directly, but with `sh`, but only if the
        /// command passed to [`prepare`](super::prepare()) requires this.
        ///
        /// This also allows to pass shell scripts as command, or use commands that contain arguments which are subsequently
        /// parsed by `sh`.
        pub fn with_shell(mut self) -> Self {
            self.use_shell = self.command.to_str().map_or(true, |cmd| {
                cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some()
            });
            self
        }

        /// Unconditionally turn off using the shell when spawning the command.
        /// Note that not using the shell is the default so an effective use of this method
        /// is some time after [`with_shell()`][Prepare::with_shell()] was called.
        pub fn without_shell(mut self) -> Self {
            self.use_shell = false;
            self
        }

        /// Set additional `ctx` to be used when spawning the process.
        ///
        /// Note that this is a must for most kind of commands that `git` usually spawns,
        /// as at least they need to know the correct `git` repository to function.
        pub fn with_context(mut self, ctx: Context) -> Self {
            self.context = Some(ctx);
            self
        }

        /// Use a shell, but try to split arguments by hand if this can be safely done without a shell.
        ///
        /// If that's not the case, use a shell instead.
        pub fn with_shell_allow_argument_splitting(mut self) -> Self {
            self.allow_manual_arg_splitting = true;
            self.with_shell()
        }

        /// Configure the process to use `stdio` for _stdin.
        pub fn stdin(mut self, stdio: Stdio) -> Self {
            self.stdin = stdio;
            self
        }
        /// Configure the process to use `stdio` for _stdout_.
        pub fn stdout(mut self, stdio: Stdio) -> Self {
            self.stdout = stdio;
            self
        }
        /// Configure the process to use `stdio` for _stderr.
        pub fn stderr(mut self, stdio: Stdio) -> Self {
            self.stderr = stdio;
            self
        }

        /// Add `arg` to the list of arguments to call the command with.
        pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
            self.args.push(arg.into());
            self
        }

        /// Add `args` to the list of arguments to call the command with.
        pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
            self.args
                .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
            self
        }

        /// Add `key` with `value` to the environment of the spawned command.
        pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
            self.env.push((key.into(), value.into()));
            self
        }
    }

    /// Finalization
    impl Prepare {
        /// Spawn the command as configured.
        pub fn spawn(self) -> std::io::Result<std::process::Child> {
            let mut cmd = Command::from(self);
            gix_trace::debug!(cmd = ?cmd);
            cmd.spawn()
        }
    }

    impl From<Prepare> for Command {
        fn from(mut prep: Prepare) -> Command {
            let mut cmd = if prep.use_shell {
                let split_args = prep
                    .allow_manual_arg_splitting
                    .then(|| {
                        if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
                            .find_byteset(b"\\|&;<>()$`\n*?[#~%")
                            .is_none()
                        {
                            prep.command
                                .to_str()
                                .and_then(|args| shell_words::split(args).ok().map(Vec::into_iter))
                        } else {
                            None
                        }
                    })
                    .flatten();
                match split_args {
                    Some(mut args) => {
                        let mut cmd = Command::new(args.next().expect("non-empty input"));
                        cmd.args(args);
                        cmd
                    }
                    None => {
                        let mut cmd = Command::new(if cfg!(windows) { "sh" } else { "/bin/sh" });
                        cmd.arg("-c");
                        if !prep.args.is_empty() {
                            if prep.command.to_str().map_or(true, |cmd| !cmd.contains("$@")) {
                                prep.command.push(" \"$@\"");
                            } else {
                                gix_trace::debug!(
                                    "Will not add '$@' to '{:?}' as it seems to contain it already",
                                    prep.command
                                );
                            }
                        }
                        cmd.arg(prep.command);
                        cmd.arg("--");
                        cmd
                    }
                }
            } else if cfg!(windows) {
                let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
                    .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
                    .map(Cow::Owned)
                    .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
                if let Some(shebang) = extract_interpreter(program.as_ref()) {
                    let mut cmd = Command::new(shebang.interpreter);
                    // For relative paths, we may have picked up a file in the current repository
                    // for which an attacker could control everything. Hence, strip options just like Git.
                    // If the file was found in the PATH though, it should be trustworthy.
                    if program.is_absolute() {
                        cmd.args(shebang.args);
                    }
                    cmd.arg(prep.command);
                    cmd
                } else {
                    Command::new(prep.command)
                }
            } else {
                Command::new(prep.command)
            };
            // We never want to have terminals pop-up on Windows if this runs from a GUI application.
            #[cfg(windows)]
            {
                use std::os::windows::process::CommandExt;
                const CREATE_NO_WINDOW: u32 = 0x08000000;
                cmd.creation_flags(CREATE_NO_WINDOW);
            }
            cmd.stdin(prep.stdin)
                .stdout(prep.stdout)
                .stderr(prep.stderr)
                .envs(prep.env)
                .args(prep.args);
            if let Some(ctx) = prep.context {
                if let Some(git_dir) = ctx.git_dir {
                    cmd.env("GIT_DIR", &git_dir);
                }
                if let Some(worktree_dir) = ctx.worktree_dir {
                    cmd.env("GIT_WORK_TREE", worktree_dir);
                }
                if let Some(value) = ctx.no_replace_objects {
                    cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
                }
                if let Some(namespace) = ctx.ref_namespace {
                    cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
                }
                if let Some(value) = ctx.literal_pathspecs {
                    cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
                }
                if let Some(value) = ctx.glob_pathspecs {
                    cmd.env(
                        if value {
                            "GIT_GLOB_PATHSPECS"
                        } else {
                            "GIT_NOGLOB_PATHSPECS"
                        },
                        "1",
                    );
                }
                if let Some(value) = ctx.icase_pathspecs {
                    cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
                }
                if let Some(stderr) = ctx.stderr {
                    cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
                }
            }
            cmd
        }
    }
}

fn is_exe(executable: &Path) -> bool {
    executable.extension() == Some(std::ffi::OsStr::new("exe"))
}

/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
    fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
        let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
        if !is_exe {
            path.set_extension("exe");
        }
        if path.is_file() {
            return Some(path);
        }
        if is_exe {
            return None;
        }
        path.set_extension("");
        path.is_file().then_some(path)
    }
    if command.components().take(2).count() == 2 {
        return None;
    }
    let path = gix_path::os_str_into_bstr(path_value).ok()?;
    let is_exe = is_exe(command);

    for root in path.split(|b| *b == b';') {
        if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
            return Some(executable);
        }
    }
    None
}

/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
/// data when available.
pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
    #[cfg(windows)]
    if is_exe(executable) {
        return None;
    }
    let mut buf = [0; 100]; // Note: just like Git
    let mut file = std::fs::File::open(executable).ok()?;
    let n = file.read(&mut buf).ok()?;
    shebang::parse(buf[..n].as_bstr())
}

///
pub mod shebang {
    use bstr::{BStr, ByteSlice};
    use std::ffi::OsString;
    use std::path::PathBuf;

    /// Parse `buf` to extract all shebang information.
    pub fn parse(buf: &BStr) -> Option<Data> {
        let mut line = buf.lines().next()?;
        line = line.strip_prefix(b"#!")?;

        let slash_idx = line.rfind_byteset(b"/\\")?;
        Some(match line[slash_idx..].find_byte(b' ') {
            Some(space_idx) => {
                let space = slash_idx + space_idx;
                Data {
                    interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
                    args: line
                        .get(space + 1..)
                        .and_then(|mut r| {
                            r = r.trim();
                            if r.is_empty() {
                                return None;
                            }

                            match r.as_bstr().to_str() {
                                Ok(args) => shell_words::split(args)
                                    .ok()
                                    .map(|args| args.into_iter().map(Into::into).collect()),
                                Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
                            }
                        })
                        .unwrap_or_default(),
                }
            }
            None => Data {
                interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
                args: Vec::new(),
            },
        })
    }

    /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
    ///
    /// ### Deviation
    ///
    /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
    /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
    /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
    ///
    /// To make that work without the kernel, we perform the splitting while Git just ignores options.
    /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
    #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
    pub struct Data {
        /// The interpreter to run.
        pub interpreter: PathBuf,
        /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
        /// as pre-split arguments just like a shell would do it.
        /// Note that we accept that illformed UTF-8 will prevent argument splitting.
        pub args: Vec<OsString>,
    }
}

/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
///
/// Note that the default IO is configured for typical API usage, that is
///
/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
/// - `stdout` is captured for consumption by the caller
/// - `stderr` is inherited to allow the command to provide context to the user
///
/// On Windows, terminal Windows will be suppressed automatically.
///
/// ### Warning
///
/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
/// additional information.
pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
    Prepare {
        command: cmd.into(),
        context: None,
        stdin: std::process::Stdio::null(),
        stdout: std::process::Stdio::piped(),
        stderr: std::process::Stdio::inherit(),
        args: Vec::new(),
        env: Vec::new(),
        use_shell: false,
        allow_manual_arg_splitting: cfg!(windows),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn internal_win_path_lookup() -> gix_testtools::Result {
        let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
        let mut paths: Vec<_> = std::fs::read_dir(&root)?
            .filter_map(Result::ok)
            .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
            .collect();
        paths.sort();
        let lookup_path: OsString = paths.join(";").into();

        assert_eq!(
            win_path_lookup("a/b".as_ref(), &lookup_path),
            None,
            "any path with separator is considered ready to use"
        );
        assert_eq!(
            win_path_lookup("x".as_ref(), &lookup_path),
            Some(root.join("a").join("x.exe")),
            "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
        );
        assert_eq!(
            win_path_lookup("x.exe".as_ref(), &lookup_path),
            Some(root.join("a").join("x.exe")),
            "no matter what, a/x won't be found as it's shadowed by an exe file"
        );
        assert_eq!(
            win_path_lookup("exe".as_ref(), &lookup_path),
            Some(root.join("b").join("exe")),
            "it finds files further down the path as well"
        );
        Ok(())
    }
}