gix_merge/blob/platform/
merge.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
use crate::blob::{builtin_driver, PlatformRef, Resolution};
use std::io::Read;
use std::path::PathBuf;

/// Options for the use in the [`PlatformRef::merge()`] call.
#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
pub struct Options {
    /// If `true`, the resources being merged are contained in a virtual ancestor,
    /// which is the case when merge bases are merged into one.
    /// This flag affects the choice of merge drivers.
    pub is_virtual_ancestor: bool,
    /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
    pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
    /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
    pub text: builtin_driver::text::Options,
}

/// The error returned by [`PlatformRef::merge()`].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
    #[error(transparent)]
    PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
    #[error("Failed to launch external merge driver: {cmd}")]
    SpawnExternalDriver { cmd: String, source: std::io::Error },
    #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
    ExternalDriverFailure {
        status: std::process::ExitStatus,
        cmd: String,
    },
    #[error("IO failed when dealing with merge-driver output")]
    ExternalDriverIO(#[from] std::io::Error),
}

/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
///
/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
/// but `stdin` closed.
/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
// TODO: remove dead-code annotation
#[allow(dead_code)]
pub struct Command {
    /// The pre-configured command
    cmd: std::process::Command,
    /// A tempfile holding the *current* (ours) state of the resource.
    current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
    /// The path at which `current` is located, for reading the result back from later.
    current_path: PathBuf,
    /// A tempfile holding the *ancestor* (base) state of the resource.
    ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
    /// A tempfile holding the *other* (their) state of the resource.
    other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
}

// Just to keep things here but move them a level up later.
pub(super) mod inner {
    ///
    pub mod prepare_external_driver {
        use crate::blob::builtin_driver::text::Conflict;
        use crate::blob::platform::{merge, DriverChoice};
        use crate::blob::{builtin_driver, BuiltinDriver, Driver, PlatformRef, ResourceKind};
        use bstr::{BString, ByteVec};
        use gix_tempfile::{AutoRemove, ContainingDirectory};
        use std::io::Write;
        use std::ops::{Deref, DerefMut};
        use std::path::{Path, PathBuf};
        use std::process::Stdio;

        /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
        #[derive(Debug, thiserror::Error)]
        #[allow(missing_docs)]
        pub enum Error {
            #[error("The resource of kind {kind:?} was too large to be processed")]
            ResourceTooLarge { kind: ResourceKind },
            #[error(
                "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
            )]
            CreateTempfile {
                rela_path: BString,
                kind: ResourceKind,
                source: std::io::Error,
            },
            #[error(
                "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
            )]
            WriteTempfile {
                rela_path: BString,
                kind: ResourceKind,
                source: std::io::Error,
            },
        }

        /// Plumbing
        impl<'parent> PlatformRef<'parent> {
            /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
            /// prepare the invocation and temporary files needed to launch it according to protocol.
            /// See the documentation of [`Driver::command`] for possible substitutions.
            ///
            /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
            ///
            /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
            /// to read back the result into a suitable buffer.
            ///
            /// ### Deviation
            ///
            /// * We allow passing more context than Git would by taking a whole `context`,
            ///   it's up to the caller to decide how much is filled.
            /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
            pub fn prepare_external_driver(
                &self,
                merge_command: BString,
                builtin_driver::text::Labels {
                    ancestor,
                    current,
                    other,
                }: builtin_driver::text::Labels<'_>,
                context: gix_command::Context,
            ) -> Result<merge::Command, Error> {
                fn write_data(
                    data: &[u8],
                ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
                    let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?;
                    file.write_all(data)?;
                    let mut path = Default::default();
                    file.with_mut(|f| {
                        f.path().clone_into(&mut path);
                    })?;
                    let file = file.close()?;
                    Ok((file, path))
                }

                let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
                    kind: ResourceKind::CommonAncestorOrBase,
                })?;
                let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
                    kind: ResourceKind::CurrentOrOurs,
                })?;
                let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
                    kind: ResourceKind::OtherOrTheirs,
                })?;

                let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile {
                    rela_path: self.ancestor.rela_path.into(),
                    kind: ResourceKind::CommonAncestorOrBase,
                    source: err,
                })?;
                let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile {
                    rela_path: self.current.rela_path.into(),
                    kind: ResourceKind::CurrentOrOurs,
                    source: err,
                })?;
                let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile {
                    rela_path: self.other.rela_path.into(),
                    kind: ResourceKind::OtherOrTheirs,
                    source: err,
                })?;

                let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
                let mut count = 0;
                for token in merge_command.split(|b| *b == b'%') {
                    count += 1;
                    let token = if count > 1 {
                        match token.first() {
                            Some(&b'O') => {
                                cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
                                &token[1..]
                            }
                            Some(&b'A') => {
                                cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
                                &token[1..]
                            }
                            Some(&b'B') => {
                                cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
                                &token[1..]
                            }
                            Some(&b'L') => {
                                let marker_size = self
                                    .options
                                    .text
                                    .conflict
                                    .marker_size()
                                    .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
                                cmd.push_str(format!("{marker_size}"));
                                &token[1..]
                            }
                            Some(&b'P') => {
                                cmd.push_str(gix_quote::single(self.current.rela_path));
                                &token[1..]
                            }
                            Some(&b'S') => {
                                cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
                                &token[1..]
                            }
                            Some(&b'X') => {
                                cmd.push_str(gix_quote::single(current.unwrap_or_default()));
                                &token[1..]
                            }
                            Some(&b'Y') => {
                                cmd.push_str(gix_quote::single(other.unwrap_or_default()));
                                &token[1..]
                            }
                            Some(_other) => {
                                cmd.push(b'%');
                                token
                            }
                            None => b"%",
                        }
                    } else {
                        token
                    };
                    cmd.extend_from_slice(token);
                }

                Ok(merge::Command {
                    cmd: gix_command::prepare(gix_path::from_bstring(cmd))
                        .with_context(context)
                        .with_shell()
                        .stdin(Stdio::null())
                        .stdout(Stdio::inherit())
                        .stderr(Stdio::inherit())
                        .into(),
                    current: ours_tmp,
                    current_path: ours_path,
                    ancestor: base_tmp,
                    other: theirs_tmp,
                })
            }

            /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
            /// with the built-in driver to use instead.
            pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
                match self.driver {
                    DriverChoice::BuiltIn(builtin) => Err(builtin),
                    DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
                }
            }
        }

        impl std::fmt::Debug for merge::Command {
            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                self.cmd.fmt(f)
            }
        }

        impl Deref for merge::Command {
            type Target = std::process::Command;

            fn deref(&self) -> &Self::Target {
                &self.cmd
            }
        }

        impl DerefMut for merge::Command {
            fn deref_mut(&mut self) -> &mut Self::Target {
                &mut self.cmd
            }
        }

        impl merge::Command {
            /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
            /// Calling this makes sense only after the merge command has finished successfully.
            pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
                std::fs::File::open(&self.current_path)
            }
        }
    }

    ///
    pub mod builtin_merge {
        use crate::blob::platform::resource;
        use crate::blob::platform::resource::Data;
        use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution};

        /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
        #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
        pub enum Pick {
            /// In a binary merge, chose the ancestor.
            ///
            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
            Ancestor,
            /// In a binary merge, chose our side.
            ///
            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
            Ours,
            /// In a binary merge, chose their side.
            ///
            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
            Theirs,
            /// New data was produced with the result of the merge, to be found in the buffer that was passed to
            /// [builtin_merge()](PlatformRef::builtin_merge).
            /// This happens for any merge that isn't a binary merge.
            Buffer,
        }

        /// Plumbing
        impl<'parent> PlatformRef<'parent> {
            /// Perform the merge using the given `driver`, possibly placing the output in `out`.
            /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
            /// Use `labels` to annotate conflict sections in case of a text-merge.
            /// Returns `None` if one of the buffers is too large, making a merge impossible.
            /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
            /// and one has to take the data from the respective resource.
            ///
            /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
            /// which effectively chooses our side by default.
            pub fn builtin_merge(
                &self,
                driver: BuiltinDriver,
                out: &mut Vec<u8>,
                input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
                labels: builtin_driver::text::Labels<'_>,
            ) -> (Pick, Resolution) {
                let base = self.ancestor.data.as_slice().unwrap_or_default();
                let ours = self.current.data.as_slice().unwrap_or_default();
                let theirs = self.other.data.as_slice().unwrap_or_default();
                let driver = if driver != BuiltinDriver::Binary
                    && (is_binary_buf(self.ancestor.data)
                        || is_binary_buf(self.other.data)
                        || is_binary_buf(self.current.data))
                {
                    BuiltinDriver::Binary
                } else {
                    driver
                };
                match driver {
                    BuiltinDriver::Text => {
                        let resolution =
                            builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
                        (Pick::Buffer, resolution)
                    }
                    BuiltinDriver::Binary => {
                        let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
                        let pick = match pick {
                            builtin_driver::binary::Pick::Ours => Pick::Ours,
                            builtin_driver::binary::Pick::Theirs => Pick::Theirs,
                            builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
                        };
                        (pick, resolution)
                    }
                    BuiltinDriver::Union => {
                        let resolution = builtin_driver::text(
                            out,
                            input,
                            labels,
                            ours,
                            base,
                            theirs,
                            builtin_driver::text::Options {
                                conflict: builtin_driver::text::Conflict::ResolveWithUnion,
                                ..self.options.text
                            },
                        );
                        (Pick::Buffer, resolution)
                    }
                }
            }
        }

        fn is_binary_buf(data: resource::Data<'_>) -> bool {
            match data {
                Data::Missing => false,
                Data::Buffer(buf) => {
                    let buf = &buf[..buf.len().min(8000)];
                    buf.contains(&0)
                }
                Data::TooLarge { .. } => true,
            }
        }
    }
}

/// Convenience
impl<'parent> PlatformRef<'parent> {
    /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
    /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
    /// to indicate it's `out`.
    /// Use `labels` to annotate conflict sections in case of a text-merge.
    /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
    ///
    /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
    /// Too-large resources will result in an error.
    ///
    /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
    /// in terms of buffer handling to make it more useful in the face of missing local files.
    pub fn merge(
        &self,
        out: &mut Vec<u8>,
        labels: builtin_driver::text::Labels<'_>,
        context: &gix_command::Context,
    ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
        match self.configured_driver() {
            Ok(driver) => {
                let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
                let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
                    cmd: format!("{:?}", cmd.cmd),
                    source: err,
                })?;
                if !status.success() {
                    return Err(Error::ExternalDriverFailure {
                        cmd: format!("{:?}", cmd.cmd),
                        status,
                    });
                }
                out.clear();
                cmd.open_result_file()?.read_to_end(out)?;
                Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
            }
            Err(builtin) => {
                let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
                out.clear();
                let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
                Ok((pick, resolution))
            }
        }
    }

    /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
    /// Return `Ok(None)`  if the `pick` corresponds to a buffer (that was written separately).
    /// Return `Err(())` if the buffer is *too large*, so it was never read.
    #[allow(clippy::result_unit_err)]
    pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
        match pick {
            inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
            inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
            inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
            inner::builtin_merge::Pick::Buffer => Ok(None),
        }
    }

    /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
    /// In case of binary or large files, this will simply be the existing ID of the resource.
    /// In case of resources available in the object DB for binary merges, the object ID will be returned.
    /// If new content was produced due to a content merge, `buf` will be written out
    /// to the object database using `write_blob`.
    /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
    /// from the worktree *and* was too large so it was never loaded from disk.
    /// `Ok(None)` will also be returned if one of the resources was missing.
    /// `write_blob()` is used to turn buffers.
    pub fn id_by_pick<E>(
        &self,
        pick: inner::builtin_merge::Pick,
        buf: &[u8],
        mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
    ) -> Result<Option<gix_hash::ObjectId>, E> {
        let field = match pick {
            inner::builtin_merge::Pick::Ancestor => &self.ancestor,
            inner::builtin_merge::Pick::Ours => &self.current,
            inner::builtin_merge::Pick::Theirs => &self.other,
            inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
        };
        use crate::blob::platform::resource::Data;
        match field.data {
            Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
            Data::TooLarge { .. } | Data::Missing => Ok(None),
            Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
            Data::Buffer(_) => Ok(Some(field.id.to_owned())),
        }
    }
}