gix_merge/blob/platform/
merge.rs

1use std::{io::Read, path::PathBuf};
2
3use crate::blob::{builtin_driver, PlatformRef, Resolution};
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8    /// If `true`, the resources being merged are contained in a virtual ancestor,
9    /// which is the case when merge bases are merged into one.
10    /// This flag affects the choice of merge drivers.
11    pub is_virtual_ancestor: bool,
12    /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13    pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14    /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15    pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22    #[error(transparent)]
23    PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24    #[error("Failed to launch external merge driver: {cmd}")]
25    SpawnExternalDriver { cmd: String, source: std::io::Error },
26    #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27    ExternalDriverFailure {
28        status: std::process::ExitStatus,
29        cmd: String,
30    },
31    #[error("IO failed when dealing with merge-driver output")]
32    ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43    /// The pre-configured command
44    cmd: std::process::Command,
45    /// A tempfile holding the *current* (ours) state of the resource.
46    current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47    /// The path at which `current` is located, for reading the result back from later.
48    current_path: PathBuf,
49    /// A tempfile holding the *ancestor* (base) state of the resource.
50    ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51    /// A tempfile holding the *other* (their) state of the resource.
52    other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57    ///
58    pub mod prepare_external_driver {
59        use std::{
60            io::Write,
61            ops::{Deref, DerefMut},
62            path::{Path, PathBuf},
63            process::Stdio,
64        };
65
66        use bstr::{BString, ByteVec};
67        use gix_tempfile::{AutoRemove, ContainingDirectory};
68
69        use crate::blob::{
70            builtin_driver,
71            builtin_driver::text::Conflict,
72            platform::{merge, DriverChoice},
73            BuiltinDriver, Driver, PlatformRef, ResourceKind,
74        };
75
76        /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
77        #[derive(Debug, thiserror::Error)]
78        #[allow(missing_docs)]
79        pub enum Error {
80            #[error("The resource of kind {kind:?} was too large to be processed")]
81            ResourceTooLarge { kind: ResourceKind },
82            #[error(
83                "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
84            )]
85            CreateTempfile {
86                rela_path: BString,
87                kind: ResourceKind,
88                source: std::io::Error,
89            },
90            #[error(
91                "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
92            )]
93            WriteTempfile {
94                rela_path: BString,
95                kind: ResourceKind,
96                source: std::io::Error,
97            },
98        }
99
100        /// Plumbing
101        impl<'parent> PlatformRef<'parent> {
102            /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
103            /// prepare the invocation and temporary files needed to launch it according to protocol.
104            /// See the documentation of [`Driver::command`] for possible substitutions.
105            ///
106            /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
107            ///
108            /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
109            /// to read back the result into a suitable buffer.
110            ///
111            /// ### Deviation
112            ///
113            /// * We allow passing more context than Git would by taking a whole `context`,
114            ///   it's up to the caller to decide how much is filled.
115            /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
116            pub fn prepare_external_driver(
117                &self,
118                merge_command: BString,
119                builtin_driver::text::Labels {
120                    ancestor,
121                    current,
122                    other,
123                }: builtin_driver::text::Labels<'_>,
124                context: gix_command::Context,
125            ) -> Result<merge::Command, Error> {
126                fn write_data(
127                    data: &[u8],
128                ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
129                    let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?;
130                    file.write_all(data)?;
131                    let mut path = Default::default();
132                    file.with_mut(|f| {
133                        f.path().clone_into(&mut path);
134                    })?;
135                    let file = file.close()?;
136                    Ok((file, path))
137                }
138
139                let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
140                    kind: ResourceKind::CommonAncestorOrBase,
141                })?;
142                let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
143                    kind: ResourceKind::CurrentOrOurs,
144                })?;
145                let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
146                    kind: ResourceKind::OtherOrTheirs,
147                })?;
148
149                let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile {
150                    rela_path: self.ancestor.rela_path.into(),
151                    kind: ResourceKind::CommonAncestorOrBase,
152                    source: err,
153                })?;
154                let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile {
155                    rela_path: self.current.rela_path.into(),
156                    kind: ResourceKind::CurrentOrOurs,
157                    source: err,
158                })?;
159                let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile {
160                    rela_path: self.other.rela_path.into(),
161                    kind: ResourceKind::OtherOrTheirs,
162                    source: err,
163                })?;
164
165                let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
166                let mut count = 0;
167                for token in merge_command.split(|b| *b == b'%') {
168                    count += 1;
169                    let token = if count > 1 {
170                        match token.first() {
171                            Some(&b'O') => {
172                                cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
173                                &token[1..]
174                            }
175                            Some(&b'A') => {
176                                cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
177                                &token[1..]
178                            }
179                            Some(&b'B') => {
180                                cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
181                                &token[1..]
182                            }
183                            Some(&b'L') => {
184                                let marker_size = self
185                                    .options
186                                    .text
187                                    .conflict
188                                    .marker_size()
189                                    .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
190                                cmd.push_str(format!("{marker_size}"));
191                                &token[1..]
192                            }
193                            Some(&b'P') => {
194                                cmd.push_str(gix_quote::single(self.current.rela_path));
195                                &token[1..]
196                            }
197                            Some(&b'S') => {
198                                cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
199                                &token[1..]
200                            }
201                            Some(&b'X') => {
202                                cmd.push_str(gix_quote::single(current.unwrap_or_default()));
203                                &token[1..]
204                            }
205                            Some(&b'Y') => {
206                                cmd.push_str(gix_quote::single(other.unwrap_or_default()));
207                                &token[1..]
208                            }
209                            Some(_other) => {
210                                cmd.push(b'%');
211                                token
212                            }
213                            None => b"%",
214                        }
215                    } else {
216                        token
217                    };
218                    cmd.extend_from_slice(token);
219                }
220
221                Ok(merge::Command {
222                    cmd: gix_command::prepare(gix_path::from_bstring(cmd))
223                        .with_context(context)
224                        .command_may_be_shell_script()
225                        .stdin(Stdio::null())
226                        .stdout(Stdio::inherit())
227                        .stderr(Stdio::inherit())
228                        .into(),
229                    current: ours_tmp,
230                    current_path: ours_path,
231                    ancestor: base_tmp,
232                    other: theirs_tmp,
233                })
234            }
235
236            /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
237            /// with the built-in driver to use instead.
238            pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
239                match self.driver {
240                    DriverChoice::BuiltIn(builtin) => Err(builtin),
241                    DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
242                }
243            }
244        }
245
246        impl std::fmt::Debug for merge::Command {
247            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248                self.cmd.fmt(f)
249            }
250        }
251
252        impl Deref for merge::Command {
253            type Target = std::process::Command;
254
255            fn deref(&self) -> &Self::Target {
256                &self.cmd
257            }
258        }
259
260        impl DerefMut for merge::Command {
261            fn deref_mut(&mut self) -> &mut Self::Target {
262                &mut self.cmd
263            }
264        }
265
266        impl merge::Command {
267            /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
268            /// Calling this makes sense only after the merge command has finished successfully.
269            pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
270                std::fs::File::open(&self.current_path)
271            }
272        }
273    }
274
275    ///
276    pub mod builtin_merge {
277        use crate::blob::{
278            builtin_driver,
279            platform::{resource, resource::Data},
280            BuiltinDriver, PlatformRef, Resolution,
281        };
282
283        /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
284        #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
285        pub enum Pick {
286            /// In a binary merge, chose the ancestor.
287            ///
288            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
289            Ancestor,
290            /// In a binary merge, chose our side.
291            ///
292            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
293            Ours,
294            /// In a binary merge, chose their side.
295            ///
296            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
297            Theirs,
298            /// New data was produced with the result of the merge, to be found in the buffer that was passed to
299            /// [builtin_merge()](PlatformRef::builtin_merge).
300            /// This happens for any merge that isn't a binary merge.
301            Buffer,
302        }
303
304        /// Plumbing
305        impl<'parent> PlatformRef<'parent> {
306            /// Perform the merge using the given `driver`, possibly placing the output in `out`.
307            /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
308            /// Use `labels` to annotate conflict sections in case of a text-merge.
309            /// Returns `None` if one of the buffers is too large, making a merge impossible.
310            /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
311            /// and one has to take the data from the respective resource.
312            ///
313            /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
314            /// which effectively chooses our side by default.
315            pub fn builtin_merge(
316                &self,
317                driver: BuiltinDriver,
318                out: &mut Vec<u8>,
319                input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
320                labels: builtin_driver::text::Labels<'_>,
321            ) -> (Pick, Resolution) {
322                let base = self.ancestor.data.as_slice().unwrap_or_default();
323                let ours = self.current.data.as_slice().unwrap_or_default();
324                let theirs = self.other.data.as_slice().unwrap_or_default();
325                let driver = if driver != BuiltinDriver::Binary
326                    && (is_binary_buf(self.ancestor.data)
327                        || is_binary_buf(self.other.data)
328                        || is_binary_buf(self.current.data))
329                {
330                    BuiltinDriver::Binary
331                } else {
332                    driver
333                };
334                match driver {
335                    BuiltinDriver::Text => {
336                        let resolution =
337                            builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
338                        (Pick::Buffer, resolution)
339                    }
340                    BuiltinDriver::Binary => {
341                        // easier to reason about the 'split' compared to merging both conditions
342                        #[allow(clippy::if_same_then_else)]
343                        if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
344                            (Pick::Ours, Resolution::Complete)
345                        } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
346                            (Pick::Ours, Resolution::Complete)
347                        } else {
348                            let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
349                            let pick = match pick {
350                                builtin_driver::binary::Pick::Ours => Pick::Ours,
351                                builtin_driver::binary::Pick::Theirs => Pick::Theirs,
352                                builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
353                            };
354                            (pick, resolution)
355                        }
356                    }
357                    BuiltinDriver::Union => {
358                        let resolution = builtin_driver::text(
359                            out,
360                            input,
361                            labels,
362                            ours,
363                            base,
364                            theirs,
365                            builtin_driver::text::Options {
366                                conflict: builtin_driver::text::Conflict::ResolveWithUnion,
367                                ..self.options.text
368                            },
369                        );
370                        (Pick::Buffer, resolution)
371                    }
372                }
373            }
374        }
375
376        fn is_binary_buf(data: resource::Data<'_>) -> bool {
377            match data {
378                Data::Missing => false,
379                Data::Buffer(buf) => {
380                    let buf = &buf[..buf.len().min(8000)];
381                    buf.contains(&0)
382                }
383                Data::TooLarge { .. } => true,
384            }
385        }
386    }
387}
388
389/// Convenience
390impl<'parent> PlatformRef<'parent> {
391    /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
392    /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
393    /// to indicate it's `out`.
394    /// Use `labels` to annotate conflict sections in case of a text-merge.
395    /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
396    ///
397    /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
398    /// Too-large resources will result in an error.
399    ///
400    /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
401    /// in terms of buffer handling to make it more useful in the face of missing local files.
402    pub fn merge(
403        &self,
404        out: &mut Vec<u8>,
405        labels: builtin_driver::text::Labels<'_>,
406        context: &gix_command::Context,
407    ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
408        match self.configured_driver() {
409            Ok(driver) => {
410                let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
411                let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
412                    cmd: format!("{:?}", cmd.cmd),
413                    source: err,
414                })?;
415                if !status.success() {
416                    return Err(Error::ExternalDriverFailure {
417                        cmd: format!("{:?}", cmd.cmd),
418                        status,
419                    });
420                }
421                out.clear();
422                cmd.open_result_file()?.read_to_end(out)?;
423                Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
424            }
425            Err(builtin) => {
426                let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
427                out.clear();
428                let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
429                Ok((pick, resolution))
430            }
431        }
432    }
433
434    /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
435    /// Return `Ok(None)`  if the `pick` corresponds to a buffer (that was written separately).
436    /// Return `Err(())` if the buffer is *too large*, so it was never read.
437    #[allow(clippy::result_unit_err)]
438    pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
439        match pick {
440            inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
441            inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
442            inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
443            inner::builtin_merge::Pick::Buffer => Ok(None),
444        }
445    }
446
447    /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
448    /// In case of binary or large files, this will simply be the existing ID of the resource.
449    /// In case of resources available in the object DB for binary merges, the object ID will be returned.
450    /// If new content was produced due to a content merge, `buf` will be written out
451    /// to the object database using `write_blob`.
452    /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
453    /// from the worktree *and* was too large so it was never loaded from disk.
454    /// `Ok(None)` will also be returned if one of the resources was missing.
455    /// `write_blob()` is used to turn buffers.
456    pub fn id_by_pick<E>(
457        &self,
458        pick: inner::builtin_merge::Pick,
459        buf: &[u8],
460        mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
461    ) -> Result<Option<gix_hash::ObjectId>, E> {
462        let field = match pick {
463            inner::builtin_merge::Pick::Ancestor => &self.ancestor,
464            inner::builtin_merge::Pick::Ours => &self.current,
465            inner::builtin_merge::Pick::Theirs => &self.other,
466            inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
467        };
468        use crate::blob::platform::resource::Data;
469        match field.data {
470            Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
471            Data::TooLarge { .. } | Data::Missing => Ok(None),
472            Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
473            Data::Buffer(_) => Ok(Some(field.id.to_owned())),
474        }
475    }
476}