gix_merge/blob/platform/merge.rs
1use std::{io::Read, path::PathBuf};
2
3use crate::blob::{builtin_driver, PlatformRef, Resolution};
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8 /// If `true`, the resources being merged are contained in a virtual ancestor,
9 /// which is the case when merge bases are merged into one.
10 /// This flag affects the choice of merge drivers.
11 pub is_virtual_ancestor: bool,
12 /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13 pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14 /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15 pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22 #[error(transparent)]
23 PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24 #[error("Failed to launch external merge driver: {cmd}")]
25 SpawnExternalDriver { cmd: String, source: std::io::Error },
26 #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27 ExternalDriverFailure {
28 status: std::process::ExitStatus,
29 cmd: String,
30 },
31 #[error("IO failed when dealing with merge-driver output")]
32 ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43 /// The pre-configured command
44 cmd: std::process::Command,
45 /// A tempfile holding the *current* (ours) state of the resource.
46 current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47 /// The path at which `current` is located, for reading the result back from later.
48 current_path: PathBuf,
49 /// A tempfile holding the *ancestor* (base) state of the resource.
50 ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51 /// A tempfile holding the *other* (their) state of the resource.
52 other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57 ///
58 pub mod prepare_external_driver {
59 use std::{
60 io::Write,
61 ops::{Deref, DerefMut},
62 path::{Path, PathBuf},
63 process::Stdio,
64 };
65
66 use bstr::{BString, ByteVec};
67 use gix_tempfile::{AutoRemove, ContainingDirectory};
68
69 use crate::blob::{
70 builtin_driver,
71 builtin_driver::text::Conflict,
72 platform::{merge, DriverChoice},
73 BuiltinDriver, Driver, PlatformRef, ResourceKind,
74 };
75
76 /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
77 #[derive(Debug, thiserror::Error)]
78 #[allow(missing_docs)]
79 pub enum Error {
80 #[error("The resource of kind {kind:?} was too large to be processed")]
81 ResourceTooLarge { kind: ResourceKind },
82 #[error(
83 "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
84 )]
85 CreateTempfile {
86 rela_path: BString,
87 kind: ResourceKind,
88 source: std::io::Error,
89 },
90 #[error(
91 "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
92 )]
93 WriteTempfile {
94 rela_path: BString,
95 kind: ResourceKind,
96 source: std::io::Error,
97 },
98 }
99
100 /// Plumbing
101 impl<'parent> PlatformRef<'parent> {
102 /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
103 /// prepare the invocation and temporary files needed to launch it according to protocol.
104 /// See the documentation of [`Driver::command`] for possible substitutions.
105 ///
106 /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
107 ///
108 /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
109 /// to read back the result into a suitable buffer.
110 ///
111 /// ### Deviation
112 ///
113 /// * We allow passing more context than Git would by taking a whole `context`,
114 /// it's up to the caller to decide how much is filled.
115 /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
116 pub fn prepare_external_driver(
117 &self,
118 merge_command: BString,
119 builtin_driver::text::Labels {
120 ancestor,
121 current,
122 other,
123 }: builtin_driver::text::Labels<'_>,
124 context: gix_command::Context,
125 ) -> Result<merge::Command, Error> {
126 fn write_data(
127 data: &[u8],
128 ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
129 let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?;
130 file.write_all(data)?;
131 let mut path = Default::default();
132 file.with_mut(|f| {
133 f.path().clone_into(&mut path);
134 })?;
135 let file = file.close()?;
136 Ok((file, path))
137 }
138
139 let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
140 kind: ResourceKind::CommonAncestorOrBase,
141 })?;
142 let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
143 kind: ResourceKind::CurrentOrOurs,
144 })?;
145 let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
146 kind: ResourceKind::OtherOrTheirs,
147 })?;
148
149 let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile {
150 rela_path: self.ancestor.rela_path.into(),
151 kind: ResourceKind::CommonAncestorOrBase,
152 source: err,
153 })?;
154 let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile {
155 rela_path: self.current.rela_path.into(),
156 kind: ResourceKind::CurrentOrOurs,
157 source: err,
158 })?;
159 let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile {
160 rela_path: self.other.rela_path.into(),
161 kind: ResourceKind::OtherOrTheirs,
162 source: err,
163 })?;
164
165 let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
166 let mut count = 0;
167 for token in merge_command.split(|b| *b == b'%') {
168 count += 1;
169 let token = if count > 1 {
170 match token.first() {
171 Some(&b'O') => {
172 cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
173 &token[1..]
174 }
175 Some(&b'A') => {
176 cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
177 &token[1..]
178 }
179 Some(&b'B') => {
180 cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
181 &token[1..]
182 }
183 Some(&b'L') => {
184 let marker_size = self
185 .options
186 .text
187 .conflict
188 .marker_size()
189 .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
190 cmd.push_str(format!("{marker_size}"));
191 &token[1..]
192 }
193 Some(&b'P') => {
194 cmd.push_str(gix_quote::single(self.current.rela_path));
195 &token[1..]
196 }
197 Some(&b'S') => {
198 cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
199 &token[1..]
200 }
201 Some(&b'X') => {
202 cmd.push_str(gix_quote::single(current.unwrap_or_default()));
203 &token[1..]
204 }
205 Some(&b'Y') => {
206 cmd.push_str(gix_quote::single(other.unwrap_or_default()));
207 &token[1..]
208 }
209 Some(_other) => {
210 cmd.push(b'%');
211 token
212 }
213 None => b"%",
214 }
215 } else {
216 token
217 };
218 cmd.extend_from_slice(token);
219 }
220
221 Ok(merge::Command {
222 cmd: gix_command::prepare(gix_path::from_bstring(cmd))
223 .with_context(context)
224 .command_may_be_shell_script()
225 .stdin(Stdio::null())
226 .stdout(Stdio::inherit())
227 .stderr(Stdio::inherit())
228 .into(),
229 current: ours_tmp,
230 current_path: ours_path,
231 ancestor: base_tmp,
232 other: theirs_tmp,
233 })
234 }
235
236 /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
237 /// with the built-in driver to use instead.
238 pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
239 match self.driver {
240 DriverChoice::BuiltIn(builtin) => Err(builtin),
241 DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
242 }
243 }
244 }
245
246 impl std::fmt::Debug for merge::Command {
247 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248 self.cmd.fmt(f)
249 }
250 }
251
252 impl Deref for merge::Command {
253 type Target = std::process::Command;
254
255 fn deref(&self) -> &Self::Target {
256 &self.cmd
257 }
258 }
259
260 impl DerefMut for merge::Command {
261 fn deref_mut(&mut self) -> &mut Self::Target {
262 &mut self.cmd
263 }
264 }
265
266 impl merge::Command {
267 /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
268 /// Calling this makes sense only after the merge command has finished successfully.
269 pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
270 std::fs::File::open(&self.current_path)
271 }
272 }
273 }
274
275 ///
276 pub mod builtin_merge {
277 use crate::blob::{
278 builtin_driver,
279 platform::{resource, resource::Data},
280 BuiltinDriver, PlatformRef, Resolution,
281 };
282
283 /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
284 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
285 pub enum Pick {
286 /// In a binary merge, chose the ancestor.
287 ///
288 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
289 Ancestor,
290 /// In a binary merge, chose our side.
291 ///
292 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
293 Ours,
294 /// In a binary merge, chose their side.
295 ///
296 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
297 Theirs,
298 /// New data was produced with the result of the merge, to be found in the buffer that was passed to
299 /// [builtin_merge()](PlatformRef::builtin_merge).
300 /// This happens for any merge that isn't a binary merge.
301 Buffer,
302 }
303
304 /// Plumbing
305 impl<'parent> PlatformRef<'parent> {
306 /// Perform the merge using the given `driver`, possibly placing the output in `out`.
307 /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
308 /// Use `labels` to annotate conflict sections in case of a text-merge.
309 /// Returns `None` if one of the buffers is too large, making a merge impossible.
310 /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
311 /// and one has to take the data from the respective resource.
312 ///
313 /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
314 /// which effectively chooses our side by default.
315 pub fn builtin_merge(
316 &self,
317 driver: BuiltinDriver,
318 out: &mut Vec<u8>,
319 input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
320 labels: builtin_driver::text::Labels<'_>,
321 ) -> (Pick, Resolution) {
322 let base = self.ancestor.data.as_slice().unwrap_or_default();
323 let ours = self.current.data.as_slice().unwrap_or_default();
324 let theirs = self.other.data.as_slice().unwrap_or_default();
325 let driver = if driver != BuiltinDriver::Binary
326 && (is_binary_buf(self.ancestor.data)
327 || is_binary_buf(self.other.data)
328 || is_binary_buf(self.current.data))
329 {
330 BuiltinDriver::Binary
331 } else {
332 driver
333 };
334 match driver {
335 BuiltinDriver::Text => {
336 let resolution =
337 builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
338 (Pick::Buffer, resolution)
339 }
340 BuiltinDriver::Binary => {
341 // easier to reason about the 'split' compared to merging both conditions
342 #[allow(clippy::if_same_then_else)]
343 if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
344 (Pick::Ours, Resolution::Complete)
345 } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
346 (Pick::Ours, Resolution::Complete)
347 } else {
348 let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
349 let pick = match pick {
350 builtin_driver::binary::Pick::Ours => Pick::Ours,
351 builtin_driver::binary::Pick::Theirs => Pick::Theirs,
352 builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
353 };
354 (pick, resolution)
355 }
356 }
357 BuiltinDriver::Union => {
358 let resolution = builtin_driver::text(
359 out,
360 input,
361 labels,
362 ours,
363 base,
364 theirs,
365 builtin_driver::text::Options {
366 conflict: builtin_driver::text::Conflict::ResolveWithUnion,
367 ..self.options.text
368 },
369 );
370 (Pick::Buffer, resolution)
371 }
372 }
373 }
374 }
375
376 fn is_binary_buf(data: resource::Data<'_>) -> bool {
377 match data {
378 Data::Missing => false,
379 Data::Buffer(buf) => {
380 let buf = &buf[..buf.len().min(8000)];
381 buf.contains(&0)
382 }
383 Data::TooLarge { .. } => true,
384 }
385 }
386 }
387}
388
389/// Convenience
390impl<'parent> PlatformRef<'parent> {
391 /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
392 /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
393 /// to indicate it's `out`.
394 /// Use `labels` to annotate conflict sections in case of a text-merge.
395 /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
396 ///
397 /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
398 /// Too-large resources will result in an error.
399 ///
400 /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
401 /// in terms of buffer handling to make it more useful in the face of missing local files.
402 pub fn merge(
403 &self,
404 out: &mut Vec<u8>,
405 labels: builtin_driver::text::Labels<'_>,
406 context: &gix_command::Context,
407 ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
408 match self.configured_driver() {
409 Ok(driver) => {
410 let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
411 let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
412 cmd: format!("{:?}", cmd.cmd),
413 source: err,
414 })?;
415 if !status.success() {
416 return Err(Error::ExternalDriverFailure {
417 cmd: format!("{:?}", cmd.cmd),
418 status,
419 });
420 }
421 out.clear();
422 cmd.open_result_file()?.read_to_end(out)?;
423 Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
424 }
425 Err(builtin) => {
426 let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
427 out.clear();
428 let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
429 Ok((pick, resolution))
430 }
431 }
432 }
433
434 /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
435 /// Return `Ok(None)` if the `pick` corresponds to a buffer (that was written separately).
436 /// Return `Err(())` if the buffer is *too large*, so it was never read.
437 #[allow(clippy::result_unit_err)]
438 pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
439 match pick {
440 inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
441 inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
442 inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
443 inner::builtin_merge::Pick::Buffer => Ok(None),
444 }
445 }
446
447 /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
448 /// In case of binary or large files, this will simply be the existing ID of the resource.
449 /// In case of resources available in the object DB for binary merges, the object ID will be returned.
450 /// If new content was produced due to a content merge, `buf` will be written out
451 /// to the object database using `write_blob`.
452 /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
453 /// from the worktree *and* was too large so it was never loaded from disk.
454 /// `Ok(None)` will also be returned if one of the resources was missing.
455 /// `write_blob()` is used to turn buffers.
456 pub fn id_by_pick<E>(
457 &self,
458 pick: inner::builtin_merge::Pick,
459 buf: &[u8],
460 mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
461 ) -> Result<Option<gix_hash::ObjectId>, E> {
462 let field = match pick {
463 inner::builtin_merge::Pick::Ancestor => &self.ancestor,
464 inner::builtin_merge::Pick::Ours => &self.current,
465 inner::builtin_merge::Pick::Theirs => &self.other,
466 inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
467 };
468 use crate::blob::platform::resource::Data;
469 match field.data {
470 Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
471 Data::TooLarge { .. } | Data::Missing => Ok(None),
472 Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
473 Data::Buffer(_) => Ok(Some(field.id.to_owned())),
474 }
475 }
476}