gix_merge/blob/
pipeline.rs

1use super::{Pipeline, ResourceKind};
2use bstr::BStr;
3use gix_filter::driver::apply::{Delay, MaybeDelayed};
4use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome};
5use gix_object::tree::EntryKind;
6use std::io::Read;
7use std::path::{Path, PathBuf};
8
9/// Options for use in a [`Pipeline`].
10#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
11pub struct Options {
12    /// The amount of bytes that an object has to reach before being treated as binary.
13    /// These objects will not be queried, nor will their data be processed in any way.
14    /// If `0`, no file is ever considered binary due to their size.
15    ///
16    /// Note that for files stored in `git`, what counts is their stored, decompressed size,
17    /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
18    /// them.
19    /// However, if they are to be retrieved from the worktree, the worktree size is what matters,
20    /// even though that also might be a `git-lfs` file which is small in Git.
21    pub large_file_threshold_bytes: u64,
22}
23
24/// The specific way to convert a resource.
25#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
26pub enum Mode {
27    /// Prepare resources as they are stored in `git`.
28    ///
29    /// This is naturally the case when object-ids are used, but a conversion is needed
30    /// when data is read from a worktree.
31    #[default]
32    ToGit,
33    /// For sources that are object-ids, convert them to what *would* be stored in the worktree,
34    /// and back to what *would* be stored in Git.
35    ///
36    /// Sources that are located in a worktree are merely converted to what *would* be stored in Git.
37    ///
38    /// This is useful to prevent merge conflicts due to inconcistent whitespace.
39    Renormalize,
40}
41
42/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
43#[derive(Clone, Debug, Default)]
44pub struct WorktreeRoots {
45    /// The worktree root where the current (or our) version of the resource is present.
46    pub current_root: Option<PathBuf>,
47    /// The worktree root where the other (or their) version of the resource is present.
48    pub other_root: Option<PathBuf>,
49    /// The worktree root where containing the resource of the common ancestor of our and their version.
50    pub common_ancestor_root: Option<PathBuf>,
51}
52
53impl WorktreeRoots {
54    /// Return the root path for the given `kind`
55    pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
56        match kind {
57            ResourceKind::CurrentOrOurs => self.current_root.as_deref(),
58            ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(),
59            ResourceKind::OtherOrTheirs => self.other_root.as_deref(),
60        }
61    }
62
63    /// Return `true` if all worktree roots are unset.
64    pub fn is_unset(&self) -> bool {
65        self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none()
66    }
67}
68
69/// Lifecycle
70impl Pipeline {
71    /// Create a new instance of a pipeline which produces blobs suitable for merging.
72    ///
73    /// `roots` allow to read worktree files directly, and `worktree_filter` is used
74    /// to transform object database data directly.
75    /// `options` are used to further configure the way we act.
76    pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self {
77        Pipeline {
78            roots,
79            filter: worktree_filter,
80            options,
81            path: Default::default(),
82        }
83    }
84}
85
86/// Access
87impl Pipeline {}
88
89/// Data as returned by [`Pipeline::convert_to_mergeable()`].
90#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
91pub enum Data {
92    /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
93    Buffer,
94    /// The file or blob is above the big-file threshold and cannot be processed.
95    ///
96    /// In this state, the file cannot be merged.
97    TooLarge {
98        /// The size of the object prior to performing any filtering or as it was found on disk.
99        ///
100        /// Note that technically, the size isn't always representative of the same 'state' of the
101        /// content, as once it can be the size of the blob in git, and once it's the size of file
102        /// in the worktree - both can differ a lot depending on filters.
103        size: u64,
104    },
105}
106
107///
108pub mod convert_to_mergeable {
109    use std::collections::TryReserveError;
110
111    use bstr::BString;
112    use gix_object::tree::EntryKind;
113
114    /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()).
115    #[derive(Debug, thiserror::Error)]
116    #[allow(missing_docs)]
117    pub enum Error {
118        #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
119        InvalidEntryKind { rela_path: BString, actual: EntryKind },
120        #[error("Entry at '{rela_path}' could not be read as symbolic link")]
121        ReadLink { rela_path: BString, source: std::io::Error },
122        #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
123        OpenOrRead { rela_path: BString, source: std::io::Error },
124        #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
125        StreamCopy { rela_path: BString, source: std::io::Error },
126        #[error(transparent)]
127        FindObject(#[from] gix_object::find::existing_object::Error),
128        #[error(transparent)]
129        ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
130        #[error(transparent)]
131        ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
132        #[error("Memory allocation failed")]
133        OutOfMemory(#[from] TryReserveError),
134    }
135}
136
137/// Conversion
138impl Pipeline {
139    /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
140    /// The resulting merge-able data is written into `out`, if it's not too large.
141    /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating
142    /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist
143    /// in the object database.
144    ///
145    /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings,
146    /// and `objects` must be usable if `kind` is a resource in the object database,
147    /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`,
148    /// then a `rela_path` is used to access it on disk.
149    ///
150    /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
151    /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`.
152    /// This is useful to simplify the calling code as empty buffers signal that nothing is there.
153    ///
154    /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
155    /// Only blobs are allowed.
156    ///
157    /// Use `convert` to control what kind of the resource will be produced.
158    #[allow(clippy::too_many_arguments)]
159    pub fn convert_to_mergeable(
160        &mut self,
161        id: &gix_hash::oid,
162        mode: EntryKind,
163        rela_path: &BStr,
164        kind: ResourceKind,
165        attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
166        objects: &dyn gix_object::FindObjectOrHeader,
167        convert: Mode,
168        out: &mut Vec<u8>,
169    ) -> Result<Option<Data>, convert_to_mergeable::Error> {
170        if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) {
171            return Err(convert_to_mergeable::Error::InvalidEntryKind {
172                rela_path: rela_path.to_owned(),
173                actual: mode,
174            });
175        }
176
177        out.clear();
178        match self.roots.by_kind(kind) {
179            Some(root) => {
180                self.path.clear();
181                self.path.push(root);
182                self.path.push(gix_path::from_bstr(rela_path));
183                let size_in_bytes = (self.options.large_file_threshold_bytes > 0)
184                    .then(|| {
185                        none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
186                            convert_to_mergeable::Error::OpenOrRead {
187                                rela_path: rela_path.to_owned(),
188                                source: err,
189                            }
190                        })
191                    })
192                    .transpose()?;
193                let data = match size_in_bytes {
194                    Some(None) => None, // missing as identified by the size check
195                    Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }),
196                    _ => {
197                        let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
198                            convert_to_mergeable::Error::OpenOrRead {
199                                rela_path: rela_path.to_owned(),
200                                source: err,
201                            }
202                        })?;
203
204                        if let Some(file) = file {
205                            match convert {
206                                Mode::ToGit | Mode::Renormalize => {
207                                    let res = self.filter.convert_to_git(
208                                        file,
209                                        gix_path::from_bstr(rela_path).as_ref(),
210                                        attributes,
211                                        &mut |buf| {
212                                            if convert == Mode::Renormalize {
213                                                Ok(None)
214                                            } else {
215                                                objects.try_find(id, buf).map(|obj| obj.map(|_| ()))
216                                            }
217                                        },
218                                    )?;
219
220                                    match res {
221                                        ToGitOutcome::Unchanged(mut file) => {
222                                            file.read_to_end(out).map_err(|err| {
223                                                convert_to_mergeable::Error::OpenOrRead {
224                                                    rela_path: rela_path.to_owned(),
225                                                    source: err,
226                                                }
227                                            })?;
228                                        }
229                                        ToGitOutcome::Process(mut stream) => {
230                                            stream.read_to_end(out).map_err(|err| {
231                                                convert_to_mergeable::Error::OpenOrRead {
232                                                    rela_path: rela_path.to_owned(),
233                                                    source: err,
234                                                }
235                                            })?;
236                                        }
237                                        ToGitOutcome::Buffer(buf) => {
238                                            out.clear();
239                                            out.try_reserve(buf.len())?;
240                                            out.extend_from_slice(buf);
241                                        }
242                                    }
243                                }
244                            }
245
246                            Some(Data::Buffer)
247                        } else {
248                            None
249                        }
250                    }
251                };
252                Ok(data)
253            }
254            None => {
255                let data = if id.is_null() {
256                    None
257                } else {
258                    let header = objects
259                        .try_header(id)
260                        .map_err(gix_object::find::existing_object::Error::Find)?
261                        .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
262                    let is_binary = self.options.large_file_threshold_bytes > 0
263                        && header.size > self.options.large_file_threshold_bytes;
264                    let data = if is_binary {
265                        Data::TooLarge { size: header.size }
266                    } else {
267                        objects
268                            .try_find(id, out)
269                            .map_err(gix_object::find::existing_object::Error::Find)?
270                            .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
271
272                        if convert == Mode::Renormalize {
273                            {
274                                let res = self
275                                    .filter
276                                    .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
277
278                                match res {
279                                    ToWorktreeOutcome::Unchanged(_) => {}
280                                    ToWorktreeOutcome::Buffer(src) => {
281                                        out.clear();
282                                        out.try_reserve(src.len())?;
283                                        out.extend_from_slice(src);
284                                    }
285                                    ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
286                                        std::io::copy(&mut stream, out).map_err(|err| {
287                                            convert_to_mergeable::Error::StreamCopy {
288                                                rela_path: rela_path.to_owned(),
289                                                source: err,
290                                            }
291                                        })?;
292                                    }
293                                    ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
294                                        unreachable!("we prohibit this")
295                                    }
296                                };
297                            }
298
299                            let res = self.filter.convert_to_git(
300                                &**out,
301                                &gix_path::from_bstr(rela_path),
302                                attributes,
303                                &mut |_buf| Ok(None),
304                            )?;
305
306                            match res {
307                                ToGitOutcome::Unchanged(_) => {}
308                                ToGitOutcome::Process(mut stream) => {
309                                    stream
310                                        .read_to_end(out)
311                                        .map_err(|err| convert_to_mergeable::Error::OpenOrRead {
312                                            rela_path: rela_path.to_owned(),
313                                            source: err,
314                                        })?;
315                                }
316                                ToGitOutcome::Buffer(buf) => {
317                                    out.clear();
318                                    out.try_reserve(buf.len())?;
319                                    out.extend_from_slice(buf);
320                                }
321                            }
322                        }
323
324                        Data::Buffer
325                    };
326                    Some(data)
327                };
328                Ok(data)
329            }
330        }
331    }
332}
333
334fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
335    match res {
336        Ok(data) => Ok(Some(data)),
337        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
338        Err(err) => Err(err),
339    }
340}