gix_merge/blob/pipeline.rs
1use super::{Pipeline, ResourceKind};
2use bstr::BStr;
3use gix_filter::driver::apply::{Delay, MaybeDelayed};
4use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome};
5use gix_object::tree::EntryKind;
6use std::io::Read;
7use std::path::{Path, PathBuf};
8
9/// Options for use in a [`Pipeline`].
10#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
11pub struct Options {
12 /// The amount of bytes that an object has to reach before being treated as binary.
13 /// These objects will not be queried, nor will their data be processed in any way.
14 /// If `0`, no file is ever considered binary due to their size.
15 ///
16 /// Note that for files stored in `git`, what counts is their stored, decompressed size,
17 /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
18 /// them.
19 /// However, if they are to be retrieved from the worktree, the worktree size is what matters,
20 /// even though that also might be a `git-lfs` file which is small in Git.
21 pub large_file_threshold_bytes: u64,
22}
23
24/// The specific way to convert a resource.
25#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
26pub enum Mode {
27 /// Prepare resources as they are stored in `git`.
28 ///
29 /// This is naturally the case when object-ids are used, but a conversion is needed
30 /// when data is read from a worktree.
31 #[default]
32 ToGit,
33 /// For sources that are object-ids, convert them to what *would* be stored in the worktree,
34 /// and back to what *would* be stored in Git.
35 ///
36 /// Sources that are located in a worktree are merely converted to what *would* be stored in Git.
37 ///
38 /// This is useful to prevent merge conflicts due to inconcistent whitespace.
39 Renormalize,
40}
41
42/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
43#[derive(Clone, Debug, Default)]
44pub struct WorktreeRoots {
45 /// The worktree root where the current (or our) version of the resource is present.
46 pub current_root: Option<PathBuf>,
47 /// The worktree root where the other (or their) version of the resource is present.
48 pub other_root: Option<PathBuf>,
49 /// The worktree root where containing the resource of the common ancestor of our and their version.
50 pub common_ancestor_root: Option<PathBuf>,
51}
52
53impl WorktreeRoots {
54 /// Return the root path for the given `kind`
55 pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
56 match kind {
57 ResourceKind::CurrentOrOurs => self.current_root.as_deref(),
58 ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(),
59 ResourceKind::OtherOrTheirs => self.other_root.as_deref(),
60 }
61 }
62
63 /// Return `true` if all worktree roots are unset.
64 pub fn is_unset(&self) -> bool {
65 self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none()
66 }
67}
68
69/// Lifecycle
70impl Pipeline {
71 /// Create a new instance of a pipeline which produces blobs suitable for merging.
72 ///
73 /// `roots` allow to read worktree files directly, and `worktree_filter` is used
74 /// to transform object database data directly.
75 /// `options` are used to further configure the way we act.
76 pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self {
77 Pipeline {
78 roots,
79 filter: worktree_filter,
80 options,
81 path: Default::default(),
82 }
83 }
84}
85
86/// Access
87impl Pipeline {}
88
89/// Data as returned by [`Pipeline::convert_to_mergeable()`].
90#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
91pub enum Data {
92 /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
93 Buffer,
94 /// The file or blob is above the big-file threshold and cannot be processed.
95 ///
96 /// In this state, the file cannot be merged.
97 TooLarge {
98 /// The size of the object prior to performing any filtering or as it was found on disk.
99 ///
100 /// Note that technically, the size isn't always representative of the same 'state' of the
101 /// content, as once it can be the size of the blob in git, and once it's the size of file
102 /// in the worktree - both can differ a lot depending on filters.
103 size: u64,
104 },
105}
106
107///
108pub mod convert_to_mergeable {
109 use std::collections::TryReserveError;
110
111 use bstr::BString;
112 use gix_object::tree::EntryKind;
113
114 /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()).
115 #[derive(Debug, thiserror::Error)]
116 #[allow(missing_docs)]
117 pub enum Error {
118 #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
119 InvalidEntryKind { rela_path: BString, actual: EntryKind },
120 #[error("Entry at '{rela_path}' could not be read as symbolic link")]
121 ReadLink { rela_path: BString, source: std::io::Error },
122 #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
123 OpenOrRead { rela_path: BString, source: std::io::Error },
124 #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
125 StreamCopy { rela_path: BString, source: std::io::Error },
126 #[error(transparent)]
127 FindObject(#[from] gix_object::find::existing_object::Error),
128 #[error(transparent)]
129 ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
130 #[error(transparent)]
131 ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
132 #[error("Memory allocation failed")]
133 OutOfMemory(#[from] TryReserveError),
134 }
135}
136
137/// Conversion
138impl Pipeline {
139 /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
140 /// The resulting merge-able data is written into `out`, if it's not too large.
141 /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating
142 /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist
143 /// in the object database.
144 ///
145 /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings,
146 /// and `objects` must be usable if `kind` is a resource in the object database,
147 /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`,
148 /// then a `rela_path` is used to access it on disk.
149 ///
150 /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
151 /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`.
152 /// This is useful to simplify the calling code as empty buffers signal that nothing is there.
153 ///
154 /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
155 /// Only blobs are allowed.
156 ///
157 /// Use `convert` to control what kind of the resource will be produced.
158 #[allow(clippy::too_many_arguments)]
159 pub fn convert_to_mergeable(
160 &mut self,
161 id: &gix_hash::oid,
162 mode: EntryKind,
163 rela_path: &BStr,
164 kind: ResourceKind,
165 attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
166 objects: &dyn gix_object::FindObjectOrHeader,
167 convert: Mode,
168 out: &mut Vec<u8>,
169 ) -> Result<Option<Data>, convert_to_mergeable::Error> {
170 if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) {
171 return Err(convert_to_mergeable::Error::InvalidEntryKind {
172 rela_path: rela_path.to_owned(),
173 actual: mode,
174 });
175 }
176
177 out.clear();
178 match self.roots.by_kind(kind) {
179 Some(root) => {
180 self.path.clear();
181 self.path.push(root);
182 self.path.push(gix_path::from_bstr(rela_path));
183 let size_in_bytes = (self.options.large_file_threshold_bytes > 0)
184 .then(|| {
185 none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
186 convert_to_mergeable::Error::OpenOrRead {
187 rela_path: rela_path.to_owned(),
188 source: err,
189 }
190 })
191 })
192 .transpose()?;
193 let data = match size_in_bytes {
194 Some(None) => None, // missing as identified by the size check
195 Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }),
196 _ => {
197 let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
198 convert_to_mergeable::Error::OpenOrRead {
199 rela_path: rela_path.to_owned(),
200 source: err,
201 }
202 })?;
203
204 if let Some(file) = file {
205 match convert {
206 Mode::ToGit | Mode::Renormalize => {
207 let res = self.filter.convert_to_git(
208 file,
209 gix_path::from_bstr(rela_path).as_ref(),
210 attributes,
211 &mut |buf| {
212 if convert == Mode::Renormalize {
213 Ok(None)
214 } else {
215 objects.try_find(id, buf).map(|obj| obj.map(|_| ()))
216 }
217 },
218 )?;
219
220 match res {
221 ToGitOutcome::Unchanged(mut file) => {
222 file.read_to_end(out).map_err(|err| {
223 convert_to_mergeable::Error::OpenOrRead {
224 rela_path: rela_path.to_owned(),
225 source: err,
226 }
227 })?;
228 }
229 ToGitOutcome::Process(mut stream) => {
230 stream.read_to_end(out).map_err(|err| {
231 convert_to_mergeable::Error::OpenOrRead {
232 rela_path: rela_path.to_owned(),
233 source: err,
234 }
235 })?;
236 }
237 ToGitOutcome::Buffer(buf) => {
238 out.clear();
239 out.try_reserve(buf.len())?;
240 out.extend_from_slice(buf);
241 }
242 }
243 }
244 }
245
246 Some(Data::Buffer)
247 } else {
248 None
249 }
250 }
251 };
252 Ok(data)
253 }
254 None => {
255 let data = if id.is_null() {
256 None
257 } else {
258 let header = objects
259 .try_header(id)
260 .map_err(gix_object::find::existing_object::Error::Find)?
261 .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
262 let is_binary = self.options.large_file_threshold_bytes > 0
263 && header.size > self.options.large_file_threshold_bytes;
264 let data = if is_binary {
265 Data::TooLarge { size: header.size }
266 } else {
267 objects
268 .try_find(id, out)
269 .map_err(gix_object::find::existing_object::Error::Find)?
270 .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
271
272 if convert == Mode::Renormalize {
273 {
274 let res = self
275 .filter
276 .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
277
278 match res {
279 ToWorktreeOutcome::Unchanged(_) => {}
280 ToWorktreeOutcome::Buffer(src) => {
281 out.clear();
282 out.try_reserve(src.len())?;
283 out.extend_from_slice(src);
284 }
285 ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
286 std::io::copy(&mut stream, out).map_err(|err| {
287 convert_to_mergeable::Error::StreamCopy {
288 rela_path: rela_path.to_owned(),
289 source: err,
290 }
291 })?;
292 }
293 ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
294 unreachable!("we prohibit this")
295 }
296 };
297 }
298
299 let res = self.filter.convert_to_git(
300 &**out,
301 &gix_path::from_bstr(rela_path),
302 attributes,
303 &mut |_buf| Ok(None),
304 )?;
305
306 match res {
307 ToGitOutcome::Unchanged(_) => {}
308 ToGitOutcome::Process(mut stream) => {
309 stream
310 .read_to_end(out)
311 .map_err(|err| convert_to_mergeable::Error::OpenOrRead {
312 rela_path: rela_path.to_owned(),
313 source: err,
314 })?;
315 }
316 ToGitOutcome::Buffer(buf) => {
317 out.clear();
318 out.try_reserve(buf.len())?;
319 out.extend_from_slice(buf);
320 }
321 }
322 }
323
324 Data::Buffer
325 };
326 Some(data)
327 };
328 Ok(data)
329 }
330 }
331 }
332}
333
334fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
335 match res {
336 Ok(data) => Ok(Some(data)),
337 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
338 Err(err) => Err(err),
339 }
340}