gix_object/tree/
mod.rs

1use std::{cell::RefCell, cmp::Ordering};
2
3use crate::{
4    bstr::{BStr, BString},
5    tree, Tree, TreeRef,
6};
7
8///
9pub mod editor;
10
11mod ref_iter;
12///
13pub mod write;
14
15/// The state needed to apply edits instantly to in-memory trees.
16///
17/// It's made so that each tree is looked at in the object database at most once, and held in memory for
18/// all edits until everything is flushed to write all changed trees.
19///
20/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
21/// with some penalties.
22#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
23#[derive(Clone)]
24pub struct Editor<'a> {
25    /// A way to lookup trees.
26    find: &'a dyn crate::FindExt,
27    /// The kind of hashes to produce>
28    object_hash: gix_hash::Kind,
29    /// All trees we currently hold in memory. Each of these may change while adding and removing entries.
30    /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
31    /// dropped when writing at the latest.
32    trees: std::collections::HashMap<BString, Tree>,
33    /// A buffer to build up paths when finding the tree to edit.
34    path_buf: RefCell<BString>,
35    /// Our buffer for storing tree-data in, right before decoding it.
36    tree_buf: Vec<u8>,
37}
38
39/// The mode of items storable in a tree, similar to the file mode on a unix file system.
40///
41/// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`].
42///
43/// Note that even though it can be created from any `u16`, it should be preferable to
44/// create it by converting [`EntryKind`] into `EntryMode`.
45#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47pub struct EntryMode {
48    // Represents the value read from Git, except that "040000" is represented with 0o140000 but
49    // "40000" is represented with 0o40000.
50    internal: u16,
51}
52
53impl TryFrom<u32> for tree::EntryMode {
54    type Error = u32;
55    fn try_from(mode: u32) -> Result<Self, Self::Error> {
56        Ok(match mode {
57            0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
58            blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
59            _ => return Err(mode),
60        })
61    }
62}
63
64impl EntryMode {
65    /// Expose the value as u16 (lossy, unlike the internal representation that is hidden).
66    pub const fn value(self) -> u16 {
67        // Demangle the hack: In the case where the second leftmost octet is 4 (Tree), the leftmost bit is
68        // there to represent whether the bytes representation should have 5 or 6 octets.
69        if self.internal & IFMT == 0o140000 {
70            0o040000
71        } else {
72            self.internal
73        }
74    }
75
76    /// Return the representation as used in the git internal format, which is octal and written
77    /// to the `backing` buffer. The respective sub-slice that was written to is returned.
78    pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
79        if self.internal == 0 {
80            std::slice::from_ref(&b'0')
81        } else {
82            for (idx, backing_octet) in backing.iter_mut().enumerate() {
83                let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
84                let oct_mask = 0b111 << bit_pos;
85                let digit = (self.internal & oct_mask) >> bit_pos;
86                *backing_octet = b'0' + digit as u8;
87            }
88            // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
89            if backing[1] == b'4' {
90                if backing[0] == b'1' {
91                    backing[0] = b'0';
92                    &backing[0..6]
93                } else {
94                    &backing[1..6]
95                }
96            } else {
97                &backing[0..6]
98            }
99        }
100        .into()
101    }
102
103    /// Construct an EntryMode from bytes represented as in the git internal format
104    /// Return the mode and the remainder of the bytes.
105    pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
106        let mut mode = 0;
107        let mut idx = 0;
108        let mut space_pos = 0;
109        if i.is_empty() {
110            return None;
111        }
112        // const fn, this is why we can't have nice things (like `.iter().any()`).
113        while idx < i.len() {
114            let b = i[idx];
115            // Delimiter, return what we got
116            if b == b' ' {
117                space_pos = idx;
118                break;
119            }
120            // Not a pure octal input.
121            // Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
122            #[allow(clippy::manual_range_contains)]
123            if b < b'0' || b > b'7' {
124                return None;
125            }
126            // More than 6 octal digits we must have hit the delimiter or the input was malformed.
127            if idx > 6 {
128                return None;
129            }
130            mode = (mode << 3) + (b - b'0') as u16;
131            idx += 1;
132        }
133        // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
134        if mode == 0o040000 && i[0] == b'0' {
135            mode += 0o100000;
136        }
137        Some((Self { internal: mode }, &i[(space_pos + 1)..]))
138    }
139
140    /// Construct an EntryMode from bytes represented as in the git internal format.
141    pub fn from_bytes(i: &[u8]) -> Option<Self> {
142        Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
143    }
144}
145
146impl std::fmt::Debug for EntryMode {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
149    }
150}
151
152impl std::fmt::Octal for EntryMode {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        write!(f, "{}", self.as_bytes(&mut Default::default()))
155    }
156}
157
158/// A discretized version of ideal and valid values for entry modes.
159///
160/// Note that even though it can represent every valid [mode](EntryMode), it might
161/// lose information due to that as well.
162#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
163#[repr(u16)]
164#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
165pub enum EntryKind {
166    /// A tree, or directory
167    Tree = 0o040000u16,
168    /// A file that is not executable
169    Blob = 0o100644,
170    /// A file that is executable
171    BlobExecutable = 0o100755,
172    /// A symbolic link
173    Link = 0o120000,
174    /// A commit of a git submodule
175    Commit = 0o160000,
176}
177
178impl From<EntryKind> for EntryMode {
179    fn from(value: EntryKind) -> Self {
180        EntryMode { internal: value as u16 }
181    }
182}
183
184impl From<EntryMode> for EntryKind {
185    fn from(value: EntryMode) -> Self {
186        value.kind()
187    }
188}
189
190/// Serialization
191impl EntryKind {
192    /// Return the representation as used in the git internal format.
193    pub fn as_octal_str(&self) -> &'static BStr {
194        use EntryKind::*;
195        let bytes: &[u8] = match self {
196            Tree => b"40000",
197            Blob => b"100644",
198            BlobExecutable => b"100755",
199            Link => b"120000",
200            Commit => b"160000",
201        };
202        bytes.into()
203    }
204}
205
206const IFMT: u16 = 0o170000;
207
208impl EntryMode {
209    /// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
210    pub const fn kind(&self) -> EntryKind {
211        let etype = self.value() & IFMT;
212        if etype == 0o100000 {
213            if self.value() & 0o000100 == 0o000100 {
214                EntryKind::BlobExecutable
215            } else {
216                EntryKind::Blob
217            }
218        } else if etype == EntryKind::Link as u16 {
219            EntryKind::Link
220        } else if etype == EntryKind::Tree as u16 {
221            EntryKind::Tree
222        } else {
223            EntryKind::Commit
224        }
225    }
226
227    /// Return true if this entry mode represents a Tree/directory
228    pub const fn is_tree(&self) -> bool {
229        self.value() & IFMT == EntryKind::Tree as u16
230    }
231
232    /// Return true if this entry mode represents the commit of a submodule.
233    pub const fn is_commit(&self) -> bool {
234        self.value() & IFMT == EntryKind::Commit as u16
235    }
236
237    /// Return true if this entry mode represents a symbolic link
238    pub const fn is_link(&self) -> bool {
239        self.value() & IFMT == EntryKind::Link as u16
240    }
241
242    /// Return true if this entry mode represents anything BUT Tree/directory
243    pub const fn is_no_tree(&self) -> bool {
244        self.value() & IFMT != EntryKind::Tree as u16
245    }
246
247    /// Return true if the entry is any kind of blob.
248    pub const fn is_blob(&self) -> bool {
249        self.value() & IFMT == 0o100000
250    }
251
252    /// Return true if the entry is an executable blob.
253    pub const fn is_executable(&self) -> bool {
254        matches!(self.kind(), EntryKind::BlobExecutable)
255    }
256
257    /// Return true if the entry is any kind of blob or symlink.
258    pub const fn is_blob_or_symlink(&self) -> bool {
259        matches!(
260            self.kind(),
261            EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link
262        )
263    }
264
265    /// Represent the mode as descriptive string.
266    pub const fn as_str(&self) -> &'static str {
267        use EntryKind::*;
268        match self.kind() {
269            Tree => "tree",
270            Blob => "blob",
271            BlobExecutable => "exe",
272            Link => "link",
273            Commit => "commit",
274        }
275    }
276}
277
278impl TreeRef<'_> {
279    /// Convert this instance into its own version, creating a copy of all data.
280    ///
281    /// This will temporarily allocate an extra copy in memory, so at worst three copies of the tree exist
282    /// at some intermediate point in time. Use [`Self::into_owned()`] to avoid this.
283    pub fn to_owned(&self) -> Tree {
284        self.clone().into()
285    }
286
287    /// Convert this instance into its own version, creating a copy of all data.
288    pub fn into_owned(self) -> Tree {
289        self.into()
290    }
291}
292
293/// An element of a [`TreeRef`][crate::TreeRef::entries].
294#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)]
295#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
296pub struct EntryRef<'a> {
297    /// The kind of object to which `oid` is pointing.
298    pub mode: tree::EntryMode,
299    /// The name of the file in the parent tree.
300    pub filename: &'a BStr,
301    /// The id of the object representing the entry.
302    // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase.
303    //       Answer: make it 'id', as in `git2`
304    #[cfg_attr(feature = "serde", serde(borrow))]
305    pub oid: &'a gix_hash::oid,
306}
307
308impl PartialOrd for EntryRef<'_> {
309    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
310        Some(self.cmp(other))
311    }
312}
313
314impl Ord for EntryRef<'_> {
315    fn cmp(&self, b: &Self) -> Ordering {
316        let a = self;
317        let common = a.filename.len().min(b.filename.len());
318        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
319            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
320            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
321            a.cmp(&b)
322        })
323    }
324}
325
326/// An entry in a [`Tree`], similar to an entry in a directory.
327#[derive(PartialEq, Eq, Debug, Hash, Clone)]
328#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
329pub struct Entry {
330    /// The kind of object to which `oid` is pointing to.
331    pub mode: EntryMode,
332    /// The name of the file in the parent tree.
333    pub filename: BString,
334    /// The id of the object representing the entry.
335    pub oid: gix_hash::ObjectId,
336}
337
338impl PartialOrd for Entry {
339    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
340        Some(self.cmp(other))
341    }
342}
343
344impl Ord for Entry {
345    fn cmp(&self, b: &Self) -> Ordering {
346        let a = self;
347        let common = a.filename.len().min(b.filename.len());
348        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
349            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
350            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
351            a.cmp(&b)
352        })
353    }
354}