gix_object/tree/
ref_iter.rs

1use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter};
2use bstr::BStr;
3use winnow::{error::ParserError, prelude::*};
4
5impl<'a> TreeRefIter<'a> {
6    /// Instantiate an iterator from the given tree data.
7    pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> {
8        TreeRefIter { data }
9    }
10
11    /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
12    /// until the last component is looked up and its tree entry is returned.
13    ///
14    /// # Performance Notes
15    ///
16    /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible
17    /// to reuse a vector and use a binary search instead, which might be able to improve performance over all.
18    /// However, a benchmark should be created first to have some data and see which trade-off to choose here.
19    pub fn lookup_entry<I, P>(
20        &self,
21        odb: impl crate::Find,
22        buffer: &'a mut Vec<u8>,
23        path: I,
24    ) -> Result<Option<tree::Entry>, crate::find::Error>
25    where
26        I: IntoIterator<Item = P>,
27        P: PartialEq<BStr>,
28    {
29        buffer.clear();
30
31        let mut path = path.into_iter().peekable();
32        buffer.extend_from_slice(self.data);
33        while let Some(component) = path.next() {
34            match TreeRefIter::from_bytes(buffer)
35                .filter_map(Result::ok)
36                .find(|entry| component.eq(entry.filename))
37            {
38                Some(entry) => {
39                    if path.peek().is_none() {
40                        return Ok(Some(entry.into()));
41                    } else {
42                        let next_id = entry.oid.to_owned();
43                        let obj = odb.try_find(&next_id, buffer)?;
44                        let Some(obj) = obj else { return Ok(None) };
45                        if !obj.kind.is_tree() {
46                            return Ok(None);
47                        }
48                    }
49                }
50                None => return Ok(None),
51            }
52        }
53        Ok(None)
54    }
55
56    /// Like [`Self::lookup_entry()`], but takes any [`AsRef<Path>`](`std::path::Path`) directly via `relative_path`,
57    /// a path relative to this tree.
58    /// `odb` and `buffer` are used to lookup intermediate trees.
59    ///
60    /// # Note
61    ///
62    /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively,
63    /// the returned component will be empty which makes the lookup fail.
64    pub fn lookup_entry_by_path(
65        &self,
66        odb: impl crate::Find,
67        buffer: &'a mut Vec<u8>,
68        relative_path: impl AsRef<std::path::Path>,
69    ) -> Result<Option<tree::Entry>, crate::find::Error> {
70        use crate::bstr::ByteSlice;
71        self.lookup_entry(
72            odb,
73            buffer,
74            relative_path.as_ref().components().map(|c: std::path::Component<'_>| {
75                gix_path::os_str_into_bstr(c.as_os_str())
76                    .unwrap_or_else(|_| "".into())
77                    .as_bytes()
78            }),
79        )
80    }
81}
82
83impl<'a> TreeRef<'a> {
84    /// Deserialize a Tree from `data`.
85    pub fn from_bytes(mut data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> {
86        let input = &mut data;
87        match decode::tree.parse_next(input) {
88            Ok(tag) => Ok(tag),
89            Err(err) => Err(crate::decode::Error::with_err(err, input)),
90        }
91    }
92
93    /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search.
94    ///
95    /// Note that it's impossible to binary search by name alone as the sort order is special.
96    pub fn bisect_entry(&self, name: &BStr, is_dir: bool) -> Option<EntryRef<'a>> {
97        static NULL_HASH: gix_hash::ObjectId = gix_hash::Kind::shortest().null();
98
99        let search = EntryRef {
100            mode: if is_dir {
101                tree::EntryKind::Tree
102            } else {
103                tree::EntryKind::Blob
104            }
105            .into(),
106            filename: name,
107            oid: &NULL_HASH,
108        };
109        self.entries
110            .binary_search_by(|e| e.cmp(&search))
111            .ok()
112            .map(|idx| self.entries[idx])
113    }
114
115    /// Create an instance of the empty tree.
116    ///
117    /// It's particularly useful as static part of a program.
118    pub const fn empty() -> TreeRef<'static> {
119        TreeRef { entries: Vec::new() }
120    }
121}
122
123impl<'a> TreeRefIter<'a> {
124    /// Consume self and return all parsed entries.
125    pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> {
126        self.collect()
127    }
128
129    /// Return the offset in bytes that our data advanced from `buf`, the original buffer
130    /// to the beginning of the data of the tree.
131    ///
132    /// Then the tree-iteration can be resumed at the entry that would otherwise be returned next.
133    pub fn offset_to_next_entry(&self, buf: &[u8]) -> usize {
134        let before = (*buf).as_ptr();
135        let after = (*self.data).as_ptr();
136
137        debug_assert!(
138            before <= after,
139            "`TreeRefIter::offset_to_next_entry(): {after:?} <= {before:?}) violated"
140        );
141        (after as usize - before as usize) / std::mem::size_of::<u8>()
142    }
143}
144
145impl<'a> Iterator for TreeRefIter<'a> {
146    type Item = Result<EntryRef<'a>, crate::decode::Error>;
147
148    fn next(&mut self) -> Option<Self::Item> {
149        if self.data.is_empty() {
150            return None;
151        }
152        match decode::fast_entry(self.data) {
153            Some((data_left, entry)) => {
154                self.data = data_left;
155                Some(Ok(entry))
156            }
157            None => {
158                let failing = self.data;
159                self.data = &[];
160                #[allow(clippy::unit_arg)]
161                Some(Err(crate::decode::Error::with_err(
162                    winnow::error::ErrMode::from_error_kind(&failing, winnow::error::ErrorKind::Verify),
163                    failing,
164                )))
165            }
166        }
167    }
168}
169
170impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
171    type Error = &'a [u8];
172
173    fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
174        mode_from_decimal(mode)
175            .map(|(mode, _rest)| tree::EntryMode(mode as u16))
176            .ok_or(mode)
177    }
178}
179
180fn mode_from_decimal(i: &[u8]) -> Option<(u32, &[u8])> {
181    let mut mode = 0u32;
182    let mut spacer_pos = 1;
183    for b in i.iter().take_while(|b| **b != b' ') {
184        if *b < b'0' || *b > b'7' {
185            return None;
186        }
187        mode = (mode << 3) + u32::from(b - b'0');
188        spacer_pos += 1;
189    }
190    if i.len() < spacer_pos {
191        return None;
192    }
193    let (_, i) = i.split_at(spacer_pos);
194    Some((mode, i))
195}
196
197impl TryFrom<u32> for tree::EntryMode {
198    type Error = u32;
199
200    fn try_from(mode: u32) -> Result<Self, Self::Error> {
201        Ok(match mode {
202            0o40000 | 0o120000 | 0o160000 => tree::EntryMode(mode as u16),
203            blob_mode if blob_mode & 0o100000 == 0o100000 => tree::EntryMode(mode as u16),
204            _ => return Err(mode),
205        })
206    }
207}
208
209mod decode {
210    use bstr::ByteSlice;
211    use winnow::{error::ParserError, prelude::*};
212
213    use crate::{
214        tree,
215        tree::{ref_iter::mode_from_decimal, EntryRef},
216        TreeRef,
217    };
218
219    pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
220        let (mode, i) = mode_from_decimal(i)?;
221        let mode = tree::EntryMode::try_from(mode).ok()?;
222        let (filename, i) = i.split_at(i.find_byte(0)?);
223        let i = &i[1..];
224        const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot
225        let (oid, i) = match i.len() {
226            len if len < HASH_LEN_FIXME => return None,
227            _ => i.split_at(20),
228        };
229        Some((
230            i,
231            EntryRef {
232                mode,
233                filename: filename.as_bstr(),
234                oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"),
235            },
236        ))
237    }
238
239    pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<TreeRef<'a>, E> {
240        let mut out = Vec::new();
241        let mut i = &**i;
242        while !i.is_empty() {
243            let Some((rest, entry)) = fast_entry(i) else {
244                #[allow(clippy::unit_arg)]
245                return Err(winnow::error::ErrMode::from_error_kind(
246                    &i,
247                    winnow::error::ErrorKind::Verify,
248                ));
249            };
250            i = rest;
251            out.push(entry);
252        }
253        Ok(TreeRef { entries: out })
254    }
255}