gix_odb/store_impls/dynamic/
find.rs

1use std::ops::Deref;
2
3use gix_pack::cache::DecodeEntry;
4
5use crate::store::{handle, load_index};
6
7pub(crate) mod error {
8    use crate::{loose, pack};
9
10    /// Returned by [`Handle::try_find()`][gix_pack::Find::try_find()]
11    #[derive(thiserror::Error, Debug)]
12    #[allow(missing_docs)]
13    pub enum Error {
14        #[error("An error occurred while obtaining an object from the loose object store")]
15        Loose(#[from] loose::find::Error),
16        #[error("An error occurred while obtaining an object from the packed object store")]
17        Pack(#[from] pack::data::decode::Error),
18        #[error(transparent)]
19        LoadIndex(#[from] crate::store::load_index::Error),
20        #[error(transparent)]
21        LoadPack(#[from] std::io::Error),
22        #[error(transparent)]
23        EntryType(#[from] gix_pack::data::entry::decode::Error),
24        #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)]
25        DeltaBaseRecursionLimit {
26            /// the maximum recursion depth we encountered.
27            max_depth: usize,
28            /// The original object to lookup
29            id: gix_hash::ObjectId,
30        },
31        #[error("The base object {} could not be found but is required to decode {}", .base_id, .id)]
32        DeltaBaseMissing {
33            /// the id of the base object which failed to lookup
34            base_id: gix_hash::ObjectId,
35            /// The original object to lookup
36            id: gix_hash::ObjectId,
37        },
38        #[error("An error occurred when looking up a ref delta base object {} to decode {}", .base_id, .id)]
39        DeltaBaseLookup {
40            #[source]
41            err: Box<Self>,
42            /// the id of the base object which failed to lookup
43            base_id: gix_hash::ObjectId,
44            /// The original object to lookup
45            id: gix_hash::ObjectId,
46        },
47    }
48
49    #[derive(Copy, Clone)]
50    pub(crate) struct DeltaBaseRecursion<'a> {
51        pub depth: usize,
52        pub original_id: &'a gix_hash::oid,
53    }
54
55    impl<'a> DeltaBaseRecursion<'a> {
56        pub fn new(id: &'a gix_hash::oid) -> Self {
57            Self {
58                original_id: id,
59                depth: 0,
60            }
61        }
62        pub fn inc_depth(mut self) -> Self {
63            self.depth += 1;
64            self
65        }
66    }
67
68    #[cfg(test)]
69    mod tests {
70        use super::*;
71
72        #[test]
73        fn error_size() {
74            let actual = std::mem::size_of::<Error>();
75            assert!(actual <= 88, "{actual} <= 88: should not grow without us noticing");
76        }
77    }
78}
79pub use error::Error;
80use gix_features::zlib;
81
82use crate::store::types::PackId;
83
84impl<S> super::Handle<S>
85where
86    S: Deref<Target = super::Store> + Clone,
87{
88    fn try_find_cached_inner<'a, 'b>(
89        &'b self,
90        mut id: &'b gix_hash::oid,
91        buffer: &'a mut Vec<u8>,
92        inflate: &mut zlib::Inflate,
93        pack_cache: &mut dyn DecodeEntry,
94        snapshot: &mut load_index::Snapshot,
95        recursion: Option<error::DeltaBaseRecursion<'_>>,
96    ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, Error> {
97        if let Some(r) = recursion {
98            if r.depth >= self.max_recursion_depth {
99                return Err(Error::DeltaBaseRecursionLimit {
100                    max_depth: self.max_recursion_depth,
101                    id: r.original_id.to_owned(),
102                });
103            }
104        } else if !self.ignore_replacements {
105            if let Ok(pos) = self
106                .store
107                .replacements
108                .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id))
109            {
110                id = self.store.replacements[pos].1.as_ref();
111            }
112        }
113
114        'outer: loop {
115            {
116                let marker = snapshot.marker;
117                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
118                    if let Some(handle::index_lookup::Outcome {
119                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
120                        index_file,
121                        pack: possibly_pack,
122                    }) = index.lookup(id)
123                    {
124                        let pack = match possibly_pack {
125                            Some(pack) => pack,
126                            None => match self.store.load_pack(pack_id, marker)? {
127                                Some(pack) => {
128                                    *possibly_pack = Some(pack);
129                                    possibly_pack.as_deref().expect("just put it in")
130                                }
131                                None => {
132                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
133                                    match self.store.load_one_index(self.refresh, snapshot.marker)? {
134                                        Some(new_snapshot) => {
135                                            *snapshot = new_snapshot;
136                                            self.clear_cache();
137                                            continue 'outer;
138                                        }
139                                        None => {
140                                            // nothing new in the index, kind of unexpected to not have a pack but to also
141                                            // to have no new index yet. We set the new index before removing any slots, so
142                                            // this should be observable.
143                                            return Ok(None);
144                                        }
145                                    }
146                                }
147                            },
148                        };
149                        let entry = pack.entry(pack_offset)?;
150                        let header_size = entry.header_size();
151                        let res = pack.decode_entry(
152                            entry,
153                            buffer,
154                            inflate,
155                            &|id, _out| {
156                                let pack_offset = index_file.pack_offset_by_id(id)?;
157                                pack.entry(pack_offset)
158                                    .ok()
159                                    .map(gix_pack::data::decode::entry::ResolvedBase::InPack)
160                            },
161                            pack_cache,
162                        );
163                        let res = match res {
164                            Ok(r) => Ok((
165                                gix_object::Data {
166                                    kind: r.kind,
167                                    data: buffer.as_slice(),
168                                },
169                                Some(gix_pack::data::entry::Location {
170                                    pack_id: pack.id,
171                                    pack_offset,
172                                    entry_size: r.compressed_size + header_size,
173                                }),
174                            )),
175                            Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => {
176                                // Only with multi-pack indices it's allowed to jump to refer to other packs within this
177                                // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit.
178                                // However, if we somehow end up with that, we will resolve it safely, even though we could
179                                // avoid handling this case and error instead.
180
181                                // Since this is a special case, we just allocate here to make it work. It's an actual delta-ref object
182                                // which is sent by some servers that points to an object outside of the pack we are looking
183                                // at right now. With the complexities of loading packs, we go into recursion here. Git itself
184                                // doesn't do a cycle check, and we won't either but limit the recursive depth.
185                                // The whole ordeal isn't as efficient as it could be due to memory allocation and
186                                // later mem-copying when trying again.
187                                let mut buf = Vec::new();
188                                let obj_kind = self
189                                    .try_find_cached_inner(
190                                        &base_id,
191                                        &mut buf,
192                                        inflate,
193                                        pack_cache,
194                                        snapshot,
195                                        recursion
196                                            .map(error::DeltaBaseRecursion::inc_depth)
197                                            .or_else(|| error::DeltaBaseRecursion::new(id).into()),
198                                    )
199                                    .map_err(|err| Error::DeltaBaseLookup {
200                                        err: Box::new(err),
201                                        base_id,
202                                        id: id.to_owned(),
203                                    })?
204                                    .ok_or_else(|| Error::DeltaBaseMissing {
205                                        base_id,
206                                        id: id.to_owned(),
207                                    })?
208                                    .0
209                                    .kind;
210                                let handle::index_lookup::Outcome {
211                                    object_index:
212                                        handle::IndexForObjectInPack {
213                                            pack_id: _,
214                                            pack_offset,
215                                        },
216                                    index_file,
217                                    pack: possibly_pack,
218                                } = match snapshot.indices[idx].lookup(id) {
219                                    Some(res) => res,
220                                    None => {
221                                        let mut out = None;
222                                        for index in &mut snapshot.indices {
223                                            out = index.lookup(id);
224                                            if out.is_some() {
225                                                break;
226                                            }
227                                        }
228
229                                        out.unwrap_or_else(|| {
230                                           panic!("could not find object {id} in any index after looking up one of its base objects {base_id}" )
231                                       })
232                                    }
233                                };
234                                let pack = possibly_pack
235                                    .as_ref()
236                                    .expect("pack to still be available like just now");
237                                let entry = pack.entry(pack_offset)?;
238                                let header_size = entry.header_size();
239                                pack.decode_entry(
240                                    entry,
241                                    buffer,
242                                    inflate,
243                                    &|id, out| {
244                                        index_file
245                                            .pack_offset_by_id(id)
246                                            .and_then(|pack_offset| {
247                                                pack.entry(pack_offset)
248                                                    .ok()
249                                                    .map(gix_pack::data::decode::entry::ResolvedBase::InPack)
250                                            })
251                                            .or_else(|| {
252                                                (id == base_id).then(|| {
253                                                    out.resize(buf.len(), 0);
254                                                    out.copy_from_slice(buf.as_slice());
255                                                    gix_pack::data::decode::entry::ResolvedBase::OutOfPack {
256                                                        kind: obj_kind,
257                                                        end: out.len(),
258                                                    }
259                                                })
260                                            })
261                                    },
262                                    pack_cache,
263                                )
264                                .map(move |r| {
265                                    (
266                                        gix_object::Data {
267                                            kind: r.kind,
268                                            data: buffer.as_slice(),
269                                        },
270                                        Some(gix_pack::data::entry::Location {
271                                            pack_id: pack.id,
272                                            pack_offset,
273                                            entry_size: r.compressed_size + header_size,
274                                        }),
275                                    )
276                                })
277                            }
278                            Err(err) => Err(err),
279                        }?;
280
281                        if idx != 0 {
282                            snapshot.indices.swap(0, idx);
283                        }
284                        return Ok(Some(res));
285                    }
286                }
287            }
288
289            for lodb in snapshot.loose_dbs.iter() {
290                // TODO: remove this double-lookup once the borrow checker allows it.
291                if lodb.contains(id) {
292                    return lodb
293                        .try_find(id, buffer)
294                        .map(|obj| obj.map(|obj| (obj, None)))
295                        .map_err(Into::into);
296                }
297            }
298
299            match self.store.load_one_index(self.refresh, snapshot.marker)? {
300                Some(new_snapshot) => {
301                    *snapshot = new_snapshot;
302                    self.clear_cache();
303                }
304                None => return Ok(None),
305            }
306        }
307    }
308
309    pub(crate) fn clear_cache(&self) {
310        self.packed_object_count.borrow_mut().take();
311    }
312}
313
314impl<S> gix_pack::Find for super::Handle<S>
315where
316    S: Deref<Target = super::Store> + Clone,
317{
318    // TODO: probably make this method fallible, but that would mean its own error type.
319    fn contains(&self, id: &gix_hash::oid) -> bool {
320        let mut snapshot = self.snapshot.borrow_mut();
321        loop {
322            for (idx, index) in snapshot.indices.iter().enumerate() {
323                if index.contains(id) {
324                    if idx != 0 {
325                        snapshot.indices.swap(0, idx);
326                    }
327                    return true;
328                }
329            }
330
331            for lodb in snapshot.loose_dbs.iter() {
332                if lodb.contains(id) {
333                    return true;
334                }
335            }
336
337            match self.store.load_one_index(self.refresh, snapshot.marker) {
338                Ok(Some(new_snapshot)) => {
339                    *snapshot = new_snapshot;
340                    self.clear_cache();
341                }
342                Ok(None) => return false, // nothing more to load, or our refresh mode doesn't allow disk refreshes
343                Err(_) => return false, // something went wrong, nothing we can communicate here with this trait. TODO: Maybe that should change?
344            }
345        }
346    }
347
348    fn try_find_cached<'a>(
349        &self,
350        id: &gix_hash::oid,
351        buffer: &'a mut Vec<u8>,
352        pack_cache: &mut dyn DecodeEntry,
353    ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, gix_object::find::Error> {
354        let mut snapshot = self.snapshot.borrow_mut();
355        let mut inflate = self.inflate.borrow_mut();
356        self.try_find_cached_inner(id, buffer, &mut inflate, pack_cache, &mut snapshot, None)
357            .map_err(|err| Box::new(err) as _)
358    }
359
360    fn location_by_oid(&self, id: &gix_hash::oid, buf: &mut Vec<u8>) -> Option<gix_pack::data::entry::Location> {
361        assert!(
362            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
363            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
364        );
365
366        assert!(self.store_ref().replacements.is_empty() || self.ignore_replacements, "Everything related to packing must not use replacements. These are not used here, but it should be turned off for good measure.");
367
368        let mut snapshot = self.snapshot.borrow_mut();
369        let mut inflate = self.inflate.borrow_mut();
370        'outer: loop {
371            {
372                let marker = snapshot.marker;
373                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
374                    if let Some(handle::index_lookup::Outcome {
375                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
376                        index_file: _,
377                        pack: possibly_pack,
378                    }) = index.lookup(id)
379                    {
380                        let pack = match possibly_pack {
381                            Some(pack) => pack,
382                            None => match self.store.load_pack(pack_id, marker).ok()? {
383                                Some(pack) => {
384                                    *possibly_pack = Some(pack);
385                                    possibly_pack.as_deref().expect("just put it in")
386                                }
387                                None => {
388                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
389                                    match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
390                                        Some(new_snapshot) => {
391                                            *snapshot = new_snapshot;
392                                            self.clear_cache();
393                                            continue 'outer;
394                                        }
395                                        None => {
396                                            // nothing new in the index, kind of unexpected to not have a pack but to also
397                                            // to have no new index yet. We set the new index before removing any slots, so
398                                            // this should be observable.
399                                            return None;
400                                        }
401                                    }
402                                }
403                            },
404                        };
405                        let entry = pack.entry(pack_offset).ok()?;
406
407                        buf.resize(entry.decompressed_size.try_into().expect("representable size"), 0);
408                        assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match");
409
410                        let res = pack
411                            .decompress_entry(&entry, &mut inflate, buf)
412                            .ok()
413                            .map(|entry_size_past_header| gix_pack::data::entry::Location {
414                                pack_id: pack.id,
415                                pack_offset,
416                                entry_size: entry.header_size() + entry_size_past_header,
417                            });
418
419                        if idx != 0 {
420                            snapshot.indices.swap(0, idx);
421                        }
422                        return res;
423                    }
424                }
425            }
426
427            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
428                Some(new_snapshot) => {
429                    *snapshot = new_snapshot;
430                    self.clear_cache();
431                }
432                None => return None,
433            }
434        }
435    }
436
437    fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(u64, gix_hash::ObjectId)>> {
438        assert!(
439            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
440            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
441        );
442        let pack_id = PackId::from_intrinsic_pack_id(pack_id);
443        loop {
444            let snapshot = self.snapshot.borrow();
445            {
446                for index in &snapshot.indices {
447                    if let Some(iter) = index.iter(pack_id) {
448                        return Some(iter.map(|e| (e.pack_offset, e.oid)).collect());
449                    }
450                }
451            }
452
453            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
454                Some(new_snapshot) => {
455                    drop(snapshot);
456                    *self.snapshot.borrow_mut() = new_snapshot;
457                }
458                None => return None,
459            }
460        }
461    }
462
463    fn entry_by_location(&self, location: &gix_pack::data::entry::Location) -> Option<gix_pack::find::Entry> {
464        assert!(
465            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
466            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
467        );
468        let pack_id = PackId::from_intrinsic_pack_id(location.pack_id);
469        let mut snapshot = self.snapshot.borrow_mut();
470        let marker = snapshot.marker;
471        loop {
472            {
473                for index in &mut snapshot.indices {
474                    if let Some(possibly_pack) = index.pack(pack_id) {
475                        let pack = match possibly_pack {
476                            Some(pack) => pack,
477                            None => {
478                                let pack = self.store.load_pack(pack_id, marker).ok()?.expect(
479                                "BUG: pack must exist from previous call to location_by_oid() and must not be unloaded",
480                            );
481                                *possibly_pack = Some(pack);
482                                possibly_pack.as_deref().expect("just put it in")
483                            }
484                        };
485                        return pack
486                            .entry_slice(location.entry_range(location.pack_offset))
487                            .map(|data| gix_pack::find::Entry {
488                                data: data.to_owned(),
489                                version: pack.version(),
490                            });
491                    }
492                }
493            }
494
495            snapshot.indices.insert(
496                0,
497                self.store
498                    .index_by_id(pack_id, marker)
499                    .expect("BUG: index must always be present, must not be unloaded or overwritten"),
500            );
501        }
502    }
503}
504
505impl<S> gix_object::Find for super::Handle<S>
506where
507    S: Deref<Target = super::Store> + Clone,
508    Self: gix_pack::Find,
509{
510    fn try_find<'a>(
511        &self,
512        id: &gix_hash::oid,
513        buffer: &'a mut Vec<u8>,
514    ) -> Result<Option<gix_object::Data<'a>>, gix_object::find::Error> {
515        gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0))
516    }
517}
518
519impl<S> gix_object::FindHeader for super::Handle<S>
520where
521    S: Deref<Target = super::Store> + Clone,
522{
523    fn try_header(&self, id: &gix_hash::oid) -> Result<Option<gix_object::Header>, gix_object::find::Error> {
524        let mut snapshot = self.snapshot.borrow_mut();
525        let mut inflate = self.inflate.borrow_mut();
526        self.try_header_inner(id, &mut inflate, &mut snapshot, None)
527            .map(|maybe_header| {
528                maybe_header.map(|hdr| gix_object::Header {
529                    kind: hdr.kind(),
530                    size: hdr.size(),
531                })
532            })
533            .map_err(|err| Box::new(err) as _)
534    }
535}
536
537impl<S> gix_object::Exists for super::Handle<S>
538where
539    S: Deref<Target = super::Store> + Clone,
540    Self: gix_pack::Find,
541{
542    fn exists(&self, id: &gix_hash::oid) -> bool {
543        gix_pack::Find::contains(self, id)
544    }
545}