gix_pack/data/input/
lookup_ref_delta_objects.rs

1use gix_hash::ObjectId;
2
3use crate::data::{entry::Header, input};
4
5/// An iterator to resolve thin packs on the fly.
6pub struct LookupRefDeltaObjectsIter<I, Find> {
7    /// The inner iterator whose entries we will resolve.
8    pub inner: I,
9    lookup: Find,
10    /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place.
11    next_delta: Option<input::Entry>,
12    /// Fuse to stop iteration after first missing object.
13    error: bool,
14    /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following
15    /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all.
16    inserted_entry_length_at_offset: Vec<Change>,
17    /// The sum of all entries added so far, as a cache to avoid recomputation
18    inserted_entries_length_in_bytes: i64,
19    buf: Vec<u8>,
20}
21
22impl<I, Find> LookupRefDeltaObjectsIter<I, Find>
23where
24    I: Iterator<Item = Result<input::Entry, input::Error>>,
25    Find: gix_object::Find,
26{
27    /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases
28    /// for ref deltas seen while traversing `iter`.
29    pub fn new(iter: I, lookup: Find) -> Self {
30        LookupRefDeltaObjectsIter {
31            inner: iter,
32            lookup,
33            error: false,
34            inserted_entry_length_at_offset: Vec::new(),
35            inserted_entries_length_in_bytes: 0,
36            next_delta: None,
37            buf: Vec::new(),
38        }
39    }
40
41    fn shifted_pack_offset(&self, pack_offset: u64) -> u64 {
42        let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes;
43        new_ofs.try_into().expect("offset value is never becomes negative")
44    }
45
46    /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values
47    /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas.
48    fn track_change(&mut self, shifted_pack_offset: u64, pack_offset: u64, size_change: i64, oid: Option<ObjectId>) {
49        if size_change == 0 {
50            return;
51        }
52        self.inserted_entry_length_at_offset.push(Change {
53            shifted_pack_offset,
54            pack_offset,
55            size_change_in_bytes: size_change,
56            oid: oid.unwrap_or_else(||
57                // NOTE: this value acts as sentinel and the actual hash kind doesn't matter.
58                gix_hash::Kind::Sha1.null()),
59        });
60        self.inserted_entries_length_in_bytes += size_change;
61    }
62
63    fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) {
64        let pack_offset = entry.pack_offset;
65        entry.pack_offset = self.shifted_pack_offset(pack_offset);
66        entry.header = Header::OfsDelta { base_distance };
67        let previous_header_size = entry.header_size;
68        entry.header_size = entry.header.size(entry.decompressed_size) as u16;
69
70        let change = i64::from(entry.header_size) - i64::from(previous_header_size);
71        entry.crc32 = Some(entry.compute_crc32());
72        self.track_change(entry.pack_offset, pack_offset, change, None);
73    }
74}
75
76impl<I, Find> Iterator for LookupRefDeltaObjectsIter<I, Find>
77where
78    I: Iterator<Item = Result<input::Entry, input::Error>>,
79    Find: gix_object::Find,
80{
81    type Item = Result<input::Entry, input::Error>;
82
83    fn next(&mut self) -> Option<Self::Item> {
84        if self.error {
85            return None;
86        }
87        if let Some(delta) = self.next_delta.take() {
88            return Some(Ok(delta));
89        }
90        match self.inner.next() {
91            Some(Ok(mut entry)) => match entry.header {
92                Header::RefDelta { base_id } => {
93                    match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) {
94                        None => {
95                            let base_entry = match self.lookup.try_find(&base_id, &mut self.buf).ok()? {
96                                Some(obj) => {
97                                    let current_pack_offset = entry.pack_offset;
98                                    let mut entry = match input::Entry::from_data_obj(&obj, 0) {
99                                        Ok(e) => e,
100                                        Err(err) => return Some(Err(err)),
101                                    };
102                                    entry.pack_offset = self.shifted_pack_offset(current_pack_offset);
103                                    self.track_change(
104                                        entry.pack_offset,
105                                        current_pack_offset,
106                                        entry.bytes_in_pack() as i64,
107                                        Some(base_id),
108                                    );
109                                    entry
110                                }
111                                None => {
112                                    self.error = true;
113                                    return Some(Err(input::Error::NotFound { object_id: base_id }));
114                                }
115                            };
116
117                            {
118                                self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack());
119                                self.next_delta = Some(entry);
120                            }
121                            Some(Ok(base_entry))
122                        }
123                        Some(base_entry) => {
124                            let base_distance =
125                                self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset;
126                            self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance);
127                            Some(Ok(entry))
128                        }
129                    }
130                }
131                _ => {
132                    if self.inserted_entries_length_in_bytes != 0 {
133                        if let Header::OfsDelta { base_distance } = entry.header {
134                            // We have to find the new distance based on the previous distance to the base, using the absolute
135                            // pack offset computed from it as stored in `base_pack_offset`.
136                            let base_pack_offset = entry
137                                .pack_offset
138                                .checked_sub(base_distance)
139                                .expect("distance to be in range of pack");
140                            match self
141                                .inserted_entry_length_at_offset
142                                .binary_search_by_key(&base_pack_offset, |c| c.pack_offset)
143                            {
144                                Ok(index) => {
145                                    let index = {
146                                        let maybe_index_of_actual_entry = index + 1;
147                                        self.inserted_entry_length_at_offset
148                                            .get(maybe_index_of_actual_entry)
149                                            .and_then(|c| {
150                                                (c.pack_offset == base_pack_offset)
151                                                    .then_some(maybe_index_of_actual_entry)
152                                            })
153                                            .unwrap_or(index)
154                                    };
155                                    let new_distance = self
156                                        .shifted_pack_offset(entry.pack_offset)
157                                        .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset)
158                                        .expect("a base that is behind us in the pack");
159                                    self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
160                                }
161                                Err(index) => {
162                                    let change_since_offset = self.inserted_entry_length_at_offset[index..]
163                                        .iter()
164                                        .map(|c| c.size_change_in_bytes)
165                                        .sum::<i64>();
166                                    let new_distance: u64 = {
167                                        (base_distance as i64 + change_since_offset)
168                                            .try_into()
169                                            .expect("it still points behind us")
170                                    };
171                                    self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
172                                }
173                            }
174                        } else {
175                            // Offset this entry by all changes (positive or negative) that we saw thus far.
176                            entry.pack_offset = self.shifted_pack_offset(entry.pack_offset);
177                        }
178                    }
179                    Some(Ok(entry))
180                }
181            },
182            other => other,
183        }
184    }
185
186    fn size_hint(&self) -> (usize, Option<usize>) {
187        let (min, max) = self.inner.size_hint();
188        max.map_or_else(|| (min * 2, None), |max| (min, Some(max * 2)))
189    }
190}
191
192#[derive(Debug)]
193struct Change {
194    /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by
195    /// old offset.
196    pack_offset: u64,
197    /// The new pack offset that is the shifted location of the pack entry in the pack.
198    shifted_pack_offset: u64,
199    /// The size change of the entry header, negative values denote shrinking, positive denote growing.
200    size_change_in_bytes: i64,
201    /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion.
202    oid: ObjectId,
203}