1use std::ops::Range;
2
3use gix_features::zlib;
4use smallvec::SmallVec;
5
6use crate::{
7 cache, data,
8 data::{delta, file::decode::Error, File},
9};
10
11#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum ResolvedBase {
15 InPack(data::Entry),
17 #[allow(missing_docs)]
20 OutOfPack { kind: gix_object::Kind, end: usize },
21}
22
23#[derive(Debug)]
24struct Delta {
25 data: Range<usize>,
26 base_size: usize,
27 result_size: usize,
28
29 decompressed_size: usize,
30 data_offset: data::Offset,
31}
32
33#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38pub struct Outcome {
39 pub kind: gix_object::Kind,
41 pub num_deltas: u32,
46 pub decompressed_size: u64,
48 pub compressed_size: usize,
50 pub object_size: u64,
52}
53
54impl Outcome {
55 pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self {
56 Self {
57 kind,
58 num_deltas: 0,
59 decompressed_size: 0,
60 compressed_size: 0,
61 object_size: 0,
62 }
63 }
64 fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
65 Self {
66 kind,
67 num_deltas: 0,
68 decompressed_size: entry.decompressed_size,
69 compressed_size,
70 object_size: entry.decompressed_size,
71 }
72 }
73}
74
75impl File {
77 pub fn decompress_entry(
87 &self,
88 entry: &data::Entry,
89 inflate: &mut zlib::Inflate,
90 out: &mut [u8],
91 ) -> Result<usize, Error> {
92 assert!(
93 out.len() as u64 >= entry.decompressed_size,
94 "output buffer isn't large enough to hold decompressed result, want {}, have {}",
95 entry.decompressed_size,
96 out.len()
97 );
98
99 self.decompress_entry_from_data_offset(entry.data_offset, inflate, out)
100 .map_err(Into::into)
101 }
102
103 pub fn entry(&self, offset: data::Offset) -> Result<data::Entry, data::entry::decode::Error> {
107 let pack_offset: usize = offset.try_into().expect("offset representable by machine");
108 assert!(pack_offset <= self.data.len(), "offset out of bounds");
109
110 let object_data = &self.data[pack_offset..];
111 data::Entry::from_bytes(object_data, offset, self.hash_len)
112 }
113
114 pub(crate) fn decompress_entry_from_data_offset(
120 &self,
121 data_offset: data::Offset,
122 inflate: &mut zlib::Inflate,
123 out: &mut [u8],
124 ) -> Result<usize, zlib::inflate::Error> {
125 let offset: usize = data_offset.try_into().expect("offset representable by machine");
126 assert!(offset < self.data.len(), "entry offset out of bounds");
127
128 inflate.reset();
129 inflate
130 .once(&self.data[offset..], out)
131 .map(|(_status, consumed_in, _consumed_out)| consumed_in)
132 }
133
134 pub(crate) fn decompress_entry_from_data_offset_2(
136 &self,
137 data_offset: data::Offset,
138 inflate: &mut zlib::Inflate,
139 out: &mut [u8],
140 ) -> Result<(usize, usize), zlib::inflate::Error> {
141 let offset: usize = data_offset.try_into().expect("offset representable by machine");
142 assert!(offset < self.data.len(), "entry offset out of bounds");
143
144 inflate.reset();
145 inflate
146 .once(&self.data[offset..], out)
147 .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
148 }
149
150 pub fn decode_entry(
162 &self,
163 entry: data::Entry,
164 out: &mut Vec<u8>,
165 inflate: &mut zlib::Inflate,
166 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
167 delta_cache: &mut dyn cache::DecodeEntry,
168 ) -> Result<Outcome, Error> {
169 use crate::data::entry::Header::*;
170 match entry.header {
171 Tree | Blob | Commit | Tag => {
172 let size: usize = entry.decompressed_size.try_into().map_err(|_| Error::OutOfMemory)?;
173 if let Some(additional) = size.checked_sub(out.len()) {
174 out.try_reserve(additional)?;
175 }
176 out.resize(size, 0);
177 self.decompress_entry(&entry, inflate, out.as_mut_slice())
178 .map(|consumed_input| {
179 Outcome::from_object_entry(
180 entry.header.as_kind().expect("a non-delta entry"),
181 &entry,
182 consumed_input,
183 )
184 })
185 }
186 OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, inflate, out, delta_cache),
187 }
188 }
189
190 fn resolve_deltas(
194 &self,
195 last: data::Entry,
196 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
197 inflate: &mut zlib::Inflate,
198 out: &mut Vec<u8>,
199 cache: &mut dyn cache::DecodeEntry,
200 ) -> Result<Outcome, Error> {
201 let mut chain = SmallVec::<[Delta; 10]>::default();
203 let first_entry = last.clone();
204 let mut cursor = last;
205 let mut base_buffer_size: Option<usize> = None;
206 let mut object_kind: Option<gix_object::Kind> = None;
207 let mut consumed_input: Option<usize> = None;
208
209 let mut total_delta_data_size: u64 = 0;
211 while cursor.header.is_delta() {
212 if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
213 base_buffer_size = Some(out.len());
214 object_kind = Some(kind);
215 if total_delta_data_size == 0 {
218 consumed_input = Some(packed_size);
219 }
220 break;
221 }
222 total_delta_data_size += cursor.decompressed_size;
225 let decompressed_size = cursor
226 .decompressed_size
227 .try_into()
228 .expect("a single delta size small enough to fit a usize");
229 chain.push(Delta {
230 data: Range {
231 start: 0,
232 end: decompressed_size,
233 },
234 base_size: 0,
235 result_size: 0,
236 decompressed_size,
237 data_offset: cursor.data_offset,
238 });
239 use crate::data::entry::Header;
240 cursor = match cursor.header {
241 Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance))?,
242 Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
243 Some(ResolvedBase::InPack(entry)) => entry,
244 Some(ResolvedBase::OutOfPack { end, kind }) => {
245 base_buffer_size = Some(end);
246 object_kind = Some(kind);
247 break;
248 }
249 None => return Err(Error::DeltaBaseUnresolved(base_id)),
250 },
251 _ => unreachable!("cursor.is_delta() only allows deltas here"),
252 };
253 }
254
255 if chain.is_empty() {
258 return Ok(Outcome::from_object_entry(
259 object_kind.expect("object kind as set by cache"),
260 &first_entry,
261 consumed_input.expect("consumed bytes as set by cache"),
262 ));
263 };
264
265 let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
269
270 let chain_len = chain.len();
271 let (first_buffer_end, second_buffer_end) = {
272 let delta_start = base_buffer_size.unwrap_or(0);
273
274 let delta_range = Range {
275 start: delta_start,
276 end: delta_start + total_delta_data_size,
277 };
278 out.try_reserve(delta_range.end.saturating_sub(out.len()))?;
279 out.resize(delta_range.end, 0);
280
281 let mut instructions = &mut out[delta_range.clone()];
282 let mut relative_delta_start = 0;
283 let mut biggest_result_size = 0;
284 for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
285 let consumed_from_data_offset = self.decompress_entry_from_data_offset(
286 delta.data_offset,
287 inflate,
288 &mut instructions[..delta.decompressed_size],
289 )?;
290 let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
291 if is_last_delta_to_be_applied {
292 consumed_input = Some(consumed_from_data_offset);
293 }
294
295 let (base_size, offset) = delta::decode_header_size(instructions);
296 let mut bytes_consumed_by_header = offset;
297 biggest_result_size = biggest_result_size.max(base_size);
298 delta.base_size = base_size.try_into().expect("base size fits into usize");
299
300 let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
301 bytes_consumed_by_header += offset;
302 biggest_result_size = biggest_result_size.max(result_size);
303 delta.result_size = result_size.try_into().expect("result size fits into usize");
304
305 delta.data.start = relative_delta_start + bytes_consumed_by_header;
307 relative_delta_start += delta.decompressed_size;
308 delta.data.end = relative_delta_start;
309
310 instructions = &mut instructions[delta.decompressed_size..];
311 }
312
313 let biggest_result_size: usize = biggest_result_size.try_into().map_err(|_| Error::OutOfMemory)?;
317 let first_buffer_size = biggest_result_size;
318 let second_buffer_size = first_buffer_size;
319 let out_size = first_buffer_size + second_buffer_size + total_delta_data_size;
320 out.try_reserve(out_size.saturating_sub(out.len()))?;
321 out.resize(out_size, 0);
322
323 let second_buffer_end = {
326 let end = first_buffer_size + second_buffer_size;
327 if delta_range.start < end {
328 out.copy_within(delta_range, end);
335 } else {
336 let (buffers, instructions) = out.split_at_mut(end);
337 instructions.copy_from_slice(&buffers[delta_range]);
338 }
339 end
340 };
341
342 if base_buffer_size.is_none() {
345 let base_entry = cursor;
346 debug_assert!(!base_entry.header.is_delta());
347 object_kind = base_entry.header.as_kind();
348 self.decompress_entry_from_data_offset(base_entry.data_offset, inflate, out)?;
349 }
350
351 (first_buffer_size, second_buffer_end)
352 };
353
354 let (buffers, instructions) = out.split_at_mut(second_buffer_end);
360 let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
361
362 let mut last_result_size = None;
363 for (
364 delta_idx,
365 Delta {
366 data,
367 base_size,
368 result_size,
369 ..
370 },
371 ) in chain.into_iter().rev().enumerate()
372 {
373 let data = &mut instructions[data];
374 if delta_idx + 1 == chain_len {
375 last_result_size = Some(result_size);
376 }
377 delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
378 std::mem::swap(&mut source_buf, &mut target_buf);
380 }
381
382 let last_result_size = last_result_size.expect("at least one delta chain item");
383 if chain_len % 2 == 1 {
392 target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
394 }
395 debug_assert!(out.len() >= last_result_size);
396 out.truncate(last_result_size);
397
398 let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
399 let consumed_input = consumed_input.expect("at least one decompressed delta object");
400 cache.put(
401 self.id,
402 first_entry.data_offset,
403 out.as_slice(),
404 object_kind,
405 consumed_input,
406 );
407 Ok(Outcome {
408 kind: object_kind,
409 num_deltas: chain_len as u32,
413 decompressed_size: first_entry.decompressed_size,
414 compressed_size: consumed_input,
415 object_size: last_result_size as u64,
416 })
417 }
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423 use gix_testtools::size_ok;
424
425 #[test]
426 fn size_of_decode_entry_outcome() {
427 let actual = std::mem::size_of::<Outcome>();
428 let expected = 32;
429 assert!(
430 size_ok(actual, expected),
431 "this shouldn't change without use noticing as it's returned a lot: {actual} <~ {expected}"
432 );
433 }
434}