gix_pack/data/entry/
header.rs

1use std::io;
2
3use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
4use crate::data;
5
6/// The header portion of a pack data entry, identifying the kind of stored object.
7#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9#[allow(missing_docs)]
10pub enum Header {
11    /// The object is a commit
12    Commit,
13    /// The object is a tree
14    Tree,
15    /// The object is a blob
16    Blob,
17    /// The object is a tag
18    Tag,
19    /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field
20    /// which is found within the parent repository.
21    /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not
22    /// part of the pack but expected to be present in the receivers repository.
23    ///
24    /// # Note
25    /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to
26    /// happen.
27    ///
28    /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**.
29    RefDelta { base_id: gix_hash::ObjectId },
30    /// Describes a delta-object present in this pack which acts as base for this object.
31    /// The base object is measured as a distance from this objects
32    /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance`
33    ///
34    /// # Note
35    ///
36    /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**.
37    OfsDelta { base_distance: u64 },
38}
39
40impl Header {
41    /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0.
42    pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> {
43        if distance == 0 {
44            return None;
45        }
46        pack_offset.checked_sub(distance)
47    }
48    /// Convert the header's object kind into [`gix_object::Kind`] if possible
49    pub fn as_kind(&self) -> Option<gix_object::Kind> {
50        use gix_object::Kind::*;
51        Some(match self {
52            Header::Tree => Tree,
53            Header::Blob => Blob,
54            Header::Commit => Commit,
55            Header::Tag => Tag,
56            Header::RefDelta { .. } | Header::OfsDelta { .. } => return None,
57        })
58    }
59    /// Convert this header's object kind into the packs internal representation
60    pub fn as_type_id(&self) -> u8 {
61        use Header::*;
62        match self {
63            Blob => BLOB,
64            Tree => TREE,
65            Commit => COMMIT,
66            Tag => TAG,
67            OfsDelta { .. } => OFS_DELTA,
68            RefDelta { .. } => REF_DELTA,
69        }
70    }
71    /// Return's true if this is a delta object, i.e. not a full object.
72    pub fn is_delta(&self) -> bool {
73        matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. })
74    }
75    /// Return's true if this is a base object, i.e. not a delta object.
76    pub fn is_base(&self) -> bool {
77        !self.is_delta()
78    }
79}
80
81impl Header {
82    /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack.
83    ///
84    /// Returns the amount of bytes written to `out`.
85    /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents
86    pub fn write_to(&self, decompressed_size_in_bytes: u64, out: &mut dyn io::Write) -> io::Result<usize> {
87        let mut size = decompressed_size_in_bytes;
88        let mut written = 1;
89        let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
90        size >>= 4;
91        while size != 0 {
92            out.write_all(&[c | 0b1000_0000])?;
93            written += 1;
94            c = size as u8 & 0b0111_1111;
95            size >>= 7;
96        }
97        out.write_all(&[c])?;
98
99        use Header::*;
100        match self {
101            RefDelta { base_id: oid } => {
102                out.write_all(oid.as_slice())?;
103                written += oid.as_slice().len();
104            }
105            OfsDelta { base_distance } => {
106                let mut buf = [0u8; 10];
107                let buf = leb64_encode(*base_distance, &mut buf);
108                out.write_all(buf)?;
109                written += buf.len();
110            }
111            Blob | Tree | Commit | Tag => {}
112        }
113        Ok(written)
114    }
115
116    /// The size of the header in bytes when serialized
117    pub fn size(&self, decompressed_size: u64) -> usize {
118        self.write_to(decompressed_size, &mut io::sink())
119            .expect("io::sink() to never fail")
120    }
121}
122
123#[inline]
124fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] {
125    let mut bytes_written = 1;
126    buf[buf.len() - 1] = n as u8 & 0b0111_1111;
127    for out in buf.iter_mut().rev().skip(1) {
128        n >>= 7;
129        if n == 0 {
130            break;
131        }
132        n -= 1;
133        *out = 0b1000_0000 | (n as u8 & 0b0111_1111);
134        bytes_written += 1;
135    }
136    debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer");
137    &buf[buf.len() - bytes_written..]
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn leb64_encode_max_int() {
146        let mut buf = [0u8; 10];
147        let buf = leb64_encode(u64::MAX, &mut buf);
148        assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded");
149    }
150}