1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
use std::io::Write;

use gix_hash::ObjectId;

use crate::{data, data::output, find};

///
#[allow(clippy::empty_docs)]
pub mod iter_from_counts;
pub use iter_from_counts::function::iter_from_counts;

/// The kind of pack entry to be written
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Kind {
    /// A complete base object, including its kind
    Base(gix_object::Kind),
    /// A delta against the object with the given index. It's always an index that was already encountered to refer only
    /// to object we have written already.
    DeltaRef {
        /// The absolute index to the object to serve as base. It's up to the writer to maintain enough state to allow producing
        /// a packed delta object from it.
        object_index: usize,
    },
    /// A delta against the given object as identified by its `ObjectId`.
    /// This is the case for thin packs only, i.e. those that are sent over the wire.
    /// Note that there is the option of the `ObjectId` being used to refer to an object within
    /// the same pack, but it's a discontinued practice which won't be encountered here.
    DeltaOid {
        /// The object serving as base for this delta
        id: ObjectId,
    },
}

/// The error returned by [`output::Entry::from_data()`].
#[allow(missing_docs)]
#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error("{0}")]
    ZlibDeflate(#[from] std::io::Error),
    #[error(transparent)]
    EntryType(#[from] crate::data::entry::decode::Error),
}

impl output::Entry {
    /// An object which can be identified as invalid easily which happens if objects didn't exist even if they were referred to.
    pub fn invalid() -> output::Entry {
        output::Entry {
            id: gix_hash::Kind::Sha1.null(), // NOTE: the actual object hash used in the repo doesn't matter here, this is a sentinel value.
            kind: Kind::Base(gix_object::Kind::Blob),
            decompressed_size: 0,
            compressed_data: vec![],
        }
    }

    /// Returns true if this object doesn't really exist but still has to be handled responsibly
    ///
    /// Note that this is true for tree entries that are commits/git submodules, or for objects which aren't present in our local clone
    /// due to shallow clones.
    pub fn is_invalid(&self) -> bool {
        self.id.is_null()
    }

    /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here.
    /// The `version` specifies what kind of target `Entry` version the caller desires.
    pub fn from_pack_entry(
        mut entry: find::Entry,
        count: &output::Count,
        potential_bases: &[output::Count],
        bases_index_offset: usize,
        pack_offset_to_oid: Option<impl FnMut(u32, u64) -> Option<ObjectId>>,
        target_version: data::Version,
    ) -> Option<Result<Self, Error>> {
        if entry.version != target_version {
            return None;
        };

        let pack_offset_must_be_zero = 0;
        let pack_entry = match data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len())
        {
            Ok(e) => e,
            Err(err) => return Some(Err(err.into())),
        };

        use crate::data::entry::Header::*;
        match pack_entry.header {
            Commit => Some(output::entry::Kind::Base(gix_object::Kind::Commit)),
            Tree => Some(output::entry::Kind::Base(gix_object::Kind::Tree)),
            Blob => Some(output::entry::Kind::Base(gix_object::Kind::Blob)),
            Tag => Some(output::entry::Kind::Base(gix_object::Kind::Tag)),
            OfsDelta { base_distance } => {
                let pack_location = count.entry_pack_location.as_ref().expect("packed");
                let base_offset = pack_location
                    .pack_offset
                    .checked_sub(base_distance)
                    .expect("pack-offset - distance is firmly within the pack");
                potential_bases
                    .binary_search_by(|e| {
                        e.entry_pack_location
                            .as_ref()
                            .expect("packed")
                            .pack_offset
                            .cmp(&base_offset)
                    })
                    .ok()
                    .map(|idx| output::entry::Kind::DeltaRef {
                        object_index: idx + bases_index_offset,
                    })
                    .or_else(|| {
                        pack_offset_to_oid
                            .and_then(|mut f| f(pack_location.pack_id, base_offset))
                            .map(|id| output::entry::Kind::DeltaOid { id })
                    })
            }
            RefDelta { base_id: _ } => None, // ref deltas are for thin packs or legacy, repack them as base objects
        }
        .map(|kind| {
            Ok(output::Entry {
                id: count.id.to_owned(),
                kind,
                decompressed_size: pack_entry.decompressed_size as usize,
                compressed_data: {
                    entry.data.copy_within(pack_entry.data_offset as usize.., 0);
                    entry.data.resize(
                        entry.data.len()
                            - usize::try_from(pack_entry.data_offset).expect("offset representable as usize"),
                        0,
                    );
                    entry.data
                },
            })
        })
    }

    /// Create a new instance from the given `oid` and its corresponding git object data `obj`.
    pub fn from_data(count: &output::Count, obj: &gix_object::Data<'_>) -> Result<Self, Error> {
        Ok(output::Entry {
            id: count.id.to_owned(),
            kind: Kind::Base(obj.kind),
            decompressed_size: obj.data.len(),
            compressed_data: {
                let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
                if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
                    match err.kind() {
                        std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)),
                        err => unreachable!("Should never see other errors than zlib, but got {:?}", err,),
                    }
                };
                out.flush()?;
                out.into_inner()
            },
        })
    }

    /// Transform ourselves into pack entry header of `version` which can be written into a pack.
    ///
    /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into
    /// the input object array (if each object is numbered) to an offset into the pack.
    /// This information is known to the one calling the method.
    pub fn to_entry_header(
        &self,
        version: data::Version,
        index_to_base_distance: impl FnOnce(usize) -> u64,
    ) -> data::entry::Header {
        assert!(
            matches!(version, data::Version::V2),
            "we can only write V2 pack entries for now"
        );

        use Kind::*;
        match self.kind {
            Base(kind) => {
                use gix_object::Kind::*;
                match kind {
                    Tree => data::entry::Header::Tree,
                    Blob => data::entry::Header::Blob,
                    Commit => data::entry::Header::Commit,
                    Tag => data::entry::Header::Tag,
                }
            }
            DeltaOid { id } => data::entry::Header::RefDelta { base_id: id.to_owned() },
            DeltaRef { object_index } => data::entry::Header::OfsDelta {
                base_distance: index_to_base_distance(object_index),
            },
        }
    }
}