1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//! a pack data file
use std::{convert::TryInto, path::Path};

/// The offset to an entry into the pack data file, relative to its beginning.
pub type Offset = u64;

/// An identifier to uniquely identify all packs loaded within a known context or namespace.
pub type Id = u32;

use memmap2::Mmap;

/// An representing an full- or delta-object within a pack
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Entry {
    /// The entry's header
    pub header: entry::Header,
    /// The decompressed size of the entry in bytes.
    ///
    /// Note that for non-delta entries this will be the size of the object itself.
    pub decompressed_size: u64,
    /// absolute offset to compressed object data in the pack, just behind the entry's header
    pub data_offset: Offset,
}

mod file;
pub use file::{decode, verify, Header};
///
pub mod header;

///
pub mod init {
    pub use super::header::decode::Error;
}

///
pub mod entry;

///
#[cfg(feature = "streaming-input")]
pub mod input;

/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
#[cfg(feature = "generate")]
pub mod output;

/// A slice into a pack file denoting a pack entry.
///
/// An entry can be decoded into an object.
pub type EntryRange = std::ops::Range<Offset>;

/// Supported versions of a pack data file
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[allow(missing_docs)]
pub enum Version {
    #[default]
    V2,
    V3,
}

/// A pack data file
pub struct File {
    data: Mmap,
    path: std::path::PathBuf,
    /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk.
    /// The same location on disk should yield the same id.
    ///
    /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change.
    /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways.
    ///
    /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met.
    pub id: Id,
    version: Version,
    num_objects: u32,
    /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use
    /// based on their configuration.
    hash_len: usize,
    object_hash: gix_hash::Kind,
}

/// Information about the pack data file itself
impl File {
    /// The pack data version of this file
    pub fn version(&self) -> Version {
        self.version
    }
    /// The number of objects stored in this pack data file
    pub fn num_objects(&self) -> u32 {
        self.num_objects
    }
    /// The length of all mapped data, including the pack header and the pack trailer
    pub fn data_len(&self) -> usize {
        self.data.len()
    }
    /// The kind of hash we use internally.
    pub fn object_hash(&self) -> gix_hash::Kind {
        self.object_hash
    }
    /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash.
    pub fn pack_end(&self) -> usize {
        self.data.len() - self.hash_len
    }

    /// The path to the pack data file on disk
    pub fn path(&self) -> &Path {
        &self.path
    }

    /// Returns the pack data at the given slice if its range is contained in the mapped pack data
    pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> {
        let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize");
        let entry_start = slice.start as usize;
        self.data.get(entry_start..entry_end)
    }

    /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data.
    ///
    /// _Note:_ finding the right size is only possible by decompressing
    /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file.
    ///
    /// # Panics
    ///
    /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data.
    pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 {
        let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize");
        gix_features::hash::crc32(&self.data[pack_offset..pack_offset + size])
    }
}

pub(crate) mod delta;