gix_pack/multi_index/
init.rs

1use std::path::Path;
2
3use crate::multi_index::{chunk, File, Version};
4
5mod error {
6    use crate::multi_index::chunk;
7
8    /// The error returned by [File::at()][super::File::at()].
9    #[derive(Debug, thiserror::Error)]
10    #[allow(missing_docs)]
11    pub enum Error {
12        #[error("Could not open multi-index file at '{path}'")]
13        Io {
14            source: std::io::Error,
15            path: std::path::PathBuf,
16        },
17        #[error("{message}")]
18        Corrupt { message: &'static str },
19        #[error("Unsupported multi-index version: {version})")]
20        UnsupportedVersion { version: u8 },
21        #[error("Unsupported hash kind: {kind})")]
22        UnsupportedObjectHash { kind: u8 },
23        #[error(transparent)]
24        ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
25        #[error(transparent)]
26        MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
27        #[error(transparent)]
28        FileTooLarge(#[from] gix_chunk::file::index::data_by_kind::Error),
29        #[error("The multi-pack fan doesn't have the correct size of 256 * 4 bytes")]
30        MultiPackFanSize,
31        #[error(transparent)]
32        PackNames(#[from] chunk::index_names::decode::Error),
33        #[error("multi-index chunk {:?} has invalid size: {message}", String::from_utf8_lossy(.id))]
34        InvalidChunkSize { id: gix_chunk::Id, message: &'static str },
35    }
36}
37
38pub use error::Error;
39
40/// Initialization
41impl File {
42    /// Open the multi-index file at the given `path`.
43    pub fn at(path: impl AsRef<Path>) -> Result<Self, Error> {
44        Self::try_from(path.as_ref())
45    }
46}
47
48impl TryFrom<&Path> for File {
49    type Error = Error;
50
51    fn try_from(path: &Path) -> Result<Self, Self::Error> {
52        let data = crate::mmap::read_only(path).map_err(|source| Error::Io {
53            source,
54            path: path.to_owned(),
55        })?;
56
57        const TRAILER_LEN: usize = gix_hash::Kind::shortest().len_in_bytes(); /* trailing hash */
58        if data.len()
59            < Self::HEADER_LEN
60                + gix_chunk::file::Index::size_for_entries(4 /*index names, fan, offsets, oids*/)
61                + chunk::fanout::SIZE
62                + TRAILER_LEN
63        {
64            return Err(Error::Corrupt {
65                message: "multi-index file is truncated and too short",
66            });
67        }
68
69        let (version, object_hash, num_chunks, num_indices) = {
70            let (signature, data) = data.split_at(4);
71            if signature != Self::SIGNATURE {
72                return Err(Error::Corrupt {
73                    message: "Invalid signature",
74                });
75            }
76            let (version, data) = data.split_at(1);
77            let version = match version[0] {
78                1 => Version::V1,
79                version => return Err(Error::UnsupportedVersion { version }),
80            };
81
82            let (object_hash, data) = data.split_at(1);
83            let object_hash = gix_hash::Kind::try_from(object_hash[0])
84                .map_err(|unknown| Error::UnsupportedObjectHash { kind: unknown })?;
85            let (num_chunks, data) = data.split_at(1);
86            let num_chunks = num_chunks[0];
87
88            let (_num_base_files, data) = data.split_at(1); // TODO: handle base files once it's clear what this does
89
90            let (num_indices, _) = data.split_at(4);
91            let num_indices = crate::read_u32(num_indices);
92
93            (version, object_hash, num_chunks, num_indices)
94        };
95
96        let chunks = gix_chunk::file::Index::from_bytes(&data, Self::HEADER_LEN, u32::from(num_chunks))?;
97
98        let index_names = chunks.data_by_id(&data, chunk::index_names::ID)?;
99        let index_names = chunk::index_names::from_bytes(index_names, num_indices)?;
100
101        let fan = chunks.data_by_id(&data, chunk::fanout::ID)?;
102        let fan = chunk::fanout::from_bytes(fan).ok_or(Error::MultiPackFanSize)?;
103        let num_objects = fan[255];
104
105        let lookup = chunks.validated_usize_offset_by_id(chunk::lookup::ID, |offset| {
106            chunk::lookup::is_valid(&offset, object_hash, num_objects)
107                .then_some(offset)
108                .ok_or(Error::InvalidChunkSize {
109                    id: chunk::lookup::ID,
110                    message: "The chunk with alphabetically ordered object ids doesn't have the correct size",
111                })
112        })??;
113        let offsets = chunks.validated_usize_offset_by_id(chunk::offsets::ID, |offset| {
114            chunk::offsets::is_valid(&offset, num_objects)
115                .then_some(offset)
116                .ok_or(Error::InvalidChunkSize {
117                    id: chunk::offsets::ID,
118                    message: "The chunk with offsets into the pack doesn't have the correct size",
119                })
120        })??;
121        let large_offsets = chunks
122            .validated_usize_offset_by_id(chunk::large_offsets::ID, |offset| {
123                chunk::large_offsets::is_valid(&offset)
124                    .then_some(offset)
125                    .ok_or(Error::InvalidChunkSize {
126                        id: chunk::large_offsets::ID,
127                        message: "The chunk with large offsets into the pack doesn't have the correct size",
128                    })
129            })
130            .ok()
131            .transpose()?;
132
133        let checksum_offset = chunks.highest_offset() as usize;
134        let trailer = &data[checksum_offset..];
135        if trailer.len() != object_hash.len_in_bytes() {
136            return Err(Error::Corrupt {
137                message:
138                    "Trailing checksum didn't have the expected size or there were unknown bytes after the checksum.",
139            });
140        }
141
142        Ok(File {
143            data,
144            path: path.to_owned(),
145            version,
146            hash_len: object_hash.len_in_bytes(),
147            object_hash,
148            fan,
149            index_names,
150            lookup_ofs: lookup.start,
151            offsets_ofs: offsets.start,
152            large_offsets_ofs: large_offsets.map(|r| r.start),
153            num_objects,
154            num_indices,
155        })
156    }
157}