gix_pack/multi_index/
chunk.rs1pub mod index_names {
3 use std::path::{Path, PathBuf};
4
5 use gix_object::bstr::{BString, ByteSlice};
6
7 pub const ID: gix_chunk::Id = *b"PNAM";
9
10 pub mod decode {
12 use gix_object::bstr::BString;
13
14 #[derive(Debug, thiserror::Error)]
16 #[allow(missing_docs)]
17 pub enum Error {
18 #[error("The pack names were not ordered alphabetically.")]
19 NotOrderedAlphabetically,
20 #[error("Each pack path name must be terminated with a null byte")]
21 MissingNullByte,
22 #[error("Couldn't turn path '{path}' into OS path due to encoding issues")]
23 PathEncoding { path: BString },
24 #[error("non-padding bytes found after all paths were read.")]
25 UnknownTrailerBytes,
26 }
27 }
28
29 pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> {
32 let mut out = Vec::new();
33 for _ in 0..num_packs {
34 let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?;
35
36 let path = &chunk[..null_byte_pos];
37 let path = gix_path::try_from_byte_slice(path)
38 .map_err(|_| decode::Error::PathEncoding {
39 path: BString::from(path),
40 })?
41 .to_owned();
42
43 if let Some(previous) = out.last() {
44 if previous >= &path {
45 return Err(decode::Error::NotOrderedAlphabetically);
46 }
47 }
48 out.push(path);
49
50 chunk = &chunk[null_byte_pos + 1..];
51 }
52
53 if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) {
54 return Err(decode::Error::UnknownTrailerBytes);
55 }
56 Ok(out)
59 }
60
61 pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 {
64 let mut count = 0u64;
65 for path in paths {
66 let path = path.as_ref();
67 let ascii_path = path.to_str().expect("UTF-8 compatible paths");
68 assert!(
69 ascii_path.is_ascii(),
70 "must use ascii bytes for correct size computation"
71 );
72 count += (ascii_path.len() + 1) as u64;
73 }
74
75 let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT);
76 if needed_alignment < CHUNK_ALIGNMENT {
77 count += needed_alignment;
78 }
79 count
80 }
81
82 pub fn write(
84 paths: impl IntoIterator<Item = impl AsRef<Path>>,
85 out: &mut dyn std::io::Write,
86 ) -> std::io::Result<()> {
87 let mut written_bytes = 0;
88 for path in paths {
89 let path = path.as_ref().to_str().expect("UTF-8 path");
90 out.write_all(path.as_bytes())?;
91 out.write_all(&[0])?;
92 written_bytes += path.len() as u64 + 1;
93 }
94
95 let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT);
96 if needed_alignment < CHUNK_ALIGNMENT {
97 let padding = [0u8; CHUNK_ALIGNMENT as usize];
98 out.write_all(&padding[..needed_alignment as usize])?;
99 }
100 Ok(())
101 }
102
103 const CHUNK_ALIGNMENT: u64 = 4;
104}
105
106pub mod fanout {
108 use crate::multi_index;
109
110 pub const SIZE: usize = 4 * 256;
112
113 pub const ID: gix_chunk::Id = *b"OIDF";
115
116 pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> {
118 if chunk.len() != SIZE {
119 return None;
120 }
121 let mut out = [0; 256];
122 for (c, f) in chunk.chunks_exact(4).zip(out.iter_mut()) {
123 *f = u32::from_be_bytes(c.try_into().unwrap());
124 }
125 out.into()
126 }
127
128 pub(crate) fn write(
130 sorted_entries: &[multi_index::write::Entry],
131 out: &mut dyn std::io::Write,
132 ) -> std::io::Result<()> {
133 let fanout = crate::index::encode::fanout(&mut sorted_entries.iter().map(|e| e.id.first_byte()));
134
135 for value in fanout.iter() {
136 out.write_all(&value.to_be_bytes())?;
137 }
138 Ok(())
139 }
140}
141
142pub mod lookup {
144 use std::ops::Range;
145
146 use crate::multi_index;
147
148 pub const ID: gix_chunk::Id = *b"OIDL";
150
151 pub fn storage_size(entries: usize, object_hash: gix_hash::Kind) -> u64 {
153 (entries * object_hash.len_in_bytes()) as u64
154 }
155
156 pub(crate) fn write(
157 sorted_entries: &[multi_index::write::Entry],
158 out: &mut dyn std::io::Write,
159 ) -> std::io::Result<()> {
160 for entry in sorted_entries {
161 out.write_all(entry.id.as_slice())?;
162 }
163 Ok(())
164 }
165
166 pub fn is_valid(offset: &Range<usize>, hash: gix_hash::Kind, num_objects: u32) -> bool {
168 (offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize
169 }
170}
171
172pub mod offsets {
174 use std::ops::Range;
175
176 use crate::multi_index;
177
178 pub const ID: gix_chunk::Id = *b"OOFF";
180
181 pub fn storage_size(entries: usize) -> u64 {
183 (entries * (4 + 4)) as u64
184 }
185
186 pub(crate) fn write(
187 sorted_entries: &[multi_index::write::Entry],
188 large_offsets_needed: bool,
189 out: &mut dyn std::io::Write,
190 ) -> std::io::Result<()> {
191 use crate::index::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD};
192 let mut num_large_offsets = 0u32;
193
194 for entry in sorted_entries {
195 out.write_all(&entry.pack_index.to_be_bytes())?;
196
197 let offset: u32 = if large_offsets_needed {
198 if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
199 let res = num_large_offsets | HIGH_BIT;
200 num_large_offsets += 1;
201 res
202 } else {
203 entry.pack_offset as u32
204 }
205 } else {
206 entry
207 .pack_offset
208 .try_into()
209 .expect("without large offsets, pack-offset fits u32")
210 };
211 out.write_all(&offset.to_be_bytes())?;
212 }
213 Ok(())
214 }
215
216 pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool {
218 let entry_size = 4 + 4 ;
219 ((offset.end - offset.start) / num_objects as usize) == entry_size
220 }
221}
222
223pub mod large_offsets {
225 use std::ops::Range;
226
227 use crate::{index::encode::LARGE_OFFSET_THRESHOLD, multi_index};
228
229 pub const ID: gix_chunk::Id = *b"LOFF";
231
232 pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> {
234 let mut num_large_offsets = 0;
235 let mut needs_large_offsets = false;
236 for entry in entries {
237 if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
238 num_large_offsets += 1;
239 }
240 if entry.pack_offset > crate::data::Offset::from(u32::MAX) {
241 needs_large_offsets = true;
242 }
243 }
244
245 needs_large_offsets.then_some(num_large_offsets)
246 }
247 pub fn is_valid(offset: &Range<usize>) -> bool {
249 (offset.end - offset.start) % 8 == 0
250 }
251
252 pub(crate) fn write(
253 sorted_entries: &[multi_index::write::Entry],
254 mut num_large_offsets: usize,
255 out: &mut dyn std::io::Write,
256 ) -> std::io::Result<()> {
257 for offset in sorted_entries
258 .iter()
259 .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset))
260 {
261 out.write_all(&offset.to_be_bytes())?;
262 num_large_offsets = num_large_offsets
263 .checked_sub(1)
264 .expect("BUG: wrote more offsets the previously found");
265 }
266 assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted");
267 Ok(())
268 }
269
270 pub(crate) fn storage_size(large_offsets: usize) -> u64 {
272 8 * large_offsets as u64
273 }
274}