1use filetime::FileTime;
2
3use crate::{entry, extension, Entry, State, Version};
4
5mod entries;
6pub mod header;
8
9mod error {
10
11 use crate::{decode, extension};
12
13 #[derive(Debug, thiserror::Error)]
15 #[allow(missing_docs)]
16 pub enum Error {
17 #[error(transparent)]
18 Header(#[from] decode::header::Error),
19 #[error("Could not parse entry at index {index}")]
20 Entry { index: u32 },
21 #[error("Mandatory extension wasn't implemented or malformed.")]
22 Extension(#[from] extension::decode::Error),
23 #[error("Index trailer should have been {expected} bytes long, but was {actual}")]
24 UnexpectedTrailerLength { expected: usize, actual: usize },
25 #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")]
26 ChecksumMismatch {
27 actual_checksum: gix_hash::ObjectId,
28 expected_checksum: gix_hash::ObjectId,
29 },
30 }
31}
32pub use error::Error;
33use gix_features::parallel::InOrderIter;
34
35use crate::util::read_u32;
36
37#[derive(Debug, Default, Clone, Copy)]
39pub struct Options {
40 pub thread_limit: Option<usize>,
46 pub min_extension_block_in_bytes_for_threading: usize,
49 pub expected_checksum: Option<gix_hash::ObjectId>,
53}
54
55impl State {
56 pub fn from_bytes(
59 data: &[u8],
60 timestamp: FileTime,
61 object_hash: gix_hash::Kind,
62 _options @ Options {
63 thread_limit,
64 min_extension_block_in_bytes_for_threading,
65 expected_checksum,
66 }: Options,
67 ) -> Result<(Self, Option<gix_hash::ObjectId>), Error> {
68 let _span = gix_features::trace::detail!("gix_index::State::from_bytes()", options = ?_options);
69 let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
70 let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);
71
72 let mut num_threads = gix_features::parallel::num_threads(thread_limit);
73 let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
74 num_entries,
75 data.len(),
76 start_of_extensions,
77 object_hash,
78 version,
79 );
80
81 let (entries, ext, data) = match start_of_extensions {
82 Some(offset) if num_threads > 1 => {
83 let extensions_data = &data[offset..];
84 let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash);
85 let (entries_res, ext_res) = gix_features::parallel::threads(|scope| {
86 let extension_loading =
87 (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({
88 num_threads -= 1;
89 || {
90 gix_features::parallel::build_thread()
91 .name("gix-index.from_bytes.load-extensions".into())
92 .spawn_scoped(scope, || extension::decode::all(extensions_data, object_hash))
93 .expect("valid name")
94 }
95 });
96 let entries_res = match index_offsets_table {
97 Some(entry_offsets) => {
98 let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize;
99 let entry_offsets_chunked = entry_offsets.chunks(chunk_size);
100 let num_chunks = entry_offsets_chunked.len();
101 let mut threads = Vec::with_capacity(num_chunks);
102 for (id, chunks) in entry_offsets_chunked.enumerate() {
103 let chunks = chunks.to_vec();
104 threads.push(
105 gix_features::parallel::build_thread()
106 .name(format!("gix-index.from_bytes.read-entries.{id}"))
107 .spawn_scoped(scope, move || {
108 let num_entries_for_chunks =
109 chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
110 let mut entries = Vec::with_capacity(num_entries_for_chunks);
111 let path_backing_buffer_size_for_chunks =
112 entries::estimate_path_storage_requirements_in_bytes(
113 num_entries_for_chunks as u32,
114 data.len() / num_chunks,
115 start_of_extensions.map(|ofs| ofs / num_chunks),
116 object_hash,
117 version,
118 );
119 let mut path_backing =
120 Vec::with_capacity(path_backing_buffer_size_for_chunks);
121 let mut is_sparse = false;
122 for offset in chunks {
123 let (
124 entries::Outcome {
125 is_sparse: chunk_is_sparse,
126 },
127 _data,
128 ) = entries::chunk(
129 &data[offset.from_beginning_of_file as usize..],
130 &mut entries,
131 &mut path_backing,
132 offset.num_entries,
133 object_hash,
134 version,
135 )?;
136 is_sparse |= chunk_is_sparse;
137 }
138 Ok::<_, Error>((
139 id,
140 EntriesOutcome {
141 entries,
142 path_backing,
143 is_sparse,
144 },
145 ))
146 })
147 .expect("valid name"),
148 );
149 }
150 let mut results =
151 InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap()));
152 let mut acc = results.next().expect("have at least two results, one per thread");
153 while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
162 match res {
163 Ok(mut rhs) => {
164 lhs.is_sparse |= rhs.is_sparse;
165 let ofs = lhs.path_backing.len();
166 lhs.path_backing.append(&mut rhs.path_backing);
167 lhs.entries.extend(rhs.entries.into_iter().map(|mut e| {
168 e.path.start += ofs;
169 e.path.end += ofs;
170 e
171 }));
172 }
173 Err(err) => {
174 acc = Err(err);
175 }
176 }
177 }
178 acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
179 }
180 None => entries(
181 post_header_data,
182 path_backing_buffer_size,
183 num_entries,
184 object_hash,
185 version,
186 ),
187 };
188 let ext_res = extension_loading.map_or_else(
189 || extension::decode::all(extensions_data, object_hash),
190 |thread| thread.join().unwrap(),
191 );
192 (entries_res, ext_res)
193 });
194 let (ext, data) = ext_res?;
195 (entries_res?.0, ext, data)
196 }
197 None | Some(_) => {
198 let (entries, data) = entries(
199 post_header_data,
200 path_backing_buffer_size,
201 num_entries,
202 object_hash,
203 version,
204 )?;
205 let (ext, data) = extension::decode::all(data, object_hash)?;
206 (entries, ext, data)
207 }
208 };
209
210 if data.len() != object_hash.len_in_bytes() {
211 return Err(Error::UnexpectedTrailerLength {
212 expected: object_hash.len_in_bytes(),
213 actual: data.len(),
214 });
215 }
216
217 let checksum = gix_hash::ObjectId::from_bytes_or_panic(data);
218 let checksum = (!checksum.is_null()).then_some(checksum);
219 if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) {
220 if actual_checksum != expected_checksum {
221 return Err(Error::ChecksumMismatch {
222 actual_checksum,
223 expected_checksum,
224 });
225 }
226 }
227 let EntriesOutcome {
228 entries,
229 path_backing,
230 mut is_sparse,
231 } = entries;
232 let extension::decode::Outcome {
233 tree,
234 link,
235 resolve_undo,
236 untracked,
237 fs_monitor,
238 is_sparse: is_sparse_from_ext, end_of_index,
240 offset_table,
241 } = ext;
242 is_sparse |= is_sparse_from_ext;
243
244 Ok((
245 State {
246 object_hash,
247 timestamp,
248 version,
249 entries,
250 path_backing,
251 is_sparse,
252
253 end_of_index_at_decode_time: end_of_index,
254 offset_table_at_decode_time: offset_table,
255 tree,
256 link,
257 resolve_undo,
258 untracked,
259 fs_monitor,
260 },
261 checksum,
262 ))
263 }
264}
265
266struct EntriesOutcome {
267 pub entries: Vec<Entry>,
268 pub path_backing: Vec<u8>,
269 pub is_sparse: bool,
270}
271
272fn entries(
273 post_header_data: &[u8],
274 path_backing_buffer_size: usize,
275 num_entries: u32,
276 object_hash: gix_hash::Kind,
277 version: Version,
278) -> Result<(EntriesOutcome, &[u8]), Error> {
279 let mut entries = Vec::with_capacity(num_entries as usize);
280 let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
281 entries::chunk(
282 post_header_data,
283 &mut entries,
284 &mut path_backing,
285 num_entries,
286 object_hash,
287 version,
288 )
289 .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| {
290 (
291 EntriesOutcome {
292 entries,
293 path_backing,
294 is_sparse,
295 },
296 data,
297 )
298 })
299}
300
301pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> {
302 let (ctime_secs, data) = read_u32(data)?;
303 let (ctime_nsecs, data) = read_u32(data)?;
304 let (mtime_secs, data) = read_u32(data)?;
305 let (mtime_nsecs, data) = read_u32(data)?;
306 let (dev, data) = read_u32(data)?;
307 let (ino, data) = read_u32(data)?;
308 let (uid, data) = read_u32(data)?;
309 let (gid, data) = read_u32(data)?;
310 let (size, data) = read_u32(data)?;
311 Some((
312 entry::Stat {
313 mtime: entry::stat::Time {
314 secs: ctime_secs,
315 nsecs: ctime_nsecs,
316 },
317 ctime: entry::stat::Time {
318 secs: mtime_secs,
319 nsecs: mtime_nsecs,
320 },
321 dev,
322 ino,
323 uid,
324 gid,
325 size,
326 },
327 data,
328 ))
329}