gix_pack/multi_index/
verify.rs

1use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant};
2
3use gix_features::progress::{Count, DynNestedProgress, Progress};
4
5use crate::{exact_vec, index, multi_index::File};
6
7///
8pub mod integrity {
9    use crate::multi_index::EntryIndex;
10
11    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
12    #[derive(thiserror::Error, Debug)]
13    #[allow(missing_docs)]
14    pub enum Error {
15        #[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")]
16        PackOffsetMismatch {
17            id: gix_hash::ObjectId,
18            expected_pack_offset: u64,
19            actual_pack_offset: u64,
20        },
21        #[error(transparent)]
22        MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error),
23        #[error(transparent)]
24        IndexIntegrity(#[from] crate::index::verify::integrity::Error),
25        #[error(transparent)]
26        BundleInit(#[from] crate::bundle::init::Error),
27        #[error("Counted {actual} objects, but expected {expected} as per multi-index")]
28        UnexpectedObjectCount { actual: usize, expected: usize },
29        #[error("{id} wasn't found in the index referenced in the multi-pack index")]
30        OidNotFound { id: gix_hash::ObjectId },
31        #[error("The object id at multi-index entry {index} wasn't in order")]
32        OutOfOrder { index: EntryIndex },
33        #[error("The fan at index {index} is out of order as it's larger then the following value.")]
34        Fan { index: usize },
35        #[error("The multi-index claims to have no objects")]
36        Empty,
37        #[error("Interrupted")]
38        Interrupted,
39    }
40
41    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
42    pub struct Outcome {
43        /// The computed checksum of the multi-index which matched the stored one.
44        pub actual_index_checksum: gix_hash::ObjectId,
45        /// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome.
46        pub pack_traverse_statistics: Vec<crate::index::traverse::Statistics>,
47    }
48
49    /// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
50    ///
51    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
52    #[derive(Debug, Copy, Clone)]
53    pub enum ProgressId {
54        /// The amount of bytes read to verify the multi-index checksum.
55        ChecksumBytes,
56        /// The amount of objects whose offset has been checked.
57        ObjectOffsets,
58    }
59
60    impl From<ProgressId> for gix_features::progress::Id {
61        fn from(v: ProgressId) -> Self {
62            match v {
63                ProgressId::ChecksumBytes => *b"MVCK",
64                ProgressId::ObjectOffsets => *b"MVOF",
65            }
66        }
67    }
68}
69
70///
71pub mod checksum {
72    /// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()].
73    pub type Error = crate::verify::checksum::Error;
74}
75
76impl File {
77    /// Validate that our [`checksum()`][File::checksum()] matches the actual contents
78    /// of this index file, and return it if it does.
79    pub fn verify_checksum(
80        &self,
81        progress: &mut dyn Progress,
82        should_interrupt: &AtomicBool,
83    ) -> Result<gix_hash::ObjectId, checksum::Error> {
84        crate::verify::checksum_on_disk_or_mmap(
85            self.path(),
86            &self.data,
87            self.checksum(),
88            self.object_hash,
89            progress,
90            should_interrupt,
91        )
92    }
93
94    /// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects.
95    ///
96    /// Instead we only validate the contents of the multi-index itself.
97    pub fn verify_integrity_fast(
98        &self,
99        progress: &mut dyn DynNestedProgress,
100        should_interrupt: &AtomicBool,
101    ) -> Result<gix_hash::ObjectId, integrity::Error> {
102        self.verify_integrity_inner(
103            progress,
104            should_interrupt,
105            false,
106            index::verify::integrity::Options::default(),
107        )
108        .map_err(|err| match err {
109            index::traverse::Error::Processor(err) => err,
110            _ => unreachable!("BUG: no other error type is possible"),
111        })
112        .map(|o| o.actual_index_checksum)
113    }
114
115    /// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs.
116    ///
117    /// Note that it's considered a failure if an index doesn't have a corresponding pack.
118    pub fn verify_integrity<C, F>(
119        &self,
120        progress: &mut dyn DynNestedProgress,
121        should_interrupt: &AtomicBool,
122        options: index::verify::integrity::Options<F>,
123    ) -> Result<integrity::Outcome, index::traverse::Error<integrity::Error>>
124    where
125        C: crate::cache::DecodeEntry,
126        F: Fn() -> C + Send + Clone,
127    {
128        self.verify_integrity_inner(progress, should_interrupt, true, options)
129    }
130
131    fn verify_integrity_inner<C, F>(
132        &self,
133        progress: &mut dyn DynNestedProgress,
134        should_interrupt: &AtomicBool,
135        deep_check: bool,
136        options: index::verify::integrity::Options<F>,
137    ) -> Result<integrity::Outcome, index::traverse::Error<integrity::Error>>
138    where
139        C: crate::cache::DecodeEntry,
140        F: Fn() -> C + Send + Clone,
141    {
142        let parent = self.path.parent().expect("must be in a directory");
143
144        let actual_index_checksum = self
145            .verify_checksum(
146                &mut progress.add_child_with_id(
147                    format!("{}: checksum", self.path.display()),
148                    integrity::ProgressId::ChecksumBytes.into(),
149                ),
150                should_interrupt,
151            )
152            .map_err(integrity::Error::from)
153            .map_err(index::traverse::Error::Processor)?;
154
155        if let Some(first_invalid) = crate::verify::fan(&self.fan) {
156            return Err(index::traverse::Error::Processor(integrity::Error::Fan {
157                index: first_invalid,
158            }));
159        }
160
161        if self.num_objects == 0 {
162            return Err(index::traverse::Error::Processor(integrity::Error::Empty));
163        }
164
165        let mut pack_traverse_statistics = Vec::new();
166
167        let operation_start = Instant::now();
168        let mut total_objects_checked = 0;
169        let mut pack_ids_and_offsets = exact_vec(self.num_objects as usize);
170        {
171            let order_start = Instant::now();
172            let mut progress = progress.add_child_with_id("checking oid order".into(), gix_features::progress::UNKNOWN);
173            progress.init(
174                Some(self.num_objects as usize),
175                gix_features::progress::count("objects"),
176            );
177
178            for entry_index in 0..(self.num_objects - 1) {
179                let lhs = self.oid_at_index(entry_index);
180                let rhs = self.oid_at_index(entry_index + 1);
181
182                if rhs.cmp(lhs) != Ordering::Greater {
183                    return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder {
184                        index: entry_index,
185                    }));
186                }
187                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
188                pack_ids_and_offsets.push((pack_id, entry_index));
189                progress.inc();
190            }
191            {
192                let entry_index = self.num_objects - 1;
193                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
194                pack_ids_and_offsets.push((pack_id, entry_index));
195            }
196            // sort by pack-id to allow handling all indices matching a pack while its open.
197            pack_ids_and_offsets.sort_by(|l, r| l.0.cmp(&r.0));
198            progress.show_throughput(order_start);
199        };
200
201        progress.init(
202            Some(self.num_indices as usize),
203            gix_features::progress::count("indices"),
204        );
205
206        let mut pack_ids_slice = pack_ids_and_offsets.as_slice();
207
208        for (pack_id, index_file_name) in self.index_names.iter().enumerate() {
209            progress.set_name(index_file_name.display().to_string());
210            progress.inc();
211
212            let mut bundle = None;
213            let index;
214            let index_path = parent.join(index_file_name);
215            let index = if deep_check {
216                bundle = crate::Bundle::at(index_path, self.object_hash)
217                    .map_err(integrity::Error::from)
218                    .map_err(index::traverse::Error::Processor)?
219                    .into();
220                bundle.as_ref().map(|b| &b.index).expect("just set")
221            } else {
222                index = Some(
223                    index::File::at(index_path, self.object_hash)
224                        .map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err)))
225                        .map_err(index::traverse::Error::Processor)?,
226                );
227                index.as_ref().expect("just set")
228            };
229
230            let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id);
231            let multi_index_entries_to_check = &pack_ids_slice[..slice_end];
232            {
233                let offset_start = Instant::now();
234                let mut offsets_progress = progress.add_child_with_id(
235                    "verify object offsets".into(),
236                    integrity::ProgressId::ObjectOffsets.into(),
237                );
238                offsets_progress.init(
239                    Some(pack_ids_and_offsets.len()),
240                    gix_features::progress::count("objects"),
241                );
242                pack_ids_slice = &pack_ids_slice[slice_end..];
243
244                for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) {
245                    let oid = self.oid_at_index(entry_id);
246                    let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id);
247                    let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| {
248                        index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() })
249                    })?;
250                    let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index);
251                    if actual_pack_offset != expected_pack_offset {
252                        return Err(index::traverse::Error::Processor(
253                            integrity::Error::PackOffsetMismatch {
254                                id: oid.to_owned(),
255                                expected_pack_offset,
256                                actual_pack_offset,
257                            },
258                        ));
259                    }
260                    offsets_progress.inc();
261                }
262
263                if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) {
264                    return Err(index::traverse::Error::Processor(integrity::Error::Interrupted));
265                }
266                offsets_progress.show_throughput(offset_start);
267            }
268
269            total_objects_checked += multi_index_entries_to_check.len();
270
271            if let Some(bundle) = bundle {
272                progress.set_name(format!("Validating {}", index_file_name.display()));
273                let crate::bundle::verify::integrity::Outcome {
274                    actual_index_checksum: _,
275                    pack_traverse_outcome,
276                } = bundle
277                    .verify_integrity(progress, should_interrupt, options.clone())
278                    .map_err(|err| {
279                        use index::traverse::Error::*;
280                        match err {
281                            Processor(err) => Processor(integrity::Error::IndexIntegrity(err)),
282                            VerifyChecksum(err) => VerifyChecksum(err),
283                            Tree(err) => Tree(err),
284                            TreeTraversal(err) => TreeTraversal(err),
285                            PackDecode { id, offset, source } => PackDecode { id, offset, source },
286                            PackMismatch { expected, actual } => PackMismatch { expected, actual },
287                            EntryType(err) => EntryType(err),
288                            PackObjectMismatch {
289                                expected,
290                                actual,
291                                offset,
292                                kind,
293                            } => PackObjectMismatch {
294                                expected,
295                                actual,
296                                offset,
297                                kind,
298                            },
299                            Crc32Mismatch {
300                                expected,
301                                actual,
302                                offset,
303                                kind,
304                            } => Crc32Mismatch {
305                                expected,
306                                actual,
307                                offset,
308                                kind,
309                            },
310                            Interrupted => Interrupted,
311                        }
312                    })?;
313                pack_traverse_statistics.push(pack_traverse_outcome);
314            }
315        }
316
317        assert_eq!(
318            self.num_objects as usize, total_objects_checked,
319            "BUG: our slicing should allow to visit all objects"
320        );
321
322        progress.set_name("Validating multi-pack".into());
323        progress.show_throughput(operation_start);
324
325        Ok(integrity::Outcome {
326            actual_index_checksum,
327            pack_traverse_statistics,
328        })
329    }
330}