gix_odb/store_impls/dynamic/
verify.rs

1use std::{
2    ops::Deref,
3    sync::atomic::{AtomicBool, Ordering},
4    time::Instant,
5};
6
7use gix_features::progress::{DynNestedProgress, MessageLevel, Progress};
8
9use crate::{
10    pack,
11    store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics},
12    types::IndexAndPacks,
13};
14
15///
16pub mod integrity {
17    use std::{marker::PhantomData, path::PathBuf};
18
19    use crate::pack;
20
21    /// Options for use in [`Store::verify_integrity()`][crate::Store::verify_integrity()].
22    pub type Options<F> = pack::index::verify::integrity::Options<F>;
23
24    /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
25    #[derive(Debug, thiserror::Error)]
26    #[allow(missing_docs)]
27    pub enum Error {
28        #[error(transparent)]
29        MultiIndexIntegrity(#[from] pack::index::traverse::Error<pack::multi_index::verify::integrity::Error>),
30        #[error(transparent)]
31        IndexIntegrity(#[from] pack::index::traverse::Error<pack::index::verify::integrity::Error>),
32        #[error(transparent)]
33        IndexOpen(#[from] pack::index::init::Error),
34        #[error(transparent)]
35        LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error),
36        #[error(transparent)]
37        MultiIndexOpen(#[from] pack::multi_index::init::Error),
38        #[error(transparent)]
39        PackOpen(#[from] pack::data::init::Error),
40        #[error(transparent)]
41        InitializeODB(#[from] crate::store::load_index::Error),
42        #[error("The disk on state changed while performing the operation, and we observed the change.")]
43        NeedsRetryDueToChangeOnDisk,
44    }
45
46    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
47    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48    /// Integrity information about loose object databases
49    pub struct LooseObjectStatistics {
50        /// The path to the root directory of the loose objects database
51        pub path: PathBuf,
52        /// The statistics created after verifying the loose object database.
53        pub statistics: crate::loose::verify::integrity::Statistics,
54    }
55
56    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
57    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
58    /// Traversal statistics of packs governed by single indices or multi-pack indices.
59    #[allow(missing_docs)]
60    pub enum SingleOrMultiStatistics {
61        Single(pack::index::traverse::Statistics),
62        Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>),
63    }
64
65    /// Statistics gathered when traversing packs of various kinds of indices.
66    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
67    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68    pub struct IndexStatistics {
69        /// The path to the index or multi-pack index for which statistics were gathered.
70        pub path: PathBuf,
71        /// The actual statistics for the index at `path`.
72        pub statistics: SingleOrMultiStatistics,
73    }
74
75    /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
76    pub struct Outcome {
77        /// Statistics for validated loose object stores.
78        pub loose_object_stores: Vec<LooseObjectStatistics>,
79        /// Pack traversal statistics for each index and their pack(s)
80        pub index_statistics: Vec<IndexStatistics>,
81    }
82
83    /// The progress ids used in [`Store::verify_integrity()`][crate::Store::verify_integrity()].
84    ///
85    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
86    #[derive(Debug, Copy, Clone)]
87    pub enum ProgressId {
88        /// Contains the path of the currently validated loose object database.
89        VerifyLooseObjectDbPath,
90        /// The root progress for all verification of an index. It doesn't contain any useful information itself.
91        VerifyIndex(PhantomData<gix_pack::index::verify::integrity::ProgressId>),
92        /// The root progress for all verification of a multi-index. It doesn't contain any useful information itself.
93        VerifyMultiIndex(PhantomData<gix_pack::multi_index::verify::integrity::ProgressId>),
94    }
95
96    impl From<ProgressId> for gix_features::progress::Id {
97        fn from(v: ProgressId) -> Self {
98            match v {
99                ProgressId::VerifyLooseObjectDbPath => *b"VISP",
100                ProgressId::VerifyMultiIndex(_) => *b"VIMI",
101                ProgressId::VerifyIndex(_) => *b"VISI",
102            }
103        }
104    }
105}
106
107impl super::Store {
108    /// Check the integrity of all objects as per the given `options`.
109    ///
110    /// Note that this will not force loading all indices or packs permanently, as we will only use the momentarily loaded disk state.
111    /// This does, however, include all alternates.
112    pub fn verify_integrity<C, F>(
113        &self,
114        progress: &mut dyn DynNestedProgress,
115        should_interrupt: &AtomicBool,
116        options: integrity::Options<F>,
117    ) -> Result<integrity::Outcome, integrity::Error>
118    where
119        C: pack::cache::DecodeEntry,
120        F: Fn() -> C + Send + Clone,
121    {
122        let _span = gix_features::trace::coarse!("gix_odb:Store::verify_integrity()");
123        let mut index = self.index.load();
124        if !index.is_initialized() {
125            self.consolidate_with_disk_state(true, false)?;
126            index = self.index.load();
127            assert!(
128                index.is_initialized(),
129                "BUG: after consolidating successfully, we have an initialized index"
130            );
131        }
132
133        progress.init(
134            Some(index.slot_indices.len()),
135            gix_features::progress::count("pack indices"),
136        );
137        let mut statistics = Vec::new();
138        let index_check_message = |path: &std::path::Path| {
139            format!(
140                "Checking integrity: {}",
141                path.file_name()
142                    .map_or_else(Default::default, std::ffi::OsStr::to_string_lossy)
143            )
144        };
145        gix_features::trace::detail!("verify indices").into_scope(|| {
146            for slot_index in &index.slot_indices {
147                let slot = &self.files[*slot_index];
148                if slot.generation.load(Ordering::SeqCst) != index.generation {
149                    return Err(integrity::Error::NeedsRetryDueToChangeOnDisk);
150                }
151                let files = slot.files.load();
152                let files = Option::as_ref(&files).ok_or(integrity::Error::NeedsRetryDueToChangeOnDisk)?;
153
154                let start = Instant::now();
155                let (mut child_progress, num_objects, index_path) = match files {
156                    IndexAndPacks::Index(bundle) => {
157                        let index;
158                        let index = match bundle.index.loaded() {
159                            Some(index) => index.deref(),
160                            None => {
161                                index = pack::index::File::at(bundle.index.path(), self.object_hash)?;
162                                &index
163                            }
164                        };
165                        let pack;
166                        let data = match bundle.data.loaded() {
167                            Some(pack) => pack.deref(),
168                            None => {
169                                pack = pack::data::File::at(bundle.data.path(), self.object_hash)?;
170                                &pack
171                            }
172                        };
173                        let mut child_progress = progress.add_child_with_id(
174                            "verify index".into(),
175                            integrity::ProgressId::VerifyIndex(Default::default()).into(),
176                        );
177                        let outcome = index.verify_integrity(
178                            Some(pack::index::verify::PackContext {
179                                data,
180                                options: options.clone(),
181                            }),
182                            &mut child_progress,
183                            should_interrupt,
184                        )?;
185                        statistics.push(IndexStatistics {
186                            path: bundle.index.path().to_owned(),
187                            statistics: SingleOrMultiStatistics::Single(
188                                outcome
189                                    .pack_traverse_statistics
190                                    .expect("pack provided so there are stats"),
191                            ),
192                        });
193                        (child_progress, index.num_objects(), index.path().to_owned())
194                    }
195                    IndexAndPacks::MultiIndex(bundle) => {
196                        let index;
197                        let index = match bundle.multi_index.loaded() {
198                            Some(index) => index.deref(),
199                            None => {
200                                index = pack::multi_index::File::at(bundle.multi_index.path())?;
201                                &index
202                            }
203                        };
204                        let mut child_progress = progress.add_child_with_id(
205                            "verify multi-index".into(),
206                            integrity::ProgressId::VerifyMultiIndex(Default::default()).into(),
207                        );
208                        let outcome = index.verify_integrity(&mut child_progress, should_interrupt, options.clone())?;
209
210                        let index_dir = bundle.multi_index.path().parent().expect("file in a directory");
211                        statistics.push(IndexStatistics {
212                            path: Default::default(),
213                            statistics: SingleOrMultiStatistics::Multi(
214                                outcome
215                                    .pack_traverse_statistics
216                                    .into_iter()
217                                    .zip(index.index_names())
218                                    .map(|(statistics, index_name)| (index_dir.join(index_name), statistics))
219                                    .collect(),
220                            ),
221                        });
222                        (child_progress, index.num_objects(), index.path().to_owned())
223                    }
224                };
225
226                child_progress.set_name(index_check_message(&index_path));
227                child_progress.show_throughput_with(
228                    start,
229                    num_objects as usize,
230                    gix_features::progress::count("objects").expect("set"),
231                    MessageLevel::Success,
232                );
233                progress.inc();
234            }
235            Ok(())
236        })?;
237
238        progress.init(
239            Some(index.loose_dbs.len()),
240            gix_features::progress::count("loose object stores"),
241        );
242        let mut loose_object_stores = Vec::new();
243        gix_features::trace::detail!("verify loose ODBs").into_scope(
244            || -> Result<_, crate::loose::verify::integrity::Error> {
245                for loose_db in &*index.loose_dbs {
246                    let out = loose_db
247                        .verify_integrity(
248                            &mut progress.add_child_with_id(
249                                loose_db.path().display().to_string(),
250                                integrity::ProgressId::VerifyLooseObjectDbPath.into(),
251                            ),
252                            should_interrupt,
253                        )
254                        .map(|statistics| integrity::LooseObjectStatistics {
255                            path: loose_db.path().to_owned(),
256                            statistics,
257                        })?;
258                    loose_object_stores.push(out);
259                }
260                Ok(())
261            },
262        )?;
263
264        Ok(integrity::Outcome {
265            loose_object_stores,
266            index_statistics: statistics,
267        })
268    }
269}