gix_odb/
lib.rs

1//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Thus it's an
2//! object store indexed by the signature of data itself with inherent deduplication: the same data will have the same hash,
3//! and thus occupy the same space within the store.
4//!
5//! There is only one all-round object store, also known as the [`Store`], as it supports ~~everything~~ most of what git has to offer.
6//!
7//! * loose object reading and writing
8//! * access to packed objects
9//! * multiple loose objects and pack locations as gathered from `alternates` files.
10//! ## Feature Flags
11#![cfg_attr(
12    all(doc, feature = "document-features"),
13    doc = ::document_features::document_features!()
14)]
15#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
16#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
17
18use std::{
19    cell::RefCell,
20    path::PathBuf,
21    sync::{atomic::AtomicUsize, Arc},
22};
23
24use arc_swap::ArcSwap;
25use gix_features::{threading::OwnShared, zlib::stream::deflate};
26pub use gix_pack as pack;
27
28mod store_impls;
29pub use store_impls::{dynamic as store, loose};
30
31pub mod alternate;
32
33/// A way to access objects along with pre-configured thread-local caches for packed base objects as well as objects themselves.
34///
35/// By default, no cache will be used.
36pub struct Cache<S> {
37    /// The inner provider of trait implementations we use in conjunction with our caches.
38    ///
39    /// For calling methods on `inner`, prefer to make use of auto-dereferencing, i.e. `cache.inner_method()` instead of `cache.inner.inner_method()`.
40    inner: S,
41    // TODO: have single-threaded code-paths also for pack-creation (entries from counts) so that we can use OwnShared here
42    //       instead of Arc. However, it's probably not that important as these aren't called often.
43    new_pack_cache: Option<Arc<cache::NewPackCacheFn>>,
44    new_object_cache: Option<Arc<cache::NewObjectCacheFn>>,
45    pack_cache: Option<RefCell<Box<cache::PackCache>>>,
46    object_cache: Option<RefCell<Box<cache::ObjectCache>>>,
47}
48
49///
50pub mod cache;
51
52///
53/// It can optionally compress the content, similarly to what would happen when using a [`loose::Store`].
54///
55#[derive(Clone)]
56pub struct Sink {
57    compressor: Option<RefCell<deflate::Write<std::io::Sink>>>,
58    object_hash: gix_hash::Kind,
59}
60
61/// Create a new [`Sink`] with compression disabled.
62pub fn sink(object_hash: gix_hash::Kind) -> Sink {
63    Sink {
64        compressor: None,
65        object_hash,
66    }
67}
68
69///
70pub mod memory;
71
72mod sink;
73
74///
75pub mod find;
76
77/// An object database equivalent to `/dev/null`, dropping all objects stored into it.
78mod traits;
79
80pub use traits::{Header, HeaderExt};
81
82/// A thread-local handle to access any object.
83pub type Handle = Cache<store::Handle<OwnShared<Store>>>;
84/// A thread-local handle to access any object, but thread-safe and independent of the actual type of `OwnShared` or feature toggles in `gix-features`.
85pub type HandleArc = Cache<store::Handle<Arc<Store>>>;
86
87use store::types;
88
89/// The object store for use in any applications with support for auto-updates in the light of changes to the object database.
90///
91/// ### Features
92///
93/// - entirely lazy, creating an instance does no disk IO at all if [`Slots::Given`][store::init::Slots::Given] is used.
94/// - multi-threaded lazy-loading of indices and packs
95/// - per-thread pack and object caching avoiding cache trashing.
96/// - most-recently-used packs are always first for speedups if objects are stored in the same pack, typical for packs organized by
97///   commit graph and object age.
98/// - lock-free reading for perfect scaling across all cores, and changes to it don't affect readers as long as these don't want to
99///   enter the same branch.
100/// - sync with the state on disk if objects aren't found to catch up with changes if an object seems to be missing.
101///    - turn off the behaviour above for all handles if objects are expected to be missing due to spare checkouts.
102pub struct Store {
103    /// The central write lock without which the slotmap index can't be changed.
104    write: parking_lot::Mutex<()>,
105
106    /// The source directory from which all content is loaded, and the central write lock for use when a directory refresh is needed.
107    pub(crate) path: PathBuf,
108
109    /// The current working directory at the time this store was instantiated. It becomes relevant when resolving alternate paths
110    /// when re-reading the store configuration on updates when an object was missed.
111    /// Keeping it here helps to assure consistency even while a process changes its CWD.
112    pub(crate) current_dir: PathBuf,
113
114    /// A set of replacements that given a source OID return a destination OID. The vector is sorted.
115    pub(crate) replacements: Vec<(gix_hash::ObjectId, gix_hash::ObjectId)>,
116
117    /// A list of indices keeping track of which slots are filled with data. These are usually, but not always, consecutive.
118    pub(crate) index: ArcSwap<types::SlotMapIndex>,
119
120    /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it.
121    /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices.
122    /// The map is static and cannot change.
123    /// It's read often and changed rarely.
124    pub(crate) files: Vec<types::MutableIndexAndPack>,
125
126    /// The amount of handles that would prevent us from unloading packs or indices
127    pub(crate) num_handles_stable: AtomicUsize,
128    /// The amount of handles that don't affect our ability to compact our internal data structures or unload packs or indices.
129    pub(crate) num_handles_unstable: AtomicUsize,
130
131    /// The amount of times we re-read the disk state to consolidate our in-memory representation.
132    pub(crate) num_disk_state_consolidation: AtomicUsize,
133    /// If true, we are allowed to use multi-pack indices and they must have the `object_hash` or be ignored.
134    use_multi_pack_index: bool,
135    /// The hash kind to use for some operations
136    object_hash: gix_hash::Kind,
137}
138
139/// Create a new cached handle to the object store with support for additional options.
140///
141/// `replacements` is an iterator over pairs of old and new object ids for replacement support.
142/// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`.
143pub fn at_opts(
144    objects_dir: impl Into<PathBuf>,
145    replacements: impl IntoIterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>,
146    options: store::init::Options,
147) -> std::io::Result<Handle> {
148    let handle = OwnShared::new(Store::at_opts(
149        objects_dir.into(),
150        &mut replacements.into_iter(),
151        options,
152    )?)
153    .to_handle();
154    Ok(Cache::from(handle))
155}
156
157/// Create a new cached handle to the object store.
158pub fn at(objects_dir: impl Into<PathBuf>) -> std::io::Result<Handle> {
159    at_opts(objects_dir, Vec::new(), Default::default())
160}