gix_odb/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Thus it's an
//! object store indexed by the signature of data itself with inherent deduplication: the same data will have the same hash,
//! and thus occupy the same space within the store.
//!
//! There is only one all-round object store, also known as the [`Store`], as it supports ~~everything~~ most of what git has to offer.
//!
//! * loose object reading and writing
//! * access to packed objects
//! * multiple loose objects and pack locations as gathered from `alternates` files.
//! ## Feature Flags
#![cfg_attr(
    all(doc, feature = "document-features"),
    doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]

use std::{
    cell::RefCell,
    path::PathBuf,
    sync::{atomic::AtomicUsize, Arc},
};

use arc_swap::ArcSwap;
use gix_features::{threading::OwnShared, zlib::stream::deflate};
pub use gix_pack as pack;

mod store_impls;
pub use store_impls::{dynamic as store, loose};

pub mod alternate;

/// A way to access objects along with pre-configured thread-local caches for packed base objects as well as objects themselves.
///
/// By default, no cache will be used.
pub struct Cache<S> {
    /// The inner provider of trait implementations we use in conjunction with our caches.
    ///
    /// For calling methods on `inner`, prefer to make use of auto-dereferencing, i.e. `cache.inner_method()` instead of `cache.inner.inner_method()`.
    inner: S,
    // TODO: have single-threaded code-paths also for pack-creation (entries from counts) so that we can use OwnShared here
    //       instead of Arc. However, it's probably not that important as these aren't called often.
    new_pack_cache: Option<Arc<cache::NewPackCacheFn>>,
    new_object_cache: Option<Arc<cache::NewObjectCacheFn>>,
    pack_cache: Option<RefCell<Box<cache::PackCache>>>,
    object_cache: Option<RefCell<Box<cache::ObjectCache>>>,
}

///
pub mod cache;

///
/// It can optionally compress the content, similarly to what would happen when using a [`loose::Store`].
///
#[derive(Clone)]
pub struct Sink {
    compressor: Option<RefCell<deflate::Write<std::io::Sink>>>,
    object_hash: gix_hash::Kind,
}

/// Create a new [`Sink`] with compression disabled.
pub fn sink(object_hash: gix_hash::Kind) -> Sink {
    Sink {
        compressor: None,
        object_hash,
    }
}

///
pub mod memory;

mod sink;

///
pub mod find;

/// An object database equivalent to `/dev/null`, dropping all objects stored into it.
mod traits;

pub use traits::{Header, HeaderExt, Write};

///
pub mod write {
    /// The error type returned by the [`Write`](crate::Write) trait.
    pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
}

/// A thread-local handle to access any object.
pub type Handle = Cache<store::Handle<OwnShared<Store>>>;
/// A thread-local handle to access any object, but thread-safe and independent of the actual type of `OwnShared` or feature toggles in `gix-features`.
pub type HandleArc = Cache<store::Handle<Arc<Store>>>;

use store::types;

/// The object store for use in any applications with support for auto-updates in the light of changes to the object database.
///
/// ### Features
///
/// - entirely lazy, creating an instance does no disk IO at all if [`Slots::Given`][store::init::Slots::Given] is used.
/// - multi-threaded lazy-loading of indices and packs
/// - per-thread pack and object caching avoiding cache trashing.
/// - most-recently-used packs are always first for speedups if objects are stored in the same pack, typical for packs organized by
///   commit graph and object age.
/// - lock-free reading for perfect scaling across all cores, and changes to it don't affect readers as long as these don't want to
///   enter the same branch.
/// - sync with the state on disk if objects aren't found to catch up with changes if an object seems to be missing.
///    - turn off the behaviour above for all handles if objects are expected to be missing due to spare checkouts.
pub struct Store {
    /// The central write lock without which the slotmap index can't be changed.
    write: parking_lot::Mutex<()>,

    /// The source directory from which all content is loaded, and the central write lock for use when a directory refresh is needed.
    pub(crate) path: PathBuf,

    /// The current working directory at the time this store was instantiated. It becomes relevant when resolving alternate paths
    /// when re-reading the store configuration on updates when an object was missed.
    /// Keeping it here helps to assure consistency even while a process changes its CWD.
    pub(crate) current_dir: PathBuf,

    /// A set of replacements that given a source OID return a destination OID. The vector is sorted.
    pub(crate) replacements: Vec<(gix_hash::ObjectId, gix_hash::ObjectId)>,

    /// A list of indices keeping track of which slots are filled with data. These are usually, but not always, consecutive.
    pub(crate) index: ArcSwap<types::SlotMapIndex>,

    /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it.
    /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices.
    /// The map is static and cannot typically change.
    /// It's read often and changed rarely.
    pub(crate) files: Vec<types::MutableIndexAndPack>,

    /// The amount of handles that would prevent us from unloading packs or indices
    pub(crate) num_handles_stable: AtomicUsize,
    /// The amount of handles that don't affect our ability to compact our internal data structures or unload packs or indices.
    pub(crate) num_handles_unstable: AtomicUsize,

    /// The amount of times we re-read the disk state to consolidate our in-memory representation.
    pub(crate) num_disk_state_consolidation: AtomicUsize,
    /// If true, we are allowed to use multi-pack indices and they must have the `object_hash` or be ignored.
    use_multi_pack_index: bool,
    /// The hash kind to use for some operations
    object_hash: gix_hash::Kind,
}

/// Create a new cached handle to the object store with support for additional options.
///
/// `replacements` is an iterator over pairs of old and new object ids for replacement support.
/// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`.
pub fn at_opts(
    objects_dir: impl Into<PathBuf>,
    replacements: impl IntoIterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>,
    options: store::init::Options,
) -> std::io::Result<Handle> {
    let handle = OwnShared::new(Store::at_opts(
        objects_dir.into(),
        &mut replacements.into_iter(),
        options,
    )?)
    .to_handle();
    Ok(Cache::from(handle))
}

/// Create a new cached handle to the object store.
pub fn at(objects_dir: impl Into<PathBuf>) -> std::io::Result<Handle> {
    at_opts(objects_dir, Vec::new(), Default::default())
}