solana_accounts_db/accounts_index_storage.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
use {
crate::{
accounts_index::{
self, in_mem_accounts_index::InMemAccountsIndex, AccountsIndexConfig, DiskIndexValue,
IndexValue,
},
bucket_map_holder::BucketMapHolder,
waitable_condvar::WaitableCondvar,
},
std::{
fmt::Debug,
num::NonZeroUsize,
sync::{
atomic::{AtomicBool, Ordering},
Arc, Mutex,
},
thread::{Builder, JoinHandle},
},
};
/// Manages the lifetime of the background processing threads.
pub struct AccountsIndexStorage<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> {
_bg_threads: BgThreads,
pub storage: Arc<BucketMapHolder<T, U>>,
pub in_mem: Vec<Arc<InMemAccountsIndex<T, U>>>,
exit: Arc<AtomicBool>,
/// set_startup(true) creates bg threads which are kept alive until set_startup(false)
startup_worker_threads: Mutex<Option<BgThreads>>,
}
impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> Debug for AccountsIndexStorage<T, U> {
fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Ok(())
}
}
/// low-level managing the bg threads
struct BgThreads {
exit: Arc<AtomicBool>,
handles: Option<Vec<JoinHandle<()>>>,
wait: Arc<WaitableCondvar>,
}
impl Drop for BgThreads {
fn drop(&mut self) {
self.exit.store(true, Ordering::Relaxed);
self.wait.notify_all();
if let Some(handles) = self.handles.take() {
handles
.into_iter()
.for_each(|handle| handle.join().unwrap());
}
}
}
impl BgThreads {
fn new<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>>(
storage: &Arc<BucketMapHolder<T, U>>,
in_mem: &[Arc<InMemAccountsIndex<T, U>>],
threads: NonZeroUsize,
can_advance_age: bool,
exit: Arc<AtomicBool>,
) -> Self {
// stop signal used for THIS batch of bg threads
let local_exit = Arc::new(AtomicBool::default());
let handles = Some(
(0..threads.get())
.map(|idx| {
// the first thread we start is special
let can_advance_age = can_advance_age && idx == 0;
let storage_ = Arc::clone(storage);
let local_exit = local_exit.clone();
let system_exit = exit.clone();
let in_mem_ = in_mem.to_vec();
// note that using rayon here causes us to exhaust # rayon threads and many tests running in parallel deadlock
Builder::new()
.name(format!("solIdxFlusher{idx:02}"))
.spawn(move || {
storage_.background(
vec![local_exit, system_exit],
in_mem_,
can_advance_age,
);
})
.unwrap()
})
.collect(),
);
BgThreads {
exit: local_exit,
handles,
wait: Arc::clone(&storage.wait_dirty_or_aged),
}
}
}
/// modes the system can be in
pub enum Startup {
/// not startup, but steady state execution
Normal,
/// startup (not steady state execution)
/// requesting 'startup'-like behavior where in-mem acct idx items are flushed asap
Startup,
/// startup (not steady state execution)
/// but also requesting additional threads to be running to flush the acct idx to disk asap
/// The idea is that the best perf to ssds will be with multiple threads,
/// but during steady state, we can't allocate as many threads because we'd starve the rest of the system.
StartupWithExtraThreads,
}
impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndexStorage<T, U> {
/// startup=true causes:
/// in mem to act in a way that flushes to disk asap
/// also creates some additional bg threads to facilitate flushing to disk asap
/// startup=false is 'normal' operation
pub fn set_startup(&self, startup: Startup) {
let value = !matches!(startup, Startup::Normal);
if matches!(startup, Startup::StartupWithExtraThreads) {
// create some additional bg threads to help get things to the disk index asap
*self.startup_worker_threads.lock().unwrap() = Some(BgThreads::new(
&self.storage,
&self.in_mem,
accounts_index::default_num_flush_threads(),
false, // cannot advance age from any of these threads
self.exit.clone(),
));
}
self.storage.set_startup(value);
if !value {
// transitioning from startup to !startup (ie. steady state)
// shutdown the bg threads
*self.startup_worker_threads.lock().unwrap() = None;
// maybe shrink hashmaps
self.shrink_to_fit();
}
}
/// estimate how many items are still needing to be flushed to the disk cache.
pub fn get_startup_remaining_items_to_flush_estimate(&self) -> usize {
self.storage
.disk
.as_ref()
.map(|_| self.storage.stats.get_remaining_items_to_flush_estimate())
.unwrap_or_default()
}
fn shrink_to_fit(&self) {
self.in_mem.iter().for_each(|mem| mem.shrink_to_fit())
}
/// allocate BucketMapHolder and InMemAccountsIndex[]
pub fn new(bins: usize, config: &Option<AccountsIndexConfig>, exit: Arc<AtomicBool>) -> Self {
let num_flush_threads = config
.as_ref()
.and_then(|config| config.num_flush_threads)
.unwrap_or_else(accounts_index::default_num_flush_threads);
let storage = Arc::new(BucketMapHolder::new(bins, config, num_flush_threads.get()));
let in_mem = (0..bins)
.map(|bin| Arc::new(InMemAccountsIndex::new(&storage, bin)))
.collect::<Vec<_>>();
Self {
_bg_threads: BgThreads::new(&storage, &in_mem, num_flush_threads, true, exit.clone()),
storage,
in_mem,
startup_worker_threads: Mutex::default(),
exit,
}
}
}