solana_accounts_db/
tiered_storage.rs

1#![allow(dead_code)]
2
3pub mod byte_block;
4pub mod error;
5pub mod file;
6pub mod footer;
7pub mod hot;
8pub mod index;
9pub mod meta;
10pub mod mmap_utils;
11pub mod owners;
12pub mod readable;
13mod test_utils;
14
15use {
16    crate::{accounts_file::StoredAccountsInfo, storable_accounts::StorableAccounts},
17    error::TieredStorageError,
18    footer::{AccountBlockFormat, AccountMetaFormat},
19    hot::{HotStorageWriter, HOT_FORMAT},
20    index::IndexBlockFormat,
21    owners::OwnersBlockFormat,
22    readable::TieredStorageReader,
23    std::{
24        fs, io,
25        path::{Path, PathBuf},
26        sync::{
27            atomic::{AtomicBool, Ordering},
28            OnceLock,
29        },
30    },
31};
32
33pub type TieredStorageResult<T> = Result<T, TieredStorageError>;
34
35const MAX_TIERED_STORAGE_FILE_SIZE: u64 = 16 * 1024 * 1024 * 1024; // 16 GiB;
36
37/// The struct that defines the formats of all building blocks of a
38/// TieredStorage.
39#[derive(Clone, Debug, PartialEq)]
40pub struct TieredStorageFormat {
41    pub meta_entry_size: usize,
42    pub account_meta_format: AccountMetaFormat,
43    pub owners_block_format: OwnersBlockFormat,
44    pub index_block_format: IndexBlockFormat,
45    pub account_block_format: AccountBlockFormat,
46}
47
48/// The implementation of AccountsFile for tiered-storage.
49#[derive(Debug)]
50pub struct TieredStorage {
51    /// The internal reader instance for its accounts file.
52    reader: OnceLock<TieredStorageReader>,
53    /// A status flag indicating whether its file has been already written.
54    already_written: AtomicBool,
55    /// The path to the file that stores accounts.
56    path: PathBuf,
57}
58
59impl Drop for TieredStorage {
60    fn drop(&mut self) {
61        if let Err(err) = fs::remove_file(&self.path) {
62            // Here we bypass NotFound error as the focus of the panic is to
63            // detect any leakage of storage resource.
64            if err.kind() != io::ErrorKind::NotFound {
65                panic!(
66                    "TieredStorage failed to remove backing storage file '{}': {err}",
67                    self.path.display(),
68                );
69            }
70        }
71    }
72}
73
74impl TieredStorage {
75    /// Creates a new writable instance of TieredStorage based on the
76    /// specified path and TieredStorageFormat.
77    ///
78    /// Note that the actual file will not be created until write_accounts
79    /// is called.
80    pub fn new_writable(path: impl Into<PathBuf>) -> Self {
81        Self {
82            reader: OnceLock::<TieredStorageReader>::new(),
83            already_written: false.into(),
84            path: path.into(),
85        }
86    }
87
88    /// Creates a new read-only instance of TieredStorage from the
89    /// specified path.
90    pub fn new_readonly(path: impl Into<PathBuf>) -> TieredStorageResult<Self> {
91        let path = path.into();
92        Ok(Self {
93            reader: TieredStorageReader::new_from_path(&path).map(OnceLock::from)?,
94            already_written: true.into(),
95            path,
96        })
97    }
98
99    /// Returns the path to this TieredStorage.
100    pub fn path(&self) -> &Path {
101        self.path.as_path()
102    }
103
104    /// Writes the specified accounts into this TieredStorage.
105    ///
106    /// Note that this function can only be called once per a TieredStorage
107    /// instance.  Otherwise, it will trigger panic.
108    pub fn write_accounts<'a>(
109        &self,
110        accounts: &impl StorableAccounts<'a>,
111        skip: usize,
112        format: &TieredStorageFormat,
113    ) -> TieredStorageResult<StoredAccountsInfo> {
114        let was_written = self.already_written.swap(true, Ordering::AcqRel);
115
116        if was_written {
117            panic!("cannot write same tiered storage file more than once");
118        }
119
120        if format == &HOT_FORMAT {
121            let stored_accounts_info = {
122                let mut writer = HotStorageWriter::new(&self.path)?;
123                let stored_accounts_info = writer.write_accounts(accounts, skip)?;
124                writer.flush()?;
125                stored_accounts_info
126            };
127
128            // panic here if self.reader.get() is not None as self.reader can only be
129            // None since a false-value `was_written` indicates the accounts file has
130            // not been written previously, implying is_read_only() was also false.
131            debug_assert!(!self.is_read_only());
132            self.reader
133                .set(TieredStorageReader::new_from_path(&self.path)?)
134                .unwrap();
135
136            Ok(stored_accounts_info)
137        } else {
138            Err(TieredStorageError::UnknownFormat(self.path.to_path_buf()))
139        }
140    }
141
142    /// Returns the underlying reader of the TieredStorage.  None will be
143    /// returned if it's is_read_only() returns false.
144    pub fn reader(&self) -> Option<&TieredStorageReader> {
145        self.reader.get()
146    }
147
148    /// Returns true if the TieredStorage instance is read-only.
149    pub fn is_read_only(&self) -> bool {
150        self.reader.get().is_some()
151    }
152
153    /// Returns the size of the underlying accounts file.
154    pub fn len(&self) -> usize {
155        self.reader().map_or(0, |reader| reader.len())
156    }
157
158    /// Returns whether the underlying storage is empty.
159    pub fn is_empty(&self) -> bool {
160        self.len() == 0
161    }
162
163    pub fn capacity(&self) -> u64 {
164        self.reader()
165            .map_or(MAX_TIERED_STORAGE_FILE_SIZE, |reader| reader.capacity())
166    }
167
168    pub fn dead_bytes_due_to_zero_lamport_single_ref(&self, count: usize) -> usize {
169        const ZERO_LAMPORT_ACCOUNT_SIZE: usize = 42; // approximately 42 bytes per zero lamport account
170        count * ZERO_LAMPORT_ACCOUNT_SIZE
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use {
177        super::*,
178        file::TieredStorageMagicNumber,
179        footer::TieredStorageFooter,
180        hot::HOT_FORMAT,
181        solana_pubkey::Pubkey,
182        solana_sdk::{
183            account::{AccountSharedData, ReadableAccount},
184            clock::Slot,
185            system_instruction::MAX_PERMITTED_DATA_LENGTH,
186        },
187        std::{
188            collections::{HashMap, HashSet},
189            mem::ManuallyDrop,
190        },
191        tempfile::tempdir,
192        test_utils::{create_test_account, verify_test_account_with_footer},
193    };
194
195    impl TieredStorage {
196        fn footer(&self) -> Option<&TieredStorageFooter> {
197            self.reader.get().map(|r| r.footer())
198        }
199    }
200
201    /// Simply invoke write_accounts with empty vector to allow the tiered storage
202    /// to persist non-account blocks such as footer, index block, etc.
203    fn write_zero_accounts(
204        tiered_storage: &TieredStorage,
205        expected_result: TieredStorageResult<StoredAccountsInfo>,
206    ) {
207        let slot_ignored = Slot::MAX;
208        let account_refs = Vec::<(&Pubkey, &AccountSharedData)>::new();
209        let storable_accounts = (slot_ignored, account_refs.as_slice());
210
211        let result = tiered_storage.write_accounts(&storable_accounts, 0, &HOT_FORMAT);
212
213        match (&result, &expected_result) {
214            (
215                Err(TieredStorageError::AttemptToUpdateReadOnly(_)),
216                Err(TieredStorageError::AttemptToUpdateReadOnly(_)),
217            ) => {}
218            (Err(TieredStorageError::Unsupported()), Err(TieredStorageError::Unsupported())) => {}
219            (Ok(_), Ok(_)) => {}
220            // we don't expect error type mis-match or other error types here
221            _ => {
222                panic!("actual: {result:?}, expected: {expected_result:?}");
223            }
224        };
225
226        assert!(tiered_storage.is_read_only());
227        assert_eq!(
228            tiered_storage.len(),
229            std::mem::size_of::<TieredStorageFooter>()
230                + std::mem::size_of::<TieredStorageMagicNumber>()
231        );
232    }
233
234    #[test]
235    fn test_new_meta_file_only() {
236        // Generate a new temp path that is guaranteed to NOT already have a file.
237        let temp_dir = tempdir().unwrap();
238        let tiered_storage_path = temp_dir.path().join("test_new_meta_file_only");
239
240        {
241            let tiered_storage =
242                ManuallyDrop::new(TieredStorage::new_writable(&tiered_storage_path));
243
244            assert!(!tiered_storage.is_read_only());
245            assert_eq!(tiered_storage.path(), tiered_storage_path);
246            assert_eq!(tiered_storage.len(), 0);
247
248            write_zero_accounts(
249                &tiered_storage,
250                Ok(StoredAccountsInfo {
251                    offsets: vec![],
252                    size: 0,
253                }),
254            );
255        }
256
257        let tiered_storage_readonly = TieredStorage::new_readonly(&tiered_storage_path).unwrap();
258        let footer = tiered_storage_readonly.footer().unwrap();
259        assert!(tiered_storage_readonly.is_read_only());
260        assert_eq!(tiered_storage_readonly.reader().unwrap().num_accounts(), 0);
261        assert_eq!(footer.account_meta_format, HOT_FORMAT.account_meta_format);
262        assert_eq!(footer.owners_block_format, HOT_FORMAT.owners_block_format);
263        assert_eq!(footer.index_block_format, HOT_FORMAT.index_block_format);
264        assert_eq!(footer.account_block_format, HOT_FORMAT.account_block_format);
265        assert_eq!(
266            tiered_storage_readonly.len(),
267            std::mem::size_of::<TieredStorageFooter>()
268                + std::mem::size_of::<TieredStorageMagicNumber>()
269        );
270    }
271
272    #[test]
273    #[should_panic(expected = "cannot write same tiered storage file more than once")]
274    fn test_write_accounts_twice() {
275        // Generate a new temp path that is guaranteed to NOT already have a file.
276        let temp_dir = tempdir().unwrap();
277        let tiered_storage_path = temp_dir.path().join("test_write_accounts_twice");
278
279        let tiered_storage = TieredStorage::new_writable(&tiered_storage_path);
280        write_zero_accounts(
281            &tiered_storage,
282            Ok(StoredAccountsInfo {
283                offsets: vec![],
284                size: 0,
285            }),
286        );
287        // Expect AttemptToUpdateReadOnly error as write_accounts can only
288        // be invoked once.
289        write_zero_accounts(
290            &tiered_storage,
291            Err(TieredStorageError::AttemptToUpdateReadOnly(
292                tiered_storage_path,
293            )),
294        );
295    }
296
297    #[test]
298    fn test_remove_on_drop() {
299        // Generate a new temp path that is guaranteed to NOT already have a file.
300        let temp_dir = tempdir().unwrap();
301        let tiered_storage_path = temp_dir.path().join("test_remove_on_drop");
302        {
303            let tiered_storage = TieredStorage::new_writable(&tiered_storage_path);
304            write_zero_accounts(
305                &tiered_storage,
306                Ok(StoredAccountsInfo {
307                    offsets: vec![],
308                    size: 0,
309                }),
310            );
311        }
312        // expect the file does not exists as it has been removed on drop
313        assert!(!tiered_storage_path.try_exists().unwrap());
314
315        {
316            let tiered_storage =
317                ManuallyDrop::new(TieredStorage::new_writable(&tiered_storage_path));
318            write_zero_accounts(
319                &tiered_storage,
320                Ok(StoredAccountsInfo {
321                    offsets: vec![],
322                    size: 0,
323                }),
324            );
325        }
326        // expect the file exists as we have ManuallyDrop this time.
327        assert!(tiered_storage_path.try_exists().unwrap());
328
329        {
330            // open again in read-only mode with ManuallyDrop.
331            _ = ManuallyDrop::new(TieredStorage::new_readonly(&tiered_storage_path).unwrap());
332        }
333        // again expect the file exists as we have ManuallyDrop.
334        assert!(tiered_storage_path.try_exists().unwrap());
335
336        {
337            // open again without ManuallyDrop in read-only mode
338            _ = TieredStorage::new_readonly(&tiered_storage_path).unwrap();
339        }
340        // expect the file does not exist as the file has been removed on drop
341        assert!(!tiered_storage_path.try_exists().unwrap());
342    }
343
344    /// The helper function for all write_accounts tests.
345    /// Currently only supports hot accounts.
346    fn do_test_write_accounts(
347        path_suffix: &str,
348        account_data_sizes: &[u64],
349        format: TieredStorageFormat,
350    ) {
351        let accounts: Vec<_> = account_data_sizes
352            .iter()
353            .map(|size| create_test_account(*size))
354            .collect();
355
356        let account_refs: Vec<_> = accounts
357            .iter()
358            .map(|account| (&account.0.pubkey, &account.1))
359            .collect();
360
361        // Slot information is not used here
362        let storable_accounts = (Slot::MAX, &account_refs[..]);
363
364        let temp_dir = tempdir().unwrap();
365        let tiered_storage_path = temp_dir.path().join(path_suffix);
366        let tiered_storage = TieredStorage::new_writable(tiered_storage_path);
367        _ = tiered_storage.write_accounts(&storable_accounts, 0, &format);
368
369        let reader = tiered_storage.reader().unwrap();
370        let num_accounts = storable_accounts.len();
371        assert_eq!(reader.num_accounts(), num_accounts);
372
373        let mut expected_accounts_map = HashMap::new();
374        for i in 0..num_accounts {
375            storable_accounts.account_default_if_zero_lamport(i, |account| {
376                expected_accounts_map.insert(*account.pubkey(), account.to_account_shared_data());
377            });
378        }
379
380        let mut verified_accounts = HashSet::new();
381        let footer = reader.footer();
382
383        const MIN_PUBKEY: Pubkey = Pubkey::new_from_array([0x00u8; 32]);
384        const MAX_PUBKEY: Pubkey = Pubkey::new_from_array([0xFFu8; 32]);
385        let mut min_pubkey = MAX_PUBKEY;
386        let mut max_pubkey = MIN_PUBKEY;
387
388        reader
389            .scan_accounts(|stored_account_meta| {
390                if let Some(account) = expected_accounts_map.get(stored_account_meta.pubkey()) {
391                    verify_test_account_with_footer(
392                        &stored_account_meta,
393                        account,
394                        stored_account_meta.pubkey(),
395                        footer,
396                    );
397                    verified_accounts.insert(*stored_account_meta.pubkey());
398                    if min_pubkey > *stored_account_meta.pubkey() {
399                        min_pubkey = *stored_account_meta.pubkey();
400                    }
401                    if max_pubkey < *stored_account_meta.pubkey() {
402                        max_pubkey = *stored_account_meta.pubkey();
403                    }
404                }
405            })
406            .unwrap();
407
408        assert_eq!(footer.min_account_address, min_pubkey);
409        assert_eq!(footer.max_account_address, max_pubkey);
410        assert!(!verified_accounts.is_empty());
411        assert_eq!(verified_accounts.len(), expected_accounts_map.len());
412    }
413
414    #[test]
415    fn test_write_accounts_small_accounts() {
416        do_test_write_accounts(
417            "test_write_accounts_small_accounts",
418            &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
419            HOT_FORMAT.clone(),
420        );
421    }
422
423    #[test]
424    fn test_write_accounts_one_max_len() {
425        do_test_write_accounts(
426            "test_write_accounts_one_max_len",
427            &[MAX_PERMITTED_DATA_LENGTH],
428            HOT_FORMAT.clone(),
429        );
430    }
431
432    #[test]
433    fn test_write_accounts_mixed_size() {
434        do_test_write_accounts(
435            "test_write_accounts_mixed_size",
436            &[
437                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1000, 2000, 3000, 4000, 9, 8, 7, 6, 5, 4, 3, 2, 1,
438            ],
439            HOT_FORMAT.clone(),
440        );
441    }
442}