solana_accounts_db/
accounts_hash.rs

1use {
2    crate::{
3        accounts_db::AccountStorageEntry,
4        active_stats::{ActiveStatItem, ActiveStats},
5        ancestors::Ancestors,
6        pubkey_bins::PubkeyBinCalculator24,
7    },
8    bytemuck_derive::{Pod, Zeroable},
9    log::*,
10    rayon::prelude::*,
11    solana_lattice_hash::lt_hash::LtHash,
12    solana_measure::{measure::Measure, measure_us},
13    solana_pubkey::Pubkey,
14    solana_sdk::{
15        hash::{Hash, Hasher, HASH_BYTES},
16        rent_collector::RentCollector,
17        slot_history::Slot,
18        sysvar::epoch_schedule::EpochSchedule,
19    },
20    std::{
21        clone,
22        convert::TryInto,
23        fs::File,
24        io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write},
25        path::{Path, PathBuf},
26        sync::{
27            atomic::{AtomicU64, AtomicUsize, Ordering},
28            Arc, Mutex,
29        },
30    },
31    tempfile::tempfile_in,
32};
33pub const MERKLE_FANOUT: usize = 16;
34
35/// 1 file containing account hashes sorted by pubkey
36struct AccountHashesFile {
37    /// Writer for hash files created in the temp directory, which will be deleted on drop.
38    writer: Option<BufWriter<File>>,
39
40    /// Number of hashes in this file
41    count: usize,
42}
43
44impl AccountHashesFile {
45    /// Create a new AccountHashesFile with a writer to it.
46    fn new(dir_for_temp_cache_files: impl AsRef<Path>) -> Self {
47        let file = tempfile_in(dir_for_temp_cache_files.as_ref()).unwrap_or_else(|err| {
48            panic!(
49                "Unable to create file within {}: {err}",
50                dir_for_temp_cache_files.as_ref().display()
51            )
52        });
53        // Now that we have a file, create a writer.
54        let writer = Some(BufWriter::new(file));
55        Self { writer, count: 0 }
56    }
57
58    /// Return a file reader for the underlying file.
59    /// This function should only be called once after all writes are done.
60    /// After calling this function, the writer will be None. No more writes are allowed.
61    fn get_reader(&mut self) -> Option<Mutex<BufReader<File>>> {
62        let writer = self.writer.take();
63        if self.count == 0 {
64            // If there are no hashes, then the file is empty and we should not return a reader.
65            return None;
66        }
67
68        writer.map(|writer| {
69            let reader = BufReader::new(writer.into_inner().unwrap());
70            Mutex::new(reader)
71        })
72    }
73
74    /// # hashes stored in this file
75    fn count(&self) -> usize {
76        self.count
77    }
78
79    /// write 'hash' to the file
80    fn write(&mut self, hash: &Hash) {
81        debug_assert!(self.writer.is_some());
82        self.writer
83            .as_mut()
84            .unwrap()
85            .write_all(hash.as_ref())
86            .expect("write hash success");
87        self.count += 1;
88    }
89}
90
91/// parameters to calculate accounts hash
92#[derive(Debug)]
93pub struct CalcAccountsHashConfig<'a> {
94    /// true to use a thread pool dedicated to bg operations
95    pub use_bg_thread_pool: bool,
96    /// 'ancestors' is used to get storages
97    pub ancestors: Option<&'a Ancestors>,
98    /// does hash calc need to consider account data that exists in the write cache?
99    /// if so, 'ancestors' will be used for this purpose as well as storages.
100    pub epoch_schedule: &'a EpochSchedule,
101    pub rent_collector: &'a RentCollector,
102    /// used for tracking down hash mismatches after the fact
103    pub store_detailed_debug_info_on_failure: bool,
104}
105
106// smallest, 3 quartiles, largest, average
107pub type StorageSizeQuartileStats = [usize; 6];
108
109#[derive(Debug, Default)]
110pub struct HashStats {
111    pub total_us: u64,
112    pub mark_time_us: u64,
113    pub cache_hash_data_us: u64,
114    pub scan_time_total_us: u64,
115    pub zeros_time_total_us: u64,
116    pub hash_time_total_us: u64,
117    pub drop_hash_files_us: u64,
118    pub sort_time_total_us: u64,
119    pub hash_total: usize,
120    pub num_snapshot_storage: usize,
121    pub scan_chunks: usize,
122    pub num_slots: usize,
123    pub num_dirty_slots: usize,
124    pub collect_snapshots_us: u64,
125    pub storage_sort_us: u64,
126    pub storage_size_quartiles: StorageSizeQuartileStats,
127    pub oldest_root: Slot,
128    pub roots_older_than_epoch: AtomicUsize,
129    pub accounts_in_roots_older_than_epoch: AtomicUsize,
130    pub append_vec_sizes_older_than_epoch: AtomicUsize,
131    pub longest_ancient_scan_us: AtomicU64,
132    pub sum_ancient_scans_us: AtomicU64,
133    pub count_ancient_scans: AtomicU64,
134    pub pubkey_bin_search_us: AtomicU64,
135    pub num_zero_lamport_accounts: AtomicU64,
136    pub num_zero_lamport_accounts_ancient: Arc<AtomicU64>,
137}
138impl HashStats {
139    pub fn calc_storage_size_quartiles(&mut self, storages: &[Arc<AccountStorageEntry>]) {
140        let mut sum = 0;
141        let mut sizes = storages
142            .iter()
143            .map(|storage| {
144                let cap = storage.accounts.capacity() as usize;
145                sum += cap;
146                cap
147            })
148            .collect::<Vec<_>>();
149        sizes.sort_unstable();
150        let len = sizes.len();
151        self.storage_size_quartiles = if len == 0 {
152            StorageSizeQuartileStats::default()
153        } else {
154            [
155                *sizes.first().unwrap(),
156                sizes[len / 4],
157                sizes[len * 2 / 4],
158                sizes[len * 3 / 4],
159                *sizes.last().unwrap(),
160                sum / len,
161            ]
162        };
163    }
164
165    pub fn log(&self) {
166        datapoint_info!(
167            "calculate_accounts_hash_from_storages",
168            ("total_us", self.total_us, i64),
169            ("mark_time_us", self.mark_time_us, i64),
170            ("cache_hash_data_us", self.cache_hash_data_us, i64),
171            ("accounts_scan_us", self.scan_time_total_us, i64),
172            ("eliminate_zeros_us", self.zeros_time_total_us, i64),
173            ("hash_us", self.hash_time_total_us, i64),
174            ("drop_hash_files_us", self.drop_hash_files_us, i64),
175            ("sort_us", self.sort_time_total_us, i64),
176            ("hash_total", self.hash_total, i64),
177            ("storage_sort_us", self.storage_sort_us, i64),
178            ("collect_snapshots_us", self.collect_snapshots_us, i64),
179            ("num_snapshot_storage", self.num_snapshot_storage, i64),
180            ("scan_chunks", self.scan_chunks, i64),
181            ("num_slots", self.num_slots, i64),
182            ("num_dirty_slots", self.num_dirty_slots, i64),
183            ("storage_size_min", self.storage_size_quartiles[0], i64),
184            (
185                "storage_size_quartile_1",
186                self.storage_size_quartiles[1],
187                i64
188            ),
189            (
190                "storage_size_quartile_2",
191                self.storage_size_quartiles[2],
192                i64
193            ),
194            (
195                "storage_size_quartile_3",
196                self.storage_size_quartiles[3],
197                i64
198            ),
199            ("storage_size_max", self.storage_size_quartiles[4], i64),
200            ("storage_size_avg", self.storage_size_quartiles[5], i64),
201            (
202                "roots_older_than_epoch",
203                self.roots_older_than_epoch.load(Ordering::Relaxed),
204                i64
205            ),
206            ("oldest_root", self.oldest_root, i64),
207            (
208                "longest_ancient_scan_us",
209                self.longest_ancient_scan_us.load(Ordering::Relaxed),
210                i64
211            ),
212            (
213                "sum_ancient_scans_us",
214                self.sum_ancient_scans_us.load(Ordering::Relaxed),
215                i64
216            ),
217            (
218                "count_ancient_scans",
219                self.count_ancient_scans.load(Ordering::Relaxed),
220                i64
221            ),
222            (
223                "append_vec_sizes_older_than_epoch",
224                self.append_vec_sizes_older_than_epoch
225                    .load(Ordering::Relaxed),
226                i64
227            ),
228            (
229                "accounts_in_roots_older_than_epoch",
230                self.accounts_in_roots_older_than_epoch
231                    .load(Ordering::Relaxed),
232                i64
233            ),
234            (
235                "pubkey_bin_search_us",
236                self.pubkey_bin_search_us.load(Ordering::Relaxed),
237                i64
238            ),
239            (
240                "num_zero_lamport_accounts",
241                self.num_zero_lamport_accounts.load(Ordering::Relaxed),
242                i64
243            ),
244            (
245                "num_zero_lamport_accounts_ancient",
246                self.num_zero_lamport_accounts_ancient
247                    .load(Ordering::Relaxed),
248                i64
249            ),
250        );
251    }
252}
253
254/// While scanning appendvecs, this is the info that needs to be extracted, de-duped, and sorted from what is stored in an append vec.
255/// Note this can be saved/loaded during hash calculation to a memory mapped file whose contents are
256/// [CalculateHashIntermediate]
257#[repr(C)]
258#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)]
259pub struct CalculateHashIntermediate {
260    pub hash: AccountHash,
261    pub lamports: u64,
262    pub pubkey: Pubkey,
263}
264
265// In order to safely guarantee CalculateHashIntermediate is Pod, it cannot have any padding
266const _: () = assert!(
267    std::mem::size_of::<CalculateHashIntermediate>()
268        == std::mem::size_of::<AccountHash>()
269            + std::mem::size_of::<u64>()
270            + std::mem::size_of::<Pubkey>(),
271    "CalculateHashIntermediate cannot have any padding"
272);
273
274#[derive(Debug, PartialEq, Eq)]
275struct CumulativeOffset {
276    /// Since the source data is at most 2D, two indexes are enough.
277    index: [usize; 2],
278    start_offset: usize,
279}
280
281trait ExtractSliceFromRawData<'b, T: 'b> {
282    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
283}
284
285impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
286    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
287        &self[offset.index[0]][start..]
288    }
289}
290
291impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
292    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
293        &self[offset.index[0]][offset.index[1]][start..]
294    }
295}
296
297// Allow retrieving &[start..end] from a logical src: Vec<T>, where src is really Vec<Vec<T>> (or later Vec<Vec<Vec<T>>>)
298// This model prevents callers from having to flatten which saves both working memory and time.
299#[derive(Default, Debug)]
300struct CumulativeOffsets {
301    cumulative_offsets: Vec<CumulativeOffset>,
302    total_count: usize,
303}
304
305/// used by merkle tree calculation to lookup account hashes by overall index
306#[derive(Default)]
307struct CumulativeHashesFromFiles {
308    /// source of hashes in order
309    readers: Vec<Mutex<BufReader<File>>>,
310    /// look up reader index and offset by overall index
311    cumulative: CumulativeOffsets,
312}
313
314impl CumulativeHashesFromFiles {
315    /// Calculate offset from overall index to which file and offset within that file based on the length of each hash file.
316    /// Also collect readers to access the data.
317    fn from_files(hashes: Vec<AccountHashesFile>) -> Self {
318        let mut readers = Vec::with_capacity(hashes.len());
319        let cumulative = CumulativeOffsets::new(hashes.into_iter().filter_map(|mut hash_file| {
320            // ignores all hashfiles that have zero entries
321            let count = hash_file.count();
322            hash_file.get_reader().map(|reader| {
323                readers.push(reader);
324                count
325            })
326        }));
327        Self {
328            cumulative,
329            readers,
330        }
331    }
332
333    /// total # of items referenced
334    fn total_count(&self) -> usize {
335        self.cumulative.total_count
336    }
337
338    // return the biggest hash data possible that starts at the overall index 'start' up to max buffer size.
339    // start is the index of hashes
340    fn get_data(&self, start: usize) -> Box<[Hash]> {
341        let (start, offset, num_hashes) = self.cumulative.find(start);
342        let data_source_index = offset.index[0];
343        let mut data = self.readers[data_source_index].lock().unwrap();
344
345        // unwrap here because we should never ask for data that doesn't exist.
346        // If we do, then cumulative calculated incorrectly.
347        let file_offset_in_bytes = std::mem::size_of::<Hash>() * start;
348        data.seek(SeekFrom::Start(file_offset_in_bytes.try_into().unwrap()))
349            .unwrap();
350
351        #[cfg(test)]
352        const MAX_BUFFER_SIZE_IN_HASH: usize = 4; // 4 hashes (total 128 bytes) for testing
353        #[cfg(not(test))]
354        const MAX_BUFFER_SIZE_IN_HASH: usize = 2 * 1024 * 1024; // 2M hashes (total 64MB bytes)
355
356        let remaining_num_hashes = num_hashes - start;
357        let num_hashes_to_read = remaining_num_hashes.min(MAX_BUFFER_SIZE_IN_HASH);
358        let mut hashes = vec![Hash::default(); num_hashes_to_read].into_boxed_slice();
359
360        // expect read hash success here because the slice that we are reading
361        // into was generated by accumulate hash file writer and it is
362        // guaranteed to be of the correct size. Otherwise, there is a bug in
363        // generating the accumulated hash files.
364        data.read_exact(bytemuck::must_cast_slice_mut(hashes.as_mut()))
365            .expect("read hash success");
366
367        hashes
368    }
369}
370
371trait AsHashSlice: std::marker::Sync + std::marker::Send + clone::Clone {
372    fn num_hashes(&self) -> usize;
373    fn get(&self, i: usize) -> &Hash;
374}
375
376impl AsHashSlice for &[Hash] {
377    fn num_hashes(&self) -> usize {
378        self.len()
379    }
380    fn get(&self, i: usize) -> &Hash {
381        &self[i]
382    }
383}
384
385impl AsHashSlice for Arc<Box<[Hash]>> {
386    fn num_hashes(&self) -> usize {
387        self.len()
388    }
389    fn get(&self, i: usize) -> &Hash {
390        &self[i]
391    }
392}
393
394impl CumulativeOffsets {
395    fn new<I>(iter: I) -> Self
396    where
397        I: Iterator<Item = usize>,
398    {
399        let mut total_count: usize = 0;
400        let cumulative_offsets: Vec<_> = iter
401            .enumerate()
402            .filter_map(|(i, len)| {
403                if len > 0 {
404                    let result = CumulativeOffset {
405                        index: [i, i],
406                        start_offset: total_count,
407                    };
408                    total_count += len;
409                    Some(result)
410                } else {
411                    None
412                }
413            })
414            .collect();
415
416        Self {
417            cumulative_offsets,
418            total_count,
419        }
420    }
421
422    fn from_raw<T>(raw: &[Vec<T>]) -> Self {
423        Self::new(raw.iter().map(|v| v.len()))
424    }
425
426    /// find the index of the data source that contains 'start'
427    fn find_index(&self, start: usize) -> usize {
428        assert!(!self.cumulative_offsets.is_empty());
429        match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
430            Ok(index) => index,
431            Err(index) => index - 1, // we would insert at index so we are before the item at index
432        }
433    }
434
435    /// given overall start index 'start'
436    /// return ('start', which is the offset into the data source at 'index',
437    ///     and 'index', which is the data source to use)
438    ///     and number of hashes stored in the data source
439    fn find(&self, start: usize) -> (usize, &CumulativeOffset, usize) {
440        let i = self.find_index(start);
441        let index = &self.cumulative_offsets[i];
442        let start = start - index.start_offset;
443
444        let i_next = i + 1;
445        let next_start_offset = if i_next == self.cumulative_offsets.len() {
446            self.total_count
447        } else {
448            let next = &self.cumulative_offsets[i_next];
449            next.start_offset
450        };
451        let num_hashes = next_start_offset - index.start_offset;
452        (start, index, num_hashes)
453    }
454
455    // return the biggest slice possible that starts at 'start'
456    fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
457    where
458        U: ExtractSliceFromRawData<'b, T> + 'b,
459    {
460        let (start, index, _) = self.find(start);
461        raw.extract(index, start)
462    }
463}
464
465#[derive(Debug)]
466pub struct AccountsHasher<'a> {
467    pub zero_lamport_accounts: ZeroLamportAccounts,
468    /// The directory where temporary cache files are put
469    pub dir_for_temp_cache_files: PathBuf,
470    pub(crate) active_stats: &'a ActiveStats,
471}
472
473/// Pointer to a specific item in chunked accounts hash slices.
474#[derive(Debug, Clone, Copy)]
475struct SlotGroupPointer {
476    /// slot group index
477    slot_group_index: usize,
478    /// offset within a slot group
479    offset: usize,
480}
481
482/// A struct for the location of an account hash item inside chunked accounts hash slices.
483#[derive(Debug)]
484struct ItemLocation<'a> {
485    /// account's pubkey
486    key: &'a Pubkey,
487    /// pointer to the item in slot group slices
488    pointer: SlotGroupPointer,
489}
490
491impl AccountsHasher<'_> {
492    pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
493        let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
494
495        let hash_total = cumulative_offsets.total_count;
496        let result = AccountsHasher::compute_merkle_root_from_slices(
497            hash_total,
498            MERKLE_FANOUT,
499            None,
500            |start: usize| cumulative_offsets.get_slice(&hashes, start),
501            None,
502        );
503        (result.0, hash_total)
504    }
505
506    pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
507        Self::compute_merkle_root_loop(hashes, fanout, |t| &t.1)
508    }
509
510    // this function avoids an infinite recursion compiler error
511    pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
512        Self::compute_merkle_root_loop(hashes, fanout, |t| t)
513    }
514
515    pub fn div_ceil(x: usize, y: usize) -> usize {
516        let mut result = x / y;
517        if x % y != 0 {
518            result += 1;
519        }
520        result
521    }
522
523    // For the first iteration, there could be more items in the tuple than just hash and lamports.
524    // Using extractor allows us to avoid an unnecessary array copy on the first iteration.
525    pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
526    where
527        F: Fn(&T) -> &Hash + std::marker::Sync,
528        T: std::marker::Sync,
529    {
530        if hashes.is_empty() {
531            return Hasher::default().result();
532        }
533
534        let mut time = Measure::start("time");
535
536        let total_hashes = hashes.len();
537        let chunks = Self::div_ceil(total_hashes, fanout);
538
539        let result: Vec<_> = (0..chunks)
540            .into_par_iter()
541            .map(|i| {
542                let start_index = i * fanout;
543                let end_index = std::cmp::min(start_index + fanout, total_hashes);
544
545                let mut hasher = Hasher::default();
546                for item in hashes.iter().take(end_index).skip(start_index) {
547                    let h = extractor(item);
548                    hasher.hash(h.as_ref());
549                }
550
551                hasher.result()
552            })
553            .collect();
554        time.stop();
555        debug!("hashing {} {}", total_hashes, time);
556
557        if result.len() == 1 {
558            result[0]
559        } else {
560            Self::compute_merkle_root_recurse(result, fanout)
561        }
562    }
563
564    fn calculate_three_level_chunks(
565        total_hashes: usize,
566        fanout: usize,
567        max_levels_per_pass: Option<usize>,
568        specific_level_count: Option<usize>,
569    ) -> (usize, usize, bool) {
570        const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll
571        let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
572
573        // Only use the 3 level optimization if we have at least 4 levels of data.
574        // Otherwise, we'll be serializing a parallel operation.
575        let threshold = target * fanout;
576        let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
577            && total_hashes >= threshold;
578        if three_level {
579            if let Some(specific_level_count_value) = specific_level_count {
580                three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
581            }
582        }
583        let (num_hashes_per_chunk, levels_hashed) = if three_level {
584            (target, THREE_LEVEL_OPTIMIZATION)
585        } else {
586            (fanout, 1)
587        };
588        (num_hashes_per_chunk, levels_hashed, three_level)
589    }
590
591    // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
592    // The caller provides a function to return a slice from the source data.
593    fn compute_merkle_root_from_slices<'b, F, U>(
594        total_hashes: usize,
595        fanout: usize,
596        max_levels_per_pass: Option<usize>,
597        get_hash_slice_starting_at_index: F,
598        specific_level_count: Option<usize>,
599    ) -> (Hash, Vec<Hash>)
600    where
601        // returns a slice of hashes starting at the given overall index
602        F: Fn(usize) -> U + std::marker::Sync,
603        U: AsHashSlice + 'b,
604    {
605        if total_hashes == 0 {
606            return (Hasher::default().result(), vec![]);
607        }
608
609        let mut time = Measure::start("time");
610
611        let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
612            total_hashes,
613            fanout,
614            max_levels_per_pass,
615            specific_level_count,
616        );
617
618        let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
619
620        // initial fetch - could return entire slice
621        let data = get_hash_slice_starting_at_index(0);
622        let data_len = data.num_hashes();
623
624        let result: Vec<_> = (0..chunks)
625            .into_par_iter()
626            .map(|i| {
627                // summary:
628                // this closure computes 1 or 3 levels of merkle tree (all chunks will be 1 or all will be 3)
629                // for a subset (our chunk) of the input data [start_index..end_index]
630
631                // index into get_hash_slice_starting_at_index where this chunk's range begins
632                let start_index = i * num_hashes_per_chunk;
633                // index into get_hash_slice_starting_at_index where this chunk's range ends
634                let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
635
636                // will compute the final result for this closure
637                let mut hasher = Hasher::default();
638
639                // index into 'data' where we are currently pulling data
640                // if we exhaust our data, then we will request a new slice, and data_index resets to 0, the beginning of the new slice
641                let mut data_index = start_index;
642                // source data, which we may refresh when we exhaust
643                let mut data = data.clone();
644                // len of the source data
645                let mut data_len = data_len;
646
647                if !three_level {
648                    // 1 group of fanout
649                    // The result of this loop is a single hash value from fanout input hashes.
650                    for i in start_index..end_index {
651                        if data_index >= data_len {
652                            // we exhausted our data, fetch next slice starting at i
653                            data = get_hash_slice_starting_at_index(i);
654                            data_len = data.num_hashes();
655                            data_index = 0;
656                        }
657                        hasher.hash(data.get(data_index).as_ref());
658                        data_index += 1;
659                    }
660                } else {
661                    // hash 3 levels of fanout simultaneously.
662                    // This codepath produces 1 hash value for between 1..=fanout^3 input hashes.
663                    // It is equivalent to running the normal merkle tree calculation 3 iterations on the input.
664                    //
665                    // big idea:
666                    //  merkle trees usually reduce the input vector by a factor of fanout with each iteration
667                    //  example with fanout 2:
668                    //   start:     [0,1,2,3,4,5,6,7]      in our case: [...16M...] or really, 1B
669                    //   iteration0 [.5, 2.5, 4.5, 6.5]                 [... 1M...]
670                    //   iteration1 [1.5, 5.5]                          [...65k...]
671                    //   iteration2 3.5                                 [...4k... ]
672                    //  So iteration 0 consumes N elements, hashes them in groups of 'fanout' and produces a vector of N/fanout elements
673                    //   and the process repeats until there is only 1 hash left.
674                    //
675                    //  With the three_level code path, we make each chunk we iterate of size fanout^3 (4096)
676                    //  So, the input could be 16M hashes and the output will be 4k hashes, or N/fanout^3
677                    //  The goal is to reduce the amount of data that has to be constructed and held in memory.
678                    //  When we know we have enough hashes, then, in 1 pass, we hash 3 levels simultaneously, storing far fewer intermediate hashes.
679                    //
680                    // Now, some details:
681                    // The result of this loop is a single hash value from fanout^3 input hashes.
682                    // concepts:
683                    //  what we're conceptually hashing: "raw_hashes"[start_index..end_index]
684                    //   example: [a,b,c,d,e,f]
685                    //   but... hashes[] may really be multiple vectors that are pieced together.
686                    //   example: [[a,b],[c],[d,e,f]]
687                    //   get_hash_slice_starting_at_index(any_index) abstracts that and returns a slice starting at raw_hashes[any_index..]
688                    //   such that the end of get_hash_slice_starting_at_index may be <, >, or = end_index
689                    //   example: get_hash_slice_starting_at_index(1) returns [b]
690                    //            get_hash_slice_starting_at_index(3) returns [d,e,f]
691                    // This code is basically 3 iterations of merkle tree hashing occurring simultaneously.
692                    // The first fanout raw hashes are hashed in hasher_k. This is iteration0
693                    // Once hasher_k has hashed fanout hashes, hasher_k's result hash is hashed in hasher_j and then discarded
694                    // hasher_k then starts over fresh and hashes the next fanout raw hashes. This is iteration0 again for a new set of data.
695                    // Once hasher_j has hashed fanout hashes (from k), hasher_j's result hash is hashed in hasher and then discarded
696                    // Once hasher has hashed fanout hashes (from j), then the result of hasher is the hash for fanout^3 raw hashes.
697                    // If there are < fanout^3 hashes, then this code stops when it runs out of raw hashes and returns whatever it hashed.
698                    // This is always how the very last elements work in a merkle tree.
699                    let mut i = start_index;
700                    while i < end_index {
701                        let mut hasher_j = Hasher::default();
702                        for _j in 0..fanout {
703                            let mut hasher_k = Hasher::default();
704                            let end = std::cmp::min(end_index - i, fanout);
705                            for _k in 0..end {
706                                if data_index >= data_len {
707                                    // we exhausted our data, fetch next slice starting at i
708                                    data = get_hash_slice_starting_at_index(i);
709                                    data_len = data.num_hashes();
710                                    data_index = 0;
711                                }
712                                hasher_k.hash(data.get(data_index).as_ref());
713                                data_index += 1;
714                                i += 1;
715                            }
716                            hasher_j.hash(hasher_k.result().as_ref());
717                            if i >= end_index {
718                                break;
719                            }
720                        }
721                        hasher.hash(hasher_j.result().as_ref());
722                    }
723                }
724
725                hasher.result()
726            })
727            .collect();
728        time.stop();
729        debug!("hashing {} {}", total_hashes, time);
730
731        if let Some(mut specific_level_count_value) = specific_level_count {
732            specific_level_count_value -= levels_hashed;
733            if specific_level_count_value == 0 {
734                (Hash::default(), result)
735            } else {
736                assert!(specific_level_count_value > 0);
737                // We did not hash the number of levels required by 'specific_level_count', so repeat
738                Self::compute_merkle_root_from_slices_recurse(
739                    result,
740                    fanout,
741                    max_levels_per_pass,
742                    Some(specific_level_count_value),
743                )
744            }
745        } else {
746            (
747                if result.len() == 1 {
748                    result[0]
749                } else {
750                    Self::compute_merkle_root_recurse(result, fanout)
751                },
752                vec![], // no intermediate results needed by caller
753            )
754        }
755    }
756
757    fn compute_merkle_root_from_slices_recurse(
758        hashes: Vec<Hash>,
759        fanout: usize,
760        max_levels_per_pass: Option<usize>,
761        specific_level_count: Option<usize>,
762    ) -> (Hash, Vec<Hash>) {
763        Self::compute_merkle_root_from_slices(
764            hashes.len(),
765            fanout,
766            max_levels_per_pass,
767            |start| &hashes[start..],
768            specific_level_count,
769        )
770    }
771
772    pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, AccountHash)>) -> Hash {
773        hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
774        Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| &i.1 .0)
775    }
776
777    pub fn compare_two_hash_entries(
778        a: &CalculateHashIntermediate,
779        b: &CalculateHashIntermediate,
780    ) -> std::cmp::Ordering {
781        // note partial_cmp only returns None with floating point comparisons
782        a.pubkey.partial_cmp(&b.pubkey).unwrap()
783    }
784
785    pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
786        balance.try_into().unwrap_or_else(|_| {
787            panic!("overflow is detected while summing capitalization: {balance}")
788        })
789    }
790
791    /// returns:
792    /// Vec, with one entry per bin
793    ///  for each entry, Vec<Hash> in pubkey order
794    /// If return Vec<AccountHashesFile> was flattened, it would be all hashes, in pubkey order.
795    fn de_dup_accounts(
796        &self,
797        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
798        stats: &mut HashStats,
799        max_bin: usize,
800    ) -> (Vec<AccountHashesFile>, u64) {
801        // 1. eliminate zero lamport accounts
802        // 2. pick the highest slot or (slot = and highest version) of each pubkey
803        // 3. produce this output:
804        // a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
805        //      vec: individual hashes in pubkey order, 1 hash per
806        // b. lamports
807        let _guard = self.active_stats.activate(ActiveStatItem::HashDeDup);
808
809        #[derive(Default)]
810        struct DedupResult {
811            hashes_files: Vec<AccountHashesFile>,
812            hashes_count: usize,
813            lamports_sum: u64,
814        }
815
816        let mut zeros = Measure::start("eliminate zeros");
817        let DedupResult {
818            hashes_files: hashes,
819            hashes_count: hash_total,
820            lamports_sum: lamports_total,
821        } = (0..max_bin)
822            .into_par_iter()
823            .fold(DedupResult::default, |mut accum, bin| {
824                let (hashes_file, lamports_bin) =
825                    self.de_dup_accounts_in_parallel(sorted_data_by_pubkey, bin, max_bin, stats);
826
827                accum.lamports_sum = accum
828                    .lamports_sum
829                    .checked_add(lamports_bin)
830                    .expect("summing capitalization cannot overflow");
831                accum.hashes_count += hashes_file.count();
832                accum.hashes_files.push(hashes_file);
833                accum
834            })
835            .reduce(
836                || {
837                    DedupResult {
838                        // Allocate with Vec::new() so that no allocation actually happens. See
839                        // https://github.com/anza-xyz/agave/pull/1308.
840                        hashes_files: Vec::new(),
841                        ..Default::default()
842                    }
843                },
844                |mut a, mut b| {
845                    a.lamports_sum = a
846                        .lamports_sum
847                        .checked_add(b.lamports_sum)
848                        .expect("summing capitalization cannot overflow");
849                    a.hashes_count += b.hashes_count;
850                    a.hashes_files.append(&mut b.hashes_files);
851                    a
852                },
853            );
854        zeros.stop();
855        stats.zeros_time_total_us += zeros.as_us();
856        stats.hash_total += hash_total;
857        (hashes, lamports_total)
858    }
859
860    /// Given the item location, return the item in the `CalculatedHashIntermediate` slices and the next item location in the same bin.
861    /// If the end of the `CalculatedHashIntermediate` slice is reached or all the accounts in current bin have been exhausted, return `None` for next item location.
862    fn get_item<'b>(
863        sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
864        bin: usize,
865        binner: &PubkeyBinCalculator24,
866        item_loc: &ItemLocation<'b>,
867    ) -> (&'b CalculateHashIntermediate, Option<ItemLocation<'b>>) {
868        let division_data = &sorted_data_by_pubkey[item_loc.pointer.slot_group_index];
869        let mut index = item_loc.pointer.offset;
870        index += 1;
871        let mut next = None;
872
873        while index < division_data.len() {
874            // still more items where we found the previous key, so just increment the index for that slot group, skipping all pubkeys that are equal
875            let next_key = &division_data[index].pubkey;
876            if next_key == item_loc.key {
877                index += 1;
878                continue; // duplicate entries of same pubkey, so keep skipping
879            }
880
881            if binner.bin_from_pubkey(next_key) > bin {
882                // the next pubkey is not in our bin
883                break;
884            }
885
886            // point to the next pubkey > key
887            next = Some(ItemLocation {
888                key: next_key,
889                pointer: SlotGroupPointer {
890                    slot_group_index: item_loc.pointer.slot_group_index,
891                    offset: index,
892                },
893            });
894            break;
895        }
896
897        // this is the previous first item that was requested
898        (&division_data[index - 1], next)
899    }
900
901    /// `hash_data` must be sorted by `binner.bin_from_pubkey()`
902    /// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
903    fn binary_search_for_first_pubkey_in_bin(
904        hash_data: &[CalculateHashIntermediate],
905        bin: usize,
906        binner: &PubkeyBinCalculator24,
907    ) -> Option<usize> {
908        let potential_index = if bin == 0 {
909            // `bin` == 0 is special because there cannot be `bin`-1
910            // so either element[0] is in bin 0 or there is nothing in bin 0.
911            0
912        } else {
913            // search for the first pubkey that is in `bin`
914            // There could be many keys in a row with the same `bin`.
915            // So, for each pubkey, use calculated_bin * 2 + 1 as the bin of a given pubkey for binary search.
916            // And compare the bin of each pubkey with `bin` * 2.
917            // So all keys that are in `bin` will compare as `bin` * 2 + 1
918            // all keys that are in `bin`-1 will compare as ((`bin` - 1) * 2 + 1), which is (`bin` * 2 - 1)
919            // NO keys will compare as `bin` * 2 because we add 1.
920            // So, the binary search will NEVER return Ok(found_index), but will always return Err(index of first key in `bin`).
921            // Note that if NO key is in `bin`, then the key at the found index will be in a bin > `bin`, so return None.
922            let just_prior_to_desired_bin = bin * 2;
923            let search = hash_data.binary_search_by(|data| {
924                (1 + 2 * binner.bin_from_pubkey(&data.pubkey)).cmp(&just_prior_to_desired_bin)
925            });
926            // returns Err(index where item should be) since the desired item will never exist
927            search.expect_err("it is impossible to find a matching bin")
928        };
929        // note that `potential_index` could be == hash_data.len(). This indicates the first key in `bin` would be
930        // after the data we have. Thus, no key is in `bin`.
931        // This also handles the case where `hash_data` is empty, since len() will be 0 and `get` will return None.
932        hash_data.get(potential_index).and_then(|potential_data| {
933            (binner.bin_from_pubkey(&potential_data.pubkey) == bin).then_some(potential_index)
934        })
935    }
936
937    /// `hash_data` must be sorted by `binner.bin_from_pubkey()`
938    /// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
939    fn find_first_pubkey_in_bin(
940        hash_data: &[CalculateHashIntermediate],
941        bin: usize,
942        bins: usize,
943        binner: &PubkeyBinCalculator24,
944        stats: &HashStats,
945    ) -> Option<usize> {
946        if hash_data.is_empty() {
947            return None;
948        }
949        let (result, us) = measure_us!({
950            // assume uniform distribution of pubkeys and choose first guess based on bin we're looking for
951            let i = hash_data.len() * bin / bins;
952            let estimate = &hash_data[i];
953
954            let pubkey_bin = binner.bin_from_pubkey(&estimate.pubkey);
955            let range = if pubkey_bin >= bin {
956                // i pubkey matches or is too large, so look <= i for the first pubkey in the right bin
957                // i+1 could be the first pubkey in the right bin
958                0..(i + 1)
959            } else {
960                // i pubkey is too small, so look after i
961                (i + 1)..hash_data.len()
962            };
963            Some(
964                range.start +
965                // binary search the subset
966                Self::binary_search_for_first_pubkey_in_bin(
967                    &hash_data[range],
968                    bin,
969                    binner,
970                )?,
971            )
972        });
973        stats.pubkey_bin_search_us.fetch_add(us, Ordering::Relaxed);
974        result
975    }
976
977    /// Return the working_set and max number of pubkeys for hash dedup.
978    /// `working_set` holds SlotGroupPointer {slot_group_index, offset} for items in account's pubkey descending order.
979    fn initialize_dedup_working_set(
980        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
981        pubkey_bin: usize,
982        bins: usize,
983        binner: &PubkeyBinCalculator24,
984        stats: &HashStats,
985    ) -> (
986        Vec<SlotGroupPointer>, /* working_set */
987        usize,                 /* max_inclusive_num_pubkeys */
988    ) {
989        // working_set holds the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
990        let mut working_set: Vec<SlotGroupPointer> = Vec::default();
991
992        // Initialize 'working_set', which holds the current lowest item in each slot group.
993        // `working_set` should be initialized in reverse order of slot_groups. Later slot_groups are
994        // processed first. For each slot_group, if the lowest item for current slot group is
995        // already in working_set (i.e. inserted by a later slot group), the next lowest item
996        // in this slot group is searched and checked, until either one that is `not` in the
997        // working_set is found, which will then be inserted, or no next lowest item is found.
998        // Iterating in reverse order of slot_group will guarantee that each slot group will be
999        // scanned only once and scanned continuously. Therefore, it can achieve better data
1000        // locality during the scan.
1001        let max_inclusive_num_pubkeys = sorted_data_by_pubkey
1002            .iter()
1003            .enumerate()
1004            .rev()
1005            .map(|(i, hash_data)| {
1006                let first_pubkey_in_bin =
1007                    Self::find_first_pubkey_in_bin(hash_data, pubkey_bin, bins, binner, stats);
1008
1009                if let Some(first_pubkey_in_bin) = first_pubkey_in_bin {
1010                    let mut next = Some(ItemLocation {
1011                        key: &hash_data[first_pubkey_in_bin].pubkey,
1012                        pointer: SlotGroupPointer {
1013                            slot_group_index: i,
1014                            offset: first_pubkey_in_bin,
1015                        },
1016                    });
1017
1018                    Self::add_next_item(
1019                        &mut next,
1020                        &mut working_set,
1021                        sorted_data_by_pubkey,
1022                        pubkey_bin,
1023                        binner,
1024                    );
1025
1026                    let mut first_pubkey_in_next_bin = first_pubkey_in_bin + 1;
1027                    while first_pubkey_in_next_bin < hash_data.len() {
1028                        if binner.bin_from_pubkey(&hash_data[first_pubkey_in_next_bin].pubkey)
1029                            != pubkey_bin
1030                        {
1031                            break;
1032                        }
1033                        first_pubkey_in_next_bin += 1;
1034                    }
1035                    first_pubkey_in_next_bin - first_pubkey_in_bin
1036                } else {
1037                    0
1038                }
1039            })
1040            .sum::<usize>();
1041
1042        (working_set, max_inclusive_num_pubkeys)
1043    }
1044
1045    /// Add next item into hash dedup working set
1046    fn add_next_item<'b>(
1047        next: &mut Option<ItemLocation<'b>>,
1048        working_set: &mut Vec<SlotGroupPointer>,
1049        sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
1050        pubkey_bin: usize,
1051        binner: &PubkeyBinCalculator24,
1052    ) {
1053        // looping to add next item to working set
1054        while let Some(ItemLocation { key, pointer }) = std::mem::take(next) {
1055            // if `new key` is less than the min key in the working set, skip binary search and
1056            // insert item to the end vec directly
1057            if let Some(SlotGroupPointer {
1058                slot_group_index: current_min_slot_group_index,
1059                offset: current_min_offset,
1060            }) = working_set.last()
1061            {
1062                let current_min_key = &sorted_data_by_pubkey[*current_min_slot_group_index]
1063                    [*current_min_offset]
1064                    .pubkey;
1065                if key < current_min_key {
1066                    working_set.push(pointer);
1067                    break;
1068                }
1069            }
1070
1071            let found = working_set.binary_search_by(|pointer| {
1072                let prob = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1073                (*key).cmp(prob)
1074            });
1075
1076            match found {
1077                Err(index) => {
1078                    // found a new new key, insert into the working_set. This is O(n/2) on
1079                    // average. Theoretically, this operation could be expensive and may be further
1080                    // optimized in future.
1081                    working_set.insert(index, pointer);
1082                    break;
1083                }
1084                Ok(index) => {
1085                    let found = &mut working_set[index];
1086                    if found.slot_group_index > pointer.slot_group_index {
1087                        // There is already a later slot group that contains this key in the working_set,
1088                        // look up again.
1089                        let (_item, new_next) = Self::get_item(
1090                            sorted_data_by_pubkey,
1091                            pubkey_bin,
1092                            binner,
1093                            &ItemLocation { key, pointer },
1094                        );
1095                        *next = new_next;
1096                    } else {
1097                        // A previous slot contains this key, replace it, and look for next item in the previous slot group.
1098                        let (_item, new_next) = Self::get_item(
1099                            sorted_data_by_pubkey,
1100                            pubkey_bin,
1101                            binner,
1102                            &ItemLocation {
1103                                key,
1104                                pointer: *found,
1105                            },
1106                        );
1107                        *found = pointer;
1108                        *next = new_next;
1109                    }
1110                }
1111            }
1112        }
1113    }
1114
1115    // go through: [..][pubkey_bin][..] and return hashes and lamport sum
1116    //   slot groups^                ^accounts found in a slot group, sorted by pubkey, higher slot, write_version
1117    // 1. handle zero lamport accounts
1118    // 2. pick the highest slot or (slot = and highest version) of each pubkey
1119    // 3. produce this output:
1120    //   a. AccountHashesFile: individual account hashes in pubkey order
1121    //   b. lamport sum
1122    fn de_dup_accounts_in_parallel(
1123        &self,
1124        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1125        pubkey_bin: usize,
1126        bins: usize,
1127        stats: &HashStats,
1128    ) -> (AccountHashesFile, u64) {
1129        let binner = PubkeyBinCalculator24::new(bins);
1130
1131        // working_set hold the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
1132        let (mut working_set, _max_inclusive_num_pubkeys) = Self::initialize_dedup_working_set(
1133            sorted_data_by_pubkey,
1134            pubkey_bin,
1135            bins,
1136            &binner,
1137            stats,
1138        );
1139
1140        let mut hashes = AccountHashesFile::new(&self.dir_for_temp_cache_files);
1141
1142        let mut overall_sum: u64 = 0;
1143
1144        while let Some(pointer) = working_set.pop() {
1145            let key = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1146
1147            // get the min item, add lamports, get hash
1148            let (item, mut next) = Self::get_item(
1149                sorted_data_by_pubkey,
1150                pubkey_bin,
1151                &binner,
1152                &ItemLocation { key, pointer },
1153            );
1154
1155            // add lamports and get hash
1156            if item.lamports != 0 {
1157                overall_sum = overall_sum
1158                    .checked_add(item.lamports)
1159                    .expect("summing lamports cannot overflow");
1160                hashes.write(&item.hash.0);
1161            } else {
1162                stats
1163                    .num_zero_lamport_accounts
1164                    .fetch_add(1, Ordering::Relaxed);
1165                // if lamports == 0, check if they should be included
1166                if self.zero_lamport_accounts == ZeroLamportAccounts::Included {
1167                    // For incremental accounts hash, the hash of a zero lamport account is
1168                    // the hash of its pubkey
1169                    let hash = blake3::hash(bytemuck::bytes_of(&item.pubkey));
1170                    let hash = Hash::new_from_array(hash.into());
1171                    hashes.write(&hash);
1172                }
1173            }
1174
1175            Self::add_next_item(
1176                &mut next,
1177                &mut working_set,
1178                sorted_data_by_pubkey,
1179                pubkey_bin,
1180                &binner,
1181            );
1182        }
1183
1184        (hashes, overall_sum)
1185    }
1186
1187    /// input:
1188    /// vec: group of slot data, ordered by Slot (low to high)
1189    ///   vec: [..] - items found in that slot range Sorted by: Pubkey, higher Slot, higher Write version (if pubkey =)
1190    pub fn rest_of_hash_calculation(
1191        &self,
1192        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1193        bins: usize,
1194        stats: &mut HashStats,
1195    ) -> (Hash, u64) {
1196        let (hashes, total_lamports) = self.de_dup_accounts(sorted_data_by_pubkey, stats, bins);
1197
1198        let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1199
1200        let _guard = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
1201        let mut hash_time = Measure::start("hash");
1202        let (hash, _) = Self::compute_merkle_root_from_slices(
1203            cumulative.total_count(),
1204            MERKLE_FANOUT,
1205            None,
1206            |start| Arc::new(cumulative.get_data(start)),
1207            None,
1208        );
1209        hash_time.stop();
1210        stats.hash_time_total_us += hash_time.as_us();
1211
1212        let (_, drop_us) = measure_us!(drop(cumulative));
1213        stats.drop_hash_files_us += drop_us;
1214        (hash, total_lamports)
1215    }
1216}
1217
1218/// How should zero-lamport accounts be treated by the accounts hasher?
1219#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1220pub enum ZeroLamportAccounts {
1221    Excluded,
1222    Included,
1223}
1224
1225/// Hash of an account
1226#[repr(transparent)]
1227#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1228#[derive(Debug, Copy, Clone, Eq, PartialEq, Pod, Zeroable)]
1229pub struct AccountHash(pub Hash);
1230
1231// Ensure the newtype wrapper never changes size from the underlying Hash
1232// This also ensures there are no padding bytes, which is required to safely implement Pod
1233const _: () = assert!(std::mem::size_of::<AccountHash>() == std::mem::size_of::<Hash>());
1234
1235/// The AccountHash for a zero-lamport account
1236pub const ZERO_LAMPORT_ACCOUNT_HASH: AccountHash =
1237    AccountHash(Hash::new_from_array([0; HASH_BYTES]));
1238
1239/// Lattice hash of an account
1240#[derive(Debug, Clone, Eq, PartialEq)]
1241pub struct AccountLtHash(pub LtHash);
1242
1243/// The AccountLtHash for a zero-lamport account
1244pub const ZERO_LAMPORT_ACCOUNT_LT_HASH: AccountLtHash = AccountLtHash(LtHash::identity());
1245
1246/// Lattice hash of all accounts
1247#[derive(Debug, Clone, Eq, PartialEq)]
1248pub struct AccountsLtHash(pub LtHash);
1249
1250/// Hash of accounts
1251#[derive(Debug, Clone, Eq, PartialEq)]
1252pub enum MerkleOrLatticeAccountsHash {
1253    /// Merkle-based hash of accounts
1254    Merkle(AccountsHashKind),
1255    /// Lattice-based hash of accounts
1256    Lattice,
1257}
1258
1259/// Hash of accounts
1260#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1261pub enum AccountsHashKind {
1262    Full(AccountsHash),
1263    Incremental(IncrementalAccountsHash),
1264}
1265impl AccountsHashKind {
1266    pub fn as_hash(&self) -> &Hash {
1267        match self {
1268            AccountsHashKind::Full(AccountsHash(hash))
1269            | AccountsHashKind::Incremental(IncrementalAccountsHash(hash)) => hash,
1270        }
1271    }
1272}
1273impl From<AccountsHash> for AccountsHashKind {
1274    fn from(accounts_hash: AccountsHash) -> Self {
1275        AccountsHashKind::Full(accounts_hash)
1276    }
1277}
1278impl From<IncrementalAccountsHash> for AccountsHashKind {
1279    fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1280        AccountsHashKind::Incremental(incremental_accounts_hash)
1281    }
1282}
1283
1284/// Hash of accounts
1285#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1286pub struct AccountsHash(pub Hash);
1287/// Hash of accounts that includes zero-lamport accounts
1288/// Used with incremental snapshots
1289#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1290pub struct IncrementalAccountsHash(pub Hash);
1291
1292/// Hash of accounts written in a single slot
1293#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1294pub struct AccountsDeltaHash(pub Hash);
1295
1296/// Snapshot serde-safe accounts delta hash
1297#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1298#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1299pub struct SerdeAccountsDeltaHash(pub Hash);
1300
1301impl From<SerdeAccountsDeltaHash> for AccountsDeltaHash {
1302    fn from(accounts_delta_hash: SerdeAccountsDeltaHash) -> Self {
1303        Self(accounts_delta_hash.0)
1304    }
1305}
1306impl From<AccountsDeltaHash> for SerdeAccountsDeltaHash {
1307    fn from(accounts_delta_hash: AccountsDeltaHash) -> Self {
1308        Self(accounts_delta_hash.0)
1309    }
1310}
1311
1312/// Snapshot serde-safe accounts hash
1313#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1314#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1315pub struct SerdeAccountsHash(pub Hash);
1316
1317impl From<SerdeAccountsHash> for AccountsHash {
1318    fn from(accounts_hash: SerdeAccountsHash) -> Self {
1319        Self(accounts_hash.0)
1320    }
1321}
1322impl From<AccountsHash> for SerdeAccountsHash {
1323    fn from(accounts_hash: AccountsHash) -> Self {
1324        Self(accounts_hash.0)
1325    }
1326}
1327
1328/// Snapshot serde-safe incremental accounts hash
1329#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1330#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1331pub struct SerdeIncrementalAccountsHash(pub Hash);
1332
1333impl From<SerdeIncrementalAccountsHash> for IncrementalAccountsHash {
1334    fn from(incremental_accounts_hash: SerdeIncrementalAccountsHash) -> Self {
1335        Self(incremental_accounts_hash.0)
1336    }
1337}
1338impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {
1339    fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1340        Self(incremental_accounts_hash.0)
1341    }
1342}
1343
1344#[cfg(test)]
1345mod tests {
1346    use {
1347        super::*, crate::accounts_db::DEFAULT_HASH_CALCULATION_PUBKEY_BINS, itertools::Itertools,
1348        std::str::FromStr, tempfile::tempdir,
1349    };
1350
1351    lazy_static! {
1352        static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
1353    }
1354
1355    impl AccountsHasher<'_> {
1356        fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1357            Self {
1358                zero_lamport_accounts: ZeroLamportAccounts::Excluded,
1359                dir_for_temp_cache_files,
1360                active_stats: &ACTIVE_STATS,
1361            }
1362        }
1363    }
1364
1365    impl CumulativeOffsets {
1366        fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> Self {
1367            let mut total_count: usize = 0;
1368            let mut cumulative_offsets = Vec::with_capacity(0);
1369            for (i, v_outer) in raw.iter().enumerate() {
1370                for (j, v) in v_outer.iter().enumerate() {
1371                    let len = v.len();
1372                    if len > 0 {
1373                        if cumulative_offsets.is_empty() {
1374                            // the first inner, non-empty vector we find gives us an approximate rectangular shape
1375                            cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
1376                        }
1377                        cumulative_offsets.push(CumulativeOffset {
1378                            index: [i, j],
1379                            start_offset: total_count,
1380                        });
1381                        total_count += len;
1382                    }
1383                }
1384            }
1385
1386            Self {
1387                cumulative_offsets,
1388                total_count,
1389            }
1390        }
1391    }
1392
1393    #[test]
1394    fn test_find_first_pubkey_in_bin() {
1395        let stats = HashStats::default();
1396        for (bins, expected_count) in [1, 2, 4].into_iter().zip([5, 20, 120]) {
1397            let bins: usize = bins;
1398            let binner = PubkeyBinCalculator24::new(bins);
1399
1400            let mut count = 0usize;
1401            // # pubkeys in each bin are permutations of these
1402            // 0 means none in this bin
1403            // large number (20) means the found key will be well before or after the expected index based on an assumption of uniform distribution
1404            for counts in [0, 1, 2, 20, 0].into_iter().permutations(bins) {
1405                count += 1;
1406                let hash_data = counts
1407                    .iter()
1408                    .enumerate()
1409                    .flat_map(|(bin, count)| {
1410                        (0..*count).map(move |_| {
1411                            let binner = PubkeyBinCalculator24::new(bins);
1412                            CalculateHashIntermediate {
1413                                hash: AccountHash(Hash::default()),
1414                                lamports: 0,
1415                                pubkey: binner.lowest_pubkey_from_bin(bin, bins),
1416                            }
1417                        })
1418                    })
1419                    .collect::<Vec<_>>();
1420                // look for the first pubkey in each bin
1421                for (bin, count_in_bin) in counts.iter().enumerate().take(bins) {
1422                    let first = AccountsHasher::find_first_pubkey_in_bin(
1423                        &hash_data, bin, bins, &binner, &stats,
1424                    );
1425                    // test both functions
1426                    let first_again = AccountsHasher::binary_search_for_first_pubkey_in_bin(
1427                        &hash_data, bin, &binner,
1428                    );
1429                    assert_eq!(first, first_again);
1430                    assert_eq!(first.is_none(), count_in_bin == &0);
1431                    if let Some(first) = first {
1432                        assert_eq!(binner.bin_from_pubkey(&hash_data[first].pubkey), bin);
1433                        if first > 0 {
1434                            assert!(binner.bin_from_pubkey(&hash_data[first - 1].pubkey) < bin);
1435                        }
1436                    }
1437                }
1438            }
1439            assert_eq!(
1440                count, expected_count,
1441                "too few iterations in test. bins: {bins}"
1442            );
1443        }
1444    }
1445
1446    #[test]
1447    fn test_account_hashes_file() {
1448        let dir_for_temp_cache_files = tempdir().unwrap();
1449        // 0 hashes
1450        let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1451        assert!(file.get_reader().is_none());
1452        let hashes = (0..2)
1453            .map(|i| Hash::new_from_array([i; 32]))
1454            .collect::<Vec<_>>();
1455
1456        // 1 hash
1457        let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1458        file.write(&hashes[0]);
1459        let reader = file.get_reader();
1460        assert_eq!(&[hashes[0]][..], read(&reader, 0).unwrap());
1461        assert!(read(&reader, 1).unwrap().is_empty());
1462
1463        // multiple hashes
1464        let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1465        hashes.iter().for_each(|hash| file.write(hash));
1466        let reader = file.get_reader();
1467        (0..2).for_each(|i| assert_eq!(&hashes[i..], read(&reader, i).unwrap()));
1468        assert!(read(&reader, 2).unwrap().is_empty());
1469    }
1470
1471    #[test]
1472    fn test_cumulative_hashes_from_files() {
1473        let dir_for_temp_cache_files = tempdir().unwrap();
1474        (0..4).for_each(|permutation| {
1475            let hashes = (0..2)
1476                .map(|i| Hash::new_from_array([i + 1; 32]))
1477                .collect::<Vec<_>>();
1478
1479            let mut combined = Vec::default();
1480
1481            // 0 hashes
1482            let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1483
1484            // 1 hash
1485            let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1486            file1.write(&hashes[0]);
1487            combined.push(hashes[0]);
1488
1489            // multiple hashes
1490            let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1491            hashes.iter().for_each(|hash| {
1492                file2.write(hash);
1493                combined.push(*hash);
1494            });
1495
1496            let hashes = if permutation == 0 {
1497                vec![file0, file1, file2]
1498            } else if permutation == 1 {
1499                // include more empty files
1500                vec![
1501                    file0,
1502                    file1,
1503                    AccountHashesFile::new(dir_for_temp_cache_files.path()),
1504                    file2,
1505                    AccountHashesFile::new(dir_for_temp_cache_files.path()),
1506                ]
1507            } else if permutation == 2 {
1508                vec![file1, file2]
1509            } else {
1510                // swap file2 and 1
1511                let one = combined.remove(0);
1512                combined.push(one);
1513                vec![
1514                    file2,
1515                    AccountHashesFile::new(dir_for_temp_cache_files.path()),
1516                    AccountHashesFile::new(dir_for_temp_cache_files.path()),
1517                    file1,
1518                ]
1519            };
1520
1521            let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1522            let len = combined.len();
1523            assert_eq!(cumulative.total_count(), len);
1524            (0..combined.len()).for_each(|start| {
1525                let mut retrieved = Vec::default();
1526                let mut cumulative_start = start;
1527                // read all data
1528                while retrieved.len() < (len - start) {
1529                    let this_one_bytes = cumulative.get_data(cumulative_start);
1530                    let this_one = bytemuck::cast_slice(&this_one_bytes);
1531                    retrieved.extend(this_one.iter());
1532                    cumulative_start += this_one.len();
1533                    assert_ne!(0, this_one.len());
1534                }
1535                assert_eq!(
1536                    &combined[start..],
1537                    &retrieved[..],
1538                    "permutation: {permutation}"
1539                );
1540            });
1541        });
1542    }
1543
1544    #[test]
1545    fn test_accountsdb_div_ceil() {
1546        assert_eq!(AccountsHasher::div_ceil(10, 3), 4);
1547        assert_eq!(AccountsHasher::div_ceil(0, 1), 0);
1548        assert_eq!(AccountsHasher::div_ceil(0, 5), 0);
1549        assert_eq!(AccountsHasher::div_ceil(9, 3), 3);
1550        assert_eq!(AccountsHasher::div_ceil(9, 9), 1);
1551    }
1552
1553    #[test]
1554    #[should_panic(expected = "attempt to divide by zero")]
1555    fn test_accountsdb_div_ceil_fail() {
1556        assert_eq!(AccountsHasher::div_ceil(10, 0), 0);
1557    }
1558
1559    fn for_rest(original: &[CalculateHashIntermediate]) -> Vec<&[CalculateHashIntermediate]> {
1560        vec![original]
1561    }
1562
1563    #[test]
1564    fn test_accountsdb_rest_of_hash_calculation() {
1565        solana_logger::setup();
1566
1567        let mut account_maps = Vec::new();
1568
1569        let pubkey = Pubkey::from([11u8; 32]);
1570        let hash = AccountHash(Hash::new_from_array([1u8; 32]));
1571        let val = CalculateHashIntermediate {
1572            hash,
1573            lamports: 88,
1574            pubkey,
1575        };
1576        account_maps.push(val);
1577
1578        // 2nd key - zero lamports, so will be removed
1579        let pubkey = Pubkey::from([12u8; 32]);
1580        let hash = AccountHash(Hash::new_from_array([2u8; 32]));
1581        let val = CalculateHashIntermediate {
1582            hash,
1583            lamports: 0,
1584            pubkey,
1585        };
1586        account_maps.push(val);
1587
1588        let dir_for_temp_cache_files = tempdir().unwrap();
1589        let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1590        let result = accounts_hash.rest_of_hash_calculation(
1591            &for_rest(&account_maps),
1592            DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1593            &mut HashStats::default(),
1594        );
1595        let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
1596        assert_eq!((result.0, result.1), (expected_hash, 88));
1597
1598        // 3rd key - with pubkey value before 1st key so it will be sorted first
1599        let pubkey = Pubkey::from([10u8; 32]);
1600        let hash = AccountHash(Hash::new_from_array([2u8; 32]));
1601        let val = CalculateHashIntermediate {
1602            hash,
1603            lamports: 20,
1604            pubkey,
1605        };
1606        account_maps.insert(0, val);
1607
1608        let result = accounts_hash.rest_of_hash_calculation(
1609            &for_rest(&account_maps),
1610            DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1611            &mut HashStats::default(),
1612        );
1613        let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
1614        assert_eq!((result.0, result.1), (expected_hash, 108));
1615
1616        // 3rd key - with later slot
1617        let pubkey = Pubkey::from([10u8; 32]);
1618        let hash = AccountHash(Hash::new_from_array([99u8; 32]));
1619        let val = CalculateHashIntermediate {
1620            hash,
1621            lamports: 30,
1622            pubkey,
1623        };
1624        account_maps.insert(1, val);
1625
1626        let result = accounts_hash.rest_of_hash_calculation(
1627            &for_rest(&account_maps),
1628            DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1629            &mut HashStats::default(),
1630        );
1631        let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
1632        assert_eq!((result.0, result.1), (expected_hash, 118));
1633    }
1634
1635    fn one_range() -> usize {
1636        1
1637    }
1638
1639    fn zero_range() -> usize {
1640        0
1641    }
1642
1643    #[test]
1644    fn test_accountsdb_de_dup_accounts_zero_chunks() {
1645        let vec = [vec![CalculateHashIntermediate {
1646            lamports: 1,
1647            hash: AccountHash(Hash::default()),
1648            pubkey: Pubkey::default(),
1649        }]];
1650        let temp_vec = vec.to_vec();
1651        let slice = convert_to_slice(&temp_vec);
1652        let dir_for_temp_cache_files = tempdir().unwrap();
1653        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1654        let (mut hashes, lamports) =
1655            accounts_hasher.de_dup_accounts_in_parallel(&slice, 0, 1, &HashStats::default());
1656        let reader = hashes.get_reader();
1657        assert_eq!(&[Hash::default()], &read(&reader, 0).unwrap()[..]);
1658        assert_eq!(lamports, 1);
1659    }
1660
1661    fn get_vec_vec(hashes: Vec<AccountHashesFile>) -> Vec<Vec<Hash>> {
1662        hashes.into_iter().map(get_vec).collect()
1663    }
1664    fn get_vec(mut hashes: AccountHashesFile) -> Vec<Hash> {
1665        let reader = hashes.get_reader();
1666        read(&reader, 0).unwrap_or_default()
1667    }
1668    fn read(reader: &Option<Mutex<BufReader<File>>>, index: usize) -> std::io::Result<Vec<Hash>> {
1669        let file_offset_in_bytes = std::mem::size_of::<Hash>() * index;
1670        if reader.is_none() {
1671            return Ok(vec![]);
1672        }
1673        let mut reader = reader.as_ref().unwrap().lock().unwrap();
1674        reader.seek(SeekFrom::Start(file_offset_in_bytes.try_into().unwrap()))?;
1675        let mut result_bytes: Vec<u8> = vec![];
1676        reader.read_to_end(&mut result_bytes)?;
1677        let result: Vec<Hash> = bytemuck::cast_slice(&result_bytes).to_vec();
1678        Ok(result)
1679    }
1680
1681    #[test]
1682    fn test_accountsdb_de_dup_accounts_empty() {
1683        solana_logger::setup();
1684        let dir_for_temp_cache_files = tempdir().unwrap();
1685        let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1686
1687        let empty = [];
1688        let vec = &empty;
1689        let (hashes, lamports) =
1690            accounts_hash.de_dup_accounts(vec, &mut HashStats::default(), one_range());
1691        assert_eq!(
1692            Vec::<Hash>::new(),
1693            get_vec_vec(hashes)
1694                .into_iter()
1695                .flatten()
1696                .collect::<Vec<_>>(),
1697        );
1698        assert_eq!(lamports, 0);
1699        let vec = vec![];
1700        let (hashes, lamports) =
1701            accounts_hash.de_dup_accounts(&vec, &mut HashStats::default(), zero_range());
1702        let empty: Vec<Vec<Hash>> = Vec::default();
1703        assert_eq!(empty, get_vec_vec(hashes));
1704        assert_eq!(lamports, 0);
1705
1706        let (hashes, lamports) =
1707            accounts_hash.de_dup_accounts_in_parallel(&[], 1, 1, &HashStats::default());
1708        assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1709        assert_eq!(lamports, 0);
1710
1711        let (hashes, lamports) =
1712            accounts_hash.de_dup_accounts_in_parallel(&[], 2, 1, &HashStats::default());
1713        assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1714        assert_eq!(lamports, 0);
1715    }
1716
1717    #[test]
1718    fn test_accountsdb_de_dup_accounts_from_stores() {
1719        solana_logger::setup();
1720
1721        let key_a = Pubkey::from([1u8; 32]);
1722        let key_b = Pubkey::from([2u8; 32]);
1723        let key_c = Pubkey::from([3u8; 32]);
1724        const COUNT: usize = 6;
1725        let hashes = (0..COUNT).map(|i| AccountHash(Hash::new_from_array([i as u8; 32])));
1726        // create this vector
1727        // abbbcc
1728        let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
1729
1730        let accounts: Vec<_> = hashes
1731            .zip(keys.iter())
1732            .enumerate()
1733            .map(|(i, (hash, &pubkey))| CalculateHashIntermediate {
1734                hash,
1735                lamports: (i + 1) as u64,
1736                pubkey,
1737            })
1738            .collect();
1739
1740        type ExpectedType = (String, bool, u64, String);
1741        let expected:Vec<ExpectedType> = vec![
1742            // ("key/lamports key2/lamports ...",
1743            // is_last_slice
1744            // result lamports
1745            // result hashes)
1746            // "a5" = key_a, 5 lamports
1747            ("a1", false, 1, "[11111111111111111111111111111111]"),
1748            ("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1749            ("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1750            ("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1751            ("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1752            ("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1753            ("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1754            ("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1755            ("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1756            ("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1757            ("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1758            ("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1759            ("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1760            ("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1761            ("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1762            ("a1", true, 1, "[11111111111111111111111111111111]"),
1763            ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1764            ("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1765            ("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1766            ("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1767            ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1768            ("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1769            ("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1770            ("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1771            ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1772            ("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1773            ("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1774            ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1775            ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1776            ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1777            ].into_iter().map(|item| {
1778                let result: ExpectedType = (
1779                    item.0.to_string(),
1780                    item.1,
1781                    item.2,
1782                    item.3.to_string(),
1783                );
1784                result
1785            }).collect();
1786
1787        let dir_for_temp_cache_files = tempdir().unwrap();
1788        let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1789        let mut expected_index = 0;
1790        for last_slice in 0..2 {
1791            for start in 0..COUNT {
1792                for end in start + 1..COUNT {
1793                    let is_last_slice = last_slice == 1;
1794                    let accounts = accounts.clone();
1795                    let slice = &accounts[start..end];
1796
1797                    let slice2 = vec![slice.to_vec()];
1798                    let slice = &slice2[..];
1799                    let slice_temp = convert_to_slice(&slice2);
1800                    let (hashes2, lamports2) =
1801                        hash.de_dup_accounts_in_parallel(&slice_temp, 0, 1, &HashStats::default());
1802                    let slice3 = convert_to_slice(&slice2);
1803                    let (hashes3, lamports3) =
1804                        hash.de_dup_accounts_in_parallel(&slice3, 0, 1, &HashStats::default());
1805                    let vec = slice.to_vec();
1806                    let slice4 = convert_to_slice(&vec);
1807                    let mut max_bin = end - start;
1808                    if !max_bin.is_power_of_two() {
1809                        max_bin = 1;
1810                    }
1811
1812                    let (hashes4, lamports4) =
1813                        hash.de_dup_accounts(&slice4, &mut HashStats::default(), max_bin);
1814                    let vec = slice.to_vec();
1815                    let slice5 = convert_to_slice(&vec);
1816                    let (hashes5, lamports5) =
1817                        hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1818                    let vec = slice.to_vec();
1819                    let slice5 = convert_to_slice(&vec);
1820                    let (hashes6, lamports6) =
1821                        hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1822
1823                    let hashes2 = get_vec(hashes2);
1824                    let hashes3 = get_vec(hashes3);
1825                    let hashes4 = get_vec_vec(hashes4);
1826                    let hashes5 = get_vec_vec(hashes5);
1827                    let hashes6 = get_vec_vec(hashes6);
1828
1829                    assert_eq!(hashes2, hashes3);
1830                    let expected2 = hashes2.clone();
1831                    assert_eq!(
1832                        expected2,
1833                        hashes4.into_iter().flatten().collect::<Vec<_>>(),
1834                        "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1835                    );
1836                    assert_eq!(
1837                        expected2.clone(),
1838                        hashes5.iter().flatten().copied().collect::<Vec<_>>(),
1839                        "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1840                    );
1841                    assert_eq!(
1842                        expected2.clone(),
1843                        hashes6.iter().flatten().copied().collect::<Vec<_>>()
1844                    );
1845                    assert_eq!(lamports2, lamports3);
1846                    assert_eq!(lamports2, lamports4);
1847                    assert_eq!(lamports2, lamports5);
1848                    assert_eq!(lamports2, lamports6);
1849
1850                    let human_readable = slice[0]
1851                        .iter()
1852                        .map(|v| {
1853                            let mut s = (if v.pubkey == key_a {
1854                                "a"
1855                            } else if v.pubkey == key_b {
1856                                "b"
1857                            } else {
1858                                "c"
1859                            })
1860                            .to_string();
1861
1862                            s.push_str(&v.lamports.to_string());
1863                            s
1864                        })
1865                        .collect::<String>();
1866
1867                    let hash_result_as_string = format!("{hashes2:?}");
1868
1869                    let packaged_result: ExpectedType = (
1870                        human_readable,
1871                        is_last_slice,
1872                        lamports2,
1873                        hash_result_as_string,
1874                    );
1875                    assert_eq!(expected[expected_index], packaged_result);
1876
1877                    // for generating expected results
1878                    // error!("{:?},", packaged_result);
1879                    expected_index += 1;
1880                }
1881            }
1882        }
1883    }
1884
1885    #[test]
1886    fn test_accountsdb_compare_two_hash_entries() {
1887        solana_logger::setup();
1888        let pubkey = Pubkey::new_unique();
1889        let hash = AccountHash(Hash::new_unique());
1890        let val = CalculateHashIntermediate {
1891            hash,
1892            lamports: 1,
1893            pubkey,
1894        };
1895
1896        // slot same, version <
1897        let hash2 = AccountHash(Hash::new_unique());
1898        let val2 = CalculateHashIntermediate {
1899            hash: hash2,
1900            lamports: 4,
1901            pubkey,
1902        };
1903        assert_eq!(
1904            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1905            AccountsHasher::compare_two_hash_entries(&val, &val2)
1906        );
1907
1908        // slot same, vers =
1909        let hash3 = AccountHash(Hash::new_unique());
1910        let val3 = CalculateHashIntermediate {
1911            hash: hash3,
1912            lamports: 2,
1913            pubkey,
1914        };
1915        assert_eq!(
1916            std::cmp::Ordering::Equal,
1917            AccountsHasher::compare_two_hash_entries(&val, &val3)
1918        );
1919
1920        // slot same, vers >
1921        let hash4 = AccountHash(Hash::new_unique());
1922        let val4 = CalculateHashIntermediate {
1923            hash: hash4,
1924            lamports: 6,
1925            pubkey,
1926        };
1927        assert_eq!(
1928            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1929            AccountsHasher::compare_two_hash_entries(&val, &val4)
1930        );
1931
1932        // slot >, version <
1933        let hash5 = AccountHash(Hash::new_unique());
1934        let val5 = CalculateHashIntermediate {
1935            hash: hash5,
1936            lamports: 8,
1937            pubkey,
1938        };
1939        assert_eq!(
1940            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1941            AccountsHasher::compare_two_hash_entries(&val, &val5)
1942        );
1943    }
1944
1945    fn test_de_dup_accounts_in_parallel<'a>(
1946        account_maps: &'a [&'a [CalculateHashIntermediate]],
1947    ) -> (AccountHashesFile, u64) {
1948        let dir_for_temp_cache_files = tempdir().unwrap();
1949        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1950        accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0, 1, &HashStats::default())
1951    }
1952
1953    #[test]
1954    fn test_accountsdb_remove_zero_balance_accounts() {
1955        solana_logger::setup();
1956
1957        let pubkey = Pubkey::new_unique();
1958        let hash = AccountHash(Hash::new_unique());
1959        let mut account_maps = Vec::new();
1960        let val = CalculateHashIntermediate {
1961            hash,
1962            lamports: 1,
1963            pubkey,
1964        };
1965        account_maps.push(val);
1966
1967        let vecs = vec![account_maps.to_vec()];
1968        let slice = convert_to_slice(&vecs);
1969        let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1970        assert_eq!(
1971            (get_vec(hashfile), lamports),
1972            (vec![val.hash.0], val.lamports)
1973        );
1974
1975        // zero original lamports, higher version
1976        let val = CalculateHashIntermediate {
1977            hash,
1978            lamports: 0,
1979            pubkey,
1980        };
1981        account_maps.push(val); // has to be after previous entry since account_maps are in slot order
1982
1983        let vecs = vec![account_maps.to_vec()];
1984        let slice = convert_to_slice(&vecs);
1985        let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1986        assert_eq!((get_vec(hashfile), lamports), (vec![], 0));
1987    }
1988
1989    #[test]
1990    fn test_accountsdb_dup_pubkey_2_chunks() {
1991        // 2 chunks, a dup pubkey in each chunk
1992        for reverse in [false, true] {
1993            let key = Pubkey::new_from_array([1; 32]); // key is BEFORE key2
1994            let key2 = Pubkey::new_from_array([2; 32]);
1995            let hash = AccountHash(Hash::new_unique());
1996            let mut account_maps = Vec::new();
1997            let mut account_maps2 = Vec::new();
1998            let val = CalculateHashIntermediate {
1999                hash,
2000                lamports: 1,
2001                pubkey: key,
2002            };
2003            account_maps.push(val);
2004            let val2 = CalculateHashIntermediate {
2005                hash,
2006                lamports: 2,
2007                pubkey: key2,
2008            };
2009            account_maps.push(val2);
2010            let val3 = CalculateHashIntermediate {
2011                hash,
2012                lamports: 3,
2013                pubkey: key2,
2014            };
2015            account_maps2.push(val3);
2016
2017            let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2018            if reverse {
2019                vecs = vecs.into_iter().rev().collect();
2020            }
2021            let slice = convert_to_slice(&vecs);
2022            let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2023            assert_eq!(
2024                (get_vec(hashfile), lamports),
2025                (
2026                    vec![val.hash.0, if reverse { val2.hash.0 } else { val3.hash.0 }],
2027                    val.lamports
2028                        + if reverse {
2029                            val2.lamports
2030                        } else {
2031                            val3.lamports
2032                        }
2033                ),
2034                "reverse: {reverse}"
2035            );
2036        }
2037    }
2038
2039    #[test]
2040    fn test_accountsdb_dup_pubkey_2_chunks_backwards() {
2041        // 2 chunks, a dup pubkey in each chunk
2042        for reverse in [false, true] {
2043            let key = Pubkey::new_from_array([3; 32]); // key is AFTER key2
2044            let key2 = Pubkey::new_from_array([2; 32]);
2045            let hash = AccountHash(Hash::new_unique());
2046            let mut account_maps = Vec::new();
2047            let mut account_maps2 = Vec::new();
2048            let val2 = CalculateHashIntermediate {
2049                hash,
2050                lamports: 2,
2051                pubkey: key2,
2052            };
2053            account_maps.push(val2);
2054            let val = CalculateHashIntermediate {
2055                hash,
2056                lamports: 1,
2057                pubkey: key,
2058            };
2059            account_maps.push(val);
2060            let val3 = CalculateHashIntermediate {
2061                hash,
2062                lamports: 3,
2063                pubkey: key2,
2064            };
2065            account_maps2.push(val3);
2066
2067            let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2068            if reverse {
2069                vecs = vecs.into_iter().rev().collect();
2070            }
2071            let slice = convert_to_slice(&vecs);
2072            let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2073            assert_eq!(
2074                (get_vec(hashfile), lamports),
2075                (
2076                    vec![if reverse { val2.hash.0 } else { val3.hash.0 }, val.hash.0],
2077                    val.lamports
2078                        + if reverse {
2079                            val2.lamports
2080                        } else {
2081                            val3.lamports
2082                        }
2083                ),
2084                "reverse: {reverse}"
2085            );
2086        }
2087    }
2088
2089    #[test]
2090    fn test_accountsdb_cumulative_offsets1_d() {
2091        let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2092        let cumulative = CumulativeOffsets::from_raw(&input);
2093
2094        let src: Vec<_> = input.clone().into_iter().flatten().collect();
2095        let len = src.len();
2096        assert_eq!(cumulative.total_count, len);
2097        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2098
2099        const DIMENSION: usize = 0;
2100        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
2101        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
2102
2103        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2104        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2105
2106        for start in 0..len {
2107            let slice = cumulative.get_slice(&input, start);
2108            let len = slice.len();
2109            assert!(len > 0);
2110            assert_eq!(&src[start..(start + len)], slice);
2111        }
2112
2113        let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2114        let cumulative = CumulativeOffsets::from_raw(&input);
2115
2116        let src: Vec<_> = input.clone().into_iter().flatten().collect();
2117        let len = src.len();
2118        assert_eq!(cumulative.total_count, len);
2119        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2120
2121        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
2122        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
2123
2124        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2125        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2126
2127        for start in 0..len {
2128            let slice = cumulative.get_slice(&input, start);
2129            let len = slice.len();
2130            assert!(len > 0);
2131            assert_eq!(&src[start..(start + len)], slice);
2132        }
2133
2134        let input: Vec<Vec<u32>> = vec![vec![]];
2135        let cumulative = CumulativeOffsets::from_raw(&input);
2136
2137        let len = input.into_iter().flatten().count();
2138        assert_eq!(cumulative.total_count, len);
2139        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
2140    }
2141
2142    #[should_panic(expected = "is_empty")]
2143    #[test]
2144    fn test_accountsdb_cumulative_find_empty() {
2145        let input = CumulativeOffsets {
2146            cumulative_offsets: vec![],
2147            total_count: 0,
2148        };
2149        input.find(0);
2150    }
2151
2152    #[test]
2153    fn test_accountsdb_cumulative_find() {
2154        let input = CumulativeOffsets {
2155            cumulative_offsets: vec![CumulativeOffset {
2156                index: [0; 2],
2157                start_offset: 0,
2158            }],
2159            total_count: 0,
2160        };
2161        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0], 0));
2162
2163        let input = CumulativeOffsets {
2164            cumulative_offsets: vec![
2165                CumulativeOffset {
2166                    index: [0; 2],
2167                    start_offset: 0,
2168                },
2169                CumulativeOffset {
2170                    index: [1; 2],
2171                    start_offset: 2,
2172                },
2173            ],
2174            total_count: 2,
2175        };
2176        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0], 2)); // = first start_offset
2177        assert_eq!(input.find(1), (1, &input.cumulative_offsets[0], 2)); // > first start_offset
2178        assert_eq!(input.find(2), (0, &input.cumulative_offsets[1], 0)); // = last start_offset
2179        assert_eq!(input.find(3), (1, &input.cumulative_offsets[1], 0)); // > last start_offset
2180    }
2181
2182    #[test]
2183    fn test_accountsdb_cumulative_offsets2_d() {
2184        let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2185        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2186
2187        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2188        let len = src.len();
2189        assert_eq!(cumulative.total_count, len);
2190        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2191
2192        const DIMENSION_0: usize = 0;
2193        const DIMENSION_1: usize = 1;
2194        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2195        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2196        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2197        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
2198
2199        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2200        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2201
2202        for start in 0..len {
2203            let slice: &[u64] = cumulative.get_slice(&input, start);
2204            let len = slice.len();
2205            assert!(len > 0);
2206            assert_eq!(&src[start..(start + len)], slice);
2207        }
2208
2209        let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2210        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2211
2212        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2213        let len = src.len();
2214        assert_eq!(cumulative.total_count, len);
2215        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2216
2217        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2218        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
2219        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2220        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
2221
2222        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2223        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2224
2225        for start in 0..len {
2226            let slice: &[u64] = cumulative.get_slice(&input, start);
2227            let len = slice.len();
2228            assert!(len > 0);
2229            assert_eq!(&src[start..(start + len)], slice);
2230        }
2231
2232        let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
2233        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2234
2235        let len = input.into_iter().flatten().count();
2236        assert_eq!(cumulative.total_count, len);
2237        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
2238
2239        let input = vec![
2240            vec![vec![0, 1]],
2241            vec![vec![]],
2242            vec![vec![], vec![2, 3, 4], vec![]],
2243        ];
2244        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2245
2246        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2247        let len = src.len();
2248        assert_eq!(cumulative.total_count, len);
2249        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2250
2251        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2252        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2253        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
2254        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
2255
2256        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2257        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2258
2259        for start in 0..len {
2260            let slice: &[u64] = cumulative.get_slice(&input, start);
2261            let len = slice.len();
2262            assert!(len > 0);
2263            assert_eq!(&src[start..(start + len)], slice);
2264        }
2265    }
2266
2267    fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
2268        let result = AccountsHasher::compute_merkle_root(hashes.clone(), fanout);
2269        let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
2270        let result2 = test_hashing(reduced, fanout);
2271        assert_eq!(result, result2, "len: {}", hashes.len());
2272        result
2273    }
2274
2275    fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
2276        let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
2277        let result = AccountsHasher::compute_merkle_root(temp, fanout);
2278        let reduced: Vec<_> = hashes.clone();
2279        let result2 = AccountsHasher::compute_merkle_root_from_slices(
2280            hashes.len(),
2281            fanout,
2282            None,
2283            |start| &reduced[start..],
2284            None,
2285        );
2286        assert_eq!(result, result2.0, "len: {}", hashes.len());
2287
2288        let result2 = AccountsHasher::compute_merkle_root_from_slices(
2289            hashes.len(),
2290            fanout,
2291            Some(1),
2292            |start| &reduced[start..],
2293            None,
2294        );
2295        assert_eq!(result, result2.0, "len: {}", hashes.len());
2296
2297        let max = std::cmp::min(reduced.len(), fanout * 2);
2298        for left in 0..max {
2299            for right in left + 1..max {
2300                let src = vec![
2301                    vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
2302                    vec![reduced[right..].to_vec()],
2303                ];
2304                let offsets = CumulativeOffsets::from_raw_2d(&src);
2305
2306                let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
2307                let result2 = AccountsHasher::compute_merkle_root_from_slices(
2308                    offsets.total_count,
2309                    fanout,
2310                    None,
2311                    get_slice,
2312                    None,
2313                );
2314                assert_eq!(result, result2.0);
2315            }
2316        }
2317        result
2318    }
2319
2320    #[test]
2321    fn test_accountsdb_compute_merkle_root_large() {
2322        solana_logger::setup();
2323
2324        // handle fanout^x -1, +0, +1 for a few 'x's
2325        const FANOUT: usize = 3;
2326        let mut hash_counts: Vec<_> = (1..6)
2327            .flat_map(|x| {
2328                let mark = FANOUT.pow(x);
2329                vec![mark - 1, mark, mark + 1]
2330            })
2331            .collect();
2332
2333        // saturate the test space for threshold to threshold + target
2334        // this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks
2335        let target = FANOUT.pow(3);
2336        let threshold = target * FANOUT;
2337        hash_counts.extend(threshold - 1..=threshold + target);
2338
2339        for hash_count in hash_counts {
2340            let hashes: Vec<_> = (0..hash_count).map(|_| Hash::new_unique()).collect();
2341
2342            test_hashing(hashes, FANOUT);
2343        }
2344    }
2345
2346    #[test]
2347    fn test_accountsdb_compute_merkle_root() {
2348        solana_logger::setup();
2349
2350        let expected_results = vec![
2351            (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2352            (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
2353            (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
2354            (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
2355            (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
2356            (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
2357            (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
2358            (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
2359            (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
2360            (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
2361            (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2362            (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
2363            (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
2364            (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
2365            (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
2366            (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
2367            (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
2368            (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
2369            (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
2370            (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
2371        ];
2372
2373        let mut expected_index = 0;
2374        let start = 0;
2375        let default_fanout = 2;
2376        // test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types.
2377        let iterations = default_fanout * default_fanout * default_fanout + 2;
2378        for pass in 0..2 {
2379            let fanout = if pass == 0 {
2380                default_fanout
2381            } else {
2382                MERKLE_FANOUT
2383            };
2384            for count in start..iterations {
2385                let mut input: Vec<_> = (0..count)
2386                    .map(|i| {
2387                        let key = Pubkey::from([(pass * iterations + count) as u8; 32]);
2388                        let hash =
2389                            Hash::new_from_array([(pass * iterations + count + i + 1) as u8; 32]);
2390                        (key, hash)
2391                    })
2392                    .collect();
2393
2394                let result = if pass == 0 {
2395                    test_hashing_larger(input, fanout)
2396                } else {
2397                    // this sorts inside
2398                    let early_result = AccountsHasher::accumulate_account_hashes(
2399                        input
2400                            .iter()
2401                            .map(|i| (i.0, AccountHash(i.1)))
2402                            .collect::<Vec<_>>(),
2403                    );
2404
2405                    input.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
2406                    let result = AccountsHasher::compute_merkle_root(input, fanout);
2407                    assert_eq!(early_result, result);
2408                    result
2409                };
2410                // compare against captured, expected results for hash (and lamports)
2411                assert_eq!(
2412                    (
2413                        pass,
2414                        count,
2415                        &*(result.to_string()),
2416                        expected_results[expected_index].3
2417                    ), // we no longer calculate lamports
2418                    expected_results[expected_index]
2419                );
2420                expected_index += 1;
2421            }
2422        }
2423    }
2424
2425    #[test]
2426    #[should_panic(expected = "summing lamports cannot overflow")]
2427    fn test_accountsdb_lamport_overflow() {
2428        solana_logger::setup();
2429
2430        let offset = 2;
2431        let input = vec![
2432            CalculateHashIntermediate {
2433                hash: AccountHash(Hash::new_from_array([1u8; 32])),
2434                lamports: u64::MAX - offset,
2435                pubkey: Pubkey::new_unique(),
2436            },
2437            CalculateHashIntermediate {
2438                hash: AccountHash(Hash::new_from_array([2u8; 32])),
2439                lamports: offset + 1,
2440                pubkey: Pubkey::new_unique(),
2441            },
2442        ];
2443        let dir_for_temp_cache_files = tempdir().unwrap();
2444        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2445        accounts_hasher.de_dup_accounts_in_parallel(
2446            &convert_to_slice(&[input]),
2447            0,
2448            1,
2449            &HashStats::default(),
2450        );
2451    }
2452
2453    fn convert_to_slice(
2454        input: &[Vec<CalculateHashIntermediate>],
2455    ) -> Vec<&[CalculateHashIntermediate]> {
2456        input.iter().map(|v| &v[..]).collect::<Vec<_>>()
2457    }
2458
2459    #[test]
2460    #[should_panic(expected = "summing lamports cannot overflow")]
2461    fn test_accountsdb_lamport_overflow2() {
2462        solana_logger::setup();
2463
2464        let offset = 2;
2465        let input = vec![
2466            vec![CalculateHashIntermediate {
2467                hash: AccountHash(Hash::new_from_array([1u8; 32])),
2468                lamports: u64::MAX - offset,
2469                pubkey: Pubkey::new_unique(),
2470            }],
2471            vec![CalculateHashIntermediate {
2472                hash: AccountHash(Hash::new_from_array([2u8; 32])),
2473                lamports: offset + 1,
2474                pubkey: Pubkey::new_unique(),
2475            }],
2476        ];
2477        let dir_for_temp_cache_files = tempdir().unwrap();
2478        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2479        accounts_hasher.de_dup_accounts(
2480            &convert_to_slice(&input),
2481            &mut HashStats::default(),
2482            2, // accounts above are in 2 groups
2483        );
2484    }
2485
2486    #[test]
2487    fn test_get_data() {
2488        // Create a temporary directory for test files
2489        let temp_dir = tempdir().unwrap();
2490
2491        const MAX_BUFFER_SIZE_IN_BYTES: usize = 128;
2492        let extra_size = 64;
2493
2494        // Create a test file and write some data to it
2495        let file_path = temp_dir.path().join("test_file");
2496        let mut file = File::create(&file_path).unwrap();
2497        let test_data: Vec<u8> = (0..(MAX_BUFFER_SIZE_IN_BYTES + extra_size) as u8).collect(); // 128 + 64 bytes of test data
2498        file.write_all(&test_data).unwrap();
2499        file.seek(SeekFrom::Start(0)).unwrap();
2500        drop(file);
2501
2502        let test_data: &[Hash] = bytemuck::cast_slice(&test_data);
2503
2504        // Create a BufReader for the test file
2505        let file = File::open(&file_path).unwrap();
2506        let reader = BufReader::new(file);
2507        let readers = vec![Mutex::new(reader)];
2508
2509        // Create a CumulativeOffsets instance
2510        let cumulative_offsets = CumulativeOffsets {
2511            cumulative_offsets: vec![CumulativeOffset {
2512                index: [0, 0],
2513                start_offset: 0,
2514            }],
2515            total_count: test_data.len(),
2516        };
2517
2518        // Create a CumulativeHashesFromFiles instance
2519        let cumulative_hashes = CumulativeHashesFromFiles {
2520            readers,
2521            cumulative: cumulative_offsets,
2522        };
2523
2524        // Test get_data function
2525        // First read MAX_BUFFER_SIZE 128 bytes (4 hashes)
2526        let start_index = 0;
2527        let result = cumulative_hashes.get_data(start_index);
2528        assert_eq!(
2529            result.len(),
2530            MAX_BUFFER_SIZE_IN_BYTES / std::mem::size_of::<Hash>()
2531        );
2532        assert_eq!(&test_data[..result.len()], &result[..]);
2533
2534        // Second read extra 64 bytes (2 hashes)
2535        let start_index = result.len();
2536        let result = cumulative_hashes.get_data(start_index);
2537        assert_eq!(result.len(), extra_size / std::mem::size_of::<Hash>());
2538        assert_eq!(
2539            &test_data[start_index..start_index + result.len()],
2540            &result[..]
2541        );
2542    }
2543}