solana_accounts_db/
accounts_hash.rs

1use {
2    crate::{
3        accounts_db::{AccountStorageEntry, PUBKEY_BINS_FOR_CALCULATING_HASHES},
4        active_stats::{ActiveStatItem, ActiveStats},
5        ancestors::Ancestors,
6        pubkey_bins::PubkeyBinCalculator24,
7    },
8    bytemuck_derive::{Pod, Zeroable},
9    log::*,
10    memmap2::MmapMut,
11    rayon::prelude::*,
12    solana_lattice_hash::lt_hash::LtHash,
13    solana_measure::{measure::Measure, measure_us},
14    solana_sdk::{
15        hash::{Hash, Hasher, HASH_BYTES},
16        pubkey::Pubkey,
17        rent_collector::RentCollector,
18        slot_history::Slot,
19        sysvar::epoch_schedule::EpochSchedule,
20    },
21    std::{
22        borrow::Borrow,
23        convert::TryInto,
24        io::{Seek, SeekFrom, Write},
25        path::PathBuf,
26        sync::{
27            atomic::{AtomicU64, AtomicUsize, Ordering},
28            Arc,
29        },
30        thread, time,
31    },
32    tempfile::tempfile_in,
33};
34pub const MERKLE_FANOUT: usize = 16;
35
36/// 1 file containing account hashes sorted by pubkey, mapped into memory
37struct MmapAccountHashesFile {
38    /// raw slice of `Hash` values. Can be a larger slice than `count`
39    mmap: MmapMut,
40    /// # of valid Hash entries in `mmap`
41    count: usize,
42}
43
44impl MmapAccountHashesFile {
45    /// return a slice of account hashes starting at 'index'
46    fn read(&self, index: usize) -> &[Hash] {
47        let start = std::mem::size_of::<Hash>() * index;
48        let end = std::mem::size_of::<Hash>() * self.count;
49        let bytes = &self.mmap[start..end];
50        bytemuck::cast_slice(bytes)
51    }
52
53    /// write a hash to the end of mmap file.
54    fn write(&mut self, hash: &Hash) {
55        let start = self.count * std::mem::size_of::<Hash>();
56        let end = start + std::mem::size_of::<Hash>();
57        self.mmap[start..end].copy_from_slice(hash.as_ref());
58        self.count += 1;
59    }
60}
61
62/// 1 file containing account hashes sorted by pubkey
63struct AccountHashesFile {
64    /// # hashes and an open file that will be deleted on drop. None if there are zero hashes to represent, and thus, no file.
65    writer: Option<MmapAccountHashesFile>,
66    /// The directory where temporary cache files are put
67    dir_for_temp_cache_files: PathBuf,
68    /// # bytes allocated
69    capacity: usize,
70}
71
72impl AccountHashesFile {
73    /// return a mmap reader that can be accessed  by slice
74    fn get_reader(&mut self) -> Option<MmapAccountHashesFile> {
75        std::mem::take(&mut self.writer)
76    }
77
78    /// # hashes stored in this file
79    fn count(&self) -> usize {
80        self.writer
81            .as_ref()
82            .map(|writer| writer.count)
83            .unwrap_or_default()
84    }
85
86    /// write 'hash' to the file
87    /// If the file isn't open, create it first.
88    fn write(&mut self, hash: &Hash) {
89        if self.writer.is_none() {
90            // we have hashes to write but no file yet, so create a file that will auto-delete on drop
91
92            let get_file = || -> Result<_, std::io::Error> {
93                let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
94                    panic!(
95                        "Unable to create file within {}: {err}",
96                        self.dir_for_temp_cache_files.display()
97                    )
98                });
99
100                // Theoretical performance optimization: write a zero to the end of
101                // the file so that we won't have to resize it later, which may be
102                // expensive.
103                assert!(self.capacity > 0);
104                data.seek(SeekFrom::Start((self.capacity - 1) as u64))?;
105                data.write_all(&[0])?;
106                data.rewind()?;
107                data.flush()?;
108                Ok(data)
109            };
110
111            // Retry 5 times to allocate the AccountHashesFile. The memory might be fragmented and
112            // causes memory allocation failure. Therefore, let's retry after failure. Hoping that the
113            // kernel has the chance to defrag the memory between the retries, and retries succeed.
114            let mut num_retries = 0;
115            let data = loop {
116                num_retries += 1;
117
118                match get_file() {
119                    Ok(data) => {
120                        break data;
121                    }
122                    Err(err) => {
123                        info!(
124                            "Unable to create account hashes file within {}: {}, retry counter {}",
125                            self.dir_for_temp_cache_files.display(),
126                            err,
127                            num_retries
128                        );
129
130                        if num_retries > 5 {
131                            panic!(
132                                "Unable to create account hashes file within {}: after {} retries",
133                                self.dir_for_temp_cache_files.display(),
134                                num_retries
135                            );
136                        }
137                        datapoint_info!(
138                            "retry_account_hashes_file_allocation",
139                            ("retry", num_retries, i64)
140                        );
141                        thread::sleep(time::Duration::from_millis(num_retries * 100));
142                    }
143                }
144            };
145
146            //UNSAFE: Required to create a Mmap
147            let map = unsafe { MmapMut::map_mut(&data) };
148            let map = map.unwrap_or_else(|e| {
149                error!(
150                    "Failed to map the data file (size: {}): {}.\n
151                        Please increase sysctl vm.max_map_count or equivalent for your platform.",
152                    self.capacity, e
153                );
154                std::process::exit(1);
155            });
156
157            self.writer = Some(MmapAccountHashesFile {
158                mmap: map,
159                count: 0,
160            });
161        }
162        self.writer.as_mut().unwrap().write(hash);
163    }
164}
165
166/// parameters to calculate accounts hash
167#[derive(Debug)]
168pub struct CalcAccountsHashConfig<'a> {
169    /// true to use a thread pool dedicated to bg operations
170    pub use_bg_thread_pool: bool,
171    /// 'ancestors' is used to get storages
172    pub ancestors: Option<&'a Ancestors>,
173    /// does hash calc need to consider account data that exists in the write cache?
174    /// if so, 'ancestors' will be used for this purpose as well as storages.
175    pub epoch_schedule: &'a EpochSchedule,
176    pub rent_collector: &'a RentCollector,
177    /// used for tracking down hash mismatches after the fact
178    pub store_detailed_debug_info_on_failure: bool,
179}
180
181// smallest, 3 quartiles, largest, average
182pub type StorageSizeQuartileStats = [usize; 6];
183
184#[derive(Debug, Default)]
185pub struct HashStats {
186    pub total_us: u64,
187    pub mark_time_us: u64,
188    pub cache_hash_data_us: u64,
189    pub scan_time_total_us: u64,
190    pub zeros_time_total_us: u64,
191    pub hash_time_total_us: u64,
192    pub sort_time_total_us: u64,
193    pub hash_total: usize,
194    pub num_snapshot_storage: usize,
195    pub scan_chunks: usize,
196    pub num_slots: usize,
197    pub num_dirty_slots: usize,
198    pub collect_snapshots_us: u64,
199    pub storage_sort_us: u64,
200    pub storage_size_quartiles: StorageSizeQuartileStats,
201    pub oldest_root: Slot,
202    pub roots_older_than_epoch: AtomicUsize,
203    pub accounts_in_roots_older_than_epoch: AtomicUsize,
204    pub append_vec_sizes_older_than_epoch: AtomicUsize,
205    pub longest_ancient_scan_us: AtomicU64,
206    pub sum_ancient_scans_us: AtomicU64,
207    pub count_ancient_scans: AtomicU64,
208    pub pubkey_bin_search_us: AtomicU64,
209    pub num_zero_lamport_accounts: AtomicU64,
210    pub num_zero_lamport_accounts_ancient: Arc<AtomicU64>,
211}
212impl HashStats {
213    pub fn calc_storage_size_quartiles(&mut self, storages: &[Arc<AccountStorageEntry>]) {
214        let mut sum = 0;
215        let mut sizes = storages
216            .iter()
217            .map(|storage| {
218                let cap = storage.accounts.capacity() as usize;
219                sum += cap;
220                cap
221            })
222            .collect::<Vec<_>>();
223        sizes.sort_unstable();
224        let len = sizes.len();
225        self.storage_size_quartiles = if len == 0 {
226            StorageSizeQuartileStats::default()
227        } else {
228            [
229                *sizes.first().unwrap(),
230                sizes[len / 4],
231                sizes[len * 2 / 4],
232                sizes[len * 3 / 4],
233                *sizes.last().unwrap(),
234                sum / len,
235            ]
236        };
237    }
238
239    pub fn log(&self) {
240        datapoint_info!(
241            "calculate_accounts_hash_from_storages",
242            ("total_us", self.total_us, i64),
243            ("mark_time_us", self.mark_time_us, i64),
244            ("cache_hash_data_us", self.cache_hash_data_us, i64),
245            ("accounts_scan_us", self.scan_time_total_us, i64),
246            ("eliminate_zeros_us", self.zeros_time_total_us, i64),
247            ("hash_us", self.hash_time_total_us, i64),
248            ("sort_us", self.sort_time_total_us, i64),
249            ("hash_total", self.hash_total, i64),
250            ("storage_sort_us", self.storage_sort_us, i64),
251            ("collect_snapshots_us", self.collect_snapshots_us, i64),
252            ("num_snapshot_storage", self.num_snapshot_storage, i64),
253            ("scan_chunks", self.scan_chunks, i64),
254            ("num_slots", self.num_slots, i64),
255            ("num_dirty_slots", self.num_dirty_slots, i64),
256            ("storage_size_min", self.storage_size_quartiles[0], i64),
257            (
258                "storage_size_quartile_1",
259                self.storage_size_quartiles[1],
260                i64
261            ),
262            (
263                "storage_size_quartile_2",
264                self.storage_size_quartiles[2],
265                i64
266            ),
267            (
268                "storage_size_quartile_3",
269                self.storage_size_quartiles[3],
270                i64
271            ),
272            ("storage_size_max", self.storage_size_quartiles[4], i64),
273            ("storage_size_avg", self.storage_size_quartiles[5], i64),
274            (
275                "roots_older_than_epoch",
276                self.roots_older_than_epoch.load(Ordering::Relaxed),
277                i64
278            ),
279            ("oldest_root", self.oldest_root, i64),
280            (
281                "longest_ancient_scan_us",
282                self.longest_ancient_scan_us.load(Ordering::Relaxed),
283                i64
284            ),
285            (
286                "sum_ancient_scans_us",
287                self.sum_ancient_scans_us.load(Ordering::Relaxed),
288                i64
289            ),
290            (
291                "count_ancient_scans",
292                self.count_ancient_scans.load(Ordering::Relaxed),
293                i64
294            ),
295            (
296                "append_vec_sizes_older_than_epoch",
297                self.append_vec_sizes_older_than_epoch
298                    .load(Ordering::Relaxed),
299                i64
300            ),
301            (
302                "accounts_in_roots_older_than_epoch",
303                self.accounts_in_roots_older_than_epoch
304                    .load(Ordering::Relaxed),
305                i64
306            ),
307            (
308                "pubkey_bin_search_us",
309                self.pubkey_bin_search_us.load(Ordering::Relaxed),
310                i64
311            ),
312            (
313                "num_zero_lamport_accounts",
314                self.num_zero_lamport_accounts.load(Ordering::Relaxed),
315                i64
316            ),
317            (
318                "num_zero_lamport_accounts_ancient",
319                self.num_zero_lamport_accounts_ancient
320                    .load(Ordering::Relaxed),
321                i64
322            ),
323        );
324    }
325}
326
327/// While scanning appendvecs, this is the info that needs to be extracted, de-duped, and sorted from what is stored in an append vec.
328/// Note this can be saved/loaded during hash calculation to a memory mapped file whose contents are
329/// [CalculateHashIntermediate]
330#[repr(C)]
331#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)]
332pub struct CalculateHashIntermediate {
333    pub hash: AccountHash,
334    pub lamports: u64,
335    pub pubkey: Pubkey,
336}
337
338// In order to safely guarantee CalculateHashIntermediate is Pod, it cannot have any padding
339const _: () = assert!(
340    std::mem::size_of::<CalculateHashIntermediate>()
341        == std::mem::size_of::<AccountHash>()
342            + std::mem::size_of::<u64>()
343            + std::mem::size_of::<Pubkey>(),
344    "CalculateHashIntermediate cannot have any padding"
345);
346
347#[derive(Debug, PartialEq, Eq)]
348struct CumulativeOffset {
349    /// Since the source data is at most 2D, two indexes are enough.
350    index: [usize; 2],
351    start_offset: usize,
352}
353
354trait ExtractSliceFromRawData<'b, T: 'b> {
355    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
356}
357
358impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
359    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
360        &self[offset.index[0]][start..]
361    }
362}
363
364impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
365    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
366        &self[offset.index[0]][offset.index[1]][start..]
367    }
368}
369
370// Allow retrieving &[start..end] from a logical src: Vec<T>, where src is really Vec<Vec<T>> (or later Vec<Vec<Vec<T>>>)
371// This model prevents callers from having to flatten which saves both working memory and time.
372#[derive(Default, Debug)]
373struct CumulativeOffsets {
374    cumulative_offsets: Vec<CumulativeOffset>,
375    total_count: usize,
376}
377
378/// used by merkle tree calculation to lookup account hashes by overall index
379#[derive(Default)]
380struct CumulativeHashesFromFiles {
381    /// source of hashes in order
382    readers: Vec<MmapAccountHashesFile>,
383    /// look up reader index and offset by overall index
384    cumulative: CumulativeOffsets,
385}
386
387impl CumulativeHashesFromFiles {
388    /// Calculate offset from overall index to which file and offset within that file based on the length of each hash file.
389    /// Also collect readers to access the data.
390    fn from_files(hashes: Vec<AccountHashesFile>) -> Self {
391        let mut readers = Vec::with_capacity(hashes.len());
392        let cumulative = CumulativeOffsets::new(hashes.into_iter().filter_map(|mut hash_file| {
393            // ignores all hashfiles that have zero entries
394            hash_file.get_reader().map(|reader| {
395                let count = reader.count;
396                readers.push(reader);
397                count
398            })
399        }));
400        Self {
401            cumulative,
402            readers,
403        }
404    }
405
406    /// total # of items referenced
407    fn total_count(&self) -> usize {
408        self.cumulative.total_count
409    }
410
411    // return the biggest slice possible that starts at the overall index 'start'
412    fn get_slice(&self, start: usize) -> &[Hash] {
413        let (start, offset) = self.cumulative.find(start);
414        let data_source_index = offset.index[0];
415        let data = &self.readers[data_source_index];
416        // unwrap here because we should never ask for data that doesn't exist. If we do, then cumulative calculated incorrectly.
417        data.read(start)
418    }
419}
420
421impl CumulativeOffsets {
422    fn new<I>(iter: I) -> Self
423    where
424        I: Iterator<Item = usize>,
425    {
426        let mut total_count: usize = 0;
427        let cumulative_offsets: Vec<_> = iter
428            .enumerate()
429            .filter_map(|(i, len)| {
430                if len > 0 {
431                    let result = CumulativeOffset {
432                        index: [i, i],
433                        start_offset: total_count,
434                    };
435                    total_count += len;
436                    Some(result)
437                } else {
438                    None
439                }
440            })
441            .collect();
442
443        Self {
444            cumulative_offsets,
445            total_count,
446        }
447    }
448
449    fn from_raw<T>(raw: &[Vec<T>]) -> Self {
450        Self::new(raw.iter().map(|v| v.len()))
451    }
452
453    /// find the index of the data source that contains 'start'
454    fn find_index(&self, start: usize) -> usize {
455        assert!(!self.cumulative_offsets.is_empty());
456        match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
457            Ok(index) => index,
458            Err(index) => index - 1, // we would insert at index so we are before the item at index
459        }
460    }
461
462    /// given overall start index 'start'
463    /// return ('start', which is the offset into the data source at 'index',
464    ///     and 'index', which is the data source to use)
465    fn find(&self, start: usize) -> (usize, &CumulativeOffset) {
466        let index = self.find_index(start);
467        let index = &self.cumulative_offsets[index];
468        let start = start - index.start_offset;
469        (start, index)
470    }
471
472    // return the biggest slice possible that starts at 'start'
473    fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
474    where
475        U: ExtractSliceFromRawData<'b, T> + 'b,
476    {
477        let (start, index) = self.find(start);
478        raw.extract(index, start)
479    }
480}
481
482#[derive(Debug)]
483pub struct AccountsHasher<'a> {
484    pub zero_lamport_accounts: ZeroLamportAccounts,
485    /// The directory where temporary cache files are put
486    pub dir_for_temp_cache_files: PathBuf,
487    pub(crate) active_stats: &'a ActiveStats,
488}
489
490/// Pointer to a specific item in chunked accounts hash slices.
491#[derive(Debug, Clone, Copy)]
492struct SlotGroupPointer {
493    /// slot group index
494    slot_group_index: usize,
495    /// offset within a slot group
496    offset: usize,
497}
498
499/// A struct for the location of an account hash item inside chunked accounts hash slices.
500#[derive(Debug)]
501struct ItemLocation<'a> {
502    /// account's pubkey
503    key: &'a Pubkey,
504    /// pointer to the item in slot group slices
505    pointer: SlotGroupPointer,
506}
507
508impl<'a> AccountsHasher<'a> {
509    pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
510        let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
511
512        let hash_total = cumulative_offsets.total_count;
513        let result = AccountsHasher::compute_merkle_root_from_slices(
514            hash_total,
515            MERKLE_FANOUT,
516            None,
517            |start: usize| cumulative_offsets.get_slice(&hashes, start),
518            None,
519        );
520        (result.0, hash_total)
521    }
522
523    pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
524        Self::compute_merkle_root_loop(hashes, fanout, |t| &t.1)
525    }
526
527    // this function avoids an infinite recursion compiler error
528    pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
529        Self::compute_merkle_root_loop(hashes, fanout, |t| t)
530    }
531
532    pub fn div_ceil(x: usize, y: usize) -> usize {
533        let mut result = x / y;
534        if x % y != 0 {
535            result += 1;
536        }
537        result
538    }
539
540    // For the first iteration, there could be more items in the tuple than just hash and lamports.
541    // Using extractor allows us to avoid an unnecessary array copy on the first iteration.
542    pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
543    where
544        F: Fn(&T) -> &Hash + std::marker::Sync,
545        T: std::marker::Sync,
546    {
547        if hashes.is_empty() {
548            return Hasher::default().result();
549        }
550
551        let mut time = Measure::start("time");
552
553        let total_hashes = hashes.len();
554        let chunks = Self::div_ceil(total_hashes, fanout);
555
556        let result: Vec<_> = (0..chunks)
557            .into_par_iter()
558            .map(|i| {
559                let start_index = i * fanout;
560                let end_index = std::cmp::min(start_index + fanout, total_hashes);
561
562                let mut hasher = Hasher::default();
563                for item in hashes.iter().take(end_index).skip(start_index) {
564                    let h = extractor(item);
565                    hasher.hash(h.as_ref());
566                }
567
568                hasher.result()
569            })
570            .collect();
571        time.stop();
572        debug!("hashing {} {}", total_hashes, time);
573
574        if result.len() == 1 {
575            result[0]
576        } else {
577            Self::compute_merkle_root_recurse(result, fanout)
578        }
579    }
580
581    fn calculate_three_level_chunks(
582        total_hashes: usize,
583        fanout: usize,
584        max_levels_per_pass: Option<usize>,
585        specific_level_count: Option<usize>,
586    ) -> (usize, usize, bool) {
587        const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll
588        let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
589
590        // Only use the 3 level optimization if we have at least 4 levels of data.
591        // Otherwise, we'll be serializing a parallel operation.
592        let threshold = target * fanout;
593        let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
594            && total_hashes >= threshold;
595        if three_level {
596            if let Some(specific_level_count_value) = specific_level_count {
597                three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
598            }
599        }
600        let (num_hashes_per_chunk, levels_hashed) = if three_level {
601            (target, THREE_LEVEL_OPTIMIZATION)
602        } else {
603            (fanout, 1)
604        };
605        (num_hashes_per_chunk, levels_hashed, three_level)
606    }
607
608    // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
609    // The caller provides a function to return a slice from the source data.
610    fn compute_merkle_root_from_slices<'b, F, T>(
611        total_hashes: usize,
612        fanout: usize,
613        max_levels_per_pass: Option<usize>,
614        get_hash_slice_starting_at_index: F,
615        specific_level_count: Option<usize>,
616    ) -> (Hash, Vec<Hash>)
617    where
618        // returns a slice of hashes starting at the given overall index
619        F: Fn(usize) -> &'b [T] + std::marker::Sync,
620        T: Borrow<Hash> + std::marker::Sync + 'b,
621    {
622        if total_hashes == 0 {
623            return (Hasher::default().result(), vec![]);
624        }
625
626        let mut time = Measure::start("time");
627
628        let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
629            total_hashes,
630            fanout,
631            max_levels_per_pass,
632            specific_level_count,
633        );
634
635        let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
636
637        // initial fetch - could return entire slice
638        let data = get_hash_slice_starting_at_index(0);
639        let data_len = data.len();
640
641        let result: Vec<_> = (0..chunks)
642            .into_par_iter()
643            .map(|i| {
644                // summary:
645                // this closure computes 1 or 3 levels of merkle tree (all chunks will be 1 or all will be 3)
646                // for a subset (our chunk) of the input data [start_index..end_index]
647
648                // index into get_hash_slice_starting_at_index where this chunk's range begins
649                let start_index = i * num_hashes_per_chunk;
650                // index into get_hash_slice_starting_at_index where this chunk's range ends
651                let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
652
653                // will compute the final result for this closure
654                let mut hasher = Hasher::default();
655
656                // index into 'data' where we are currently pulling data
657                // if we exhaust our data, then we will request a new slice, and data_index resets to 0, the beginning of the new slice
658                let mut data_index = start_index;
659                // source data, which we may refresh when we exhaust
660                let mut data = data;
661                // len of the source data
662                let mut data_len = data_len;
663
664                if !three_level {
665                    // 1 group of fanout
666                    // The result of this loop is a single hash value from fanout input hashes.
667                    for i in start_index..end_index {
668                        if data_index >= data_len {
669                            // we exhausted our data, fetch next slice starting at i
670                            data = get_hash_slice_starting_at_index(i);
671                            data_len = data.len();
672                            data_index = 0;
673                        }
674                        hasher.hash(data[data_index].borrow().as_ref());
675                        data_index += 1;
676                    }
677                } else {
678                    // hash 3 levels of fanout simultaneously.
679                    // This codepath produces 1 hash value for between 1..=fanout^3 input hashes.
680                    // It is equivalent to running the normal merkle tree calculation 3 iterations on the input.
681                    //
682                    // big idea:
683                    //  merkle trees usually reduce the input vector by a factor of fanout with each iteration
684                    //  example with fanout 2:
685                    //   start:     [0,1,2,3,4,5,6,7]      in our case: [...16M...] or really, 1B
686                    //   iteration0 [.5, 2.5, 4.5, 6.5]                 [... 1M...]
687                    //   iteration1 [1.5, 5.5]                          [...65k...]
688                    //   iteration2 3.5                                 [...4k... ]
689                    //  So iteration 0 consumes N elements, hashes them in groups of 'fanout' and produces a vector of N/fanout elements
690                    //   and the process repeats until there is only 1 hash left.
691                    //
692                    //  With the three_level code path, we make each chunk we iterate of size fanout^3 (4096)
693                    //  So, the input could be 16M hashes and the output will be 4k hashes, or N/fanout^3
694                    //  The goal is to reduce the amount of data that has to be constructed and held in memory.
695                    //  When we know we have enough hashes, then, in 1 pass, we hash 3 levels simultaneously, storing far fewer intermediate hashes.
696                    //
697                    // Now, some details:
698                    // The result of this loop is a single hash value from fanout^3 input hashes.
699                    // concepts:
700                    //  what we're conceptually hashing: "raw_hashes"[start_index..end_index]
701                    //   example: [a,b,c,d,e,f]
702                    //   but... hashes[] may really be multiple vectors that are pieced together.
703                    //   example: [[a,b],[c],[d,e,f]]
704                    //   get_hash_slice_starting_at_index(any_index) abstracts that and returns a slice starting at raw_hashes[any_index..]
705                    //   such that the end of get_hash_slice_starting_at_index may be <, >, or = end_index
706                    //   example: get_hash_slice_starting_at_index(1) returns [b]
707                    //            get_hash_slice_starting_at_index(3) returns [d,e,f]
708                    // This code is basically 3 iterations of merkle tree hashing occurring simultaneously.
709                    // The first fanout raw hashes are hashed in hasher_k. This is iteration0
710                    // Once hasher_k has hashed fanout hashes, hasher_k's result hash is hashed in hasher_j and then discarded
711                    // hasher_k then starts over fresh and hashes the next fanout raw hashes. This is iteration0 again for a new set of data.
712                    // Once hasher_j has hashed fanout hashes (from k), hasher_j's result hash is hashed in hasher and then discarded
713                    // Once hasher has hashed fanout hashes (from j), then the result of hasher is the hash for fanout^3 raw hashes.
714                    // If there are < fanout^3 hashes, then this code stops when it runs out of raw hashes and returns whatever it hashed.
715                    // This is always how the very last elements work in a merkle tree.
716                    let mut i = start_index;
717                    while i < end_index {
718                        let mut hasher_j = Hasher::default();
719                        for _j in 0..fanout {
720                            let mut hasher_k = Hasher::default();
721                            let end = std::cmp::min(end_index - i, fanout);
722                            for _k in 0..end {
723                                if data_index >= data_len {
724                                    // we exhausted our data, fetch next slice starting at i
725                                    data = get_hash_slice_starting_at_index(i);
726                                    data_len = data.len();
727                                    data_index = 0;
728                                }
729                                hasher_k.hash(data[data_index].borrow().as_ref());
730                                data_index += 1;
731                                i += 1;
732                            }
733                            hasher_j.hash(hasher_k.result().as_ref());
734                            if i >= end_index {
735                                break;
736                            }
737                        }
738                        hasher.hash(hasher_j.result().as_ref());
739                    }
740                }
741
742                hasher.result()
743            })
744            .collect();
745        time.stop();
746        debug!("hashing {} {}", total_hashes, time);
747
748        if let Some(mut specific_level_count_value) = specific_level_count {
749            specific_level_count_value -= levels_hashed;
750            if specific_level_count_value == 0 {
751                (Hash::default(), result)
752            } else {
753                assert!(specific_level_count_value > 0);
754                // We did not hash the number of levels required by 'specific_level_count', so repeat
755                Self::compute_merkle_root_from_slices_recurse(
756                    result,
757                    fanout,
758                    max_levels_per_pass,
759                    Some(specific_level_count_value),
760                )
761            }
762        } else {
763            (
764                if result.len() == 1 {
765                    result[0]
766                } else {
767                    Self::compute_merkle_root_recurse(result, fanout)
768                },
769                vec![], // no intermediate results needed by caller
770            )
771        }
772    }
773
774    fn compute_merkle_root_from_slices_recurse(
775        hashes: Vec<Hash>,
776        fanout: usize,
777        max_levels_per_pass: Option<usize>,
778        specific_level_count: Option<usize>,
779    ) -> (Hash, Vec<Hash>) {
780        Self::compute_merkle_root_from_slices(
781            hashes.len(),
782            fanout,
783            max_levels_per_pass,
784            |start| &hashes[start..],
785            specific_level_count,
786        )
787    }
788
789    pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, AccountHash)>) -> Hash {
790        hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
791        Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| &i.1 .0)
792    }
793
794    pub fn compare_two_hash_entries(
795        a: &CalculateHashIntermediate,
796        b: &CalculateHashIntermediate,
797    ) -> std::cmp::Ordering {
798        // note partial_cmp only returns None with floating point comparisons
799        a.pubkey.partial_cmp(&b.pubkey).unwrap()
800    }
801
802    pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
803        balance.try_into().unwrap_or_else(|_| {
804            panic!("overflow is detected while summing capitalization: {balance}")
805        })
806    }
807
808    /// returns:
809    /// Vec, with one entry per bin
810    ///  for each entry, Vec<Hash> in pubkey order
811    /// If return Vec<AccountHashesFile> was flattened, it would be all hashes, in pubkey order.
812    fn de_dup_accounts(
813        &self,
814        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
815        stats: &mut HashStats,
816        max_bin: usize,
817    ) -> (Vec<AccountHashesFile>, u64) {
818        // 1. eliminate zero lamport accounts
819        // 2. pick the highest slot or (slot = and highest version) of each pubkey
820        // 3. produce this output:
821        // a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
822        //      vec: individual hashes in pubkey order, 1 hash per
823        // b. lamports
824        let _guard = self.active_stats.activate(ActiveStatItem::HashDeDup);
825
826        #[derive(Default)]
827        struct DedupResult {
828            hashes_files: Vec<AccountHashesFile>,
829            hashes_count: usize,
830            lamports_sum: u64,
831        }
832
833        let mut zeros = Measure::start("eliminate zeros");
834        let DedupResult {
835            hashes_files: hashes,
836            hashes_count: hash_total,
837            lamports_sum: lamports_total,
838        } = (0..max_bin)
839            .into_par_iter()
840            .fold(DedupResult::default, |mut accum, bin| {
841                let (hashes_file, lamports_bin) =
842                    self.de_dup_accounts_in_parallel(sorted_data_by_pubkey, bin, max_bin, stats);
843
844                accum.lamports_sum = accum
845                    .lamports_sum
846                    .checked_add(lamports_bin)
847                    .expect("summing capitalization cannot overflow");
848                accum.hashes_count += hashes_file.count();
849                accum.hashes_files.push(hashes_file);
850                accum
851            })
852            .reduce(
853                || {
854                    DedupResult {
855                        // Allocate with Vec::new() so that no allocation actually happens. See
856                        // https://github.com/anza-xyz/agave/pull/1308.
857                        hashes_files: Vec::new(),
858                        ..Default::default()
859                    }
860                },
861                |mut a, mut b| {
862                    a.lamports_sum = a
863                        .lamports_sum
864                        .checked_add(b.lamports_sum)
865                        .expect("summing capitalization cannot overflow");
866                    a.hashes_count += b.hashes_count;
867                    a.hashes_files.append(&mut b.hashes_files);
868                    a
869                },
870            );
871        zeros.stop();
872        stats.zeros_time_total_us += zeros.as_us();
873        stats.hash_total += hash_total;
874        (hashes, lamports_total)
875    }
876
877    /// Given the item location, return the item in the `CalculatedHashIntermediate` slices and the next item location in the same bin.
878    /// If the end of the `CalculatedHashIntermediate` slice is reached or all the accounts in current bin have been exhausted, return `None` for next item location.
879    fn get_item<'b>(
880        sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
881        bin: usize,
882        binner: &PubkeyBinCalculator24,
883        item_loc: &ItemLocation<'b>,
884    ) -> (&'b CalculateHashIntermediate, Option<ItemLocation<'b>>) {
885        let division_data = &sorted_data_by_pubkey[item_loc.pointer.slot_group_index];
886        let mut index = item_loc.pointer.offset;
887        index += 1;
888        let mut next = None;
889
890        while index < division_data.len() {
891            // still more items where we found the previous key, so just increment the index for that slot group, skipping all pubkeys that are equal
892            let next_key = &division_data[index].pubkey;
893            if next_key == item_loc.key {
894                index += 1;
895                continue; // duplicate entries of same pubkey, so keep skipping
896            }
897
898            if binner.bin_from_pubkey(next_key) > bin {
899                // the next pubkey is not in our bin
900                break;
901            }
902
903            // point to the next pubkey > key
904            next = Some(ItemLocation {
905                key: next_key,
906                pointer: SlotGroupPointer {
907                    slot_group_index: item_loc.pointer.slot_group_index,
908                    offset: index,
909                },
910            });
911            break;
912        }
913
914        // this is the previous first item that was requested
915        (&division_data[index - 1], next)
916    }
917
918    /// `hash_data` must be sorted by `binner.bin_from_pubkey()`
919    /// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
920    fn binary_search_for_first_pubkey_in_bin(
921        hash_data: &[CalculateHashIntermediate],
922        bin: usize,
923        binner: &PubkeyBinCalculator24,
924    ) -> Option<usize> {
925        let potential_index = if bin == 0 {
926            // `bin` == 0 is special because there cannot be `bin`-1
927            // so either element[0] is in bin 0 or there is nothing in bin 0.
928            0
929        } else {
930            // search for the first pubkey that is in `bin`
931            // There could be many keys in a row with the same `bin`.
932            // So, for each pubkey, use calculated_bin * 2 + 1 as the bin of a given pubkey for binary search.
933            // And compare the bin of each pubkey with `bin` * 2.
934            // So all keys that are in `bin` will compare as `bin` * 2 + 1
935            // all keys that are in `bin`-1 will compare as ((`bin` - 1) * 2 + 1), which is (`bin` * 2 - 1)
936            // NO keys will compare as `bin` * 2 because we add 1.
937            // So, the binary search will NEVER return Ok(found_index), but will always return Err(index of first key in `bin`).
938            // Note that if NO key is in `bin`, then the key at the found index will be in a bin > `bin`, so return None.
939            let just_prior_to_desired_bin = bin * 2;
940            let search = hash_data.binary_search_by(|data| {
941                (1 + 2 * binner.bin_from_pubkey(&data.pubkey)).cmp(&just_prior_to_desired_bin)
942            });
943            // returns Err(index where item should be) since the desired item will never exist
944            search.expect_err("it is impossible to find a matching bin")
945        };
946        // note that `potential_index` could be == hash_data.len(). This indicates the first key in `bin` would be
947        // after the data we have. Thus, no key is in `bin`.
948        // This also handles the case where `hash_data` is empty, since len() will be 0 and `get` will return None.
949        hash_data.get(potential_index).and_then(|potential_data| {
950            (binner.bin_from_pubkey(&potential_data.pubkey) == bin).then_some(potential_index)
951        })
952    }
953
954    /// `hash_data` must be sorted by `binner.bin_from_pubkey()`
955    /// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
956    fn find_first_pubkey_in_bin(
957        hash_data: &[CalculateHashIntermediate],
958        bin: usize,
959        bins: usize,
960        binner: &PubkeyBinCalculator24,
961        stats: &HashStats,
962    ) -> Option<usize> {
963        if hash_data.is_empty() {
964            return None;
965        }
966        let (result, us) = measure_us!({
967            // assume uniform distribution of pubkeys and choose first guess based on bin we're looking for
968            let i = hash_data.len() * bin / bins;
969            let estimate = &hash_data[i];
970
971            let pubkey_bin = binner.bin_from_pubkey(&estimate.pubkey);
972            let range = if pubkey_bin >= bin {
973                // i pubkey matches or is too large, so look <= i for the first pubkey in the right bin
974                // i+1 could be the first pubkey in the right bin
975                0..(i + 1)
976            } else {
977                // i pubkey is too small, so look after i
978                (i + 1)..hash_data.len()
979            };
980            Some(
981                range.start +
982                // binary search the subset
983                Self::binary_search_for_first_pubkey_in_bin(
984                    &hash_data[range],
985                    bin,
986                    binner,
987                )?,
988            )
989        });
990        stats.pubkey_bin_search_us.fetch_add(us, Ordering::Relaxed);
991        result
992    }
993
994    /// Return the working_set and max number of pubkeys for hash dedup.
995    /// `working_set` holds SlotGroupPointer {slot_group_index, offset} for items in account's pubkey descending order.
996    fn initialize_dedup_working_set(
997        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
998        pubkey_bin: usize,
999        bins: usize,
1000        binner: &PubkeyBinCalculator24,
1001        stats: &HashStats,
1002    ) -> (
1003        Vec<SlotGroupPointer>, /* working_set */
1004        usize,                 /* max_inclusive_num_pubkeys */
1005    ) {
1006        // working_set holds the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
1007        let mut working_set: Vec<SlotGroupPointer> = Vec::default();
1008
1009        // Initialize 'working_set', which holds the current lowest item in each slot group.
1010        // `working_set` should be initialized in reverse order of slot_groups. Later slot_groups are
1011        // processed first. For each slot_group, if the lowest item for current slot group is
1012        // already in working_set (i.e. inserted by a later slot group), the next lowest item
1013        // in this slot group is searched and checked, until either one that is `not` in the
1014        // working_set is found, which will then be inserted, or no next lowest item is found.
1015        // Iterating in reverse order of slot_group will guarantee that each slot group will be
1016        // scanned only once and scanned continuously. Therefore, it can achieve better data
1017        // locality during the scan.
1018        let max_inclusive_num_pubkeys = sorted_data_by_pubkey
1019            .iter()
1020            .enumerate()
1021            .rev()
1022            .map(|(i, hash_data)| {
1023                let first_pubkey_in_bin =
1024                    Self::find_first_pubkey_in_bin(hash_data, pubkey_bin, bins, binner, stats);
1025
1026                if let Some(first_pubkey_in_bin) = first_pubkey_in_bin {
1027                    let mut next = Some(ItemLocation {
1028                        key: &hash_data[first_pubkey_in_bin].pubkey,
1029                        pointer: SlotGroupPointer {
1030                            slot_group_index: i,
1031                            offset: first_pubkey_in_bin,
1032                        },
1033                    });
1034
1035                    Self::add_next_item(
1036                        &mut next,
1037                        &mut working_set,
1038                        sorted_data_by_pubkey,
1039                        pubkey_bin,
1040                        binner,
1041                    );
1042
1043                    let mut first_pubkey_in_next_bin = first_pubkey_in_bin + 1;
1044                    while first_pubkey_in_next_bin < hash_data.len() {
1045                        if binner.bin_from_pubkey(&hash_data[first_pubkey_in_next_bin].pubkey)
1046                            != pubkey_bin
1047                        {
1048                            break;
1049                        }
1050                        first_pubkey_in_next_bin += 1;
1051                    }
1052                    first_pubkey_in_next_bin - first_pubkey_in_bin
1053                } else {
1054                    0
1055                }
1056            })
1057            .sum::<usize>();
1058
1059        (working_set, max_inclusive_num_pubkeys)
1060    }
1061
1062    /// Add next item into hash dedup working set
1063    fn add_next_item<'b>(
1064        next: &mut Option<ItemLocation<'b>>,
1065        working_set: &mut Vec<SlotGroupPointer>,
1066        sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
1067        pubkey_bin: usize,
1068        binner: &PubkeyBinCalculator24,
1069    ) {
1070        // looping to add next item to working set
1071        while let Some(ItemLocation { key, pointer }) = std::mem::take(next) {
1072            // if `new key` is less than the min key in the working set, skip binary search and
1073            // insert item to the end vec directly
1074            if let Some(SlotGroupPointer {
1075                slot_group_index: current_min_slot_group_index,
1076                offset: current_min_offset,
1077            }) = working_set.last()
1078            {
1079                let current_min_key = &sorted_data_by_pubkey[*current_min_slot_group_index]
1080                    [*current_min_offset]
1081                    .pubkey;
1082                if key < current_min_key {
1083                    working_set.push(pointer);
1084                    break;
1085                }
1086            }
1087
1088            let found = working_set.binary_search_by(|pointer| {
1089                let prob = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1090                (*key).cmp(prob)
1091            });
1092
1093            match found {
1094                Err(index) => {
1095                    // found a new new key, insert into the working_set. This is O(n/2) on
1096                    // average. Theoretically, this operation could be expensive and may be further
1097                    // optimized in future.
1098                    working_set.insert(index, pointer);
1099                    break;
1100                }
1101                Ok(index) => {
1102                    let found = &mut working_set[index];
1103                    if found.slot_group_index > pointer.slot_group_index {
1104                        // There is already a later slot group that contains this key in the working_set,
1105                        // look up again.
1106                        let (_item, new_next) = Self::get_item(
1107                            sorted_data_by_pubkey,
1108                            pubkey_bin,
1109                            binner,
1110                            &ItemLocation { key, pointer },
1111                        );
1112                        *next = new_next;
1113                    } else {
1114                        // A previous slot contains this key, replace it, and look for next item in the previous slot group.
1115                        let (_item, new_next) = Self::get_item(
1116                            sorted_data_by_pubkey,
1117                            pubkey_bin,
1118                            binner,
1119                            &ItemLocation {
1120                                key,
1121                                pointer: *found,
1122                            },
1123                        );
1124                        *found = pointer;
1125                        *next = new_next;
1126                    }
1127                }
1128            }
1129        }
1130    }
1131
1132    // go through: [..][pubkey_bin][..] and return hashes and lamport sum
1133    //   slot groups^                ^accounts found in a slot group, sorted by pubkey, higher slot, write_version
1134    // 1. handle zero lamport accounts
1135    // 2. pick the highest slot or (slot = and highest version) of each pubkey
1136    // 3. produce this output:
1137    //   a. AccountHashesFile: individual account hashes in pubkey order
1138    //   b. lamport sum
1139    fn de_dup_accounts_in_parallel(
1140        &self,
1141        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1142        pubkey_bin: usize,
1143        bins: usize,
1144        stats: &HashStats,
1145    ) -> (AccountHashesFile, u64) {
1146        let binner = PubkeyBinCalculator24::new(bins);
1147
1148        // working_set hold the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
1149        let (mut working_set, max_inclusive_num_pubkeys) = Self::initialize_dedup_working_set(
1150            sorted_data_by_pubkey,
1151            pubkey_bin,
1152            bins,
1153            &binner,
1154            stats,
1155        );
1156
1157        let mut hashes = AccountHashesFile {
1158            writer: None,
1159            dir_for_temp_cache_files: self.dir_for_temp_cache_files.clone(),
1160            capacity: max_inclusive_num_pubkeys * std::mem::size_of::<Hash>(),
1161        };
1162
1163        let mut overall_sum: u64 = 0;
1164
1165        while let Some(pointer) = working_set.pop() {
1166            let key = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1167
1168            // get the min item, add lamports, get hash
1169            let (item, mut next) = Self::get_item(
1170                sorted_data_by_pubkey,
1171                pubkey_bin,
1172                &binner,
1173                &ItemLocation { key, pointer },
1174            );
1175
1176            // add lamports and get hash
1177            if item.lamports != 0 {
1178                overall_sum = overall_sum
1179                    .checked_add(item.lamports)
1180                    .expect("summing lamports cannot overflow");
1181                hashes.write(&item.hash.0);
1182            } else {
1183                stats
1184                    .num_zero_lamport_accounts
1185                    .fetch_add(1, Ordering::Relaxed);
1186                // if lamports == 0, check if they should be included
1187                if self.zero_lamport_accounts == ZeroLamportAccounts::Included {
1188                    // For incremental accounts hash, the hash of a zero lamport account is
1189                    // the hash of its pubkey
1190                    let hash = blake3::hash(bytemuck::bytes_of(&item.pubkey));
1191                    let hash = Hash::new_from_array(hash.into());
1192                    hashes.write(&hash);
1193                }
1194            }
1195
1196            Self::add_next_item(
1197                &mut next,
1198                &mut working_set,
1199                sorted_data_by_pubkey,
1200                pubkey_bin,
1201                &binner,
1202            );
1203        }
1204
1205        (hashes, overall_sum)
1206    }
1207
1208    /// input:
1209    /// vec: group of slot data, ordered by Slot (low to high)
1210    ///   vec: [..] - items found in that slot range Sorted by: Pubkey, higher Slot, higher Write version (if pubkey =)
1211    pub fn rest_of_hash_calculation(
1212        &self,
1213        sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1214        stats: &mut HashStats,
1215    ) -> (Hash, u64) {
1216        let (hashes, total_lamports) = self.de_dup_accounts(
1217            sorted_data_by_pubkey,
1218            stats,
1219            PUBKEY_BINS_FOR_CALCULATING_HASHES,
1220        );
1221
1222        let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1223
1224        let _guard = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
1225        let mut hash_time = Measure::start("hash");
1226        let (hash, _) = Self::compute_merkle_root_from_slices(
1227            cumulative.total_count(),
1228            MERKLE_FANOUT,
1229            None,
1230            |start| cumulative.get_slice(start),
1231            None,
1232        );
1233        hash_time.stop();
1234        stats.hash_time_total_us += hash_time.as_us();
1235        (hash, total_lamports)
1236    }
1237}
1238
1239/// How should zero-lamport accounts be treated by the accounts hasher?
1240#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1241pub enum ZeroLamportAccounts {
1242    Excluded,
1243    Included,
1244}
1245
1246/// Hash of an account
1247#[repr(transparent)]
1248#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1249#[derive(Debug, Copy, Clone, Eq, PartialEq, Pod, Zeroable)]
1250pub struct AccountHash(pub Hash);
1251
1252// Ensure the newtype wrapper never changes size from the underlying Hash
1253// This also ensures there are no padding bytes, which is required to safely implement Pod
1254const _: () = assert!(std::mem::size_of::<AccountHash>() == std::mem::size_of::<Hash>());
1255
1256/// The AccountHash for a zero-lamport account
1257pub const ZERO_LAMPORT_ACCOUNT_HASH: AccountHash =
1258    AccountHash(Hash::new_from_array([0; HASH_BYTES]));
1259
1260/// Lattice hash of an account
1261#[derive(Debug, Clone, Eq, PartialEq)]
1262pub struct AccountLtHash(pub LtHash);
1263
1264/// The AccountLtHash for a zero-lamport account
1265pub const ZERO_LAMPORT_ACCOUNT_LT_HASH: AccountLtHash = AccountLtHash(LtHash::identity());
1266
1267/// Lattice hash of all accounts
1268#[derive(Debug, Clone, Eq, PartialEq)]
1269pub struct AccountsLtHash(pub LtHash);
1270
1271/// Hash of accounts
1272#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1273pub enum AccountsHashKind {
1274    Full(AccountsHash),
1275    Incremental(IncrementalAccountsHash),
1276}
1277impl AccountsHashKind {
1278    pub fn as_hash(&self) -> &Hash {
1279        match self {
1280            AccountsHashKind::Full(AccountsHash(hash))
1281            | AccountsHashKind::Incremental(IncrementalAccountsHash(hash)) => hash,
1282        }
1283    }
1284}
1285impl From<AccountsHash> for AccountsHashKind {
1286    fn from(accounts_hash: AccountsHash) -> Self {
1287        AccountsHashKind::Full(accounts_hash)
1288    }
1289}
1290impl From<IncrementalAccountsHash> for AccountsHashKind {
1291    fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1292        AccountsHashKind::Incremental(incremental_accounts_hash)
1293    }
1294}
1295
1296/// Hash of accounts
1297#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1298pub struct AccountsHash(pub Hash);
1299/// Hash of accounts that includes zero-lamport accounts
1300/// Used with incremental snapshots
1301#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1302pub struct IncrementalAccountsHash(pub Hash);
1303
1304/// Hash of accounts written in a single slot
1305#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1306pub struct AccountsDeltaHash(pub Hash);
1307
1308/// Snapshot serde-safe accounts delta hash
1309#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1310#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1311pub struct SerdeAccountsDeltaHash(pub Hash);
1312
1313impl From<SerdeAccountsDeltaHash> for AccountsDeltaHash {
1314    fn from(accounts_delta_hash: SerdeAccountsDeltaHash) -> Self {
1315        Self(accounts_delta_hash.0)
1316    }
1317}
1318impl From<AccountsDeltaHash> for SerdeAccountsDeltaHash {
1319    fn from(accounts_delta_hash: AccountsDeltaHash) -> Self {
1320        Self(accounts_delta_hash.0)
1321    }
1322}
1323
1324/// Snapshot serde-safe accounts hash
1325#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1326#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1327pub struct SerdeAccountsHash(pub Hash);
1328
1329impl From<SerdeAccountsHash> for AccountsHash {
1330    fn from(accounts_hash: SerdeAccountsHash) -> Self {
1331        Self(accounts_hash.0)
1332    }
1333}
1334impl From<AccountsHash> for SerdeAccountsHash {
1335    fn from(accounts_hash: AccountsHash) -> Self {
1336        Self(accounts_hash.0)
1337    }
1338}
1339
1340/// Snapshot serde-safe incremental accounts hash
1341#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1342#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1343pub struct SerdeIncrementalAccountsHash(pub Hash);
1344
1345impl From<SerdeIncrementalAccountsHash> for IncrementalAccountsHash {
1346    fn from(incremental_accounts_hash: SerdeIncrementalAccountsHash) -> Self {
1347        Self(incremental_accounts_hash.0)
1348    }
1349}
1350impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {
1351    fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1352        Self(incremental_accounts_hash.0)
1353    }
1354}
1355
1356#[cfg(test)]
1357mod tests {
1358    use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir};
1359
1360    lazy_static! {
1361        static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
1362    }
1363
1364    impl<'a> AccountsHasher<'a> {
1365        fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1366            Self {
1367                zero_lamport_accounts: ZeroLamportAccounts::Excluded,
1368                dir_for_temp_cache_files,
1369                active_stats: &ACTIVE_STATS,
1370            }
1371        }
1372    }
1373
1374    impl AccountHashesFile {
1375        fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1376            Self {
1377                writer: None,
1378                dir_for_temp_cache_files,
1379                capacity: 1024, /* default 1k for tests */
1380            }
1381        }
1382    }
1383
1384    impl CumulativeOffsets {
1385        fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> Self {
1386            let mut total_count: usize = 0;
1387            let mut cumulative_offsets = Vec::with_capacity(0);
1388            for (i, v_outer) in raw.iter().enumerate() {
1389                for (j, v) in v_outer.iter().enumerate() {
1390                    let len = v.len();
1391                    if len > 0 {
1392                        if cumulative_offsets.is_empty() {
1393                            // the first inner, non-empty vector we find gives us an approximate rectangular shape
1394                            cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
1395                        }
1396                        cumulative_offsets.push(CumulativeOffset {
1397                            index: [i, j],
1398                            start_offset: total_count,
1399                        });
1400                        total_count += len;
1401                    }
1402                }
1403            }
1404
1405            Self {
1406                cumulative_offsets,
1407                total_count,
1408            }
1409        }
1410    }
1411
1412    #[test]
1413    fn test_find_first_pubkey_in_bin() {
1414        let stats = HashStats::default();
1415        for (bins, expected_count) in [1, 2, 4].into_iter().zip([5, 20, 120]) {
1416            let bins: usize = bins;
1417            let binner = PubkeyBinCalculator24::new(bins);
1418
1419            let mut count = 0usize;
1420            // # pubkeys in each bin are permutations of these
1421            // 0 means none in this bin
1422            // large number (20) means the found key will be well before or after the expected index based on an assumption of uniform distribution
1423            for counts in [0, 1, 2, 20, 0].into_iter().permutations(bins) {
1424                count += 1;
1425                let hash_data = counts
1426                    .iter()
1427                    .enumerate()
1428                    .flat_map(|(bin, count)| {
1429                        (0..*count).map(move |_| {
1430                            let binner = PubkeyBinCalculator24::new(bins);
1431                            CalculateHashIntermediate {
1432                                hash: AccountHash(Hash::default()),
1433                                lamports: 0,
1434                                pubkey: binner.lowest_pubkey_from_bin(bin, bins),
1435                            }
1436                        })
1437                    })
1438                    .collect::<Vec<_>>();
1439                // look for the first pubkey in each bin
1440                for (bin, count_in_bin) in counts.iter().enumerate().take(bins) {
1441                    let first = AccountsHasher::find_first_pubkey_in_bin(
1442                        &hash_data, bin, bins, &binner, &stats,
1443                    );
1444                    // test both functions
1445                    let first_again = AccountsHasher::binary_search_for_first_pubkey_in_bin(
1446                        &hash_data, bin, &binner,
1447                    );
1448                    assert_eq!(first, first_again);
1449                    assert_eq!(first.is_none(), count_in_bin == &0);
1450                    if let Some(first) = first {
1451                        assert_eq!(binner.bin_from_pubkey(&hash_data[first].pubkey), bin);
1452                        if first > 0 {
1453                            assert!(binner.bin_from_pubkey(&hash_data[first - 1].pubkey) < bin);
1454                        }
1455                    }
1456                }
1457            }
1458            assert_eq!(
1459                count, expected_count,
1460                "too few iterations in test. bins: {bins}"
1461            );
1462        }
1463    }
1464
1465    #[test]
1466    fn test_account_hashes_file() {
1467        let dir_for_temp_cache_files = tempdir().unwrap();
1468        // 0 hashes
1469        let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1470        assert!(file.get_reader().is_none());
1471        let hashes = (0..2).map(|i| Hash::new(&[i; 32])).collect::<Vec<_>>();
1472
1473        // 1 hash
1474        file.write(&hashes[0]);
1475        let reader = file.get_reader().unwrap();
1476        assert_eq!(&[hashes[0]][..], reader.read(0));
1477        assert!(reader.read(1).is_empty());
1478
1479        // multiple hashes
1480        let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1481        assert!(file.get_reader().is_none());
1482        hashes.iter().for_each(|hash| file.write(hash));
1483        let reader = file.get_reader().unwrap();
1484        (0..2).for_each(|i| assert_eq!(&hashes[i..], reader.read(i)));
1485        assert!(reader.read(2).is_empty());
1486    }
1487
1488    #[test]
1489    fn test_cumulative_hashes_from_files() {
1490        let dir_for_temp_cache_files = tempdir().unwrap();
1491        (0..4).for_each(|permutation| {
1492            let hashes = (0..2).map(|i| Hash::new(&[i + 1; 32])).collect::<Vec<_>>();
1493
1494            let mut combined = Vec::default();
1495
1496            // 0 hashes
1497            let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1498
1499            // 1 hash
1500            let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1501            file1.write(&hashes[0]);
1502            combined.push(hashes[0]);
1503
1504            // multiple hashes
1505            let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1506            hashes.iter().for_each(|hash| {
1507                file2.write(hash);
1508                combined.push(*hash);
1509            });
1510
1511            let hashes = if permutation == 0 {
1512                vec![file0, file1, file2]
1513            } else if permutation == 1 {
1514                // include more empty files
1515                vec![
1516                    file0,
1517                    file1,
1518                    AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1519                    file2,
1520                    AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1521                ]
1522            } else if permutation == 2 {
1523                vec![file1, file2]
1524            } else {
1525                // swap file2 and 1
1526                let one = combined.remove(0);
1527                combined.push(one);
1528                vec![
1529                    file2,
1530                    AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1531                    AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1532                    file1,
1533                ]
1534            };
1535
1536            let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1537            let len = combined.len();
1538            assert_eq!(cumulative.total_count(), len);
1539            (0..combined.len()).for_each(|start| {
1540                let mut retrieved = Vec::default();
1541                let mut cumulative_start = start;
1542                // read all data
1543                while retrieved.len() < (len - start) {
1544                    let this_one = cumulative.get_slice(cumulative_start);
1545                    retrieved.extend(this_one.iter());
1546                    cumulative_start += this_one.len();
1547                    assert_ne!(0, this_one.len());
1548                }
1549                assert_eq!(
1550                    &combined[start..],
1551                    &retrieved[..],
1552                    "permutation: {permutation}"
1553                );
1554            });
1555        });
1556    }
1557
1558    #[test]
1559    fn test_accountsdb_div_ceil() {
1560        assert_eq!(AccountsHasher::div_ceil(10, 3), 4);
1561        assert_eq!(AccountsHasher::div_ceil(0, 1), 0);
1562        assert_eq!(AccountsHasher::div_ceil(0, 5), 0);
1563        assert_eq!(AccountsHasher::div_ceil(9, 3), 3);
1564        assert_eq!(AccountsHasher::div_ceil(9, 9), 1);
1565    }
1566
1567    #[test]
1568    #[should_panic(expected = "attempt to divide by zero")]
1569    fn test_accountsdb_div_ceil_fail() {
1570        assert_eq!(AccountsHasher::div_ceil(10, 0), 0);
1571    }
1572
1573    fn for_rest(original: &[CalculateHashIntermediate]) -> Vec<&[CalculateHashIntermediate]> {
1574        vec![original]
1575    }
1576
1577    #[test]
1578    fn test_accountsdb_rest_of_hash_calculation() {
1579        solana_logger::setup();
1580
1581        let mut account_maps = Vec::new();
1582
1583        let pubkey = Pubkey::from([11u8; 32]);
1584        let hash = AccountHash(Hash::new(&[1u8; 32]));
1585        let val = CalculateHashIntermediate {
1586            hash,
1587            lamports: 88,
1588            pubkey,
1589        };
1590        account_maps.push(val);
1591
1592        // 2nd key - zero lamports, so will be removed
1593        let pubkey = Pubkey::from([12u8; 32]);
1594        let hash = AccountHash(Hash::new(&[2u8; 32]));
1595        let val = CalculateHashIntermediate {
1596            hash,
1597            lamports: 0,
1598            pubkey,
1599        };
1600        account_maps.push(val);
1601
1602        let dir_for_temp_cache_files = tempdir().unwrap();
1603        let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1604        let result = accounts_hash
1605            .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1606        let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
1607        assert_eq!((result.0, result.1), (expected_hash, 88));
1608
1609        // 3rd key - with pubkey value before 1st key so it will be sorted first
1610        let pubkey = Pubkey::from([10u8; 32]);
1611        let hash = AccountHash(Hash::new(&[2u8; 32]));
1612        let val = CalculateHashIntermediate {
1613            hash,
1614            lamports: 20,
1615            pubkey,
1616        };
1617        account_maps.insert(0, val);
1618
1619        let result = accounts_hash
1620            .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1621        let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
1622        assert_eq!((result.0, result.1), (expected_hash, 108));
1623
1624        // 3rd key - with later slot
1625        let pubkey = Pubkey::from([10u8; 32]);
1626        let hash = AccountHash(Hash::new(&[99u8; 32]));
1627        let val = CalculateHashIntermediate {
1628            hash,
1629            lamports: 30,
1630            pubkey,
1631        };
1632        account_maps.insert(1, val);
1633
1634        let result = accounts_hash
1635            .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1636        let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
1637        assert_eq!((result.0, result.1), (expected_hash, 118));
1638    }
1639
1640    fn one_range() -> usize {
1641        1
1642    }
1643
1644    fn zero_range() -> usize {
1645        0
1646    }
1647
1648    #[test]
1649    fn test_accountsdb_de_dup_accounts_zero_chunks() {
1650        let vec = [vec![CalculateHashIntermediate {
1651            lamports: 1,
1652            hash: AccountHash(Hash::default()),
1653            pubkey: Pubkey::default(),
1654        }]];
1655        let temp_vec = vec.to_vec();
1656        let slice = convert_to_slice(&temp_vec);
1657        let dir_for_temp_cache_files = tempdir().unwrap();
1658        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1659        let (mut hashes, lamports) =
1660            accounts_hasher.de_dup_accounts_in_parallel(&slice, 0, 1, &HashStats::default());
1661        assert_eq!(&[Hash::default()], hashes.get_reader().unwrap().read(0));
1662        assert_eq!(lamports, 1);
1663    }
1664
1665    fn get_vec_vec(hashes: Vec<AccountHashesFile>) -> Vec<Vec<Hash>> {
1666        hashes.into_iter().map(get_vec).collect()
1667    }
1668    fn get_vec(mut hashes: AccountHashesFile) -> Vec<Hash> {
1669        hashes
1670            .get_reader()
1671            .map(|r| r.read(0).to_vec())
1672            .unwrap_or_default()
1673    }
1674
1675    #[test]
1676    fn test_accountsdb_de_dup_accounts_empty() {
1677        solana_logger::setup();
1678        let dir_for_temp_cache_files = tempdir().unwrap();
1679        let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1680
1681        let empty = [];
1682        let vec = &empty;
1683        let (hashes, lamports) =
1684            accounts_hash.de_dup_accounts(vec, &mut HashStats::default(), one_range());
1685        assert_eq!(
1686            Vec::<Hash>::new(),
1687            get_vec_vec(hashes)
1688                .into_iter()
1689                .flatten()
1690                .collect::<Vec<_>>(),
1691        );
1692        assert_eq!(lamports, 0);
1693        let vec = vec![];
1694        let (hashes, lamports) =
1695            accounts_hash.de_dup_accounts(&vec, &mut HashStats::default(), zero_range());
1696        let empty: Vec<Vec<Hash>> = Vec::default();
1697        assert_eq!(empty, get_vec_vec(hashes));
1698        assert_eq!(lamports, 0);
1699
1700        let (hashes, lamports) =
1701            accounts_hash.de_dup_accounts_in_parallel(&[], 1, 1, &HashStats::default());
1702        assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1703        assert_eq!(lamports, 0);
1704
1705        let (hashes, lamports) =
1706            accounts_hash.de_dup_accounts_in_parallel(&[], 2, 1, &HashStats::default());
1707        assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1708        assert_eq!(lamports, 0);
1709    }
1710
1711    #[test]
1712    fn test_accountsdb_de_dup_accounts_from_stores() {
1713        solana_logger::setup();
1714
1715        let key_a = Pubkey::from([1u8; 32]);
1716        let key_b = Pubkey::from([2u8; 32]);
1717        let key_c = Pubkey::from([3u8; 32]);
1718        const COUNT: usize = 6;
1719        let hashes = (0..COUNT).map(|i| AccountHash(Hash::new(&[i as u8; 32])));
1720        // create this vector
1721        // abbbcc
1722        let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
1723
1724        let accounts: Vec<_> = hashes
1725            .zip(keys.iter())
1726            .enumerate()
1727            .map(|(i, (hash, &pubkey))| CalculateHashIntermediate {
1728                hash,
1729                lamports: (i + 1) as u64,
1730                pubkey,
1731            })
1732            .collect();
1733
1734        type ExpectedType = (String, bool, u64, String);
1735        let expected:Vec<ExpectedType> = vec![
1736            // ("key/lamports key2/lamports ...",
1737            // is_last_slice
1738            // result lamports
1739            // result hashes)
1740            // "a5" = key_a, 5 lamports
1741            ("a1", false, 1, "[11111111111111111111111111111111]"),
1742            ("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1743            ("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1744            ("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1745            ("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1746            ("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1747            ("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1748            ("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1749            ("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1750            ("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1751            ("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1752            ("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1753            ("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1754            ("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1755            ("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1756            ("a1", true, 1, "[11111111111111111111111111111111]"),
1757            ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1758            ("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1759            ("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1760            ("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1761            ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1762            ("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1763            ("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1764            ("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1765            ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1766            ("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1767            ("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1768            ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1769            ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1770            ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1771            ].into_iter().map(|item| {
1772                let result: ExpectedType = (
1773                    item.0.to_string(),
1774                    item.1,
1775                    item.2,
1776                    item.3.to_string(),
1777                );
1778                result
1779            }).collect();
1780
1781        let dir_for_temp_cache_files = tempdir().unwrap();
1782        let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1783        let mut expected_index = 0;
1784        for last_slice in 0..2 {
1785            for start in 0..COUNT {
1786                for end in start + 1..COUNT {
1787                    let is_last_slice = last_slice == 1;
1788                    let accounts = accounts.clone();
1789                    let slice = &accounts[start..end];
1790
1791                    let slice2 = vec![slice.to_vec()];
1792                    let slice = &slice2[..];
1793                    let slice_temp = convert_to_slice(&slice2);
1794                    let (hashes2, lamports2) =
1795                        hash.de_dup_accounts_in_parallel(&slice_temp, 0, 1, &HashStats::default());
1796                    let slice3 = convert_to_slice(&slice2);
1797                    let (hashes3, lamports3) =
1798                        hash.de_dup_accounts_in_parallel(&slice3, 0, 1, &HashStats::default());
1799                    let vec = slice.to_vec();
1800                    let slice4 = convert_to_slice(&vec);
1801                    let mut max_bin = end - start;
1802                    if !max_bin.is_power_of_two() {
1803                        max_bin = 1;
1804                    }
1805
1806                    let (hashes4, lamports4) =
1807                        hash.de_dup_accounts(&slice4, &mut HashStats::default(), max_bin);
1808                    let vec = slice.to_vec();
1809                    let slice5 = convert_to_slice(&vec);
1810                    let (hashes5, lamports5) =
1811                        hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1812                    let vec = slice.to_vec();
1813                    let slice5 = convert_to_slice(&vec);
1814                    let (hashes6, lamports6) =
1815                        hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1816
1817                    let hashes2 = get_vec(hashes2);
1818                    let hashes3 = get_vec(hashes3);
1819                    let hashes4 = get_vec_vec(hashes4);
1820                    let hashes5 = get_vec_vec(hashes5);
1821                    let hashes6 = get_vec_vec(hashes6);
1822
1823                    assert_eq!(hashes2, hashes3);
1824                    let expected2 = hashes2.clone();
1825                    assert_eq!(
1826                        expected2,
1827                        hashes4.into_iter().flatten().collect::<Vec<_>>(),
1828                        "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1829                    );
1830                    assert_eq!(
1831                        expected2.clone(),
1832                        hashes5.iter().flatten().copied().collect::<Vec<_>>(),
1833                        "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1834                    );
1835                    assert_eq!(
1836                        expected2.clone(),
1837                        hashes6.iter().flatten().copied().collect::<Vec<_>>()
1838                    );
1839                    assert_eq!(lamports2, lamports3);
1840                    assert_eq!(lamports2, lamports4);
1841                    assert_eq!(lamports2, lamports5);
1842                    assert_eq!(lamports2, lamports6);
1843
1844                    let human_readable = slice[0]
1845                        .iter()
1846                        .map(|v| {
1847                            let mut s = (if v.pubkey == key_a {
1848                                "a"
1849                            } else if v.pubkey == key_b {
1850                                "b"
1851                            } else {
1852                                "c"
1853                            })
1854                            .to_string();
1855
1856                            s.push_str(&v.lamports.to_string());
1857                            s
1858                        })
1859                        .collect::<String>();
1860
1861                    let hash_result_as_string = format!("{hashes2:?}");
1862
1863                    let packaged_result: ExpectedType = (
1864                        human_readable,
1865                        is_last_slice,
1866                        lamports2,
1867                        hash_result_as_string,
1868                    );
1869                    assert_eq!(expected[expected_index], packaged_result);
1870
1871                    // for generating expected results
1872                    // error!("{:?},", packaged_result);
1873                    expected_index += 1;
1874                }
1875            }
1876        }
1877    }
1878
1879    #[test]
1880    fn test_accountsdb_compare_two_hash_entries() {
1881        solana_logger::setup();
1882        let pubkey = Pubkey::new_unique();
1883        let hash = AccountHash(Hash::new_unique());
1884        let val = CalculateHashIntermediate {
1885            hash,
1886            lamports: 1,
1887            pubkey,
1888        };
1889
1890        // slot same, version <
1891        let hash2 = AccountHash(Hash::new_unique());
1892        let val2 = CalculateHashIntermediate {
1893            hash: hash2,
1894            lamports: 4,
1895            pubkey,
1896        };
1897        assert_eq!(
1898            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1899            AccountsHasher::compare_two_hash_entries(&val, &val2)
1900        );
1901
1902        // slot same, vers =
1903        let hash3 = AccountHash(Hash::new_unique());
1904        let val3 = CalculateHashIntermediate {
1905            hash: hash3,
1906            lamports: 2,
1907            pubkey,
1908        };
1909        assert_eq!(
1910            std::cmp::Ordering::Equal,
1911            AccountsHasher::compare_two_hash_entries(&val, &val3)
1912        );
1913
1914        // slot same, vers >
1915        let hash4 = AccountHash(Hash::new_unique());
1916        let val4 = CalculateHashIntermediate {
1917            hash: hash4,
1918            lamports: 6,
1919            pubkey,
1920        };
1921        assert_eq!(
1922            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1923            AccountsHasher::compare_two_hash_entries(&val, &val4)
1924        );
1925
1926        // slot >, version <
1927        let hash5 = AccountHash(Hash::new_unique());
1928        let val5 = CalculateHashIntermediate {
1929            hash: hash5,
1930            lamports: 8,
1931            pubkey,
1932        };
1933        assert_eq!(
1934            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1935            AccountsHasher::compare_two_hash_entries(&val, &val5)
1936        );
1937    }
1938
1939    fn test_de_dup_accounts_in_parallel<'a>(
1940        account_maps: &'a [&'a [CalculateHashIntermediate]],
1941    ) -> (AccountHashesFile, u64) {
1942        let dir_for_temp_cache_files = tempdir().unwrap();
1943        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1944        accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0, 1, &HashStats::default())
1945    }
1946
1947    #[test]
1948    fn test_accountsdb_remove_zero_balance_accounts() {
1949        solana_logger::setup();
1950
1951        let pubkey = Pubkey::new_unique();
1952        let hash = AccountHash(Hash::new_unique());
1953        let mut account_maps = Vec::new();
1954        let val = CalculateHashIntermediate {
1955            hash,
1956            lamports: 1,
1957            pubkey,
1958        };
1959        account_maps.push(val);
1960
1961        let vecs = vec![account_maps.to_vec()];
1962        let slice = convert_to_slice(&vecs);
1963        let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1964        assert_eq!(
1965            (get_vec(hashfile), lamports),
1966            (vec![val.hash.0], val.lamports)
1967        );
1968
1969        // zero original lamports, higher version
1970        let val = CalculateHashIntermediate {
1971            hash,
1972            lamports: 0,
1973            pubkey,
1974        };
1975        account_maps.push(val); // has to be after previous entry since account_maps are in slot order
1976
1977        let vecs = vec![account_maps.to_vec()];
1978        let slice = convert_to_slice(&vecs);
1979        let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1980        assert_eq!((get_vec(hashfile), lamports), (vec![], 0));
1981    }
1982
1983    #[test]
1984    fn test_accountsdb_dup_pubkey_2_chunks() {
1985        // 2 chunks, a dup pubkey in each chunk
1986        for reverse in [false, true] {
1987            let key = Pubkey::new_from_array([1; 32]); // key is BEFORE key2
1988            let key2 = Pubkey::new_from_array([2; 32]);
1989            let hash = AccountHash(Hash::new_unique());
1990            let mut account_maps = Vec::new();
1991            let mut account_maps2 = Vec::new();
1992            let val = CalculateHashIntermediate {
1993                hash,
1994                lamports: 1,
1995                pubkey: key,
1996            };
1997            account_maps.push(val);
1998            let val2 = CalculateHashIntermediate {
1999                hash,
2000                lamports: 2,
2001                pubkey: key2,
2002            };
2003            account_maps.push(val2);
2004            let val3 = CalculateHashIntermediate {
2005                hash,
2006                lamports: 3,
2007                pubkey: key2,
2008            };
2009            account_maps2.push(val3);
2010
2011            let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2012            if reverse {
2013                vecs = vecs.into_iter().rev().collect();
2014            }
2015            let slice = convert_to_slice(&vecs);
2016            let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2017            assert_eq!(
2018                (get_vec(hashfile), lamports),
2019                (
2020                    vec![val.hash.0, if reverse { val2.hash.0 } else { val3.hash.0 }],
2021                    val.lamports
2022                        + if reverse {
2023                            val2.lamports
2024                        } else {
2025                            val3.lamports
2026                        }
2027                ),
2028                "reverse: {reverse}"
2029            );
2030        }
2031    }
2032
2033    #[test]
2034    fn test_accountsdb_dup_pubkey_2_chunks_backwards() {
2035        // 2 chunks, a dup pubkey in each chunk
2036        for reverse in [false, true] {
2037            let key = Pubkey::new_from_array([3; 32]); // key is AFTER key2
2038            let key2 = Pubkey::new_from_array([2; 32]);
2039            let hash = AccountHash(Hash::new_unique());
2040            let mut account_maps = Vec::new();
2041            let mut account_maps2 = Vec::new();
2042            let val2 = CalculateHashIntermediate {
2043                hash,
2044                lamports: 2,
2045                pubkey: key2,
2046            };
2047            account_maps.push(val2);
2048            let val = CalculateHashIntermediate {
2049                hash,
2050                lamports: 1,
2051                pubkey: key,
2052            };
2053            account_maps.push(val);
2054            let val3 = CalculateHashIntermediate {
2055                hash,
2056                lamports: 3,
2057                pubkey: key2,
2058            };
2059            account_maps2.push(val3);
2060
2061            let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2062            if reverse {
2063                vecs = vecs.into_iter().rev().collect();
2064            }
2065            let slice = convert_to_slice(&vecs);
2066            let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2067            assert_eq!(
2068                (get_vec(hashfile), lamports),
2069                (
2070                    vec![if reverse { val2.hash.0 } else { val3.hash.0 }, val.hash.0],
2071                    val.lamports
2072                        + if reverse {
2073                            val2.lamports
2074                        } else {
2075                            val3.lamports
2076                        }
2077                ),
2078                "reverse: {reverse}"
2079            );
2080        }
2081    }
2082
2083    #[test]
2084    fn test_accountsdb_cumulative_offsets1_d() {
2085        let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2086        let cumulative = CumulativeOffsets::from_raw(&input);
2087
2088        let src: Vec<_> = input.clone().into_iter().flatten().collect();
2089        let len = src.len();
2090        assert_eq!(cumulative.total_count, len);
2091        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2092
2093        const DIMENSION: usize = 0;
2094        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
2095        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
2096
2097        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2098        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2099
2100        for start in 0..len {
2101            let slice = cumulative.get_slice(&input, start);
2102            let len = slice.len();
2103            assert!(len > 0);
2104            assert_eq!(&src[start..(start + len)], slice);
2105        }
2106
2107        let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2108        let cumulative = CumulativeOffsets::from_raw(&input);
2109
2110        let src: Vec<_> = input.clone().into_iter().flatten().collect();
2111        let len = src.len();
2112        assert_eq!(cumulative.total_count, len);
2113        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2114
2115        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
2116        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
2117
2118        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2119        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2120
2121        for start in 0..len {
2122            let slice = cumulative.get_slice(&input, start);
2123            let len = slice.len();
2124            assert!(len > 0);
2125            assert_eq!(&src[start..(start + len)], slice);
2126        }
2127
2128        let input: Vec<Vec<u32>> = vec![vec![]];
2129        let cumulative = CumulativeOffsets::from_raw(&input);
2130
2131        let len = input.into_iter().flatten().count();
2132        assert_eq!(cumulative.total_count, len);
2133        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
2134    }
2135
2136    #[should_panic(expected = "is_empty")]
2137    #[test]
2138    fn test_accountsdb_cumulative_find_empty() {
2139        let input = CumulativeOffsets {
2140            cumulative_offsets: vec![],
2141            total_count: 0,
2142        };
2143        input.find(0);
2144    }
2145
2146    #[test]
2147    fn test_accountsdb_cumulative_find() {
2148        let input = CumulativeOffsets {
2149            cumulative_offsets: vec![CumulativeOffset {
2150                index: [0; 2],
2151                start_offset: 0,
2152            }],
2153            total_count: 0,
2154        };
2155        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0]));
2156
2157        let input = CumulativeOffsets {
2158            cumulative_offsets: vec![
2159                CumulativeOffset {
2160                    index: [0; 2],
2161                    start_offset: 0,
2162                },
2163                CumulativeOffset {
2164                    index: [1; 2],
2165                    start_offset: 2,
2166                },
2167            ],
2168            total_count: 0,
2169        };
2170        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0])); // = first start_offset
2171        assert_eq!(input.find(1), (1, &input.cumulative_offsets[0])); // > first start_offset
2172        assert_eq!(input.find(2), (0, &input.cumulative_offsets[1])); // = last start_offset
2173        assert_eq!(input.find(3), (1, &input.cumulative_offsets[1])); // > last start_offset
2174    }
2175
2176    #[test]
2177    fn test_accountsdb_cumulative_offsets2_d() {
2178        let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2179        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2180
2181        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2182        let len = src.len();
2183        assert_eq!(cumulative.total_count, len);
2184        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2185
2186        const DIMENSION_0: usize = 0;
2187        const DIMENSION_1: usize = 1;
2188        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2189        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2190        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2191        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
2192
2193        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2194        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2195
2196        for start in 0..len {
2197            let slice: &[u64] = cumulative.get_slice(&input, start);
2198            let len = slice.len();
2199            assert!(len > 0);
2200            assert_eq!(&src[start..(start + len)], slice);
2201        }
2202
2203        let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2204        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2205
2206        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2207        let len = src.len();
2208        assert_eq!(cumulative.total_count, len);
2209        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2210
2211        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2212        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
2213        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2214        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
2215
2216        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2217        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2218
2219        for start in 0..len {
2220            let slice: &[u64] = cumulative.get_slice(&input, start);
2221            let len = slice.len();
2222            assert!(len > 0);
2223            assert_eq!(&src[start..(start + len)], slice);
2224        }
2225
2226        let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
2227        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2228
2229        let len = input.into_iter().flatten().count();
2230        assert_eq!(cumulative.total_count, len);
2231        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
2232
2233        let input = vec![
2234            vec![vec![0, 1]],
2235            vec![vec![]],
2236            vec![vec![], vec![2, 3, 4], vec![]],
2237        ];
2238        let cumulative = CumulativeOffsets::from_raw_2d(&input);
2239
2240        let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2241        let len = src.len();
2242        assert_eq!(cumulative.total_count, len);
2243        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
2244
2245        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2246        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2247        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
2248        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
2249
2250        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2251        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2252
2253        for start in 0..len {
2254            let slice: &[u64] = cumulative.get_slice(&input, start);
2255            let len = slice.len();
2256            assert!(len > 0);
2257            assert_eq!(&src[start..(start + len)], slice);
2258        }
2259    }
2260
2261    fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
2262        let result = AccountsHasher::compute_merkle_root(hashes.clone(), fanout);
2263        let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
2264        let result2 = test_hashing(reduced, fanout);
2265        assert_eq!(result, result2, "len: {}", hashes.len());
2266        result
2267    }
2268
2269    fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
2270        let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
2271        let result = AccountsHasher::compute_merkle_root(temp, fanout);
2272        let reduced: Vec<_> = hashes.clone();
2273        let result2 = AccountsHasher::compute_merkle_root_from_slices(
2274            hashes.len(),
2275            fanout,
2276            None,
2277            |start| &reduced[start..],
2278            None,
2279        );
2280        assert_eq!(result, result2.0, "len: {}", hashes.len());
2281
2282        let result2 = AccountsHasher::compute_merkle_root_from_slices(
2283            hashes.len(),
2284            fanout,
2285            Some(1),
2286            |start| &reduced[start..],
2287            None,
2288        );
2289        assert_eq!(result, result2.0, "len: {}", hashes.len());
2290
2291        let max = std::cmp::min(reduced.len(), fanout * 2);
2292        for left in 0..max {
2293            for right in left + 1..max {
2294                let src = vec![
2295                    vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
2296                    vec![reduced[right..].to_vec()],
2297                ];
2298                let offsets = CumulativeOffsets::from_raw_2d(&src);
2299
2300                let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
2301                let result2 = AccountsHasher::compute_merkle_root_from_slices(
2302                    offsets.total_count,
2303                    fanout,
2304                    None,
2305                    get_slice,
2306                    None,
2307                );
2308                assert_eq!(result, result2.0);
2309            }
2310        }
2311        result
2312    }
2313
2314    #[test]
2315    fn test_accountsdb_compute_merkle_root_large() {
2316        solana_logger::setup();
2317
2318        // handle fanout^x -1, +0, +1 for a few 'x's
2319        const FANOUT: usize = 3;
2320        let mut hash_counts: Vec<_> = (1..6)
2321            .flat_map(|x| {
2322                let mark = FANOUT.pow(x);
2323                vec![mark - 1, mark, mark + 1]
2324            })
2325            .collect();
2326
2327        // saturate the test space for threshold to threshold + target
2328        // this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks
2329        let target = FANOUT.pow(3);
2330        let threshold = target * FANOUT;
2331        hash_counts.extend(threshold - 1..=threshold + target);
2332
2333        for hash_count in hash_counts {
2334            let hashes: Vec<_> = (0..hash_count).map(|_| Hash::new_unique()).collect();
2335
2336            test_hashing(hashes, FANOUT);
2337        }
2338    }
2339
2340    #[test]
2341    fn test_accountsdb_compute_merkle_root() {
2342        solana_logger::setup();
2343
2344        let expected_results = vec![
2345            (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2346            (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
2347            (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
2348            (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
2349            (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
2350            (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
2351            (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
2352            (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
2353            (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
2354            (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
2355            (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2356            (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
2357            (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
2358            (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
2359            (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
2360            (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
2361            (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
2362            (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
2363            (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
2364            (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
2365        ];
2366
2367        let mut expected_index = 0;
2368        let start = 0;
2369        let default_fanout = 2;
2370        // test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types.
2371        let iterations = default_fanout * default_fanout * default_fanout + 2;
2372        for pass in 0..2 {
2373            let fanout = if pass == 0 {
2374                default_fanout
2375            } else {
2376                MERKLE_FANOUT
2377            };
2378            for count in start..iterations {
2379                let mut input: Vec<_> = (0..count)
2380                    .map(|i| {
2381                        let key = Pubkey::from([(pass * iterations + count) as u8; 32]);
2382                        let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]);
2383                        (key, hash)
2384                    })
2385                    .collect();
2386
2387                let result = if pass == 0 {
2388                    test_hashing_larger(input, fanout)
2389                } else {
2390                    // this sorts inside
2391                    let early_result = AccountsHasher::accumulate_account_hashes(
2392                        input
2393                            .iter()
2394                            .map(|i| (i.0, AccountHash(i.1)))
2395                            .collect::<Vec<_>>(),
2396                    );
2397
2398                    input.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
2399                    let result = AccountsHasher::compute_merkle_root(input, fanout);
2400                    assert_eq!(early_result, result);
2401                    result
2402                };
2403                // compare against captured, expected results for hash (and lamports)
2404                assert_eq!(
2405                    (
2406                        pass,
2407                        count,
2408                        &*(result.to_string()),
2409                        expected_results[expected_index].3
2410                    ), // we no longer calculate lamports
2411                    expected_results[expected_index]
2412                );
2413                expected_index += 1;
2414            }
2415        }
2416    }
2417
2418    #[test]
2419    #[should_panic(expected = "summing lamports cannot overflow")]
2420    fn test_accountsdb_lamport_overflow() {
2421        solana_logger::setup();
2422
2423        let offset = 2;
2424        let input = vec![
2425            CalculateHashIntermediate {
2426                hash: AccountHash(Hash::new(&[1u8; 32])),
2427                lamports: u64::MAX - offset,
2428                pubkey: Pubkey::new_unique(),
2429            },
2430            CalculateHashIntermediate {
2431                hash: AccountHash(Hash::new(&[2u8; 32])),
2432                lamports: offset + 1,
2433                pubkey: Pubkey::new_unique(),
2434            },
2435        ];
2436        let dir_for_temp_cache_files = tempdir().unwrap();
2437        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2438        accounts_hasher.de_dup_accounts_in_parallel(
2439            &convert_to_slice(&[input]),
2440            0,
2441            1,
2442            &HashStats::default(),
2443        );
2444    }
2445
2446    fn convert_to_slice(
2447        input: &[Vec<CalculateHashIntermediate>],
2448    ) -> Vec<&[CalculateHashIntermediate]> {
2449        input.iter().map(|v| &v[..]).collect::<Vec<_>>()
2450    }
2451
2452    #[test]
2453    #[should_panic(expected = "summing lamports cannot overflow")]
2454    fn test_accountsdb_lamport_overflow2() {
2455        solana_logger::setup();
2456
2457        let offset = 2;
2458        let input = vec![
2459            vec![CalculateHashIntermediate {
2460                hash: AccountHash(Hash::new(&[1u8; 32])),
2461                lamports: u64::MAX - offset,
2462                pubkey: Pubkey::new_unique(),
2463            }],
2464            vec![CalculateHashIntermediate {
2465                hash: AccountHash(Hash::new(&[2u8; 32])),
2466                lamports: offset + 1,
2467                pubkey: Pubkey::new_unique(),
2468            }],
2469        ];
2470        let dir_for_temp_cache_files = tempdir().unwrap();
2471        let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2472        accounts_hasher.de_dup_accounts(
2473            &convert_to_slice(&input),
2474            &mut HashStats::default(),
2475            2, // accounts above are in 2 groups
2476        );
2477    }
2478}