1use {
2 crate::{
3 accounts_db::{AccountStorageEntry, PUBKEY_BINS_FOR_CALCULATING_HASHES},
4 active_stats::{ActiveStatItem, ActiveStats},
5 ancestors::Ancestors,
6 pubkey_bins::PubkeyBinCalculator24,
7 },
8 bytemuck_derive::{Pod, Zeroable},
9 log::*,
10 memmap2::MmapMut,
11 rayon::prelude::*,
12 solana_lattice_hash::lt_hash::LtHash,
13 solana_measure::{measure::Measure, measure_us},
14 solana_sdk::{
15 hash::{Hash, Hasher, HASH_BYTES},
16 pubkey::Pubkey,
17 rent_collector::RentCollector,
18 slot_history::Slot,
19 sysvar::epoch_schedule::EpochSchedule,
20 },
21 std::{
22 borrow::Borrow,
23 convert::TryInto,
24 io::{Seek, SeekFrom, Write},
25 path::PathBuf,
26 sync::{
27 atomic::{AtomicU64, AtomicUsize, Ordering},
28 Arc,
29 },
30 thread, time,
31 },
32 tempfile::tempfile_in,
33};
34pub const MERKLE_FANOUT: usize = 16;
35
36struct MmapAccountHashesFile {
38 mmap: MmapMut,
40 count: usize,
42}
43
44impl MmapAccountHashesFile {
45 fn read(&self, index: usize) -> &[Hash] {
47 let start = std::mem::size_of::<Hash>() * index;
48 let end = std::mem::size_of::<Hash>() * self.count;
49 let bytes = &self.mmap[start..end];
50 bytemuck::cast_slice(bytes)
51 }
52
53 fn write(&mut self, hash: &Hash) {
55 let start = self.count * std::mem::size_of::<Hash>();
56 let end = start + std::mem::size_of::<Hash>();
57 self.mmap[start..end].copy_from_slice(hash.as_ref());
58 self.count += 1;
59 }
60}
61
62struct AccountHashesFile {
64 writer: Option<MmapAccountHashesFile>,
66 dir_for_temp_cache_files: PathBuf,
68 capacity: usize,
70}
71
72impl AccountHashesFile {
73 fn get_reader(&mut self) -> Option<MmapAccountHashesFile> {
75 std::mem::take(&mut self.writer)
76 }
77
78 fn count(&self) -> usize {
80 self.writer
81 .as_ref()
82 .map(|writer| writer.count)
83 .unwrap_or_default()
84 }
85
86 fn write(&mut self, hash: &Hash) {
89 if self.writer.is_none() {
90 let get_file = || -> Result<_, std::io::Error> {
93 let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
94 panic!(
95 "Unable to create file within {}: {err}",
96 self.dir_for_temp_cache_files.display()
97 )
98 });
99
100 assert!(self.capacity > 0);
104 data.seek(SeekFrom::Start((self.capacity - 1) as u64))?;
105 data.write_all(&[0])?;
106 data.rewind()?;
107 data.flush()?;
108 Ok(data)
109 };
110
111 let mut num_retries = 0;
115 let data = loop {
116 num_retries += 1;
117
118 match get_file() {
119 Ok(data) => {
120 break data;
121 }
122 Err(err) => {
123 info!(
124 "Unable to create account hashes file within {}: {}, retry counter {}",
125 self.dir_for_temp_cache_files.display(),
126 err,
127 num_retries
128 );
129
130 if num_retries > 5 {
131 panic!(
132 "Unable to create account hashes file within {}: after {} retries",
133 self.dir_for_temp_cache_files.display(),
134 num_retries
135 );
136 }
137 datapoint_info!(
138 "retry_account_hashes_file_allocation",
139 ("retry", num_retries, i64)
140 );
141 thread::sleep(time::Duration::from_millis(num_retries * 100));
142 }
143 }
144 };
145
146 let map = unsafe { MmapMut::map_mut(&data) };
148 let map = map.unwrap_or_else(|e| {
149 error!(
150 "Failed to map the data file (size: {}): {}.\n
151 Please increase sysctl vm.max_map_count or equivalent for your platform.",
152 self.capacity, e
153 );
154 std::process::exit(1);
155 });
156
157 self.writer = Some(MmapAccountHashesFile {
158 mmap: map,
159 count: 0,
160 });
161 }
162 self.writer.as_mut().unwrap().write(hash);
163 }
164}
165
166#[derive(Debug)]
168pub struct CalcAccountsHashConfig<'a> {
169 pub use_bg_thread_pool: bool,
171 pub ancestors: Option<&'a Ancestors>,
173 pub epoch_schedule: &'a EpochSchedule,
176 pub rent_collector: &'a RentCollector,
177 pub store_detailed_debug_info_on_failure: bool,
179}
180
181pub type StorageSizeQuartileStats = [usize; 6];
183
184#[derive(Debug, Default)]
185pub struct HashStats {
186 pub total_us: u64,
187 pub mark_time_us: u64,
188 pub cache_hash_data_us: u64,
189 pub scan_time_total_us: u64,
190 pub zeros_time_total_us: u64,
191 pub hash_time_total_us: u64,
192 pub sort_time_total_us: u64,
193 pub hash_total: usize,
194 pub num_snapshot_storage: usize,
195 pub scan_chunks: usize,
196 pub num_slots: usize,
197 pub num_dirty_slots: usize,
198 pub collect_snapshots_us: u64,
199 pub storage_sort_us: u64,
200 pub storage_size_quartiles: StorageSizeQuartileStats,
201 pub oldest_root: Slot,
202 pub roots_older_than_epoch: AtomicUsize,
203 pub accounts_in_roots_older_than_epoch: AtomicUsize,
204 pub append_vec_sizes_older_than_epoch: AtomicUsize,
205 pub longest_ancient_scan_us: AtomicU64,
206 pub sum_ancient_scans_us: AtomicU64,
207 pub count_ancient_scans: AtomicU64,
208 pub pubkey_bin_search_us: AtomicU64,
209 pub num_zero_lamport_accounts: AtomicU64,
210 pub num_zero_lamport_accounts_ancient: Arc<AtomicU64>,
211}
212impl HashStats {
213 pub fn calc_storage_size_quartiles(&mut self, storages: &[Arc<AccountStorageEntry>]) {
214 let mut sum = 0;
215 let mut sizes = storages
216 .iter()
217 .map(|storage| {
218 let cap = storage.accounts.capacity() as usize;
219 sum += cap;
220 cap
221 })
222 .collect::<Vec<_>>();
223 sizes.sort_unstable();
224 let len = sizes.len();
225 self.storage_size_quartiles = if len == 0 {
226 StorageSizeQuartileStats::default()
227 } else {
228 [
229 *sizes.first().unwrap(),
230 sizes[len / 4],
231 sizes[len * 2 / 4],
232 sizes[len * 3 / 4],
233 *sizes.last().unwrap(),
234 sum / len,
235 ]
236 };
237 }
238
239 pub fn log(&self) {
240 datapoint_info!(
241 "calculate_accounts_hash_from_storages",
242 ("total_us", self.total_us, i64),
243 ("mark_time_us", self.mark_time_us, i64),
244 ("cache_hash_data_us", self.cache_hash_data_us, i64),
245 ("accounts_scan_us", self.scan_time_total_us, i64),
246 ("eliminate_zeros_us", self.zeros_time_total_us, i64),
247 ("hash_us", self.hash_time_total_us, i64),
248 ("sort_us", self.sort_time_total_us, i64),
249 ("hash_total", self.hash_total, i64),
250 ("storage_sort_us", self.storage_sort_us, i64),
251 ("collect_snapshots_us", self.collect_snapshots_us, i64),
252 ("num_snapshot_storage", self.num_snapshot_storage, i64),
253 ("scan_chunks", self.scan_chunks, i64),
254 ("num_slots", self.num_slots, i64),
255 ("num_dirty_slots", self.num_dirty_slots, i64),
256 ("storage_size_min", self.storage_size_quartiles[0], i64),
257 (
258 "storage_size_quartile_1",
259 self.storage_size_quartiles[1],
260 i64
261 ),
262 (
263 "storage_size_quartile_2",
264 self.storage_size_quartiles[2],
265 i64
266 ),
267 (
268 "storage_size_quartile_3",
269 self.storage_size_quartiles[3],
270 i64
271 ),
272 ("storage_size_max", self.storage_size_quartiles[4], i64),
273 ("storage_size_avg", self.storage_size_quartiles[5], i64),
274 (
275 "roots_older_than_epoch",
276 self.roots_older_than_epoch.load(Ordering::Relaxed),
277 i64
278 ),
279 ("oldest_root", self.oldest_root, i64),
280 (
281 "longest_ancient_scan_us",
282 self.longest_ancient_scan_us.load(Ordering::Relaxed),
283 i64
284 ),
285 (
286 "sum_ancient_scans_us",
287 self.sum_ancient_scans_us.load(Ordering::Relaxed),
288 i64
289 ),
290 (
291 "count_ancient_scans",
292 self.count_ancient_scans.load(Ordering::Relaxed),
293 i64
294 ),
295 (
296 "append_vec_sizes_older_than_epoch",
297 self.append_vec_sizes_older_than_epoch
298 .load(Ordering::Relaxed),
299 i64
300 ),
301 (
302 "accounts_in_roots_older_than_epoch",
303 self.accounts_in_roots_older_than_epoch
304 .load(Ordering::Relaxed),
305 i64
306 ),
307 (
308 "pubkey_bin_search_us",
309 self.pubkey_bin_search_us.load(Ordering::Relaxed),
310 i64
311 ),
312 (
313 "num_zero_lamport_accounts",
314 self.num_zero_lamport_accounts.load(Ordering::Relaxed),
315 i64
316 ),
317 (
318 "num_zero_lamport_accounts_ancient",
319 self.num_zero_lamport_accounts_ancient
320 .load(Ordering::Relaxed),
321 i64
322 ),
323 );
324 }
325}
326
327#[repr(C)]
331#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)]
332pub struct CalculateHashIntermediate {
333 pub hash: AccountHash,
334 pub lamports: u64,
335 pub pubkey: Pubkey,
336}
337
338const _: () = assert!(
340 std::mem::size_of::<CalculateHashIntermediate>()
341 == std::mem::size_of::<AccountHash>()
342 + std::mem::size_of::<u64>()
343 + std::mem::size_of::<Pubkey>(),
344 "CalculateHashIntermediate cannot have any padding"
345);
346
347#[derive(Debug, PartialEq, Eq)]
348struct CumulativeOffset {
349 index: [usize; 2],
351 start_offset: usize,
352}
353
354trait ExtractSliceFromRawData<'b, T: 'b> {
355 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
356}
357
358impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
359 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
360 &self[offset.index[0]][start..]
361 }
362}
363
364impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
365 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
366 &self[offset.index[0]][offset.index[1]][start..]
367 }
368}
369
370#[derive(Default, Debug)]
373struct CumulativeOffsets {
374 cumulative_offsets: Vec<CumulativeOffset>,
375 total_count: usize,
376}
377
378#[derive(Default)]
380struct CumulativeHashesFromFiles {
381 readers: Vec<MmapAccountHashesFile>,
383 cumulative: CumulativeOffsets,
385}
386
387impl CumulativeHashesFromFiles {
388 fn from_files(hashes: Vec<AccountHashesFile>) -> Self {
391 let mut readers = Vec::with_capacity(hashes.len());
392 let cumulative = CumulativeOffsets::new(hashes.into_iter().filter_map(|mut hash_file| {
393 hash_file.get_reader().map(|reader| {
395 let count = reader.count;
396 readers.push(reader);
397 count
398 })
399 }));
400 Self {
401 cumulative,
402 readers,
403 }
404 }
405
406 fn total_count(&self) -> usize {
408 self.cumulative.total_count
409 }
410
411 fn get_slice(&self, start: usize) -> &[Hash] {
413 let (start, offset) = self.cumulative.find(start);
414 let data_source_index = offset.index[0];
415 let data = &self.readers[data_source_index];
416 data.read(start)
418 }
419}
420
421impl CumulativeOffsets {
422 fn new<I>(iter: I) -> Self
423 where
424 I: Iterator<Item = usize>,
425 {
426 let mut total_count: usize = 0;
427 let cumulative_offsets: Vec<_> = iter
428 .enumerate()
429 .filter_map(|(i, len)| {
430 if len > 0 {
431 let result = CumulativeOffset {
432 index: [i, i],
433 start_offset: total_count,
434 };
435 total_count += len;
436 Some(result)
437 } else {
438 None
439 }
440 })
441 .collect();
442
443 Self {
444 cumulative_offsets,
445 total_count,
446 }
447 }
448
449 fn from_raw<T>(raw: &[Vec<T>]) -> Self {
450 Self::new(raw.iter().map(|v| v.len()))
451 }
452
453 fn find_index(&self, start: usize) -> usize {
455 assert!(!self.cumulative_offsets.is_empty());
456 match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
457 Ok(index) => index,
458 Err(index) => index - 1, }
460 }
461
462 fn find(&self, start: usize) -> (usize, &CumulativeOffset) {
466 let index = self.find_index(start);
467 let index = &self.cumulative_offsets[index];
468 let start = start - index.start_offset;
469 (start, index)
470 }
471
472 fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
474 where
475 U: ExtractSliceFromRawData<'b, T> + 'b,
476 {
477 let (start, index) = self.find(start);
478 raw.extract(index, start)
479 }
480}
481
482#[derive(Debug)]
483pub struct AccountsHasher<'a> {
484 pub zero_lamport_accounts: ZeroLamportAccounts,
485 pub dir_for_temp_cache_files: PathBuf,
487 pub(crate) active_stats: &'a ActiveStats,
488}
489
490#[derive(Debug, Clone, Copy)]
492struct SlotGroupPointer {
493 slot_group_index: usize,
495 offset: usize,
497}
498
499#[derive(Debug)]
501struct ItemLocation<'a> {
502 key: &'a Pubkey,
504 pointer: SlotGroupPointer,
506}
507
508impl<'a> AccountsHasher<'a> {
509 pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
510 let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
511
512 let hash_total = cumulative_offsets.total_count;
513 let result = AccountsHasher::compute_merkle_root_from_slices(
514 hash_total,
515 MERKLE_FANOUT,
516 None,
517 |start: usize| cumulative_offsets.get_slice(&hashes, start),
518 None,
519 );
520 (result.0, hash_total)
521 }
522
523 pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
524 Self::compute_merkle_root_loop(hashes, fanout, |t| &t.1)
525 }
526
527 pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
529 Self::compute_merkle_root_loop(hashes, fanout, |t| t)
530 }
531
532 pub fn div_ceil(x: usize, y: usize) -> usize {
533 let mut result = x / y;
534 if x % y != 0 {
535 result += 1;
536 }
537 result
538 }
539
540 pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
543 where
544 F: Fn(&T) -> &Hash + std::marker::Sync,
545 T: std::marker::Sync,
546 {
547 if hashes.is_empty() {
548 return Hasher::default().result();
549 }
550
551 let mut time = Measure::start("time");
552
553 let total_hashes = hashes.len();
554 let chunks = Self::div_ceil(total_hashes, fanout);
555
556 let result: Vec<_> = (0..chunks)
557 .into_par_iter()
558 .map(|i| {
559 let start_index = i * fanout;
560 let end_index = std::cmp::min(start_index + fanout, total_hashes);
561
562 let mut hasher = Hasher::default();
563 for item in hashes.iter().take(end_index).skip(start_index) {
564 let h = extractor(item);
565 hasher.hash(h.as_ref());
566 }
567
568 hasher.result()
569 })
570 .collect();
571 time.stop();
572 debug!("hashing {} {}", total_hashes, time);
573
574 if result.len() == 1 {
575 result[0]
576 } else {
577 Self::compute_merkle_root_recurse(result, fanout)
578 }
579 }
580
581 fn calculate_three_level_chunks(
582 total_hashes: usize,
583 fanout: usize,
584 max_levels_per_pass: Option<usize>,
585 specific_level_count: Option<usize>,
586 ) -> (usize, usize, bool) {
587 const THREE_LEVEL_OPTIMIZATION: usize = 3; let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
589
590 let threshold = target * fanout;
593 let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
594 && total_hashes >= threshold;
595 if three_level {
596 if let Some(specific_level_count_value) = specific_level_count {
597 three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
598 }
599 }
600 let (num_hashes_per_chunk, levels_hashed) = if three_level {
601 (target, THREE_LEVEL_OPTIMIZATION)
602 } else {
603 (fanout, 1)
604 };
605 (num_hashes_per_chunk, levels_hashed, three_level)
606 }
607
608 fn compute_merkle_root_from_slices<'b, F, T>(
611 total_hashes: usize,
612 fanout: usize,
613 max_levels_per_pass: Option<usize>,
614 get_hash_slice_starting_at_index: F,
615 specific_level_count: Option<usize>,
616 ) -> (Hash, Vec<Hash>)
617 where
618 F: Fn(usize) -> &'b [T] + std::marker::Sync,
620 T: Borrow<Hash> + std::marker::Sync + 'b,
621 {
622 if total_hashes == 0 {
623 return (Hasher::default().result(), vec![]);
624 }
625
626 let mut time = Measure::start("time");
627
628 let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
629 total_hashes,
630 fanout,
631 max_levels_per_pass,
632 specific_level_count,
633 );
634
635 let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
636
637 let data = get_hash_slice_starting_at_index(0);
639 let data_len = data.len();
640
641 let result: Vec<_> = (0..chunks)
642 .into_par_iter()
643 .map(|i| {
644 let start_index = i * num_hashes_per_chunk;
650 let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
652
653 let mut hasher = Hasher::default();
655
656 let mut data_index = start_index;
659 let mut data = data;
661 let mut data_len = data_len;
663
664 if !three_level {
665 for i in start_index..end_index {
668 if data_index >= data_len {
669 data = get_hash_slice_starting_at_index(i);
671 data_len = data.len();
672 data_index = 0;
673 }
674 hasher.hash(data[data_index].borrow().as_ref());
675 data_index += 1;
676 }
677 } else {
678 let mut i = start_index;
717 while i < end_index {
718 let mut hasher_j = Hasher::default();
719 for _j in 0..fanout {
720 let mut hasher_k = Hasher::default();
721 let end = std::cmp::min(end_index - i, fanout);
722 for _k in 0..end {
723 if data_index >= data_len {
724 data = get_hash_slice_starting_at_index(i);
726 data_len = data.len();
727 data_index = 0;
728 }
729 hasher_k.hash(data[data_index].borrow().as_ref());
730 data_index += 1;
731 i += 1;
732 }
733 hasher_j.hash(hasher_k.result().as_ref());
734 if i >= end_index {
735 break;
736 }
737 }
738 hasher.hash(hasher_j.result().as_ref());
739 }
740 }
741
742 hasher.result()
743 })
744 .collect();
745 time.stop();
746 debug!("hashing {} {}", total_hashes, time);
747
748 if let Some(mut specific_level_count_value) = specific_level_count {
749 specific_level_count_value -= levels_hashed;
750 if specific_level_count_value == 0 {
751 (Hash::default(), result)
752 } else {
753 assert!(specific_level_count_value > 0);
754 Self::compute_merkle_root_from_slices_recurse(
756 result,
757 fanout,
758 max_levels_per_pass,
759 Some(specific_level_count_value),
760 )
761 }
762 } else {
763 (
764 if result.len() == 1 {
765 result[0]
766 } else {
767 Self::compute_merkle_root_recurse(result, fanout)
768 },
769 vec![], )
771 }
772 }
773
774 fn compute_merkle_root_from_slices_recurse(
775 hashes: Vec<Hash>,
776 fanout: usize,
777 max_levels_per_pass: Option<usize>,
778 specific_level_count: Option<usize>,
779 ) -> (Hash, Vec<Hash>) {
780 Self::compute_merkle_root_from_slices(
781 hashes.len(),
782 fanout,
783 max_levels_per_pass,
784 |start| &hashes[start..],
785 specific_level_count,
786 )
787 }
788
789 pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, AccountHash)>) -> Hash {
790 hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
791 Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| &i.1 .0)
792 }
793
794 pub fn compare_two_hash_entries(
795 a: &CalculateHashIntermediate,
796 b: &CalculateHashIntermediate,
797 ) -> std::cmp::Ordering {
798 a.pubkey.partial_cmp(&b.pubkey).unwrap()
800 }
801
802 pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
803 balance.try_into().unwrap_or_else(|_| {
804 panic!("overflow is detected while summing capitalization: {balance}")
805 })
806 }
807
808 fn de_dup_accounts(
813 &self,
814 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
815 stats: &mut HashStats,
816 max_bin: usize,
817 ) -> (Vec<AccountHashesFile>, u64) {
818 let _guard = self.active_stats.activate(ActiveStatItem::HashDeDup);
825
826 #[derive(Default)]
827 struct DedupResult {
828 hashes_files: Vec<AccountHashesFile>,
829 hashes_count: usize,
830 lamports_sum: u64,
831 }
832
833 let mut zeros = Measure::start("eliminate zeros");
834 let DedupResult {
835 hashes_files: hashes,
836 hashes_count: hash_total,
837 lamports_sum: lamports_total,
838 } = (0..max_bin)
839 .into_par_iter()
840 .fold(DedupResult::default, |mut accum, bin| {
841 let (hashes_file, lamports_bin) =
842 self.de_dup_accounts_in_parallel(sorted_data_by_pubkey, bin, max_bin, stats);
843
844 accum.lamports_sum = accum
845 .lamports_sum
846 .checked_add(lamports_bin)
847 .expect("summing capitalization cannot overflow");
848 accum.hashes_count += hashes_file.count();
849 accum.hashes_files.push(hashes_file);
850 accum
851 })
852 .reduce(
853 || {
854 DedupResult {
855 hashes_files: Vec::new(),
858 ..Default::default()
859 }
860 },
861 |mut a, mut b| {
862 a.lamports_sum = a
863 .lamports_sum
864 .checked_add(b.lamports_sum)
865 .expect("summing capitalization cannot overflow");
866 a.hashes_count += b.hashes_count;
867 a.hashes_files.append(&mut b.hashes_files);
868 a
869 },
870 );
871 zeros.stop();
872 stats.zeros_time_total_us += zeros.as_us();
873 stats.hash_total += hash_total;
874 (hashes, lamports_total)
875 }
876
877 fn get_item<'b>(
880 sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
881 bin: usize,
882 binner: &PubkeyBinCalculator24,
883 item_loc: &ItemLocation<'b>,
884 ) -> (&'b CalculateHashIntermediate, Option<ItemLocation<'b>>) {
885 let division_data = &sorted_data_by_pubkey[item_loc.pointer.slot_group_index];
886 let mut index = item_loc.pointer.offset;
887 index += 1;
888 let mut next = None;
889
890 while index < division_data.len() {
891 let next_key = &division_data[index].pubkey;
893 if next_key == item_loc.key {
894 index += 1;
895 continue; }
897
898 if binner.bin_from_pubkey(next_key) > bin {
899 break;
901 }
902
903 next = Some(ItemLocation {
905 key: next_key,
906 pointer: SlotGroupPointer {
907 slot_group_index: item_loc.pointer.slot_group_index,
908 offset: index,
909 },
910 });
911 break;
912 }
913
914 (&division_data[index - 1], next)
916 }
917
918 fn binary_search_for_first_pubkey_in_bin(
921 hash_data: &[CalculateHashIntermediate],
922 bin: usize,
923 binner: &PubkeyBinCalculator24,
924 ) -> Option<usize> {
925 let potential_index = if bin == 0 {
926 0
929 } else {
930 let just_prior_to_desired_bin = bin * 2;
940 let search = hash_data.binary_search_by(|data| {
941 (1 + 2 * binner.bin_from_pubkey(&data.pubkey)).cmp(&just_prior_to_desired_bin)
942 });
943 search.expect_err("it is impossible to find a matching bin")
945 };
946 hash_data.get(potential_index).and_then(|potential_data| {
950 (binner.bin_from_pubkey(&potential_data.pubkey) == bin).then_some(potential_index)
951 })
952 }
953
954 fn find_first_pubkey_in_bin(
957 hash_data: &[CalculateHashIntermediate],
958 bin: usize,
959 bins: usize,
960 binner: &PubkeyBinCalculator24,
961 stats: &HashStats,
962 ) -> Option<usize> {
963 if hash_data.is_empty() {
964 return None;
965 }
966 let (result, us) = measure_us!({
967 let i = hash_data.len() * bin / bins;
969 let estimate = &hash_data[i];
970
971 let pubkey_bin = binner.bin_from_pubkey(&estimate.pubkey);
972 let range = if pubkey_bin >= bin {
973 0..(i + 1)
976 } else {
977 (i + 1)..hash_data.len()
979 };
980 Some(
981 range.start +
982 Self::binary_search_for_first_pubkey_in_bin(
984 &hash_data[range],
985 bin,
986 binner,
987 )?,
988 )
989 });
990 stats.pubkey_bin_search_us.fetch_add(us, Ordering::Relaxed);
991 result
992 }
993
994 fn initialize_dedup_working_set(
997 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
998 pubkey_bin: usize,
999 bins: usize,
1000 binner: &PubkeyBinCalculator24,
1001 stats: &HashStats,
1002 ) -> (
1003 Vec<SlotGroupPointer>, usize, ) {
1006 let mut working_set: Vec<SlotGroupPointer> = Vec::default();
1008
1009 let max_inclusive_num_pubkeys = sorted_data_by_pubkey
1019 .iter()
1020 .enumerate()
1021 .rev()
1022 .map(|(i, hash_data)| {
1023 let first_pubkey_in_bin =
1024 Self::find_first_pubkey_in_bin(hash_data, pubkey_bin, bins, binner, stats);
1025
1026 if let Some(first_pubkey_in_bin) = first_pubkey_in_bin {
1027 let mut next = Some(ItemLocation {
1028 key: &hash_data[first_pubkey_in_bin].pubkey,
1029 pointer: SlotGroupPointer {
1030 slot_group_index: i,
1031 offset: first_pubkey_in_bin,
1032 },
1033 });
1034
1035 Self::add_next_item(
1036 &mut next,
1037 &mut working_set,
1038 sorted_data_by_pubkey,
1039 pubkey_bin,
1040 binner,
1041 );
1042
1043 let mut first_pubkey_in_next_bin = first_pubkey_in_bin + 1;
1044 while first_pubkey_in_next_bin < hash_data.len() {
1045 if binner.bin_from_pubkey(&hash_data[first_pubkey_in_next_bin].pubkey)
1046 != pubkey_bin
1047 {
1048 break;
1049 }
1050 first_pubkey_in_next_bin += 1;
1051 }
1052 first_pubkey_in_next_bin - first_pubkey_in_bin
1053 } else {
1054 0
1055 }
1056 })
1057 .sum::<usize>();
1058
1059 (working_set, max_inclusive_num_pubkeys)
1060 }
1061
1062 fn add_next_item<'b>(
1064 next: &mut Option<ItemLocation<'b>>,
1065 working_set: &mut Vec<SlotGroupPointer>,
1066 sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
1067 pubkey_bin: usize,
1068 binner: &PubkeyBinCalculator24,
1069 ) {
1070 while let Some(ItemLocation { key, pointer }) = std::mem::take(next) {
1072 if let Some(SlotGroupPointer {
1075 slot_group_index: current_min_slot_group_index,
1076 offset: current_min_offset,
1077 }) = working_set.last()
1078 {
1079 let current_min_key = &sorted_data_by_pubkey[*current_min_slot_group_index]
1080 [*current_min_offset]
1081 .pubkey;
1082 if key < current_min_key {
1083 working_set.push(pointer);
1084 break;
1085 }
1086 }
1087
1088 let found = working_set.binary_search_by(|pointer| {
1089 let prob = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1090 (*key).cmp(prob)
1091 });
1092
1093 match found {
1094 Err(index) => {
1095 working_set.insert(index, pointer);
1099 break;
1100 }
1101 Ok(index) => {
1102 let found = &mut working_set[index];
1103 if found.slot_group_index > pointer.slot_group_index {
1104 let (_item, new_next) = Self::get_item(
1107 sorted_data_by_pubkey,
1108 pubkey_bin,
1109 binner,
1110 &ItemLocation { key, pointer },
1111 );
1112 *next = new_next;
1113 } else {
1114 let (_item, new_next) = Self::get_item(
1116 sorted_data_by_pubkey,
1117 pubkey_bin,
1118 binner,
1119 &ItemLocation {
1120 key,
1121 pointer: *found,
1122 },
1123 );
1124 *found = pointer;
1125 *next = new_next;
1126 }
1127 }
1128 }
1129 }
1130 }
1131
1132 fn de_dup_accounts_in_parallel(
1140 &self,
1141 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1142 pubkey_bin: usize,
1143 bins: usize,
1144 stats: &HashStats,
1145 ) -> (AccountHashesFile, u64) {
1146 let binner = PubkeyBinCalculator24::new(bins);
1147
1148 let (mut working_set, max_inclusive_num_pubkeys) = Self::initialize_dedup_working_set(
1150 sorted_data_by_pubkey,
1151 pubkey_bin,
1152 bins,
1153 &binner,
1154 stats,
1155 );
1156
1157 let mut hashes = AccountHashesFile {
1158 writer: None,
1159 dir_for_temp_cache_files: self.dir_for_temp_cache_files.clone(),
1160 capacity: max_inclusive_num_pubkeys * std::mem::size_of::<Hash>(),
1161 };
1162
1163 let mut overall_sum: u64 = 0;
1164
1165 while let Some(pointer) = working_set.pop() {
1166 let key = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1167
1168 let (item, mut next) = Self::get_item(
1170 sorted_data_by_pubkey,
1171 pubkey_bin,
1172 &binner,
1173 &ItemLocation { key, pointer },
1174 );
1175
1176 if item.lamports != 0 {
1178 overall_sum = overall_sum
1179 .checked_add(item.lamports)
1180 .expect("summing lamports cannot overflow");
1181 hashes.write(&item.hash.0);
1182 } else {
1183 stats
1184 .num_zero_lamport_accounts
1185 .fetch_add(1, Ordering::Relaxed);
1186 if self.zero_lamport_accounts == ZeroLamportAccounts::Included {
1188 let hash = blake3::hash(bytemuck::bytes_of(&item.pubkey));
1191 let hash = Hash::new_from_array(hash.into());
1192 hashes.write(&hash);
1193 }
1194 }
1195
1196 Self::add_next_item(
1197 &mut next,
1198 &mut working_set,
1199 sorted_data_by_pubkey,
1200 pubkey_bin,
1201 &binner,
1202 );
1203 }
1204
1205 (hashes, overall_sum)
1206 }
1207
1208 pub fn rest_of_hash_calculation(
1212 &self,
1213 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1214 stats: &mut HashStats,
1215 ) -> (Hash, u64) {
1216 let (hashes, total_lamports) = self.de_dup_accounts(
1217 sorted_data_by_pubkey,
1218 stats,
1219 PUBKEY_BINS_FOR_CALCULATING_HASHES,
1220 );
1221
1222 let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1223
1224 let _guard = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
1225 let mut hash_time = Measure::start("hash");
1226 let (hash, _) = Self::compute_merkle_root_from_slices(
1227 cumulative.total_count(),
1228 MERKLE_FANOUT,
1229 None,
1230 |start| cumulative.get_slice(start),
1231 None,
1232 );
1233 hash_time.stop();
1234 stats.hash_time_total_us += hash_time.as_us();
1235 (hash, total_lamports)
1236 }
1237}
1238
1239#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1241pub enum ZeroLamportAccounts {
1242 Excluded,
1243 Included,
1244}
1245
1246#[repr(transparent)]
1248#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1249#[derive(Debug, Copy, Clone, Eq, PartialEq, Pod, Zeroable)]
1250pub struct AccountHash(pub Hash);
1251
1252const _: () = assert!(std::mem::size_of::<AccountHash>() == std::mem::size_of::<Hash>());
1255
1256pub const ZERO_LAMPORT_ACCOUNT_HASH: AccountHash =
1258 AccountHash(Hash::new_from_array([0; HASH_BYTES]));
1259
1260#[derive(Debug, Clone, Eq, PartialEq)]
1262pub struct AccountLtHash(pub LtHash);
1263
1264pub const ZERO_LAMPORT_ACCOUNT_LT_HASH: AccountLtHash = AccountLtHash(LtHash::identity());
1266
1267#[derive(Debug, Clone, Eq, PartialEq)]
1269pub struct AccountsLtHash(pub LtHash);
1270
1271#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1273pub enum AccountsHashKind {
1274 Full(AccountsHash),
1275 Incremental(IncrementalAccountsHash),
1276}
1277impl AccountsHashKind {
1278 pub fn as_hash(&self) -> &Hash {
1279 match self {
1280 AccountsHashKind::Full(AccountsHash(hash))
1281 | AccountsHashKind::Incremental(IncrementalAccountsHash(hash)) => hash,
1282 }
1283 }
1284}
1285impl From<AccountsHash> for AccountsHashKind {
1286 fn from(accounts_hash: AccountsHash) -> Self {
1287 AccountsHashKind::Full(accounts_hash)
1288 }
1289}
1290impl From<IncrementalAccountsHash> for AccountsHashKind {
1291 fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1292 AccountsHashKind::Incremental(incremental_accounts_hash)
1293 }
1294}
1295
1296#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1298pub struct AccountsHash(pub Hash);
1299#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1302pub struct IncrementalAccountsHash(pub Hash);
1303
1304#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1306pub struct AccountsDeltaHash(pub Hash);
1307
1308#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1310#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1311pub struct SerdeAccountsDeltaHash(pub Hash);
1312
1313impl From<SerdeAccountsDeltaHash> for AccountsDeltaHash {
1314 fn from(accounts_delta_hash: SerdeAccountsDeltaHash) -> Self {
1315 Self(accounts_delta_hash.0)
1316 }
1317}
1318impl From<AccountsDeltaHash> for SerdeAccountsDeltaHash {
1319 fn from(accounts_delta_hash: AccountsDeltaHash) -> Self {
1320 Self(accounts_delta_hash.0)
1321 }
1322}
1323
1324#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1326#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1327pub struct SerdeAccountsHash(pub Hash);
1328
1329impl From<SerdeAccountsHash> for AccountsHash {
1330 fn from(accounts_hash: SerdeAccountsHash) -> Self {
1331 Self(accounts_hash.0)
1332 }
1333}
1334impl From<AccountsHash> for SerdeAccountsHash {
1335 fn from(accounts_hash: AccountsHash) -> Self {
1336 Self(accounts_hash.0)
1337 }
1338}
1339
1340#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1342#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1343pub struct SerdeIncrementalAccountsHash(pub Hash);
1344
1345impl From<SerdeIncrementalAccountsHash> for IncrementalAccountsHash {
1346 fn from(incremental_accounts_hash: SerdeIncrementalAccountsHash) -> Self {
1347 Self(incremental_accounts_hash.0)
1348 }
1349}
1350impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {
1351 fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1352 Self(incremental_accounts_hash.0)
1353 }
1354}
1355
1356#[cfg(test)]
1357mod tests {
1358 use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir};
1359
1360 lazy_static! {
1361 static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
1362 }
1363
1364 impl<'a> AccountsHasher<'a> {
1365 fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1366 Self {
1367 zero_lamport_accounts: ZeroLamportAccounts::Excluded,
1368 dir_for_temp_cache_files,
1369 active_stats: &ACTIVE_STATS,
1370 }
1371 }
1372 }
1373
1374 impl AccountHashesFile {
1375 fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1376 Self {
1377 writer: None,
1378 dir_for_temp_cache_files,
1379 capacity: 1024, }
1381 }
1382 }
1383
1384 impl CumulativeOffsets {
1385 fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> Self {
1386 let mut total_count: usize = 0;
1387 let mut cumulative_offsets = Vec::with_capacity(0);
1388 for (i, v_outer) in raw.iter().enumerate() {
1389 for (j, v) in v_outer.iter().enumerate() {
1390 let len = v.len();
1391 if len > 0 {
1392 if cumulative_offsets.is_empty() {
1393 cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
1395 }
1396 cumulative_offsets.push(CumulativeOffset {
1397 index: [i, j],
1398 start_offset: total_count,
1399 });
1400 total_count += len;
1401 }
1402 }
1403 }
1404
1405 Self {
1406 cumulative_offsets,
1407 total_count,
1408 }
1409 }
1410 }
1411
1412 #[test]
1413 fn test_find_first_pubkey_in_bin() {
1414 let stats = HashStats::default();
1415 for (bins, expected_count) in [1, 2, 4].into_iter().zip([5, 20, 120]) {
1416 let bins: usize = bins;
1417 let binner = PubkeyBinCalculator24::new(bins);
1418
1419 let mut count = 0usize;
1420 for counts in [0, 1, 2, 20, 0].into_iter().permutations(bins) {
1424 count += 1;
1425 let hash_data = counts
1426 .iter()
1427 .enumerate()
1428 .flat_map(|(bin, count)| {
1429 (0..*count).map(move |_| {
1430 let binner = PubkeyBinCalculator24::new(bins);
1431 CalculateHashIntermediate {
1432 hash: AccountHash(Hash::default()),
1433 lamports: 0,
1434 pubkey: binner.lowest_pubkey_from_bin(bin, bins),
1435 }
1436 })
1437 })
1438 .collect::<Vec<_>>();
1439 for (bin, count_in_bin) in counts.iter().enumerate().take(bins) {
1441 let first = AccountsHasher::find_first_pubkey_in_bin(
1442 &hash_data, bin, bins, &binner, &stats,
1443 );
1444 let first_again = AccountsHasher::binary_search_for_first_pubkey_in_bin(
1446 &hash_data, bin, &binner,
1447 );
1448 assert_eq!(first, first_again);
1449 assert_eq!(first.is_none(), count_in_bin == &0);
1450 if let Some(first) = first {
1451 assert_eq!(binner.bin_from_pubkey(&hash_data[first].pubkey), bin);
1452 if first > 0 {
1453 assert!(binner.bin_from_pubkey(&hash_data[first - 1].pubkey) < bin);
1454 }
1455 }
1456 }
1457 }
1458 assert_eq!(
1459 count, expected_count,
1460 "too few iterations in test. bins: {bins}"
1461 );
1462 }
1463 }
1464
1465 #[test]
1466 fn test_account_hashes_file() {
1467 let dir_for_temp_cache_files = tempdir().unwrap();
1468 let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1470 assert!(file.get_reader().is_none());
1471 let hashes = (0..2).map(|i| Hash::new(&[i; 32])).collect::<Vec<_>>();
1472
1473 file.write(&hashes[0]);
1475 let reader = file.get_reader().unwrap();
1476 assert_eq!(&[hashes[0]][..], reader.read(0));
1477 assert!(reader.read(1).is_empty());
1478
1479 let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1481 assert!(file.get_reader().is_none());
1482 hashes.iter().for_each(|hash| file.write(hash));
1483 let reader = file.get_reader().unwrap();
1484 (0..2).for_each(|i| assert_eq!(&hashes[i..], reader.read(i)));
1485 assert!(reader.read(2).is_empty());
1486 }
1487
1488 #[test]
1489 fn test_cumulative_hashes_from_files() {
1490 let dir_for_temp_cache_files = tempdir().unwrap();
1491 (0..4).for_each(|permutation| {
1492 let hashes = (0..2).map(|i| Hash::new(&[i + 1; 32])).collect::<Vec<_>>();
1493
1494 let mut combined = Vec::default();
1495
1496 let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1498
1499 let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1501 file1.write(&hashes[0]);
1502 combined.push(hashes[0]);
1503
1504 let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
1506 hashes.iter().for_each(|hash| {
1507 file2.write(hash);
1508 combined.push(*hash);
1509 });
1510
1511 let hashes = if permutation == 0 {
1512 vec![file0, file1, file2]
1513 } else if permutation == 1 {
1514 vec![
1516 file0,
1517 file1,
1518 AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1519 file2,
1520 AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1521 ]
1522 } else if permutation == 2 {
1523 vec![file1, file2]
1524 } else {
1525 let one = combined.remove(0);
1527 combined.push(one);
1528 vec![
1529 file2,
1530 AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1531 AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
1532 file1,
1533 ]
1534 };
1535
1536 let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1537 let len = combined.len();
1538 assert_eq!(cumulative.total_count(), len);
1539 (0..combined.len()).for_each(|start| {
1540 let mut retrieved = Vec::default();
1541 let mut cumulative_start = start;
1542 while retrieved.len() < (len - start) {
1544 let this_one = cumulative.get_slice(cumulative_start);
1545 retrieved.extend(this_one.iter());
1546 cumulative_start += this_one.len();
1547 assert_ne!(0, this_one.len());
1548 }
1549 assert_eq!(
1550 &combined[start..],
1551 &retrieved[..],
1552 "permutation: {permutation}"
1553 );
1554 });
1555 });
1556 }
1557
1558 #[test]
1559 fn test_accountsdb_div_ceil() {
1560 assert_eq!(AccountsHasher::div_ceil(10, 3), 4);
1561 assert_eq!(AccountsHasher::div_ceil(0, 1), 0);
1562 assert_eq!(AccountsHasher::div_ceil(0, 5), 0);
1563 assert_eq!(AccountsHasher::div_ceil(9, 3), 3);
1564 assert_eq!(AccountsHasher::div_ceil(9, 9), 1);
1565 }
1566
1567 #[test]
1568 #[should_panic(expected = "attempt to divide by zero")]
1569 fn test_accountsdb_div_ceil_fail() {
1570 assert_eq!(AccountsHasher::div_ceil(10, 0), 0);
1571 }
1572
1573 fn for_rest(original: &[CalculateHashIntermediate]) -> Vec<&[CalculateHashIntermediate]> {
1574 vec![original]
1575 }
1576
1577 #[test]
1578 fn test_accountsdb_rest_of_hash_calculation() {
1579 solana_logger::setup();
1580
1581 let mut account_maps = Vec::new();
1582
1583 let pubkey = Pubkey::from([11u8; 32]);
1584 let hash = AccountHash(Hash::new(&[1u8; 32]));
1585 let val = CalculateHashIntermediate {
1586 hash,
1587 lamports: 88,
1588 pubkey,
1589 };
1590 account_maps.push(val);
1591
1592 let pubkey = Pubkey::from([12u8; 32]);
1594 let hash = AccountHash(Hash::new(&[2u8; 32]));
1595 let val = CalculateHashIntermediate {
1596 hash,
1597 lamports: 0,
1598 pubkey,
1599 };
1600 account_maps.push(val);
1601
1602 let dir_for_temp_cache_files = tempdir().unwrap();
1603 let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1604 let result = accounts_hash
1605 .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1606 let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
1607 assert_eq!((result.0, result.1), (expected_hash, 88));
1608
1609 let pubkey = Pubkey::from([10u8; 32]);
1611 let hash = AccountHash(Hash::new(&[2u8; 32]));
1612 let val = CalculateHashIntermediate {
1613 hash,
1614 lamports: 20,
1615 pubkey,
1616 };
1617 account_maps.insert(0, val);
1618
1619 let result = accounts_hash
1620 .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1621 let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
1622 assert_eq!((result.0, result.1), (expected_hash, 108));
1623
1624 let pubkey = Pubkey::from([10u8; 32]);
1626 let hash = AccountHash(Hash::new(&[99u8; 32]));
1627 let val = CalculateHashIntermediate {
1628 hash,
1629 lamports: 30,
1630 pubkey,
1631 };
1632 account_maps.insert(1, val);
1633
1634 let result = accounts_hash
1635 .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
1636 let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
1637 assert_eq!((result.0, result.1), (expected_hash, 118));
1638 }
1639
1640 fn one_range() -> usize {
1641 1
1642 }
1643
1644 fn zero_range() -> usize {
1645 0
1646 }
1647
1648 #[test]
1649 fn test_accountsdb_de_dup_accounts_zero_chunks() {
1650 let vec = [vec![CalculateHashIntermediate {
1651 lamports: 1,
1652 hash: AccountHash(Hash::default()),
1653 pubkey: Pubkey::default(),
1654 }]];
1655 let temp_vec = vec.to_vec();
1656 let slice = convert_to_slice(&temp_vec);
1657 let dir_for_temp_cache_files = tempdir().unwrap();
1658 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1659 let (mut hashes, lamports) =
1660 accounts_hasher.de_dup_accounts_in_parallel(&slice, 0, 1, &HashStats::default());
1661 assert_eq!(&[Hash::default()], hashes.get_reader().unwrap().read(0));
1662 assert_eq!(lamports, 1);
1663 }
1664
1665 fn get_vec_vec(hashes: Vec<AccountHashesFile>) -> Vec<Vec<Hash>> {
1666 hashes.into_iter().map(get_vec).collect()
1667 }
1668 fn get_vec(mut hashes: AccountHashesFile) -> Vec<Hash> {
1669 hashes
1670 .get_reader()
1671 .map(|r| r.read(0).to_vec())
1672 .unwrap_or_default()
1673 }
1674
1675 #[test]
1676 fn test_accountsdb_de_dup_accounts_empty() {
1677 solana_logger::setup();
1678 let dir_for_temp_cache_files = tempdir().unwrap();
1679 let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1680
1681 let empty = [];
1682 let vec = ∅
1683 let (hashes, lamports) =
1684 accounts_hash.de_dup_accounts(vec, &mut HashStats::default(), one_range());
1685 assert_eq!(
1686 Vec::<Hash>::new(),
1687 get_vec_vec(hashes)
1688 .into_iter()
1689 .flatten()
1690 .collect::<Vec<_>>(),
1691 );
1692 assert_eq!(lamports, 0);
1693 let vec = vec![];
1694 let (hashes, lamports) =
1695 accounts_hash.de_dup_accounts(&vec, &mut HashStats::default(), zero_range());
1696 let empty: Vec<Vec<Hash>> = Vec::default();
1697 assert_eq!(empty, get_vec_vec(hashes));
1698 assert_eq!(lamports, 0);
1699
1700 let (hashes, lamports) =
1701 accounts_hash.de_dup_accounts_in_parallel(&[], 1, 1, &HashStats::default());
1702 assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1703 assert_eq!(lamports, 0);
1704
1705 let (hashes, lamports) =
1706 accounts_hash.de_dup_accounts_in_parallel(&[], 2, 1, &HashStats::default());
1707 assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1708 assert_eq!(lamports, 0);
1709 }
1710
1711 #[test]
1712 fn test_accountsdb_de_dup_accounts_from_stores() {
1713 solana_logger::setup();
1714
1715 let key_a = Pubkey::from([1u8; 32]);
1716 let key_b = Pubkey::from([2u8; 32]);
1717 let key_c = Pubkey::from([3u8; 32]);
1718 const COUNT: usize = 6;
1719 let hashes = (0..COUNT).map(|i| AccountHash(Hash::new(&[i as u8; 32])));
1720 let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
1723
1724 let accounts: Vec<_> = hashes
1725 .zip(keys.iter())
1726 .enumerate()
1727 .map(|(i, (hash, &pubkey))| CalculateHashIntermediate {
1728 hash,
1729 lamports: (i + 1) as u64,
1730 pubkey,
1731 })
1732 .collect();
1733
1734 type ExpectedType = (String, bool, u64, String);
1735 let expected:Vec<ExpectedType> = vec![
1736 ("a1", false, 1, "[11111111111111111111111111111111]"),
1742 ("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1743 ("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1744 ("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1745 ("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1746 ("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1747 ("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1748 ("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1749 ("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1750 ("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1751 ("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1752 ("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1753 ("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1754 ("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1755 ("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1756 ("a1", true, 1, "[11111111111111111111111111111111]"),
1757 ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1758 ("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1759 ("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1760 ("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1761 ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1762 ("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1763 ("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1764 ("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1765 ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1766 ("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1767 ("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1768 ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1769 ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1770 ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1771 ].into_iter().map(|item| {
1772 let result: ExpectedType = (
1773 item.0.to_string(),
1774 item.1,
1775 item.2,
1776 item.3.to_string(),
1777 );
1778 result
1779 }).collect();
1780
1781 let dir_for_temp_cache_files = tempdir().unwrap();
1782 let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1783 let mut expected_index = 0;
1784 for last_slice in 0..2 {
1785 for start in 0..COUNT {
1786 for end in start + 1..COUNT {
1787 let is_last_slice = last_slice == 1;
1788 let accounts = accounts.clone();
1789 let slice = &accounts[start..end];
1790
1791 let slice2 = vec![slice.to_vec()];
1792 let slice = &slice2[..];
1793 let slice_temp = convert_to_slice(&slice2);
1794 let (hashes2, lamports2) =
1795 hash.de_dup_accounts_in_parallel(&slice_temp, 0, 1, &HashStats::default());
1796 let slice3 = convert_to_slice(&slice2);
1797 let (hashes3, lamports3) =
1798 hash.de_dup_accounts_in_parallel(&slice3, 0, 1, &HashStats::default());
1799 let vec = slice.to_vec();
1800 let slice4 = convert_to_slice(&vec);
1801 let mut max_bin = end - start;
1802 if !max_bin.is_power_of_two() {
1803 max_bin = 1;
1804 }
1805
1806 let (hashes4, lamports4) =
1807 hash.de_dup_accounts(&slice4, &mut HashStats::default(), max_bin);
1808 let vec = slice.to_vec();
1809 let slice5 = convert_to_slice(&vec);
1810 let (hashes5, lamports5) =
1811 hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1812 let vec = slice.to_vec();
1813 let slice5 = convert_to_slice(&vec);
1814 let (hashes6, lamports6) =
1815 hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1816
1817 let hashes2 = get_vec(hashes2);
1818 let hashes3 = get_vec(hashes3);
1819 let hashes4 = get_vec_vec(hashes4);
1820 let hashes5 = get_vec_vec(hashes5);
1821 let hashes6 = get_vec_vec(hashes6);
1822
1823 assert_eq!(hashes2, hashes3);
1824 let expected2 = hashes2.clone();
1825 assert_eq!(
1826 expected2,
1827 hashes4.into_iter().flatten().collect::<Vec<_>>(),
1828 "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1829 );
1830 assert_eq!(
1831 expected2.clone(),
1832 hashes5.iter().flatten().copied().collect::<Vec<_>>(),
1833 "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1834 );
1835 assert_eq!(
1836 expected2.clone(),
1837 hashes6.iter().flatten().copied().collect::<Vec<_>>()
1838 );
1839 assert_eq!(lamports2, lamports3);
1840 assert_eq!(lamports2, lamports4);
1841 assert_eq!(lamports2, lamports5);
1842 assert_eq!(lamports2, lamports6);
1843
1844 let human_readable = slice[0]
1845 .iter()
1846 .map(|v| {
1847 let mut s = (if v.pubkey == key_a {
1848 "a"
1849 } else if v.pubkey == key_b {
1850 "b"
1851 } else {
1852 "c"
1853 })
1854 .to_string();
1855
1856 s.push_str(&v.lamports.to_string());
1857 s
1858 })
1859 .collect::<String>();
1860
1861 let hash_result_as_string = format!("{hashes2:?}");
1862
1863 let packaged_result: ExpectedType = (
1864 human_readable,
1865 is_last_slice,
1866 lamports2,
1867 hash_result_as_string,
1868 );
1869 assert_eq!(expected[expected_index], packaged_result);
1870
1871 expected_index += 1;
1874 }
1875 }
1876 }
1877 }
1878
1879 #[test]
1880 fn test_accountsdb_compare_two_hash_entries() {
1881 solana_logger::setup();
1882 let pubkey = Pubkey::new_unique();
1883 let hash = AccountHash(Hash::new_unique());
1884 let val = CalculateHashIntermediate {
1885 hash,
1886 lamports: 1,
1887 pubkey,
1888 };
1889
1890 let hash2 = AccountHash(Hash::new_unique());
1892 let val2 = CalculateHashIntermediate {
1893 hash: hash2,
1894 lamports: 4,
1895 pubkey,
1896 };
1897 assert_eq!(
1898 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val2)
1900 );
1901
1902 let hash3 = AccountHash(Hash::new_unique());
1904 let val3 = CalculateHashIntermediate {
1905 hash: hash3,
1906 lamports: 2,
1907 pubkey,
1908 };
1909 assert_eq!(
1910 std::cmp::Ordering::Equal,
1911 AccountsHasher::compare_two_hash_entries(&val, &val3)
1912 );
1913
1914 let hash4 = AccountHash(Hash::new_unique());
1916 let val4 = CalculateHashIntermediate {
1917 hash: hash4,
1918 lamports: 6,
1919 pubkey,
1920 };
1921 assert_eq!(
1922 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val4)
1924 );
1925
1926 let hash5 = AccountHash(Hash::new_unique());
1928 let val5 = CalculateHashIntermediate {
1929 hash: hash5,
1930 lamports: 8,
1931 pubkey,
1932 };
1933 assert_eq!(
1934 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val5)
1936 );
1937 }
1938
1939 fn test_de_dup_accounts_in_parallel<'a>(
1940 account_maps: &'a [&'a [CalculateHashIntermediate]],
1941 ) -> (AccountHashesFile, u64) {
1942 let dir_for_temp_cache_files = tempdir().unwrap();
1943 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1944 accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0, 1, &HashStats::default())
1945 }
1946
1947 #[test]
1948 fn test_accountsdb_remove_zero_balance_accounts() {
1949 solana_logger::setup();
1950
1951 let pubkey = Pubkey::new_unique();
1952 let hash = AccountHash(Hash::new_unique());
1953 let mut account_maps = Vec::new();
1954 let val = CalculateHashIntermediate {
1955 hash,
1956 lamports: 1,
1957 pubkey,
1958 };
1959 account_maps.push(val);
1960
1961 let vecs = vec![account_maps.to_vec()];
1962 let slice = convert_to_slice(&vecs);
1963 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1964 assert_eq!(
1965 (get_vec(hashfile), lamports),
1966 (vec![val.hash.0], val.lamports)
1967 );
1968
1969 let val = CalculateHashIntermediate {
1971 hash,
1972 lamports: 0,
1973 pubkey,
1974 };
1975 account_maps.push(val); let vecs = vec![account_maps.to_vec()];
1978 let slice = convert_to_slice(&vecs);
1979 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1980 assert_eq!((get_vec(hashfile), lamports), (vec![], 0));
1981 }
1982
1983 #[test]
1984 fn test_accountsdb_dup_pubkey_2_chunks() {
1985 for reverse in [false, true] {
1987 let key = Pubkey::new_from_array([1; 32]); let key2 = Pubkey::new_from_array([2; 32]);
1989 let hash = AccountHash(Hash::new_unique());
1990 let mut account_maps = Vec::new();
1991 let mut account_maps2 = Vec::new();
1992 let val = CalculateHashIntermediate {
1993 hash,
1994 lamports: 1,
1995 pubkey: key,
1996 };
1997 account_maps.push(val);
1998 let val2 = CalculateHashIntermediate {
1999 hash,
2000 lamports: 2,
2001 pubkey: key2,
2002 };
2003 account_maps.push(val2);
2004 let val3 = CalculateHashIntermediate {
2005 hash,
2006 lamports: 3,
2007 pubkey: key2,
2008 };
2009 account_maps2.push(val3);
2010
2011 let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2012 if reverse {
2013 vecs = vecs.into_iter().rev().collect();
2014 }
2015 let slice = convert_to_slice(&vecs);
2016 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2017 assert_eq!(
2018 (get_vec(hashfile), lamports),
2019 (
2020 vec![val.hash.0, if reverse { val2.hash.0 } else { val3.hash.0 }],
2021 val.lamports
2022 + if reverse {
2023 val2.lamports
2024 } else {
2025 val3.lamports
2026 }
2027 ),
2028 "reverse: {reverse}"
2029 );
2030 }
2031 }
2032
2033 #[test]
2034 fn test_accountsdb_dup_pubkey_2_chunks_backwards() {
2035 for reverse in [false, true] {
2037 let key = Pubkey::new_from_array([3; 32]); let key2 = Pubkey::new_from_array([2; 32]);
2039 let hash = AccountHash(Hash::new_unique());
2040 let mut account_maps = Vec::new();
2041 let mut account_maps2 = Vec::new();
2042 let val2 = CalculateHashIntermediate {
2043 hash,
2044 lamports: 2,
2045 pubkey: key2,
2046 };
2047 account_maps.push(val2);
2048 let val = CalculateHashIntermediate {
2049 hash,
2050 lamports: 1,
2051 pubkey: key,
2052 };
2053 account_maps.push(val);
2054 let val3 = CalculateHashIntermediate {
2055 hash,
2056 lamports: 3,
2057 pubkey: key2,
2058 };
2059 account_maps2.push(val3);
2060
2061 let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2062 if reverse {
2063 vecs = vecs.into_iter().rev().collect();
2064 }
2065 let slice = convert_to_slice(&vecs);
2066 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2067 assert_eq!(
2068 (get_vec(hashfile), lamports),
2069 (
2070 vec![if reverse { val2.hash.0 } else { val3.hash.0 }, val.hash.0],
2071 val.lamports
2072 + if reverse {
2073 val2.lamports
2074 } else {
2075 val3.lamports
2076 }
2077 ),
2078 "reverse: {reverse}"
2079 );
2080 }
2081 }
2082
2083 #[test]
2084 fn test_accountsdb_cumulative_offsets1_d() {
2085 let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2086 let cumulative = CumulativeOffsets::from_raw(&input);
2087
2088 let src: Vec<_> = input.clone().into_iter().flatten().collect();
2089 let len = src.len();
2090 assert_eq!(cumulative.total_count, len);
2091 assert_eq!(cumulative.cumulative_offsets.len(), 2); const DIMENSION: usize = 0;
2094 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
2095 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
2096
2097 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2098 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2099
2100 for start in 0..len {
2101 let slice = cumulative.get_slice(&input, start);
2102 let len = slice.len();
2103 assert!(len > 0);
2104 assert_eq!(&src[start..(start + len)], slice);
2105 }
2106
2107 let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2108 let cumulative = CumulativeOffsets::from_raw(&input);
2109
2110 let src: Vec<_> = input.clone().into_iter().flatten().collect();
2111 let len = src.len();
2112 assert_eq!(cumulative.total_count, len);
2113 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
2116 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
2117
2118 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2119 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2120
2121 for start in 0..len {
2122 let slice = cumulative.get_slice(&input, start);
2123 let len = slice.len();
2124 assert!(len > 0);
2125 assert_eq!(&src[start..(start + len)], slice);
2126 }
2127
2128 let input: Vec<Vec<u32>> = vec![vec![]];
2129 let cumulative = CumulativeOffsets::from_raw(&input);
2130
2131 let len = input.into_iter().flatten().count();
2132 assert_eq!(cumulative.total_count, len);
2133 assert_eq!(cumulative.cumulative_offsets.len(), 0); }
2135
2136 #[should_panic(expected = "is_empty")]
2137 #[test]
2138 fn test_accountsdb_cumulative_find_empty() {
2139 let input = CumulativeOffsets {
2140 cumulative_offsets: vec![],
2141 total_count: 0,
2142 };
2143 input.find(0);
2144 }
2145
2146 #[test]
2147 fn test_accountsdb_cumulative_find() {
2148 let input = CumulativeOffsets {
2149 cumulative_offsets: vec![CumulativeOffset {
2150 index: [0; 2],
2151 start_offset: 0,
2152 }],
2153 total_count: 0,
2154 };
2155 assert_eq!(input.find(0), (0, &input.cumulative_offsets[0]));
2156
2157 let input = CumulativeOffsets {
2158 cumulative_offsets: vec![
2159 CumulativeOffset {
2160 index: [0; 2],
2161 start_offset: 0,
2162 },
2163 CumulativeOffset {
2164 index: [1; 2],
2165 start_offset: 2,
2166 },
2167 ],
2168 total_count: 0,
2169 };
2170 assert_eq!(input.find(0), (0, &input.cumulative_offsets[0])); assert_eq!(input.find(1), (1, &input.cumulative_offsets[0])); assert_eq!(input.find(2), (0, &input.cumulative_offsets[1])); assert_eq!(input.find(3), (1, &input.cumulative_offsets[1])); }
2175
2176 #[test]
2177 fn test_accountsdb_cumulative_offsets2_d() {
2178 let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2179 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2180
2181 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2182 let len = src.len();
2183 assert_eq!(cumulative.total_count, len);
2184 assert_eq!(cumulative.cumulative_offsets.len(), 2); const DIMENSION_0: usize = 0;
2187 const DIMENSION_1: usize = 1;
2188 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2189 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2190 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2191 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
2192
2193 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2194 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2195
2196 for start in 0..len {
2197 let slice: &[u64] = cumulative.get_slice(&input, start);
2198 let len = slice.len();
2199 assert!(len > 0);
2200 assert_eq!(&src[start..(start + len)], slice);
2201 }
2202
2203 let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2204 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2205
2206 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2207 let len = src.len();
2208 assert_eq!(cumulative.total_count, len);
2209 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2212 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
2213 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2214 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
2215
2216 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2217 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2218
2219 for start in 0..len {
2220 let slice: &[u64] = cumulative.get_slice(&input, start);
2221 let len = slice.len();
2222 assert!(len > 0);
2223 assert_eq!(&src[start..(start + len)], slice);
2224 }
2225
2226 let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
2227 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2228
2229 let len = input.into_iter().flatten().count();
2230 assert_eq!(cumulative.total_count, len);
2231 assert_eq!(cumulative.cumulative_offsets.len(), 0); let input = vec![
2234 vec![vec![0, 1]],
2235 vec![vec![]],
2236 vec![vec![], vec![2, 3, 4], vec![]],
2237 ];
2238 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2239
2240 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2241 let len = src.len();
2242 assert_eq!(cumulative.total_count, len);
2243 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2246 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2247 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
2248 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
2249
2250 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2251 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2252
2253 for start in 0..len {
2254 let slice: &[u64] = cumulative.get_slice(&input, start);
2255 let len = slice.len();
2256 assert!(len > 0);
2257 assert_eq!(&src[start..(start + len)], slice);
2258 }
2259 }
2260
2261 fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
2262 let result = AccountsHasher::compute_merkle_root(hashes.clone(), fanout);
2263 let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
2264 let result2 = test_hashing(reduced, fanout);
2265 assert_eq!(result, result2, "len: {}", hashes.len());
2266 result
2267 }
2268
2269 fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
2270 let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
2271 let result = AccountsHasher::compute_merkle_root(temp, fanout);
2272 let reduced: Vec<_> = hashes.clone();
2273 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2274 hashes.len(),
2275 fanout,
2276 None,
2277 |start| &reduced[start..],
2278 None,
2279 );
2280 assert_eq!(result, result2.0, "len: {}", hashes.len());
2281
2282 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2283 hashes.len(),
2284 fanout,
2285 Some(1),
2286 |start| &reduced[start..],
2287 None,
2288 );
2289 assert_eq!(result, result2.0, "len: {}", hashes.len());
2290
2291 let max = std::cmp::min(reduced.len(), fanout * 2);
2292 for left in 0..max {
2293 for right in left + 1..max {
2294 let src = vec![
2295 vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
2296 vec![reduced[right..].to_vec()],
2297 ];
2298 let offsets = CumulativeOffsets::from_raw_2d(&src);
2299
2300 let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
2301 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2302 offsets.total_count,
2303 fanout,
2304 None,
2305 get_slice,
2306 None,
2307 );
2308 assert_eq!(result, result2.0);
2309 }
2310 }
2311 result
2312 }
2313
2314 #[test]
2315 fn test_accountsdb_compute_merkle_root_large() {
2316 solana_logger::setup();
2317
2318 const FANOUT: usize = 3;
2320 let mut hash_counts: Vec<_> = (1..6)
2321 .flat_map(|x| {
2322 let mark = FANOUT.pow(x);
2323 vec![mark - 1, mark, mark + 1]
2324 })
2325 .collect();
2326
2327 let target = FANOUT.pow(3);
2330 let threshold = target * FANOUT;
2331 hash_counts.extend(threshold - 1..=threshold + target);
2332
2333 for hash_count in hash_counts {
2334 let hashes: Vec<_> = (0..hash_count).map(|_| Hash::new_unique()).collect();
2335
2336 test_hashing(hashes, FANOUT);
2337 }
2338 }
2339
2340 #[test]
2341 fn test_accountsdb_compute_merkle_root() {
2342 solana_logger::setup();
2343
2344 let expected_results = vec![
2345 (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2346 (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
2347 (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
2348 (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
2349 (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
2350 (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
2351 (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
2352 (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
2353 (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
2354 (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
2355 (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2356 (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
2357 (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
2358 (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
2359 (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
2360 (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
2361 (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
2362 (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
2363 (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
2364 (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
2365 ];
2366
2367 let mut expected_index = 0;
2368 let start = 0;
2369 let default_fanout = 2;
2370 let iterations = default_fanout * default_fanout * default_fanout + 2;
2372 for pass in 0..2 {
2373 let fanout = if pass == 0 {
2374 default_fanout
2375 } else {
2376 MERKLE_FANOUT
2377 };
2378 for count in start..iterations {
2379 let mut input: Vec<_> = (0..count)
2380 .map(|i| {
2381 let key = Pubkey::from([(pass * iterations + count) as u8; 32]);
2382 let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]);
2383 (key, hash)
2384 })
2385 .collect();
2386
2387 let result = if pass == 0 {
2388 test_hashing_larger(input, fanout)
2389 } else {
2390 let early_result = AccountsHasher::accumulate_account_hashes(
2392 input
2393 .iter()
2394 .map(|i| (i.0, AccountHash(i.1)))
2395 .collect::<Vec<_>>(),
2396 );
2397
2398 input.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
2399 let result = AccountsHasher::compute_merkle_root(input, fanout);
2400 assert_eq!(early_result, result);
2401 result
2402 };
2403 assert_eq!(
2405 (
2406 pass,
2407 count,
2408 &*(result.to_string()),
2409 expected_results[expected_index].3
2410 ), expected_results[expected_index]
2412 );
2413 expected_index += 1;
2414 }
2415 }
2416 }
2417
2418 #[test]
2419 #[should_panic(expected = "summing lamports cannot overflow")]
2420 fn test_accountsdb_lamport_overflow() {
2421 solana_logger::setup();
2422
2423 let offset = 2;
2424 let input = vec![
2425 CalculateHashIntermediate {
2426 hash: AccountHash(Hash::new(&[1u8; 32])),
2427 lamports: u64::MAX - offset,
2428 pubkey: Pubkey::new_unique(),
2429 },
2430 CalculateHashIntermediate {
2431 hash: AccountHash(Hash::new(&[2u8; 32])),
2432 lamports: offset + 1,
2433 pubkey: Pubkey::new_unique(),
2434 },
2435 ];
2436 let dir_for_temp_cache_files = tempdir().unwrap();
2437 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2438 accounts_hasher.de_dup_accounts_in_parallel(
2439 &convert_to_slice(&[input]),
2440 0,
2441 1,
2442 &HashStats::default(),
2443 );
2444 }
2445
2446 fn convert_to_slice(
2447 input: &[Vec<CalculateHashIntermediate>],
2448 ) -> Vec<&[CalculateHashIntermediate]> {
2449 input.iter().map(|v| &v[..]).collect::<Vec<_>>()
2450 }
2451
2452 #[test]
2453 #[should_panic(expected = "summing lamports cannot overflow")]
2454 fn test_accountsdb_lamport_overflow2() {
2455 solana_logger::setup();
2456
2457 let offset = 2;
2458 let input = vec![
2459 vec![CalculateHashIntermediate {
2460 hash: AccountHash(Hash::new(&[1u8; 32])),
2461 lamports: u64::MAX - offset,
2462 pubkey: Pubkey::new_unique(),
2463 }],
2464 vec![CalculateHashIntermediate {
2465 hash: AccountHash(Hash::new(&[2u8; 32])),
2466 lamports: offset + 1,
2467 pubkey: Pubkey::new_unique(),
2468 }],
2469 ];
2470 let dir_for_temp_cache_files = tempdir().unwrap();
2471 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2472 accounts_hasher.de_dup_accounts(
2473 &convert_to_slice(&input),
2474 &mut HashStats::default(),
2475 2, );
2477 }
2478}