1use {
2 crate::{
3 accounts_db::AccountStorageEntry,
4 active_stats::{ActiveStatItem, ActiveStats},
5 ancestors::Ancestors,
6 pubkey_bins::PubkeyBinCalculator24,
7 },
8 bytemuck_derive::{Pod, Zeroable},
9 log::*,
10 rayon::prelude::*,
11 solana_lattice_hash::lt_hash::LtHash,
12 solana_measure::{measure::Measure, measure_us},
13 solana_pubkey::Pubkey,
14 solana_sdk::{
15 hash::{Hash, Hasher, HASH_BYTES},
16 rent_collector::RentCollector,
17 slot_history::Slot,
18 sysvar::epoch_schedule::EpochSchedule,
19 },
20 std::{
21 clone,
22 convert::TryInto,
23 fs::File,
24 io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write},
25 path::{Path, PathBuf},
26 sync::{
27 atomic::{AtomicU64, AtomicUsize, Ordering},
28 Arc, Mutex,
29 },
30 },
31 tempfile::tempfile_in,
32};
33pub const MERKLE_FANOUT: usize = 16;
34
35struct AccountHashesFile {
37 writer: Option<BufWriter<File>>,
39
40 count: usize,
42}
43
44impl AccountHashesFile {
45 fn new(dir_for_temp_cache_files: impl AsRef<Path>) -> Self {
47 let file = tempfile_in(dir_for_temp_cache_files.as_ref()).unwrap_or_else(|err| {
48 panic!(
49 "Unable to create file within {}: {err}",
50 dir_for_temp_cache_files.as_ref().display()
51 )
52 });
53 let writer = Some(BufWriter::new(file));
55 Self { writer, count: 0 }
56 }
57
58 fn get_reader(&mut self) -> Option<Mutex<BufReader<File>>> {
62 let writer = self.writer.take();
63 if self.count == 0 {
64 return None;
66 }
67
68 writer.map(|writer| {
69 let reader = BufReader::new(writer.into_inner().unwrap());
70 Mutex::new(reader)
71 })
72 }
73
74 fn count(&self) -> usize {
76 self.count
77 }
78
79 fn write(&mut self, hash: &Hash) {
81 debug_assert!(self.writer.is_some());
82 self.writer
83 .as_mut()
84 .unwrap()
85 .write_all(hash.as_ref())
86 .expect("write hash success");
87 self.count += 1;
88 }
89}
90
91#[derive(Debug)]
93pub struct CalcAccountsHashConfig<'a> {
94 pub use_bg_thread_pool: bool,
96 pub ancestors: Option<&'a Ancestors>,
98 pub epoch_schedule: &'a EpochSchedule,
101 pub rent_collector: &'a RentCollector,
102 pub store_detailed_debug_info_on_failure: bool,
104}
105
106pub type StorageSizeQuartileStats = [usize; 6];
108
109#[derive(Debug, Default)]
110pub struct HashStats {
111 pub total_us: u64,
112 pub mark_time_us: u64,
113 pub cache_hash_data_us: u64,
114 pub scan_time_total_us: u64,
115 pub zeros_time_total_us: u64,
116 pub hash_time_total_us: u64,
117 pub drop_hash_files_us: u64,
118 pub sort_time_total_us: u64,
119 pub hash_total: usize,
120 pub num_snapshot_storage: usize,
121 pub scan_chunks: usize,
122 pub num_slots: usize,
123 pub num_dirty_slots: usize,
124 pub collect_snapshots_us: u64,
125 pub storage_sort_us: u64,
126 pub storage_size_quartiles: StorageSizeQuartileStats,
127 pub oldest_root: Slot,
128 pub roots_older_than_epoch: AtomicUsize,
129 pub accounts_in_roots_older_than_epoch: AtomicUsize,
130 pub append_vec_sizes_older_than_epoch: AtomicUsize,
131 pub longest_ancient_scan_us: AtomicU64,
132 pub sum_ancient_scans_us: AtomicU64,
133 pub count_ancient_scans: AtomicU64,
134 pub pubkey_bin_search_us: AtomicU64,
135 pub num_zero_lamport_accounts: AtomicU64,
136 pub num_zero_lamport_accounts_ancient: Arc<AtomicU64>,
137}
138impl HashStats {
139 pub fn calc_storage_size_quartiles(&mut self, storages: &[Arc<AccountStorageEntry>]) {
140 let mut sum = 0;
141 let mut sizes = storages
142 .iter()
143 .map(|storage| {
144 let cap = storage.accounts.capacity() as usize;
145 sum += cap;
146 cap
147 })
148 .collect::<Vec<_>>();
149 sizes.sort_unstable();
150 let len = sizes.len();
151 self.storage_size_quartiles = if len == 0 {
152 StorageSizeQuartileStats::default()
153 } else {
154 [
155 *sizes.first().unwrap(),
156 sizes[len / 4],
157 sizes[len * 2 / 4],
158 sizes[len * 3 / 4],
159 *sizes.last().unwrap(),
160 sum / len,
161 ]
162 };
163 }
164
165 pub fn log(&self) {
166 datapoint_info!(
167 "calculate_accounts_hash_from_storages",
168 ("total_us", self.total_us, i64),
169 ("mark_time_us", self.mark_time_us, i64),
170 ("cache_hash_data_us", self.cache_hash_data_us, i64),
171 ("accounts_scan_us", self.scan_time_total_us, i64),
172 ("eliminate_zeros_us", self.zeros_time_total_us, i64),
173 ("hash_us", self.hash_time_total_us, i64),
174 ("drop_hash_files_us", self.drop_hash_files_us, i64),
175 ("sort_us", self.sort_time_total_us, i64),
176 ("hash_total", self.hash_total, i64),
177 ("storage_sort_us", self.storage_sort_us, i64),
178 ("collect_snapshots_us", self.collect_snapshots_us, i64),
179 ("num_snapshot_storage", self.num_snapshot_storage, i64),
180 ("scan_chunks", self.scan_chunks, i64),
181 ("num_slots", self.num_slots, i64),
182 ("num_dirty_slots", self.num_dirty_slots, i64),
183 ("storage_size_min", self.storage_size_quartiles[0], i64),
184 (
185 "storage_size_quartile_1",
186 self.storage_size_quartiles[1],
187 i64
188 ),
189 (
190 "storage_size_quartile_2",
191 self.storage_size_quartiles[2],
192 i64
193 ),
194 (
195 "storage_size_quartile_3",
196 self.storage_size_quartiles[3],
197 i64
198 ),
199 ("storage_size_max", self.storage_size_quartiles[4], i64),
200 ("storage_size_avg", self.storage_size_quartiles[5], i64),
201 (
202 "roots_older_than_epoch",
203 self.roots_older_than_epoch.load(Ordering::Relaxed),
204 i64
205 ),
206 ("oldest_root", self.oldest_root, i64),
207 (
208 "longest_ancient_scan_us",
209 self.longest_ancient_scan_us.load(Ordering::Relaxed),
210 i64
211 ),
212 (
213 "sum_ancient_scans_us",
214 self.sum_ancient_scans_us.load(Ordering::Relaxed),
215 i64
216 ),
217 (
218 "count_ancient_scans",
219 self.count_ancient_scans.load(Ordering::Relaxed),
220 i64
221 ),
222 (
223 "append_vec_sizes_older_than_epoch",
224 self.append_vec_sizes_older_than_epoch
225 .load(Ordering::Relaxed),
226 i64
227 ),
228 (
229 "accounts_in_roots_older_than_epoch",
230 self.accounts_in_roots_older_than_epoch
231 .load(Ordering::Relaxed),
232 i64
233 ),
234 (
235 "pubkey_bin_search_us",
236 self.pubkey_bin_search_us.load(Ordering::Relaxed),
237 i64
238 ),
239 (
240 "num_zero_lamport_accounts",
241 self.num_zero_lamport_accounts.load(Ordering::Relaxed),
242 i64
243 ),
244 (
245 "num_zero_lamport_accounts_ancient",
246 self.num_zero_lamport_accounts_ancient
247 .load(Ordering::Relaxed),
248 i64
249 ),
250 );
251 }
252}
253
254#[repr(C)]
258#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)]
259pub struct CalculateHashIntermediate {
260 pub hash: AccountHash,
261 pub lamports: u64,
262 pub pubkey: Pubkey,
263}
264
265const _: () = assert!(
267 std::mem::size_of::<CalculateHashIntermediate>()
268 == std::mem::size_of::<AccountHash>()
269 + std::mem::size_of::<u64>()
270 + std::mem::size_of::<Pubkey>(),
271 "CalculateHashIntermediate cannot have any padding"
272);
273
274#[derive(Debug, PartialEq, Eq)]
275struct CumulativeOffset {
276 index: [usize; 2],
278 start_offset: usize,
279}
280
281trait ExtractSliceFromRawData<'b, T: 'b> {
282 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
283}
284
285impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
286 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
287 &self[offset.index[0]][start..]
288 }
289}
290
291impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
292 fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
293 &self[offset.index[0]][offset.index[1]][start..]
294 }
295}
296
297#[derive(Default, Debug)]
300struct CumulativeOffsets {
301 cumulative_offsets: Vec<CumulativeOffset>,
302 total_count: usize,
303}
304
305#[derive(Default)]
307struct CumulativeHashesFromFiles {
308 readers: Vec<Mutex<BufReader<File>>>,
310 cumulative: CumulativeOffsets,
312}
313
314impl CumulativeHashesFromFiles {
315 fn from_files(hashes: Vec<AccountHashesFile>) -> Self {
318 let mut readers = Vec::with_capacity(hashes.len());
319 let cumulative = CumulativeOffsets::new(hashes.into_iter().filter_map(|mut hash_file| {
320 let count = hash_file.count();
322 hash_file.get_reader().map(|reader| {
323 readers.push(reader);
324 count
325 })
326 }));
327 Self {
328 cumulative,
329 readers,
330 }
331 }
332
333 fn total_count(&self) -> usize {
335 self.cumulative.total_count
336 }
337
338 fn get_data(&self, start: usize) -> Box<[Hash]> {
341 let (start, offset, num_hashes) = self.cumulative.find(start);
342 let data_source_index = offset.index[0];
343 let mut data = self.readers[data_source_index].lock().unwrap();
344
345 let file_offset_in_bytes = std::mem::size_of::<Hash>() * start;
348 data.seek(SeekFrom::Start(file_offset_in_bytes.try_into().unwrap()))
349 .unwrap();
350
351 #[cfg(test)]
352 const MAX_BUFFER_SIZE_IN_HASH: usize = 4; #[cfg(not(test))]
354 const MAX_BUFFER_SIZE_IN_HASH: usize = 2 * 1024 * 1024; let remaining_num_hashes = num_hashes - start;
357 let num_hashes_to_read = remaining_num_hashes.min(MAX_BUFFER_SIZE_IN_HASH);
358 let mut hashes = vec![Hash::default(); num_hashes_to_read].into_boxed_slice();
359
360 data.read_exact(bytemuck::must_cast_slice_mut(hashes.as_mut()))
365 .expect("read hash success");
366
367 hashes
368 }
369}
370
371trait AsHashSlice: std::marker::Sync + std::marker::Send + clone::Clone {
372 fn num_hashes(&self) -> usize;
373 fn get(&self, i: usize) -> &Hash;
374}
375
376impl AsHashSlice for &[Hash] {
377 fn num_hashes(&self) -> usize {
378 self.len()
379 }
380 fn get(&self, i: usize) -> &Hash {
381 &self[i]
382 }
383}
384
385impl AsHashSlice for Arc<Box<[Hash]>> {
386 fn num_hashes(&self) -> usize {
387 self.len()
388 }
389 fn get(&self, i: usize) -> &Hash {
390 &self[i]
391 }
392}
393
394impl CumulativeOffsets {
395 fn new<I>(iter: I) -> Self
396 where
397 I: Iterator<Item = usize>,
398 {
399 let mut total_count: usize = 0;
400 let cumulative_offsets: Vec<_> = iter
401 .enumerate()
402 .filter_map(|(i, len)| {
403 if len > 0 {
404 let result = CumulativeOffset {
405 index: [i, i],
406 start_offset: total_count,
407 };
408 total_count += len;
409 Some(result)
410 } else {
411 None
412 }
413 })
414 .collect();
415
416 Self {
417 cumulative_offsets,
418 total_count,
419 }
420 }
421
422 fn from_raw<T>(raw: &[Vec<T>]) -> Self {
423 Self::new(raw.iter().map(|v| v.len()))
424 }
425
426 fn find_index(&self, start: usize) -> usize {
428 assert!(!self.cumulative_offsets.is_empty());
429 match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
430 Ok(index) => index,
431 Err(index) => index - 1, }
433 }
434
435 fn find(&self, start: usize) -> (usize, &CumulativeOffset, usize) {
440 let i = self.find_index(start);
441 let index = &self.cumulative_offsets[i];
442 let start = start - index.start_offset;
443
444 let i_next = i + 1;
445 let next_start_offset = if i_next == self.cumulative_offsets.len() {
446 self.total_count
447 } else {
448 let next = &self.cumulative_offsets[i_next];
449 next.start_offset
450 };
451 let num_hashes = next_start_offset - index.start_offset;
452 (start, index, num_hashes)
453 }
454
455 fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
457 where
458 U: ExtractSliceFromRawData<'b, T> + 'b,
459 {
460 let (start, index, _) = self.find(start);
461 raw.extract(index, start)
462 }
463}
464
465#[derive(Debug)]
466pub struct AccountsHasher<'a> {
467 pub zero_lamport_accounts: ZeroLamportAccounts,
468 pub dir_for_temp_cache_files: PathBuf,
470 pub(crate) active_stats: &'a ActiveStats,
471}
472
473#[derive(Debug, Clone, Copy)]
475struct SlotGroupPointer {
476 slot_group_index: usize,
478 offset: usize,
480}
481
482#[derive(Debug)]
484struct ItemLocation<'a> {
485 key: &'a Pubkey,
487 pointer: SlotGroupPointer,
489}
490
491impl AccountsHasher<'_> {
492 pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
493 let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
494
495 let hash_total = cumulative_offsets.total_count;
496 let result = AccountsHasher::compute_merkle_root_from_slices(
497 hash_total,
498 MERKLE_FANOUT,
499 None,
500 |start: usize| cumulative_offsets.get_slice(&hashes, start),
501 None,
502 );
503 (result.0, hash_total)
504 }
505
506 pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
507 Self::compute_merkle_root_loop(hashes, fanout, |t| &t.1)
508 }
509
510 pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
512 Self::compute_merkle_root_loop(hashes, fanout, |t| t)
513 }
514
515 pub fn div_ceil(x: usize, y: usize) -> usize {
516 let mut result = x / y;
517 if x % y != 0 {
518 result += 1;
519 }
520 result
521 }
522
523 pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
526 where
527 F: Fn(&T) -> &Hash + std::marker::Sync,
528 T: std::marker::Sync,
529 {
530 if hashes.is_empty() {
531 return Hasher::default().result();
532 }
533
534 let mut time = Measure::start("time");
535
536 let total_hashes = hashes.len();
537 let chunks = Self::div_ceil(total_hashes, fanout);
538
539 let result: Vec<_> = (0..chunks)
540 .into_par_iter()
541 .map(|i| {
542 let start_index = i * fanout;
543 let end_index = std::cmp::min(start_index + fanout, total_hashes);
544
545 let mut hasher = Hasher::default();
546 for item in hashes.iter().take(end_index).skip(start_index) {
547 let h = extractor(item);
548 hasher.hash(h.as_ref());
549 }
550
551 hasher.result()
552 })
553 .collect();
554 time.stop();
555 debug!("hashing {} {}", total_hashes, time);
556
557 if result.len() == 1 {
558 result[0]
559 } else {
560 Self::compute_merkle_root_recurse(result, fanout)
561 }
562 }
563
564 fn calculate_three_level_chunks(
565 total_hashes: usize,
566 fanout: usize,
567 max_levels_per_pass: Option<usize>,
568 specific_level_count: Option<usize>,
569 ) -> (usize, usize, bool) {
570 const THREE_LEVEL_OPTIMIZATION: usize = 3; let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
572
573 let threshold = target * fanout;
576 let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
577 && total_hashes >= threshold;
578 if three_level {
579 if let Some(specific_level_count_value) = specific_level_count {
580 three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
581 }
582 }
583 let (num_hashes_per_chunk, levels_hashed) = if three_level {
584 (target, THREE_LEVEL_OPTIMIZATION)
585 } else {
586 (fanout, 1)
587 };
588 (num_hashes_per_chunk, levels_hashed, three_level)
589 }
590
591 fn compute_merkle_root_from_slices<'b, F, U>(
594 total_hashes: usize,
595 fanout: usize,
596 max_levels_per_pass: Option<usize>,
597 get_hash_slice_starting_at_index: F,
598 specific_level_count: Option<usize>,
599 ) -> (Hash, Vec<Hash>)
600 where
601 F: Fn(usize) -> U + std::marker::Sync,
603 U: AsHashSlice + 'b,
604 {
605 if total_hashes == 0 {
606 return (Hasher::default().result(), vec![]);
607 }
608
609 let mut time = Measure::start("time");
610
611 let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
612 total_hashes,
613 fanout,
614 max_levels_per_pass,
615 specific_level_count,
616 );
617
618 let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
619
620 let data = get_hash_slice_starting_at_index(0);
622 let data_len = data.num_hashes();
623
624 let result: Vec<_> = (0..chunks)
625 .into_par_iter()
626 .map(|i| {
627 let start_index = i * num_hashes_per_chunk;
633 let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
635
636 let mut hasher = Hasher::default();
638
639 let mut data_index = start_index;
642 let mut data = data.clone();
644 let mut data_len = data_len;
646
647 if !three_level {
648 for i in start_index..end_index {
651 if data_index >= data_len {
652 data = get_hash_slice_starting_at_index(i);
654 data_len = data.num_hashes();
655 data_index = 0;
656 }
657 hasher.hash(data.get(data_index).as_ref());
658 data_index += 1;
659 }
660 } else {
661 let mut i = start_index;
700 while i < end_index {
701 let mut hasher_j = Hasher::default();
702 for _j in 0..fanout {
703 let mut hasher_k = Hasher::default();
704 let end = std::cmp::min(end_index - i, fanout);
705 for _k in 0..end {
706 if data_index >= data_len {
707 data = get_hash_slice_starting_at_index(i);
709 data_len = data.num_hashes();
710 data_index = 0;
711 }
712 hasher_k.hash(data.get(data_index).as_ref());
713 data_index += 1;
714 i += 1;
715 }
716 hasher_j.hash(hasher_k.result().as_ref());
717 if i >= end_index {
718 break;
719 }
720 }
721 hasher.hash(hasher_j.result().as_ref());
722 }
723 }
724
725 hasher.result()
726 })
727 .collect();
728 time.stop();
729 debug!("hashing {} {}", total_hashes, time);
730
731 if let Some(mut specific_level_count_value) = specific_level_count {
732 specific_level_count_value -= levels_hashed;
733 if specific_level_count_value == 0 {
734 (Hash::default(), result)
735 } else {
736 assert!(specific_level_count_value > 0);
737 Self::compute_merkle_root_from_slices_recurse(
739 result,
740 fanout,
741 max_levels_per_pass,
742 Some(specific_level_count_value),
743 )
744 }
745 } else {
746 (
747 if result.len() == 1 {
748 result[0]
749 } else {
750 Self::compute_merkle_root_recurse(result, fanout)
751 },
752 vec![], )
754 }
755 }
756
757 fn compute_merkle_root_from_slices_recurse(
758 hashes: Vec<Hash>,
759 fanout: usize,
760 max_levels_per_pass: Option<usize>,
761 specific_level_count: Option<usize>,
762 ) -> (Hash, Vec<Hash>) {
763 Self::compute_merkle_root_from_slices(
764 hashes.len(),
765 fanout,
766 max_levels_per_pass,
767 |start| &hashes[start..],
768 specific_level_count,
769 )
770 }
771
772 pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, AccountHash)>) -> Hash {
773 hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
774 Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| &i.1 .0)
775 }
776
777 pub fn compare_two_hash_entries(
778 a: &CalculateHashIntermediate,
779 b: &CalculateHashIntermediate,
780 ) -> std::cmp::Ordering {
781 a.pubkey.partial_cmp(&b.pubkey).unwrap()
783 }
784
785 pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
786 balance.try_into().unwrap_or_else(|_| {
787 panic!("overflow is detected while summing capitalization: {balance}")
788 })
789 }
790
791 fn de_dup_accounts(
796 &self,
797 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
798 stats: &mut HashStats,
799 max_bin: usize,
800 ) -> (Vec<AccountHashesFile>, u64) {
801 let _guard = self.active_stats.activate(ActiveStatItem::HashDeDup);
808
809 #[derive(Default)]
810 struct DedupResult {
811 hashes_files: Vec<AccountHashesFile>,
812 hashes_count: usize,
813 lamports_sum: u64,
814 }
815
816 let mut zeros = Measure::start("eliminate zeros");
817 let DedupResult {
818 hashes_files: hashes,
819 hashes_count: hash_total,
820 lamports_sum: lamports_total,
821 } = (0..max_bin)
822 .into_par_iter()
823 .fold(DedupResult::default, |mut accum, bin| {
824 let (hashes_file, lamports_bin) =
825 self.de_dup_accounts_in_parallel(sorted_data_by_pubkey, bin, max_bin, stats);
826
827 accum.lamports_sum = accum
828 .lamports_sum
829 .checked_add(lamports_bin)
830 .expect("summing capitalization cannot overflow");
831 accum.hashes_count += hashes_file.count();
832 accum.hashes_files.push(hashes_file);
833 accum
834 })
835 .reduce(
836 || {
837 DedupResult {
838 hashes_files: Vec::new(),
841 ..Default::default()
842 }
843 },
844 |mut a, mut b| {
845 a.lamports_sum = a
846 .lamports_sum
847 .checked_add(b.lamports_sum)
848 .expect("summing capitalization cannot overflow");
849 a.hashes_count += b.hashes_count;
850 a.hashes_files.append(&mut b.hashes_files);
851 a
852 },
853 );
854 zeros.stop();
855 stats.zeros_time_total_us += zeros.as_us();
856 stats.hash_total += hash_total;
857 (hashes, lamports_total)
858 }
859
860 fn get_item<'b>(
863 sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
864 bin: usize,
865 binner: &PubkeyBinCalculator24,
866 item_loc: &ItemLocation<'b>,
867 ) -> (&'b CalculateHashIntermediate, Option<ItemLocation<'b>>) {
868 let division_data = &sorted_data_by_pubkey[item_loc.pointer.slot_group_index];
869 let mut index = item_loc.pointer.offset;
870 index += 1;
871 let mut next = None;
872
873 while index < division_data.len() {
874 let next_key = &division_data[index].pubkey;
876 if next_key == item_loc.key {
877 index += 1;
878 continue; }
880
881 if binner.bin_from_pubkey(next_key) > bin {
882 break;
884 }
885
886 next = Some(ItemLocation {
888 key: next_key,
889 pointer: SlotGroupPointer {
890 slot_group_index: item_loc.pointer.slot_group_index,
891 offset: index,
892 },
893 });
894 break;
895 }
896
897 (&division_data[index - 1], next)
899 }
900
901 fn binary_search_for_first_pubkey_in_bin(
904 hash_data: &[CalculateHashIntermediate],
905 bin: usize,
906 binner: &PubkeyBinCalculator24,
907 ) -> Option<usize> {
908 let potential_index = if bin == 0 {
909 0
912 } else {
913 let just_prior_to_desired_bin = bin * 2;
923 let search = hash_data.binary_search_by(|data| {
924 (1 + 2 * binner.bin_from_pubkey(&data.pubkey)).cmp(&just_prior_to_desired_bin)
925 });
926 search.expect_err("it is impossible to find a matching bin")
928 };
929 hash_data.get(potential_index).and_then(|potential_data| {
933 (binner.bin_from_pubkey(&potential_data.pubkey) == bin).then_some(potential_index)
934 })
935 }
936
937 fn find_first_pubkey_in_bin(
940 hash_data: &[CalculateHashIntermediate],
941 bin: usize,
942 bins: usize,
943 binner: &PubkeyBinCalculator24,
944 stats: &HashStats,
945 ) -> Option<usize> {
946 if hash_data.is_empty() {
947 return None;
948 }
949 let (result, us) = measure_us!({
950 let i = hash_data.len() * bin / bins;
952 let estimate = &hash_data[i];
953
954 let pubkey_bin = binner.bin_from_pubkey(&estimate.pubkey);
955 let range = if pubkey_bin >= bin {
956 0..(i + 1)
959 } else {
960 (i + 1)..hash_data.len()
962 };
963 Some(
964 range.start +
965 Self::binary_search_for_first_pubkey_in_bin(
967 &hash_data[range],
968 bin,
969 binner,
970 )?,
971 )
972 });
973 stats.pubkey_bin_search_us.fetch_add(us, Ordering::Relaxed);
974 result
975 }
976
977 fn initialize_dedup_working_set(
980 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
981 pubkey_bin: usize,
982 bins: usize,
983 binner: &PubkeyBinCalculator24,
984 stats: &HashStats,
985 ) -> (
986 Vec<SlotGroupPointer>, usize, ) {
989 let mut working_set: Vec<SlotGroupPointer> = Vec::default();
991
992 let max_inclusive_num_pubkeys = sorted_data_by_pubkey
1002 .iter()
1003 .enumerate()
1004 .rev()
1005 .map(|(i, hash_data)| {
1006 let first_pubkey_in_bin =
1007 Self::find_first_pubkey_in_bin(hash_data, pubkey_bin, bins, binner, stats);
1008
1009 if let Some(first_pubkey_in_bin) = first_pubkey_in_bin {
1010 let mut next = Some(ItemLocation {
1011 key: &hash_data[first_pubkey_in_bin].pubkey,
1012 pointer: SlotGroupPointer {
1013 slot_group_index: i,
1014 offset: first_pubkey_in_bin,
1015 },
1016 });
1017
1018 Self::add_next_item(
1019 &mut next,
1020 &mut working_set,
1021 sorted_data_by_pubkey,
1022 pubkey_bin,
1023 binner,
1024 );
1025
1026 let mut first_pubkey_in_next_bin = first_pubkey_in_bin + 1;
1027 while first_pubkey_in_next_bin < hash_data.len() {
1028 if binner.bin_from_pubkey(&hash_data[first_pubkey_in_next_bin].pubkey)
1029 != pubkey_bin
1030 {
1031 break;
1032 }
1033 first_pubkey_in_next_bin += 1;
1034 }
1035 first_pubkey_in_next_bin - first_pubkey_in_bin
1036 } else {
1037 0
1038 }
1039 })
1040 .sum::<usize>();
1041
1042 (working_set, max_inclusive_num_pubkeys)
1043 }
1044
1045 fn add_next_item<'b>(
1047 next: &mut Option<ItemLocation<'b>>,
1048 working_set: &mut Vec<SlotGroupPointer>,
1049 sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
1050 pubkey_bin: usize,
1051 binner: &PubkeyBinCalculator24,
1052 ) {
1053 while let Some(ItemLocation { key, pointer }) = std::mem::take(next) {
1055 if let Some(SlotGroupPointer {
1058 slot_group_index: current_min_slot_group_index,
1059 offset: current_min_offset,
1060 }) = working_set.last()
1061 {
1062 let current_min_key = &sorted_data_by_pubkey[*current_min_slot_group_index]
1063 [*current_min_offset]
1064 .pubkey;
1065 if key < current_min_key {
1066 working_set.push(pointer);
1067 break;
1068 }
1069 }
1070
1071 let found = working_set.binary_search_by(|pointer| {
1072 let prob = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1073 (*key).cmp(prob)
1074 });
1075
1076 match found {
1077 Err(index) => {
1078 working_set.insert(index, pointer);
1082 break;
1083 }
1084 Ok(index) => {
1085 let found = &mut working_set[index];
1086 if found.slot_group_index > pointer.slot_group_index {
1087 let (_item, new_next) = Self::get_item(
1090 sorted_data_by_pubkey,
1091 pubkey_bin,
1092 binner,
1093 &ItemLocation { key, pointer },
1094 );
1095 *next = new_next;
1096 } else {
1097 let (_item, new_next) = Self::get_item(
1099 sorted_data_by_pubkey,
1100 pubkey_bin,
1101 binner,
1102 &ItemLocation {
1103 key,
1104 pointer: *found,
1105 },
1106 );
1107 *found = pointer;
1108 *next = new_next;
1109 }
1110 }
1111 }
1112 }
1113 }
1114
1115 fn de_dup_accounts_in_parallel(
1123 &self,
1124 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1125 pubkey_bin: usize,
1126 bins: usize,
1127 stats: &HashStats,
1128 ) -> (AccountHashesFile, u64) {
1129 let binner = PubkeyBinCalculator24::new(bins);
1130
1131 let (mut working_set, _max_inclusive_num_pubkeys) = Self::initialize_dedup_working_set(
1133 sorted_data_by_pubkey,
1134 pubkey_bin,
1135 bins,
1136 &binner,
1137 stats,
1138 );
1139
1140 let mut hashes = AccountHashesFile::new(&self.dir_for_temp_cache_files);
1141
1142 let mut overall_sum: u64 = 0;
1143
1144 while let Some(pointer) = working_set.pop() {
1145 let key = &sorted_data_by_pubkey[pointer.slot_group_index][pointer.offset].pubkey;
1146
1147 let (item, mut next) = Self::get_item(
1149 sorted_data_by_pubkey,
1150 pubkey_bin,
1151 &binner,
1152 &ItemLocation { key, pointer },
1153 );
1154
1155 if item.lamports != 0 {
1157 overall_sum = overall_sum
1158 .checked_add(item.lamports)
1159 .expect("summing lamports cannot overflow");
1160 hashes.write(&item.hash.0);
1161 } else {
1162 stats
1163 .num_zero_lamport_accounts
1164 .fetch_add(1, Ordering::Relaxed);
1165 if self.zero_lamport_accounts == ZeroLamportAccounts::Included {
1167 let hash = blake3::hash(bytemuck::bytes_of(&item.pubkey));
1170 let hash = Hash::new_from_array(hash.into());
1171 hashes.write(&hash);
1172 }
1173 }
1174
1175 Self::add_next_item(
1176 &mut next,
1177 &mut working_set,
1178 sorted_data_by_pubkey,
1179 pubkey_bin,
1180 &binner,
1181 );
1182 }
1183
1184 (hashes, overall_sum)
1185 }
1186
1187 pub fn rest_of_hash_calculation(
1191 &self,
1192 sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
1193 bins: usize,
1194 stats: &mut HashStats,
1195 ) -> (Hash, u64) {
1196 let (hashes, total_lamports) = self.de_dup_accounts(sorted_data_by_pubkey, stats, bins);
1197
1198 let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1199
1200 let _guard = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
1201 let mut hash_time = Measure::start("hash");
1202 let (hash, _) = Self::compute_merkle_root_from_slices(
1203 cumulative.total_count(),
1204 MERKLE_FANOUT,
1205 None,
1206 |start| Arc::new(cumulative.get_data(start)),
1207 None,
1208 );
1209 hash_time.stop();
1210 stats.hash_time_total_us += hash_time.as_us();
1211
1212 let (_, drop_us) = measure_us!(drop(cumulative));
1213 stats.drop_hash_files_us += drop_us;
1214 (hash, total_lamports)
1215 }
1216}
1217
1218#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1220pub enum ZeroLamportAccounts {
1221 Excluded,
1222 Included,
1223}
1224
1225#[repr(transparent)]
1227#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1228#[derive(Debug, Copy, Clone, Eq, PartialEq, Pod, Zeroable)]
1229pub struct AccountHash(pub Hash);
1230
1231const _: () = assert!(std::mem::size_of::<AccountHash>() == std::mem::size_of::<Hash>());
1234
1235pub const ZERO_LAMPORT_ACCOUNT_HASH: AccountHash =
1237 AccountHash(Hash::new_from_array([0; HASH_BYTES]));
1238
1239#[derive(Debug, Clone, Eq, PartialEq)]
1241pub struct AccountLtHash(pub LtHash);
1242
1243pub const ZERO_LAMPORT_ACCOUNT_LT_HASH: AccountLtHash = AccountLtHash(LtHash::identity());
1245
1246#[derive(Debug, Clone, Eq, PartialEq)]
1248pub struct AccountsLtHash(pub LtHash);
1249
1250#[derive(Debug, Clone, Eq, PartialEq)]
1252pub enum MerkleOrLatticeAccountsHash {
1253 Merkle(AccountsHashKind),
1255 Lattice,
1257}
1258
1259#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1261pub enum AccountsHashKind {
1262 Full(AccountsHash),
1263 Incremental(IncrementalAccountsHash),
1264}
1265impl AccountsHashKind {
1266 pub fn as_hash(&self) -> &Hash {
1267 match self {
1268 AccountsHashKind::Full(AccountsHash(hash))
1269 | AccountsHashKind::Incremental(IncrementalAccountsHash(hash)) => hash,
1270 }
1271 }
1272}
1273impl From<AccountsHash> for AccountsHashKind {
1274 fn from(accounts_hash: AccountsHash) -> Self {
1275 AccountsHashKind::Full(accounts_hash)
1276 }
1277}
1278impl From<IncrementalAccountsHash> for AccountsHashKind {
1279 fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1280 AccountsHashKind::Incremental(incremental_accounts_hash)
1281 }
1282}
1283
1284#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1286pub struct AccountsHash(pub Hash);
1287#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1290pub struct IncrementalAccountsHash(pub Hash);
1291
1292#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1294pub struct AccountsDeltaHash(pub Hash);
1295
1296#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1298#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1299pub struct SerdeAccountsDeltaHash(pub Hash);
1300
1301impl From<SerdeAccountsDeltaHash> for AccountsDeltaHash {
1302 fn from(accounts_delta_hash: SerdeAccountsDeltaHash) -> Self {
1303 Self(accounts_delta_hash.0)
1304 }
1305}
1306impl From<AccountsDeltaHash> for SerdeAccountsDeltaHash {
1307 fn from(accounts_delta_hash: AccountsDeltaHash) -> Self {
1308 Self(accounts_delta_hash.0)
1309 }
1310}
1311
1312#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1314#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1315pub struct SerdeAccountsHash(pub Hash);
1316
1317impl From<SerdeAccountsHash> for AccountsHash {
1318 fn from(accounts_hash: SerdeAccountsHash) -> Self {
1319 Self(accounts_hash.0)
1320 }
1321}
1322impl From<AccountsHash> for SerdeAccountsHash {
1323 fn from(accounts_hash: AccountsHash) -> Self {
1324 Self(accounts_hash.0)
1325 }
1326}
1327
1328#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
1330#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
1331pub struct SerdeIncrementalAccountsHash(pub Hash);
1332
1333impl From<SerdeIncrementalAccountsHash> for IncrementalAccountsHash {
1334 fn from(incremental_accounts_hash: SerdeIncrementalAccountsHash) -> Self {
1335 Self(incremental_accounts_hash.0)
1336 }
1337}
1338impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {
1339 fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
1340 Self(incremental_accounts_hash.0)
1341 }
1342}
1343
1344#[cfg(test)]
1345mod tests {
1346 use {
1347 super::*, crate::accounts_db::DEFAULT_HASH_CALCULATION_PUBKEY_BINS, itertools::Itertools,
1348 std::str::FromStr, tempfile::tempdir,
1349 };
1350
1351 lazy_static! {
1352 static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
1353 }
1354
1355 impl AccountsHasher<'_> {
1356 fn new(dir_for_temp_cache_files: PathBuf) -> Self {
1357 Self {
1358 zero_lamport_accounts: ZeroLamportAccounts::Excluded,
1359 dir_for_temp_cache_files,
1360 active_stats: &ACTIVE_STATS,
1361 }
1362 }
1363 }
1364
1365 impl CumulativeOffsets {
1366 fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> Self {
1367 let mut total_count: usize = 0;
1368 let mut cumulative_offsets = Vec::with_capacity(0);
1369 for (i, v_outer) in raw.iter().enumerate() {
1370 for (j, v) in v_outer.iter().enumerate() {
1371 let len = v.len();
1372 if len > 0 {
1373 if cumulative_offsets.is_empty() {
1374 cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
1376 }
1377 cumulative_offsets.push(CumulativeOffset {
1378 index: [i, j],
1379 start_offset: total_count,
1380 });
1381 total_count += len;
1382 }
1383 }
1384 }
1385
1386 Self {
1387 cumulative_offsets,
1388 total_count,
1389 }
1390 }
1391 }
1392
1393 #[test]
1394 fn test_find_first_pubkey_in_bin() {
1395 let stats = HashStats::default();
1396 for (bins, expected_count) in [1, 2, 4].into_iter().zip([5, 20, 120]) {
1397 let bins: usize = bins;
1398 let binner = PubkeyBinCalculator24::new(bins);
1399
1400 let mut count = 0usize;
1401 for counts in [0, 1, 2, 20, 0].into_iter().permutations(bins) {
1405 count += 1;
1406 let hash_data = counts
1407 .iter()
1408 .enumerate()
1409 .flat_map(|(bin, count)| {
1410 (0..*count).map(move |_| {
1411 let binner = PubkeyBinCalculator24::new(bins);
1412 CalculateHashIntermediate {
1413 hash: AccountHash(Hash::default()),
1414 lamports: 0,
1415 pubkey: binner.lowest_pubkey_from_bin(bin, bins),
1416 }
1417 })
1418 })
1419 .collect::<Vec<_>>();
1420 for (bin, count_in_bin) in counts.iter().enumerate().take(bins) {
1422 let first = AccountsHasher::find_first_pubkey_in_bin(
1423 &hash_data, bin, bins, &binner, &stats,
1424 );
1425 let first_again = AccountsHasher::binary_search_for_first_pubkey_in_bin(
1427 &hash_data, bin, &binner,
1428 );
1429 assert_eq!(first, first_again);
1430 assert_eq!(first.is_none(), count_in_bin == &0);
1431 if let Some(first) = first {
1432 assert_eq!(binner.bin_from_pubkey(&hash_data[first].pubkey), bin);
1433 if first > 0 {
1434 assert!(binner.bin_from_pubkey(&hash_data[first - 1].pubkey) < bin);
1435 }
1436 }
1437 }
1438 }
1439 assert_eq!(
1440 count, expected_count,
1441 "too few iterations in test. bins: {bins}"
1442 );
1443 }
1444 }
1445
1446 #[test]
1447 fn test_account_hashes_file() {
1448 let dir_for_temp_cache_files = tempdir().unwrap();
1449 let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1451 assert!(file.get_reader().is_none());
1452 let hashes = (0..2)
1453 .map(|i| Hash::new_from_array([i; 32]))
1454 .collect::<Vec<_>>();
1455
1456 let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1458 file.write(&hashes[0]);
1459 let reader = file.get_reader();
1460 assert_eq!(&[hashes[0]][..], read(&reader, 0).unwrap());
1461 assert!(read(&reader, 1).unwrap().is_empty());
1462
1463 let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path());
1465 hashes.iter().for_each(|hash| file.write(hash));
1466 let reader = file.get_reader();
1467 (0..2).for_each(|i| assert_eq!(&hashes[i..], read(&reader, i).unwrap()));
1468 assert!(read(&reader, 2).unwrap().is_empty());
1469 }
1470
1471 #[test]
1472 fn test_cumulative_hashes_from_files() {
1473 let dir_for_temp_cache_files = tempdir().unwrap();
1474 (0..4).for_each(|permutation| {
1475 let hashes = (0..2)
1476 .map(|i| Hash::new_from_array([i + 1; 32]))
1477 .collect::<Vec<_>>();
1478
1479 let mut combined = Vec::default();
1480
1481 let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1483
1484 let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1486 file1.write(&hashes[0]);
1487 combined.push(hashes[0]);
1488
1489 let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path());
1491 hashes.iter().for_each(|hash| {
1492 file2.write(hash);
1493 combined.push(*hash);
1494 });
1495
1496 let hashes = if permutation == 0 {
1497 vec![file0, file1, file2]
1498 } else if permutation == 1 {
1499 vec![
1501 file0,
1502 file1,
1503 AccountHashesFile::new(dir_for_temp_cache_files.path()),
1504 file2,
1505 AccountHashesFile::new(dir_for_temp_cache_files.path()),
1506 ]
1507 } else if permutation == 2 {
1508 vec![file1, file2]
1509 } else {
1510 let one = combined.remove(0);
1512 combined.push(one);
1513 vec![
1514 file2,
1515 AccountHashesFile::new(dir_for_temp_cache_files.path()),
1516 AccountHashesFile::new(dir_for_temp_cache_files.path()),
1517 file1,
1518 ]
1519 };
1520
1521 let cumulative = CumulativeHashesFromFiles::from_files(hashes);
1522 let len = combined.len();
1523 assert_eq!(cumulative.total_count(), len);
1524 (0..combined.len()).for_each(|start| {
1525 let mut retrieved = Vec::default();
1526 let mut cumulative_start = start;
1527 while retrieved.len() < (len - start) {
1529 let this_one_bytes = cumulative.get_data(cumulative_start);
1530 let this_one = bytemuck::cast_slice(&this_one_bytes);
1531 retrieved.extend(this_one.iter());
1532 cumulative_start += this_one.len();
1533 assert_ne!(0, this_one.len());
1534 }
1535 assert_eq!(
1536 &combined[start..],
1537 &retrieved[..],
1538 "permutation: {permutation}"
1539 );
1540 });
1541 });
1542 }
1543
1544 #[test]
1545 fn test_accountsdb_div_ceil() {
1546 assert_eq!(AccountsHasher::div_ceil(10, 3), 4);
1547 assert_eq!(AccountsHasher::div_ceil(0, 1), 0);
1548 assert_eq!(AccountsHasher::div_ceil(0, 5), 0);
1549 assert_eq!(AccountsHasher::div_ceil(9, 3), 3);
1550 assert_eq!(AccountsHasher::div_ceil(9, 9), 1);
1551 }
1552
1553 #[test]
1554 #[should_panic(expected = "attempt to divide by zero")]
1555 fn test_accountsdb_div_ceil_fail() {
1556 assert_eq!(AccountsHasher::div_ceil(10, 0), 0);
1557 }
1558
1559 fn for_rest(original: &[CalculateHashIntermediate]) -> Vec<&[CalculateHashIntermediate]> {
1560 vec![original]
1561 }
1562
1563 #[test]
1564 fn test_accountsdb_rest_of_hash_calculation() {
1565 solana_logger::setup();
1566
1567 let mut account_maps = Vec::new();
1568
1569 let pubkey = Pubkey::from([11u8; 32]);
1570 let hash = AccountHash(Hash::new_from_array([1u8; 32]));
1571 let val = CalculateHashIntermediate {
1572 hash,
1573 lamports: 88,
1574 pubkey,
1575 };
1576 account_maps.push(val);
1577
1578 let pubkey = Pubkey::from([12u8; 32]);
1580 let hash = AccountHash(Hash::new_from_array([2u8; 32]));
1581 let val = CalculateHashIntermediate {
1582 hash,
1583 lamports: 0,
1584 pubkey,
1585 };
1586 account_maps.push(val);
1587
1588 let dir_for_temp_cache_files = tempdir().unwrap();
1589 let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1590 let result = accounts_hash.rest_of_hash_calculation(
1591 &for_rest(&account_maps),
1592 DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1593 &mut HashStats::default(),
1594 );
1595 let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
1596 assert_eq!((result.0, result.1), (expected_hash, 88));
1597
1598 let pubkey = Pubkey::from([10u8; 32]);
1600 let hash = AccountHash(Hash::new_from_array([2u8; 32]));
1601 let val = CalculateHashIntermediate {
1602 hash,
1603 lamports: 20,
1604 pubkey,
1605 };
1606 account_maps.insert(0, val);
1607
1608 let result = accounts_hash.rest_of_hash_calculation(
1609 &for_rest(&account_maps),
1610 DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1611 &mut HashStats::default(),
1612 );
1613 let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
1614 assert_eq!((result.0, result.1), (expected_hash, 108));
1615
1616 let pubkey = Pubkey::from([10u8; 32]);
1618 let hash = AccountHash(Hash::new_from_array([99u8; 32]));
1619 let val = CalculateHashIntermediate {
1620 hash,
1621 lamports: 30,
1622 pubkey,
1623 };
1624 account_maps.insert(1, val);
1625
1626 let result = accounts_hash.rest_of_hash_calculation(
1627 &for_rest(&account_maps),
1628 DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
1629 &mut HashStats::default(),
1630 );
1631 let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
1632 assert_eq!((result.0, result.1), (expected_hash, 118));
1633 }
1634
1635 fn one_range() -> usize {
1636 1
1637 }
1638
1639 fn zero_range() -> usize {
1640 0
1641 }
1642
1643 #[test]
1644 fn test_accountsdb_de_dup_accounts_zero_chunks() {
1645 let vec = [vec![CalculateHashIntermediate {
1646 lamports: 1,
1647 hash: AccountHash(Hash::default()),
1648 pubkey: Pubkey::default(),
1649 }]];
1650 let temp_vec = vec.to_vec();
1651 let slice = convert_to_slice(&temp_vec);
1652 let dir_for_temp_cache_files = tempdir().unwrap();
1653 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1654 let (mut hashes, lamports) =
1655 accounts_hasher.de_dup_accounts_in_parallel(&slice, 0, 1, &HashStats::default());
1656 let reader = hashes.get_reader();
1657 assert_eq!(&[Hash::default()], &read(&reader, 0).unwrap()[..]);
1658 assert_eq!(lamports, 1);
1659 }
1660
1661 fn get_vec_vec(hashes: Vec<AccountHashesFile>) -> Vec<Vec<Hash>> {
1662 hashes.into_iter().map(get_vec).collect()
1663 }
1664 fn get_vec(mut hashes: AccountHashesFile) -> Vec<Hash> {
1665 let reader = hashes.get_reader();
1666 read(&reader, 0).unwrap_or_default()
1667 }
1668 fn read(reader: &Option<Mutex<BufReader<File>>>, index: usize) -> std::io::Result<Vec<Hash>> {
1669 let file_offset_in_bytes = std::mem::size_of::<Hash>() * index;
1670 if reader.is_none() {
1671 return Ok(vec![]);
1672 }
1673 let mut reader = reader.as_ref().unwrap().lock().unwrap();
1674 reader.seek(SeekFrom::Start(file_offset_in_bytes.try_into().unwrap()))?;
1675 let mut result_bytes: Vec<u8> = vec![];
1676 reader.read_to_end(&mut result_bytes)?;
1677 let result: Vec<Hash> = bytemuck::cast_slice(&result_bytes).to_vec();
1678 Ok(result)
1679 }
1680
1681 #[test]
1682 fn test_accountsdb_de_dup_accounts_empty() {
1683 solana_logger::setup();
1684 let dir_for_temp_cache_files = tempdir().unwrap();
1685 let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1686
1687 let empty = [];
1688 let vec = ∅
1689 let (hashes, lamports) =
1690 accounts_hash.de_dup_accounts(vec, &mut HashStats::default(), one_range());
1691 assert_eq!(
1692 Vec::<Hash>::new(),
1693 get_vec_vec(hashes)
1694 .into_iter()
1695 .flatten()
1696 .collect::<Vec<_>>(),
1697 );
1698 assert_eq!(lamports, 0);
1699 let vec = vec![];
1700 let (hashes, lamports) =
1701 accounts_hash.de_dup_accounts(&vec, &mut HashStats::default(), zero_range());
1702 let empty: Vec<Vec<Hash>> = Vec::default();
1703 assert_eq!(empty, get_vec_vec(hashes));
1704 assert_eq!(lamports, 0);
1705
1706 let (hashes, lamports) =
1707 accounts_hash.de_dup_accounts_in_parallel(&[], 1, 1, &HashStats::default());
1708 assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1709 assert_eq!(lamports, 0);
1710
1711 let (hashes, lamports) =
1712 accounts_hash.de_dup_accounts_in_parallel(&[], 2, 1, &HashStats::default());
1713 assert_eq!(Vec::<Hash>::new(), get_vec(hashes));
1714 assert_eq!(lamports, 0);
1715 }
1716
1717 #[test]
1718 fn test_accountsdb_de_dup_accounts_from_stores() {
1719 solana_logger::setup();
1720
1721 let key_a = Pubkey::from([1u8; 32]);
1722 let key_b = Pubkey::from([2u8; 32]);
1723 let key_c = Pubkey::from([3u8; 32]);
1724 const COUNT: usize = 6;
1725 let hashes = (0..COUNT).map(|i| AccountHash(Hash::new_from_array([i as u8; 32])));
1726 let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
1729
1730 let accounts: Vec<_> = hashes
1731 .zip(keys.iter())
1732 .enumerate()
1733 .map(|(i, (hash, &pubkey))| CalculateHashIntermediate {
1734 hash,
1735 lamports: (i + 1) as u64,
1736 pubkey,
1737 })
1738 .collect();
1739
1740 type ExpectedType = (String, bool, u64, String);
1741 let expected:Vec<ExpectedType> = vec![
1742 ("a1", false, 1, "[11111111111111111111111111111111]"),
1748 ("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1749 ("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1750 ("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1751 ("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1752 ("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1753 ("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1754 ("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1755 ("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1756 ("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1757 ("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1758 ("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1759 ("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1760 ("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1761 ("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1762 ("a1", true, 1, "[11111111111111111111111111111111]"),
1763 ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1764 ("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1765 ("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1766 ("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1767 ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1768 ("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1769 ("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1770 ("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1771 ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1772 ("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1773 ("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1774 ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1775 ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1776 ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1777 ].into_iter().map(|item| {
1778 let result: ExpectedType = (
1779 item.0.to_string(),
1780 item.1,
1781 item.2,
1782 item.3.to_string(),
1783 );
1784 result
1785 }).collect();
1786
1787 let dir_for_temp_cache_files = tempdir().unwrap();
1788 let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1789 let mut expected_index = 0;
1790 for last_slice in 0..2 {
1791 for start in 0..COUNT {
1792 for end in start + 1..COUNT {
1793 let is_last_slice = last_slice == 1;
1794 let accounts = accounts.clone();
1795 let slice = &accounts[start..end];
1796
1797 let slice2 = vec![slice.to_vec()];
1798 let slice = &slice2[..];
1799 let slice_temp = convert_to_slice(&slice2);
1800 let (hashes2, lamports2) =
1801 hash.de_dup_accounts_in_parallel(&slice_temp, 0, 1, &HashStats::default());
1802 let slice3 = convert_to_slice(&slice2);
1803 let (hashes3, lamports3) =
1804 hash.de_dup_accounts_in_parallel(&slice3, 0, 1, &HashStats::default());
1805 let vec = slice.to_vec();
1806 let slice4 = convert_to_slice(&vec);
1807 let mut max_bin = end - start;
1808 if !max_bin.is_power_of_two() {
1809 max_bin = 1;
1810 }
1811
1812 let (hashes4, lamports4) =
1813 hash.de_dup_accounts(&slice4, &mut HashStats::default(), max_bin);
1814 let vec = slice.to_vec();
1815 let slice5 = convert_to_slice(&vec);
1816 let (hashes5, lamports5) =
1817 hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1818 let vec = slice.to_vec();
1819 let slice5 = convert_to_slice(&vec);
1820 let (hashes6, lamports6) =
1821 hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
1822
1823 let hashes2 = get_vec(hashes2);
1824 let hashes3 = get_vec(hashes3);
1825 let hashes4 = get_vec_vec(hashes4);
1826 let hashes5 = get_vec_vec(hashes5);
1827 let hashes6 = get_vec_vec(hashes6);
1828
1829 assert_eq!(hashes2, hashes3);
1830 let expected2 = hashes2.clone();
1831 assert_eq!(
1832 expected2,
1833 hashes4.into_iter().flatten().collect::<Vec<_>>(),
1834 "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1835 );
1836 assert_eq!(
1837 expected2.clone(),
1838 hashes5.iter().flatten().copied().collect::<Vec<_>>(),
1839 "last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
1840 );
1841 assert_eq!(
1842 expected2.clone(),
1843 hashes6.iter().flatten().copied().collect::<Vec<_>>()
1844 );
1845 assert_eq!(lamports2, lamports3);
1846 assert_eq!(lamports2, lamports4);
1847 assert_eq!(lamports2, lamports5);
1848 assert_eq!(lamports2, lamports6);
1849
1850 let human_readable = slice[0]
1851 .iter()
1852 .map(|v| {
1853 let mut s = (if v.pubkey == key_a {
1854 "a"
1855 } else if v.pubkey == key_b {
1856 "b"
1857 } else {
1858 "c"
1859 })
1860 .to_string();
1861
1862 s.push_str(&v.lamports.to_string());
1863 s
1864 })
1865 .collect::<String>();
1866
1867 let hash_result_as_string = format!("{hashes2:?}");
1868
1869 let packaged_result: ExpectedType = (
1870 human_readable,
1871 is_last_slice,
1872 lamports2,
1873 hash_result_as_string,
1874 );
1875 assert_eq!(expected[expected_index], packaged_result);
1876
1877 expected_index += 1;
1880 }
1881 }
1882 }
1883 }
1884
1885 #[test]
1886 fn test_accountsdb_compare_two_hash_entries() {
1887 solana_logger::setup();
1888 let pubkey = Pubkey::new_unique();
1889 let hash = AccountHash(Hash::new_unique());
1890 let val = CalculateHashIntermediate {
1891 hash,
1892 lamports: 1,
1893 pubkey,
1894 };
1895
1896 let hash2 = AccountHash(Hash::new_unique());
1898 let val2 = CalculateHashIntermediate {
1899 hash: hash2,
1900 lamports: 4,
1901 pubkey,
1902 };
1903 assert_eq!(
1904 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val2)
1906 );
1907
1908 let hash3 = AccountHash(Hash::new_unique());
1910 let val3 = CalculateHashIntermediate {
1911 hash: hash3,
1912 lamports: 2,
1913 pubkey,
1914 };
1915 assert_eq!(
1916 std::cmp::Ordering::Equal,
1917 AccountsHasher::compare_two_hash_entries(&val, &val3)
1918 );
1919
1920 let hash4 = AccountHash(Hash::new_unique());
1922 let val4 = CalculateHashIntermediate {
1923 hash: hash4,
1924 lamports: 6,
1925 pubkey,
1926 };
1927 assert_eq!(
1928 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val4)
1930 );
1931
1932 let hash5 = AccountHash(Hash::new_unique());
1934 let val5 = CalculateHashIntermediate {
1935 hash: hash5,
1936 lamports: 8,
1937 pubkey,
1938 };
1939 assert_eq!(
1940 std::cmp::Ordering::Equal, AccountsHasher::compare_two_hash_entries(&val, &val5)
1942 );
1943 }
1944
1945 fn test_de_dup_accounts_in_parallel<'a>(
1946 account_maps: &'a [&'a [CalculateHashIntermediate]],
1947 ) -> (AccountHashesFile, u64) {
1948 let dir_for_temp_cache_files = tempdir().unwrap();
1949 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
1950 accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0, 1, &HashStats::default())
1951 }
1952
1953 #[test]
1954 fn test_accountsdb_remove_zero_balance_accounts() {
1955 solana_logger::setup();
1956
1957 let pubkey = Pubkey::new_unique();
1958 let hash = AccountHash(Hash::new_unique());
1959 let mut account_maps = Vec::new();
1960 let val = CalculateHashIntermediate {
1961 hash,
1962 lamports: 1,
1963 pubkey,
1964 };
1965 account_maps.push(val);
1966
1967 let vecs = vec![account_maps.to_vec()];
1968 let slice = convert_to_slice(&vecs);
1969 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1970 assert_eq!(
1971 (get_vec(hashfile), lamports),
1972 (vec![val.hash.0], val.lamports)
1973 );
1974
1975 let val = CalculateHashIntermediate {
1977 hash,
1978 lamports: 0,
1979 pubkey,
1980 };
1981 account_maps.push(val); let vecs = vec![account_maps.to_vec()];
1984 let slice = convert_to_slice(&vecs);
1985 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
1986 assert_eq!((get_vec(hashfile), lamports), (vec![], 0));
1987 }
1988
1989 #[test]
1990 fn test_accountsdb_dup_pubkey_2_chunks() {
1991 for reverse in [false, true] {
1993 let key = Pubkey::new_from_array([1; 32]); let key2 = Pubkey::new_from_array([2; 32]);
1995 let hash = AccountHash(Hash::new_unique());
1996 let mut account_maps = Vec::new();
1997 let mut account_maps2 = Vec::new();
1998 let val = CalculateHashIntermediate {
1999 hash,
2000 lamports: 1,
2001 pubkey: key,
2002 };
2003 account_maps.push(val);
2004 let val2 = CalculateHashIntermediate {
2005 hash,
2006 lamports: 2,
2007 pubkey: key2,
2008 };
2009 account_maps.push(val2);
2010 let val3 = CalculateHashIntermediate {
2011 hash,
2012 lamports: 3,
2013 pubkey: key2,
2014 };
2015 account_maps2.push(val3);
2016
2017 let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2018 if reverse {
2019 vecs = vecs.into_iter().rev().collect();
2020 }
2021 let slice = convert_to_slice(&vecs);
2022 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2023 assert_eq!(
2024 (get_vec(hashfile), lamports),
2025 (
2026 vec![val.hash.0, if reverse { val2.hash.0 } else { val3.hash.0 }],
2027 val.lamports
2028 + if reverse {
2029 val2.lamports
2030 } else {
2031 val3.lamports
2032 }
2033 ),
2034 "reverse: {reverse}"
2035 );
2036 }
2037 }
2038
2039 #[test]
2040 fn test_accountsdb_dup_pubkey_2_chunks_backwards() {
2041 for reverse in [false, true] {
2043 let key = Pubkey::new_from_array([3; 32]); let key2 = Pubkey::new_from_array([2; 32]);
2045 let hash = AccountHash(Hash::new_unique());
2046 let mut account_maps = Vec::new();
2047 let mut account_maps2 = Vec::new();
2048 let val2 = CalculateHashIntermediate {
2049 hash,
2050 lamports: 2,
2051 pubkey: key2,
2052 };
2053 account_maps.push(val2);
2054 let val = CalculateHashIntermediate {
2055 hash,
2056 lamports: 1,
2057 pubkey: key,
2058 };
2059 account_maps.push(val);
2060 let val3 = CalculateHashIntermediate {
2061 hash,
2062 lamports: 3,
2063 pubkey: key2,
2064 };
2065 account_maps2.push(val3);
2066
2067 let mut vecs = vec![account_maps.to_vec(), account_maps2.to_vec()];
2068 if reverse {
2069 vecs = vecs.into_iter().rev().collect();
2070 }
2071 let slice = convert_to_slice(&vecs);
2072 let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
2073 assert_eq!(
2074 (get_vec(hashfile), lamports),
2075 (
2076 vec![if reverse { val2.hash.0 } else { val3.hash.0 }, val.hash.0],
2077 val.lamports
2078 + if reverse {
2079 val2.lamports
2080 } else {
2081 val3.lamports
2082 }
2083 ),
2084 "reverse: {reverse}"
2085 );
2086 }
2087 }
2088
2089 #[test]
2090 fn test_accountsdb_cumulative_offsets1_d() {
2091 let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2092 let cumulative = CumulativeOffsets::from_raw(&input);
2093
2094 let src: Vec<_> = input.clone().into_iter().flatten().collect();
2095 let len = src.len();
2096 assert_eq!(cumulative.total_count, len);
2097 assert_eq!(cumulative.cumulative_offsets.len(), 2); const DIMENSION: usize = 0;
2100 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
2101 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
2102
2103 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2104 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2105
2106 for start in 0..len {
2107 let slice = cumulative.get_slice(&input, start);
2108 let len = slice.len();
2109 assert!(len > 0);
2110 assert_eq!(&src[start..(start + len)], slice);
2111 }
2112
2113 let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
2114 let cumulative = CumulativeOffsets::from_raw(&input);
2115
2116 let src: Vec<_> = input.clone().into_iter().flatten().collect();
2117 let len = src.len();
2118 assert_eq!(cumulative.total_count, len);
2119 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
2122 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
2123
2124 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2125 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2126
2127 for start in 0..len {
2128 let slice = cumulative.get_slice(&input, start);
2129 let len = slice.len();
2130 assert!(len > 0);
2131 assert_eq!(&src[start..(start + len)], slice);
2132 }
2133
2134 let input: Vec<Vec<u32>> = vec![vec![]];
2135 let cumulative = CumulativeOffsets::from_raw(&input);
2136
2137 let len = input.into_iter().flatten().count();
2138 assert_eq!(cumulative.total_count, len);
2139 assert_eq!(cumulative.cumulative_offsets.len(), 0); }
2141
2142 #[should_panic(expected = "is_empty")]
2143 #[test]
2144 fn test_accountsdb_cumulative_find_empty() {
2145 let input = CumulativeOffsets {
2146 cumulative_offsets: vec![],
2147 total_count: 0,
2148 };
2149 input.find(0);
2150 }
2151
2152 #[test]
2153 fn test_accountsdb_cumulative_find() {
2154 let input = CumulativeOffsets {
2155 cumulative_offsets: vec![CumulativeOffset {
2156 index: [0; 2],
2157 start_offset: 0,
2158 }],
2159 total_count: 0,
2160 };
2161 assert_eq!(input.find(0), (0, &input.cumulative_offsets[0], 0));
2162
2163 let input = CumulativeOffsets {
2164 cumulative_offsets: vec![
2165 CumulativeOffset {
2166 index: [0; 2],
2167 start_offset: 0,
2168 },
2169 CumulativeOffset {
2170 index: [1; 2],
2171 start_offset: 2,
2172 },
2173 ],
2174 total_count: 2,
2175 };
2176 assert_eq!(input.find(0), (0, &input.cumulative_offsets[0], 2)); assert_eq!(input.find(1), (1, &input.cumulative_offsets[0], 2)); assert_eq!(input.find(2), (0, &input.cumulative_offsets[1], 0)); assert_eq!(input.find(3), (1, &input.cumulative_offsets[1], 0)); }
2181
2182 #[test]
2183 fn test_accountsdb_cumulative_offsets2_d() {
2184 let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2185 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2186
2187 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2188 let len = src.len();
2189 assert_eq!(cumulative.total_count, len);
2190 assert_eq!(cumulative.cumulative_offsets.len(), 2); const DIMENSION_0: usize = 0;
2193 const DIMENSION_1: usize = 1;
2194 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2195 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2196 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2197 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
2198
2199 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2200 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2201
2202 for start in 0..len {
2203 let slice: &[u64] = cumulative.get_slice(&input, start);
2204 let len = slice.len();
2205 assert!(len > 0);
2206 assert_eq!(&src[start..(start + len)], slice);
2207 }
2208
2209 let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
2210 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2211
2212 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2213 let len = src.len();
2214 assert_eq!(cumulative.total_count, len);
2215 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2218 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
2219 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
2220 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
2221
2222 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2223 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2224
2225 for start in 0..len {
2226 let slice: &[u64] = cumulative.get_slice(&input, start);
2227 let len = slice.len();
2228 assert!(len > 0);
2229 assert_eq!(&src[start..(start + len)], slice);
2230 }
2231
2232 let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
2233 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2234
2235 let len = input.into_iter().flatten().count();
2236 assert_eq!(cumulative.total_count, len);
2237 assert_eq!(cumulative.cumulative_offsets.len(), 0); let input = vec![
2240 vec![vec![0, 1]],
2241 vec![vec![]],
2242 vec![vec![], vec![2, 3, 4], vec![]],
2243 ];
2244 let cumulative = CumulativeOffsets::from_raw_2d(&input);
2245
2246 let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
2247 let len = src.len();
2248 assert_eq!(cumulative.total_count, len);
2249 assert_eq!(cumulative.cumulative_offsets.len(), 2); assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
2252 assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
2253 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
2254 assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
2255
2256 assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
2257 assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
2258
2259 for start in 0..len {
2260 let slice: &[u64] = cumulative.get_slice(&input, start);
2261 let len = slice.len();
2262 assert!(len > 0);
2263 assert_eq!(&src[start..(start + len)], slice);
2264 }
2265 }
2266
2267 fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
2268 let result = AccountsHasher::compute_merkle_root(hashes.clone(), fanout);
2269 let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
2270 let result2 = test_hashing(reduced, fanout);
2271 assert_eq!(result, result2, "len: {}", hashes.len());
2272 result
2273 }
2274
2275 fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
2276 let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
2277 let result = AccountsHasher::compute_merkle_root(temp, fanout);
2278 let reduced: Vec<_> = hashes.clone();
2279 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2280 hashes.len(),
2281 fanout,
2282 None,
2283 |start| &reduced[start..],
2284 None,
2285 );
2286 assert_eq!(result, result2.0, "len: {}", hashes.len());
2287
2288 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2289 hashes.len(),
2290 fanout,
2291 Some(1),
2292 |start| &reduced[start..],
2293 None,
2294 );
2295 assert_eq!(result, result2.0, "len: {}", hashes.len());
2296
2297 let max = std::cmp::min(reduced.len(), fanout * 2);
2298 for left in 0..max {
2299 for right in left + 1..max {
2300 let src = vec![
2301 vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
2302 vec![reduced[right..].to_vec()],
2303 ];
2304 let offsets = CumulativeOffsets::from_raw_2d(&src);
2305
2306 let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
2307 let result2 = AccountsHasher::compute_merkle_root_from_slices(
2308 offsets.total_count,
2309 fanout,
2310 None,
2311 get_slice,
2312 None,
2313 );
2314 assert_eq!(result, result2.0);
2315 }
2316 }
2317 result
2318 }
2319
2320 #[test]
2321 fn test_accountsdb_compute_merkle_root_large() {
2322 solana_logger::setup();
2323
2324 const FANOUT: usize = 3;
2326 let mut hash_counts: Vec<_> = (1..6)
2327 .flat_map(|x| {
2328 let mark = FANOUT.pow(x);
2329 vec![mark - 1, mark, mark + 1]
2330 })
2331 .collect();
2332
2333 let target = FANOUT.pow(3);
2336 let threshold = target * FANOUT;
2337 hash_counts.extend(threshold - 1..=threshold + target);
2338
2339 for hash_count in hash_counts {
2340 let hashes: Vec<_> = (0..hash_count).map(|_| Hash::new_unique()).collect();
2341
2342 test_hashing(hashes, FANOUT);
2343 }
2344 }
2345
2346 #[test]
2347 fn test_accountsdb_compute_merkle_root() {
2348 solana_logger::setup();
2349
2350 let expected_results = vec![
2351 (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2352 (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
2353 (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
2354 (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
2355 (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
2356 (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
2357 (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
2358 (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
2359 (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
2360 (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
2361 (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
2362 (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
2363 (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
2364 (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
2365 (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
2366 (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
2367 (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
2368 (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
2369 (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
2370 (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
2371 ];
2372
2373 let mut expected_index = 0;
2374 let start = 0;
2375 let default_fanout = 2;
2376 let iterations = default_fanout * default_fanout * default_fanout + 2;
2378 for pass in 0..2 {
2379 let fanout = if pass == 0 {
2380 default_fanout
2381 } else {
2382 MERKLE_FANOUT
2383 };
2384 for count in start..iterations {
2385 let mut input: Vec<_> = (0..count)
2386 .map(|i| {
2387 let key = Pubkey::from([(pass * iterations + count) as u8; 32]);
2388 let hash =
2389 Hash::new_from_array([(pass * iterations + count + i + 1) as u8; 32]);
2390 (key, hash)
2391 })
2392 .collect();
2393
2394 let result = if pass == 0 {
2395 test_hashing_larger(input, fanout)
2396 } else {
2397 let early_result = AccountsHasher::accumulate_account_hashes(
2399 input
2400 .iter()
2401 .map(|i| (i.0, AccountHash(i.1)))
2402 .collect::<Vec<_>>(),
2403 );
2404
2405 input.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
2406 let result = AccountsHasher::compute_merkle_root(input, fanout);
2407 assert_eq!(early_result, result);
2408 result
2409 };
2410 assert_eq!(
2412 (
2413 pass,
2414 count,
2415 &*(result.to_string()),
2416 expected_results[expected_index].3
2417 ), expected_results[expected_index]
2419 );
2420 expected_index += 1;
2421 }
2422 }
2423 }
2424
2425 #[test]
2426 #[should_panic(expected = "summing lamports cannot overflow")]
2427 fn test_accountsdb_lamport_overflow() {
2428 solana_logger::setup();
2429
2430 let offset = 2;
2431 let input = vec![
2432 CalculateHashIntermediate {
2433 hash: AccountHash(Hash::new_from_array([1u8; 32])),
2434 lamports: u64::MAX - offset,
2435 pubkey: Pubkey::new_unique(),
2436 },
2437 CalculateHashIntermediate {
2438 hash: AccountHash(Hash::new_from_array([2u8; 32])),
2439 lamports: offset + 1,
2440 pubkey: Pubkey::new_unique(),
2441 },
2442 ];
2443 let dir_for_temp_cache_files = tempdir().unwrap();
2444 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2445 accounts_hasher.de_dup_accounts_in_parallel(
2446 &convert_to_slice(&[input]),
2447 0,
2448 1,
2449 &HashStats::default(),
2450 );
2451 }
2452
2453 fn convert_to_slice(
2454 input: &[Vec<CalculateHashIntermediate>],
2455 ) -> Vec<&[CalculateHashIntermediate]> {
2456 input.iter().map(|v| &v[..]).collect::<Vec<_>>()
2457 }
2458
2459 #[test]
2460 #[should_panic(expected = "summing lamports cannot overflow")]
2461 fn test_accountsdb_lamport_overflow2() {
2462 solana_logger::setup();
2463
2464 let offset = 2;
2465 let input = vec![
2466 vec![CalculateHashIntermediate {
2467 hash: AccountHash(Hash::new_from_array([1u8; 32])),
2468 lamports: u64::MAX - offset,
2469 pubkey: Pubkey::new_unique(),
2470 }],
2471 vec![CalculateHashIntermediate {
2472 hash: AccountHash(Hash::new_from_array([2u8; 32])),
2473 lamports: offset + 1,
2474 pubkey: Pubkey::new_unique(),
2475 }],
2476 ];
2477 let dir_for_temp_cache_files = tempdir().unwrap();
2478 let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
2479 accounts_hasher.de_dup_accounts(
2480 &convert_to_slice(&input),
2481 &mut HashStats::default(),
2482 2, );
2484 }
2485
2486 #[test]
2487 fn test_get_data() {
2488 let temp_dir = tempdir().unwrap();
2490
2491 const MAX_BUFFER_SIZE_IN_BYTES: usize = 128;
2492 let extra_size = 64;
2493
2494 let file_path = temp_dir.path().join("test_file");
2496 let mut file = File::create(&file_path).unwrap();
2497 let test_data: Vec<u8> = (0..(MAX_BUFFER_SIZE_IN_BYTES + extra_size) as u8).collect(); file.write_all(&test_data).unwrap();
2499 file.seek(SeekFrom::Start(0)).unwrap();
2500 drop(file);
2501
2502 let test_data: &[Hash] = bytemuck::cast_slice(&test_data);
2503
2504 let file = File::open(&file_path).unwrap();
2506 let reader = BufReader::new(file);
2507 let readers = vec![Mutex::new(reader)];
2508
2509 let cumulative_offsets = CumulativeOffsets {
2511 cumulative_offsets: vec![CumulativeOffset {
2512 index: [0, 0],
2513 start_offset: 0,
2514 }],
2515 total_count: test_data.len(),
2516 };
2517
2518 let cumulative_hashes = CumulativeHashesFromFiles {
2520 readers,
2521 cumulative: cumulative_offsets,
2522 };
2523
2524 let start_index = 0;
2527 let result = cumulative_hashes.get_data(start_index);
2528 assert_eq!(
2529 result.len(),
2530 MAX_BUFFER_SIZE_IN_BYTES / std::mem::size_of::<Hash>()
2531 );
2532 assert_eq!(&test_data[..result.len()], &result[..]);
2533
2534 let start_index = result.len();
2536 let result = cumulative_hashes.get_data(start_index);
2537 assert_eq!(result.len(), extra_size / std::mem::size_of::<Hash>());
2538 assert_eq!(
2539 &test_data[start_index..start_index + result.len()],
2540 &result[..]
2541 );
2542 }
2543}