1#[cfg(not(feature = "force_hash_collisions"))]
21use std::sync::Arc;
22
23use ahash::RandomState;
24use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
25use arrow::array::*;
26use arrow::datatypes::*;
27#[cfg(not(feature = "force_hash_collisions"))]
28use arrow::{downcast_dictionary_array, downcast_primitive_array};
29
30#[cfg(not(feature = "force_hash_collisions"))]
31use crate::cast::{
32 as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
33 as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
34 as_string_array, as_string_view_array, as_struct_array,
35};
36use crate::error::Result;
37#[cfg(not(feature = "force_hash_collisions"))]
38use crate::error::_internal_err;
39
40#[inline]
42pub fn combine_hashes(l: u64, r: u64) -> u64 {
43 let hash = (17 * 37u64).wrapping_add(l);
44 hash.wrapping_mul(37).wrapping_add(r)
45}
46
47#[cfg(not(feature = "force_hash_collisions"))]
48fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) {
49 if mul_col {
50 hashes_buffer.iter_mut().for_each(|hash| {
51 *hash = combine_hashes(random_state.hash_one(1), *hash);
53 })
54 } else {
55 hashes_buffer.iter_mut().for_each(|hash| {
56 *hash = random_state.hash_one(1);
57 })
58 }
59}
60
61pub trait HashValue {
62 fn hash_one(&self, state: &RandomState) -> u64;
63}
64
65impl<T: HashValue + ?Sized> HashValue for &T {
66 fn hash_one(&self, state: &RandomState) -> u64 {
67 T::hash_one(self, state)
68 }
69}
70
71macro_rules! hash_value {
72 ($($t:ty),+) => {
73 $(impl HashValue for $t {
74 fn hash_one(&self, state: &RandomState) -> u64 {
75 state.hash_one(self)
76 }
77 })+
78 };
79}
80hash_value!(i8, i16, i32, i64, i128, i256, u8, u16, u32, u64);
81hash_value!(bool, str, [u8], IntervalDayTime, IntervalMonthDayNano);
82
83macro_rules! hash_float_value {
84 ($(($t:ty, $i:ty)),+) => {
85 $(impl HashValue for $t {
86 fn hash_one(&self, state: &RandomState) -> u64 {
87 state.hash_one(<$i>::from_ne_bytes(self.to_ne_bytes()))
88 }
89 })+
90 };
91}
92hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
93
94#[cfg(not(feature = "force_hash_collisions"))]
98fn hash_array_primitive<T>(
99 array: &PrimitiveArray<T>,
100 random_state: &RandomState,
101 hashes_buffer: &mut [u64],
102 rehash: bool,
103) where
104 T: ArrowPrimitiveType<Native: HashValue>,
105{
106 assert_eq!(
107 hashes_buffer.len(),
108 array.len(),
109 "hashes_buffer and array should be of equal length"
110 );
111
112 if array.null_count() == 0 {
113 if rehash {
114 for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
115 *hash = combine_hashes(value.hash_one(random_state), *hash);
116 }
117 } else {
118 for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
119 *hash = value.hash_one(random_state);
120 }
121 }
122 } else if rehash {
123 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
124 if !array.is_null(i) {
125 let value = unsafe { array.value_unchecked(i) };
126 *hash = combine_hashes(value.hash_one(random_state), *hash);
127 }
128 }
129 } else {
130 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
131 if !array.is_null(i) {
132 let value = unsafe { array.value_unchecked(i) };
133 *hash = value.hash_one(random_state);
134 }
135 }
136 }
137}
138
139#[cfg(not(feature = "force_hash_collisions"))]
143fn hash_array<T>(
144 array: T,
145 random_state: &RandomState,
146 hashes_buffer: &mut [u64],
147 rehash: bool,
148) where
149 T: ArrayAccessor,
150 T::Item: HashValue,
151{
152 assert_eq!(
153 hashes_buffer.len(),
154 array.len(),
155 "hashes_buffer and array should be of equal length"
156 );
157
158 if array.null_count() == 0 {
159 if rehash {
160 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
161 let value = unsafe { array.value_unchecked(i) };
162 *hash = combine_hashes(value.hash_one(random_state), *hash);
163 }
164 } else {
165 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
166 let value = unsafe { array.value_unchecked(i) };
167 *hash = value.hash_one(random_state);
168 }
169 }
170 } else if rehash {
171 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
172 if !array.is_null(i) {
173 let value = unsafe { array.value_unchecked(i) };
174 *hash = combine_hashes(value.hash_one(random_state), *hash);
175 }
176 }
177 } else {
178 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
179 if !array.is_null(i) {
180 let value = unsafe { array.value_unchecked(i) };
181 *hash = value.hash_one(random_state);
182 }
183 }
184 }
185}
186
187#[cfg(not(feature = "force_hash_collisions"))]
189fn hash_dictionary<K: ArrowDictionaryKeyType>(
190 array: &DictionaryArray<K>,
191 random_state: &RandomState,
192 hashes_buffer: &mut [u64],
193 multi_col: bool,
194) -> Result<()> {
195 let values = Arc::clone(array.values());
199 let mut dict_hashes = vec![0; values.len()];
200 create_hashes(&[values], random_state, &mut dict_hashes)?;
201
202 if multi_col {
204 for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) {
205 if let Some(key) = key {
206 *hash = combine_hashes(dict_hashes[key.as_usize()], *hash)
207 } }
209 } else {
210 for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) {
211 if let Some(key) = key {
212 *hash = dict_hashes[key.as_usize()]
213 } }
215 }
216 Ok(())
217}
218
219#[cfg(not(feature = "force_hash_collisions"))]
220fn hash_struct_array(
221 array: &StructArray,
222 random_state: &RandomState,
223 hashes_buffer: &mut [u64],
224) -> Result<()> {
225 let nulls = array.nulls();
226 let row_len = array.len();
227
228 let valid_row_indices: Vec<usize> = if let Some(nulls) = nulls {
229 nulls.valid_indices().collect()
230 } else {
231 (0..row_len).collect()
232 };
233
234 let mut values_hashes = vec![0u64; row_len];
236 create_hashes(array.columns(), random_state, &mut values_hashes)?;
237
238 for i in valid_row_indices {
239 let hash = &mut hashes_buffer[i];
240 *hash = combine_hashes(*hash, values_hashes[i]);
241 }
242
243 Ok(())
244}
245
246#[cfg(not(feature = "force_hash_collisions"))]
248fn hash_map_array(
249 array: &MapArray,
250 random_state: &RandomState,
251 hashes_buffer: &mut [u64],
252) -> Result<()> {
253 let nulls = array.nulls();
254 let offsets = array.offsets();
255
256 let mut values_hashes = vec![0u64; array.entries().len()];
258 create_hashes(array.entries().columns(), random_state, &mut values_hashes)?;
259
260 if let Some(nulls) = nulls {
262 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
263 if nulls.is_valid(i) {
264 let hash = &mut hashes_buffer[i];
265 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
266 *hash = combine_hashes(*hash, *values_hash);
267 }
268 }
269 }
270 } else {
271 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
272 let hash = &mut hashes_buffer[i];
273 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
274 *hash = combine_hashes(*hash, *values_hash);
275 }
276 }
277 }
278
279 Ok(())
280}
281
282#[cfg(not(feature = "force_hash_collisions"))]
283fn hash_list_array<OffsetSize>(
284 array: &GenericListArray<OffsetSize>,
285 random_state: &RandomState,
286 hashes_buffer: &mut [u64],
287) -> Result<()>
288where
289 OffsetSize: OffsetSizeTrait,
290{
291 let values = Arc::clone(array.values());
292 let offsets = array.value_offsets();
293 let nulls = array.nulls();
294 let mut values_hashes = vec![0u64; values.len()];
295 create_hashes(&[values], random_state, &mut values_hashes)?;
296 if let Some(nulls) = nulls {
297 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
298 if nulls.is_valid(i) {
299 let hash = &mut hashes_buffer[i];
300 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
301 *hash = combine_hashes(*hash, *values_hash);
302 }
303 }
304 }
305 } else {
306 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
307 let hash = &mut hashes_buffer[i];
308 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
309 *hash = combine_hashes(*hash, *values_hash);
310 }
311 }
312 }
313 Ok(())
314}
315
316#[cfg(not(feature = "force_hash_collisions"))]
317fn hash_fixed_list_array(
318 array: &FixedSizeListArray,
319 random_state: &RandomState,
320 hashes_buffer: &mut [u64],
321) -> Result<()> {
322 let values = Arc::clone(array.values());
323 let value_length = array.value_length() as usize;
324 let nulls = array.nulls();
325 let mut values_hashes = vec![0u64; values.len()];
326 create_hashes(&[values], random_state, &mut values_hashes)?;
327 if let Some(nulls) = nulls {
328 for i in 0..array.len() {
329 if nulls.is_valid(i) {
330 let hash = &mut hashes_buffer[i];
331 for values_hash in
332 &values_hashes[i * value_length..(i + 1) * value_length]
333 {
334 *hash = combine_hashes(*hash, *values_hash);
335 }
336 }
337 }
338 } else {
339 for i in 0..array.len() {
340 let hash = &mut hashes_buffer[i];
341 for values_hash in &values_hashes[i * value_length..(i + 1) * value_length] {
342 *hash = combine_hashes(*hash, *values_hash);
343 }
344 }
345 }
346 Ok(())
347}
348
349#[cfg(feature = "force_hash_collisions")]
354pub fn create_hashes<'a>(
355 _arrays: &[ArrayRef],
356 _random_state: &RandomState,
357 hashes_buffer: &'a mut Vec<u64>,
358) -> Result<&'a mut Vec<u64>> {
359 for hash in hashes_buffer.iter_mut() {
360 *hash = 0
361 }
362 Ok(hashes_buffer)
363}
364
365#[cfg(not(feature = "force_hash_collisions"))]
371pub fn create_hashes<'a>(
372 arrays: &[ArrayRef],
373 random_state: &RandomState,
374 hashes_buffer: &'a mut Vec<u64>,
375) -> Result<&'a mut Vec<u64>> {
376 for (i, col) in arrays.iter().enumerate() {
377 let array = col.as_ref();
378 let rehash = i >= 1;
380 downcast_primitive_array! {
381 array => hash_array_primitive(array, random_state, hashes_buffer, rehash),
382 DataType::Null => hash_null(random_state, hashes_buffer, rehash),
383 DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
384 DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
385 DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
386 DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
387 DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
388 DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
389 DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
390 DataType::FixedSizeBinary(_) => {
391 let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
392 hash_array(array, random_state, hashes_buffer, rehash)
393 }
394 DataType::Dictionary(_, _) => downcast_dictionary_array! {
395 array => hash_dictionary(array, random_state, hashes_buffer, rehash)?,
396 _ => unreachable!()
397 }
398 DataType::Struct(_) => {
399 let array = as_struct_array(array)?;
400 hash_struct_array(array, random_state, hashes_buffer)?;
401 }
402 DataType::List(_) => {
403 let array = as_list_array(array)?;
404 hash_list_array(array, random_state, hashes_buffer)?;
405 }
406 DataType::LargeList(_) => {
407 let array = as_large_list_array(array)?;
408 hash_list_array(array, random_state, hashes_buffer)?;
409 }
410 DataType::Map(_, _) => {
411 let array = as_map_array(array)?;
412 hash_map_array(array, random_state, hashes_buffer)?;
413 }
414 DataType::FixedSizeList(_,_) => {
415 let array = as_fixed_size_list_array(array)?;
416 hash_fixed_list_array(array, random_state, hashes_buffer)?;
417 }
418 _ => {
419 return _internal_err!(
421 "Unsupported data type in hasher: {}",
422 col.data_type()
423 );
424 }
425 }
426 }
427 Ok(hashes_buffer)
428}
429
430#[cfg(test)]
431mod tests {
432 use std::sync::Arc;
433
434 use arrow::array::*;
435 #[cfg(not(feature = "force_hash_collisions"))]
436 use arrow::datatypes::*;
437
438 use super::*;
439
440 #[test]
441 fn create_hashes_for_decimal_array() -> Result<()> {
442 let array = vec![1, 2, 3, 4]
443 .into_iter()
444 .map(Some)
445 .collect::<Decimal128Array>()
446 .with_precision_and_scale(20, 3)
447 .unwrap();
448 let array_ref = Arc::new(array);
449 let random_state = RandomState::with_seeds(0, 0, 0, 0);
450 let hashes_buff = &mut vec![0; array_ref.len()];
451 let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?;
452 assert_eq!(hashes.len(), 4);
453 Ok(())
454 }
455
456 #[test]
457 fn create_hashes_for_empty_fixed_size_lit() -> Result<()> {
458 let empty_array = FixedSizeListBuilder::new(StringBuilder::new(), 1).finish();
459 let random_state = RandomState::with_seeds(0, 0, 0, 0);
460 let hashes_buff = &mut vec![0; 0];
461 let hashes = create_hashes(&[Arc::new(empty_array)], &random_state, hashes_buff)?;
462 assert_eq!(hashes, &Vec::<u64>::new());
463 Ok(())
464 }
465
466 #[test]
467 fn create_hashes_for_float_arrays() -> Result<()> {
468 let f32_arr = Arc::new(Float32Array::from(vec![0.12, 0.5, 1f32, 444.7]));
469 let f64_arr = Arc::new(Float64Array::from(vec![0.12, 0.5, 1f64, 444.7]));
470
471 let random_state = RandomState::with_seeds(0, 0, 0, 0);
472 let hashes_buff = &mut vec![0; f32_arr.len()];
473 let hashes = create_hashes(&[f32_arr], &random_state, hashes_buff)?;
474 assert_eq!(hashes.len(), 4,);
475
476 let hashes = create_hashes(&[f64_arr], &random_state, hashes_buff)?;
477 assert_eq!(hashes.len(), 4,);
478
479 Ok(())
480 }
481
482 macro_rules! create_hash_binary {
483 ($NAME:ident, $ARRAY:ty) => {
484 #[cfg(not(feature = "force_hash_collisions"))]
485 #[test]
486 fn $NAME() {
487 let binary = [
488 Some(b"short".to_byte_slice()),
489 None,
490 Some(b"long but different 12 bytes string"),
491 Some(b"short2"),
492 Some(b"Longer than 12 bytes string"),
493 Some(b"short"),
494 Some(b"Longer than 12 bytes string"),
495 ];
496
497 let binary_array = Arc::new(binary.iter().cloned().collect::<$ARRAY>());
498 let ref_array = Arc::new(binary.iter().cloned().collect::<BinaryArray>());
499
500 let random_state = RandomState::with_seeds(0, 0, 0, 0);
501
502 let mut binary_hashes = vec![0; binary.len()];
503 create_hashes(&[binary_array], &random_state, &mut binary_hashes)
504 .unwrap();
505
506 let mut ref_hashes = vec![0; binary.len()];
507 create_hashes(&[ref_array], &random_state, &mut ref_hashes).unwrap();
508
509 for (val, hash) in binary.iter().zip(binary_hashes.iter()) {
511 match val {
512 Some(_) => assert_ne!(*hash, 0),
513 None => assert_eq!(*hash, 0),
514 }
515 }
516
517 assert_eq!(binary_hashes, ref_hashes);
519
520 assert_eq!(binary[0], binary[5]);
522 assert_eq!(binary[4], binary[6]);
523
524 assert_ne!(binary[0], binary[2]);
526 }
527 };
528 }
529
530 create_hash_binary!(binary_array, BinaryArray);
531 create_hash_binary!(binary_view_array, BinaryViewArray);
532
533 #[test]
534 fn create_hashes_fixed_size_binary() -> Result<()> {
535 let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]];
536 let fixed_size_binary_array =
537 Arc::new(FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap());
538
539 let random_state = RandomState::with_seeds(0, 0, 0, 0);
540 let hashes_buff = &mut vec![0; fixed_size_binary_array.len()];
541 let hashes =
542 create_hashes(&[fixed_size_binary_array], &random_state, hashes_buff)?;
543 assert_eq!(hashes.len(), 3,);
544
545 Ok(())
546 }
547
548 macro_rules! create_hash_string {
549 ($NAME:ident, $ARRAY:ty) => {
550 #[cfg(not(feature = "force_hash_collisions"))]
551 #[test]
552 fn $NAME() {
553 let strings = [
554 Some("short"),
555 None,
556 Some("long but different 12 bytes string"),
557 Some("short2"),
558 Some("Longer than 12 bytes string"),
559 Some("short"),
560 Some("Longer than 12 bytes string"),
561 ];
562
563 let string_array = Arc::new(strings.iter().cloned().collect::<$ARRAY>());
564 let dict_array = Arc::new(
565 strings
566 .iter()
567 .cloned()
568 .collect::<DictionaryArray<Int8Type>>(),
569 );
570
571 let random_state = RandomState::with_seeds(0, 0, 0, 0);
572
573 let mut string_hashes = vec![0; strings.len()];
574 create_hashes(&[string_array], &random_state, &mut string_hashes)
575 .unwrap();
576
577 let mut dict_hashes = vec![0; strings.len()];
578 create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap();
579
580 for (val, hash) in strings.iter().zip(string_hashes.iter()) {
582 match val {
583 Some(_) => assert_ne!(*hash, 0),
584 None => assert_eq!(*hash, 0),
585 }
586 }
587
588 assert_eq!(string_hashes, dict_hashes);
590
591 assert_eq!(strings[0], strings[5]);
593 assert_eq!(strings[4], strings[6]);
594
595 assert_ne!(strings[0], strings[2]);
597 }
598 };
599 }
600
601 create_hash_string!(string_array, StringArray);
602 create_hash_string!(large_string_array, LargeStringArray);
603 create_hash_string!(string_view_array, StringArray);
604 create_hash_string!(dict_string_array, DictionaryArray<Int8Type>);
605
606 #[test]
607 #[cfg(not(feature = "force_hash_collisions"))]
609 fn create_hashes_for_dict_arrays() {
610 let strings = [Some("foo"), None, Some("bar"), Some("foo"), None];
611
612 let string_array = Arc::new(strings.iter().cloned().collect::<StringArray>());
613 let dict_array = Arc::new(
614 strings
615 .iter()
616 .cloned()
617 .collect::<DictionaryArray<Int8Type>>(),
618 );
619
620 let random_state = RandomState::with_seeds(0, 0, 0, 0);
621
622 let mut string_hashes = vec![0; strings.len()];
623 create_hashes(&[string_array], &random_state, &mut string_hashes).unwrap();
624
625 let mut dict_hashes = vec![0; strings.len()];
626 create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap();
627
628 for (val, hash) in strings.iter().zip(string_hashes.iter()) {
630 match val {
631 Some(_) => assert_ne!(*hash, 0),
632 None => assert_eq!(*hash, 0),
633 }
634 }
635
636 assert_eq!(string_hashes, dict_hashes);
638
639 assert_eq!(strings[1], strings[4]);
641 assert_eq!(dict_hashes[1], dict_hashes[4]);
642 assert_eq!(strings[0], strings[3]);
643 assert_eq!(dict_hashes[0], dict_hashes[3]);
644
645 assert_ne!(strings[0], strings[2]);
647 assert_ne!(dict_hashes[0], dict_hashes[2]);
648 }
649
650 #[test]
651 #[cfg(not(feature = "force_hash_collisions"))]
653 fn create_hashes_for_list_arrays() {
654 let data = vec![
655 Some(vec![Some(0), Some(1), Some(2)]),
656 None,
657 Some(vec![Some(3), None, Some(5)]),
658 Some(vec![Some(3), None, Some(5)]),
659 None,
660 Some(vec![Some(0), Some(1), Some(2)]),
661 Some(vec![]),
662 ];
663 let list_array =
664 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(data)) as ArrayRef;
665 let random_state = RandomState::with_seeds(0, 0, 0, 0);
666 let mut hashes = vec![0; list_array.len()];
667 create_hashes(&[list_array], &random_state, &mut hashes).unwrap();
668 assert_eq!(hashes[0], hashes[5]);
669 assert_eq!(hashes[1], hashes[4]);
670 assert_eq!(hashes[2], hashes[3]);
671 assert_eq!(hashes[1], hashes[6]); }
673
674 #[test]
675 #[cfg(not(feature = "force_hash_collisions"))]
677 fn create_hashes_for_fixed_size_list_arrays() {
678 let data = vec![
679 Some(vec![Some(0), Some(1), Some(2)]),
680 None,
681 Some(vec![Some(3), None, Some(5)]),
682 Some(vec![Some(3), None, Some(5)]),
683 None,
684 Some(vec![Some(0), Some(1), Some(2)]),
685 ];
686 let list_array =
687 Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
688 data, 3,
689 )) as ArrayRef;
690 let random_state = RandomState::with_seeds(0, 0, 0, 0);
691 let mut hashes = vec![0; list_array.len()];
692 create_hashes(&[list_array], &random_state, &mut hashes).unwrap();
693 assert_eq!(hashes[0], hashes[5]);
694 assert_eq!(hashes[1], hashes[4]);
695 assert_eq!(hashes[2], hashes[3]);
696 }
697
698 #[test]
699 #[cfg(not(feature = "force_hash_collisions"))]
701 fn create_hashes_for_struct_arrays() {
702 use arrow::buffer::Buffer;
703
704 let boolarr = Arc::new(BooleanArray::from(vec![
705 false, false, true, true, true, true,
706 ]));
707 let i32arr = Arc::new(Int32Array::from(vec![10, 10, 20, 20, 30, 31]));
708
709 let struct_array = StructArray::from((
710 vec![
711 (
712 Arc::new(Field::new("bool", DataType::Boolean, false)),
713 Arc::clone(&boolarr) as ArrayRef,
714 ),
715 (
716 Arc::new(Field::new("i32", DataType::Int32, false)),
717 Arc::clone(&i32arr) as ArrayRef,
718 ),
719 (
720 Arc::new(Field::new("i32", DataType::Int32, false)),
721 Arc::clone(&i32arr) as ArrayRef,
722 ),
723 (
724 Arc::new(Field::new("bool", DataType::Boolean, false)),
725 Arc::clone(&boolarr) as ArrayRef,
726 ),
727 ],
728 Buffer::from(&[0b001011]),
729 ));
730
731 assert!(struct_array.is_valid(0));
732 assert!(struct_array.is_valid(1));
733 assert!(struct_array.is_null(2));
734 assert!(struct_array.is_valid(3));
735 assert!(struct_array.is_null(4));
736 assert!(struct_array.is_null(5));
737
738 let array = Arc::new(struct_array) as ArrayRef;
739
740 let random_state = RandomState::with_seeds(0, 0, 0, 0);
741 let mut hashes = vec![0; array.len()];
742 create_hashes(&[array], &random_state, &mut hashes).unwrap();
743 assert_eq!(hashes[0], hashes[1]);
744 assert_ne!(hashes[2], hashes[3]);
746 assert_eq!(hashes[4], hashes[5]);
748 }
749
750 #[test]
751 #[cfg(not(feature = "force_hash_collisions"))]
753 fn create_hashes_for_struct_arrays_more_column_than_row() {
754 let struct_array = StructArray::from(vec![
755 (
756 Arc::new(Field::new("bool", DataType::Boolean, false)),
757 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
758 ),
759 (
760 Arc::new(Field::new("i32-1", DataType::Int32, false)),
761 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
762 ),
763 (
764 Arc::new(Field::new("i32-2", DataType::Int32, false)),
765 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
766 ),
767 (
768 Arc::new(Field::new("i32-3", DataType::Int32, false)),
769 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
770 ),
771 ]);
772
773 assert!(struct_array.is_valid(0));
774 assert!(struct_array.is_valid(1));
775
776 let array = Arc::new(struct_array) as ArrayRef;
777 let random_state = RandomState::with_seeds(0, 0, 0, 0);
778 let mut hashes = vec![0; array.len()];
779 create_hashes(&[array], &random_state, &mut hashes).unwrap();
780 assert_eq!(hashes[0], hashes[1]);
781 }
782
783 #[test]
784 #[cfg(not(feature = "force_hash_collisions"))]
786 fn create_hashes_for_map_arrays() {
787 let mut builder =
788 MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
789 builder.keys().append_value("key1");
791 builder.keys().append_value("key2");
792 builder.values().append_value(1);
793 builder.values().append_value(2);
794 builder.append(true).unwrap();
795 builder.keys().append_value("key1");
797 builder.keys().append_value("key2");
798 builder.values().append_value(1);
799 builder.values().append_value(2);
800 builder.append(true).unwrap();
801 builder.keys().append_value("key1");
803 builder.keys().append_value("key2");
804 builder.values().append_value(1);
805 builder.values().append_value(3);
806 builder.append(true).unwrap();
807 builder.keys().append_value("key1");
809 builder.keys().append_value("key3");
810 builder.values().append_value(1);
811 builder.values().append_value(2);
812 builder.append(true).unwrap();
813 builder.keys().append_value("key1");
815 builder.values().append_value(1);
816 builder.append(true).unwrap();
817 builder.keys().append_value("key1");
819 builder.values().append_null();
820 builder.append(true).unwrap();
821 builder.append(true).unwrap();
823 builder.keys().append_value("key1");
825 builder.values().append_value(1);
826 builder.append(false).unwrap();
827
828 let array = Arc::new(builder.finish()) as ArrayRef;
829
830 let random_state = RandomState::with_seeds(0, 0, 0, 0);
831 let mut hashes = vec![0; array.len()];
832 create_hashes(&[array], &random_state, &mut hashes).unwrap();
833 assert_eq!(hashes[0], hashes[1]); assert_ne!(hashes[0], hashes[2]); assert_ne!(hashes[0], hashes[3]); assert_ne!(hashes[0], hashes[4]); assert_ne!(hashes[4], hashes[5]); assert_eq!(hashes[6], hashes[7]); }
840
841 #[test]
842 #[cfg(not(feature = "force_hash_collisions"))]
844 fn create_multi_column_hash_for_dict_arrays() {
845 let strings1 = [Some("foo"), None, Some("bar")];
846 let strings2 = [Some("blarg"), Some("blah"), None];
847
848 let string_array = Arc::new(strings1.iter().cloned().collect::<StringArray>());
849 let dict_array = Arc::new(
850 strings2
851 .iter()
852 .cloned()
853 .collect::<DictionaryArray<Int32Type>>(),
854 );
855
856 let random_state = RandomState::with_seeds(0, 0, 0, 0);
857
858 let mut one_col_hashes = vec![0; strings1.len()];
859 create_hashes(
860 &[Arc::clone(&dict_array) as ArrayRef],
861 &random_state,
862 &mut one_col_hashes,
863 )
864 .unwrap();
865
866 let mut two_col_hashes = vec![0; strings1.len()];
867 create_hashes(
868 &[dict_array, string_array],
869 &random_state,
870 &mut two_col_hashes,
871 )
872 .unwrap();
873
874 assert_eq!(one_col_hashes.len(), 3);
875 assert_eq!(two_col_hashes.len(), 3);
876
877 assert_ne!(one_col_hashes, two_col_hashes);
878 }
879}