1use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
19use crate::cast::AsArray;
20use crate::iterator::ArrayIter;
21use crate::types::*;
22use crate::{
23 make_array, Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar,
24 StringArray,
25};
26use arrow_buffer::bit_util::set_bit;
27use arrow_buffer::buffer::NullBuffer;
28use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
29use arrow_data::ArrayData;
30use arrow_schema::{ArrowError, DataType};
31use std::any::Any;
32use std::sync::Arc;
33
34pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
49
50pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
65
66pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
81
82pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
97
98pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
113
114pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
129
130pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
145
146pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
161
162pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
244 data_type: DataType,
245
246 keys: PrimitiveArray<K>,
251
252 values: ArrayRef,
254
255 is_ordered: bool,
257}
258
259impl<K: ArrowDictionaryKeyType> Clone for DictionaryArray<K> {
260 fn clone(&self) -> Self {
261 Self {
262 data_type: self.data_type.clone(),
263 keys: self.keys.clone(),
264 values: self.values.clone(),
265 is_ordered: self.is_ordered,
266 }
267 }
268}
269
270impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
271 pub fn new(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
279 Self::try_new(keys, values).unwrap()
280 }
281
282 pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
290 let data_type = DataType::Dictionary(
291 Box::new(keys.data_type().clone()),
292 Box::new(values.data_type().clone()),
293 );
294
295 let zero = K::Native::usize_as(0);
296 let values_len = values.len();
297
298 if let Some((idx, v)) =
299 keys.values().iter().enumerate().find(|(idx, v)| {
300 (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
301 })
302 {
303 return Err(ArrowError::InvalidArgumentError(format!(
304 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
305 )));
306 }
307
308 Ok(Self {
309 data_type,
310 keys,
311 values,
312 is_ordered: false,
313 })
314 }
315
316 pub fn new_scalar<T: Array + 'static>(value: Scalar<T>) -> Scalar<Self> {
318 Scalar::new(Self::new(
319 PrimitiveArray::new(vec![K::Native::usize_as(0)].into(), None),
320 Arc::new(value.into_inner()),
321 ))
322 }
323
324 pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
330 let data_type = DataType::Dictionary(
331 Box::new(keys.data_type().clone()),
332 Box::new(values.data_type().clone()),
333 );
334
335 Self {
336 data_type,
337 keys,
338 values,
339 is_ordered: false,
340 }
341 }
342
343 pub fn into_parts(self) -> (PrimitiveArray<K>, ArrayRef) {
345 (self.keys, self.values)
346 }
347
348 pub fn keys(&self) -> &PrimitiveArray<K> {
350 &self.keys
351 }
352
353 pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
359 let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
360
361 (0..rd_buf.len())
362 .position(|i| rd_buf.value(i) == value)
363 .and_then(K::Native::from_usize)
364 }
365
366 pub fn values(&self) -> &ArrayRef {
368 &self.values
369 }
370
371 pub fn value_type(&self) -> DataType {
373 self.values.data_type().clone()
374 }
375
376 pub fn len(&self) -> usize {
378 self.keys.len()
379 }
380
381 pub fn is_empty(&self) -> bool {
383 self.keys.is_empty()
384 }
385
386 pub fn is_ordered(&self) -> bool {
388 self.is_ordered
389 }
390
391 pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
393 self.keys.iter().map(|key| key.map(|k| k.as_usize()))
394 }
395
396 pub fn key(&self, i: usize) -> Option<usize> {
399 self.keys.is_valid(i).then(|| self.keys.value(i).as_usize())
400 }
401
402 pub fn slice(&self, offset: usize, length: usize) -> Self {
404 Self {
405 data_type: self.data_type.clone(),
406 keys: self.keys.slice(offset, length),
407 values: self.values.clone(),
408 is_ordered: self.is_ordered,
409 }
410 }
411
412 pub fn downcast_dict<V: 'static>(&self) -> Option<TypedDictionaryArray<'_, K, V>> {
426 let values = self.values.as_any().downcast_ref()?;
427 Some(TypedDictionaryArray {
428 dictionary: self,
429 values,
430 })
431 }
432
433 pub fn with_values(&self, values: ArrayRef) -> Self {
471 assert!(values.len() >= self.values.len());
472 let data_type =
473 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
474 Self {
475 data_type,
476 keys: self.keys.clone(),
477 values,
478 is_ordered: false,
479 }
480 }
481
482 pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
485 where
486 V: ArrowPrimitiveType,
487 {
488 if !self.value_type().is_primitive() {
489 return Err(self);
490 }
491
492 let key_array = self.keys().clone();
493 let value_array = self.values().as_primitive::<V>().clone();
494
495 drop(self.keys);
496 drop(self.values);
497
498 let key_builder = key_array.into_builder();
499 let value_builder = value_array.into_builder();
500
501 match (key_builder, value_builder) {
502 (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
503 PrimitiveDictionaryBuilder::new_from_builders(key_builder, value_builder)
504 }),
505 (Err(key_array), Ok(mut value_builder)) => {
506 Err(Self::try_new(key_array, Arc::new(value_builder.finish())).unwrap())
507 }
508 (Ok(mut key_builder), Err(value_array)) => {
509 Err(Self::try_new(key_builder.finish(), Arc::new(value_array)).unwrap())
510 }
511 (Err(key_array), Err(value_array)) => {
512 Err(Self::try_new(key_array, Arc::new(value_array)).unwrap())
513 }
514 }
515 }
516
517 pub fn unary_mut<F, V>(self, op: F) -> Result<DictionaryArray<K>, DictionaryArray<K>>
541 where
542 V: ArrowPrimitiveType,
543 F: Fn(V::Native) -> V::Native,
544 {
545 let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
546 builder
547 .values_slice_mut()
548 .iter_mut()
549 .for_each(|v| *v = op(*v));
550 Ok(builder.finish())
551 }
552
553 pub fn occupancy(&self) -> BooleanBuffer {
558 let len = self.values.len();
559 let mut builder = BooleanBufferBuilder::new(len);
560 builder.resize(len);
561 let slice = builder.as_slice_mut();
562 match self.keys.nulls().filter(|n| n.null_count() > 0) {
563 Some(n) => {
564 let v = self.keys.values();
565 n.valid_indices()
566 .for_each(|idx| set_bit(slice, v[idx].as_usize()))
567 }
568 None => {
569 let v = self.keys.values();
570 v.iter().for_each(|v| set_bit(slice, v.as_usize()))
571 }
572 }
573 builder.finish()
574 }
575}
576
577impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
579 fn from(data: ArrayData) -> Self {
580 assert_eq!(
581 data.buffers().len(),
582 1,
583 "DictionaryArray data should contain a single buffer only (keys)."
584 );
585 assert_eq!(
586 data.child_data().len(),
587 1,
588 "DictionaryArray should contain a single child array (values)."
589 );
590
591 if let DataType::Dictionary(key_data_type, _) = data.data_type() {
592 assert_eq!(
593 &T::DATA_TYPE,
594 key_data_type.as_ref(),
595 "DictionaryArray's data type must match, expected {} got {}",
596 T::DATA_TYPE,
597 key_data_type
598 );
599
600 let values = make_array(data.child_data()[0].clone());
601 let data_type = data.data_type().clone();
602
603 let keys = PrimitiveArray::<T>::from(unsafe {
608 data.into_builder()
609 .data_type(T::DATA_TYPE)
610 .child_data(vec![])
611 .build_unchecked()
612 });
613
614 Self {
615 data_type,
616 keys,
617 values,
618 is_ordered: false,
619 }
620 } else {
621 panic!("DictionaryArray must have Dictionary data type.")
622 }
623 }
624}
625
626impl<T: ArrowDictionaryKeyType> From<DictionaryArray<T>> for ArrayData {
627 fn from(array: DictionaryArray<T>) -> Self {
628 let builder = array
629 .keys
630 .into_data()
631 .into_builder()
632 .data_type(array.data_type)
633 .child_data(vec![array.values.to_data()]);
634
635 unsafe { builder.build_unchecked() }
636 }
637}
638
639impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for DictionaryArray<T> {
656 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
657 let it = iter.into_iter();
658 let (lower, _) = it.size_hint();
659 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
660 builder.extend(it);
661 builder.finish()
662 }
663}
664
665impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T> {
680 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
681 let it = iter.into_iter();
682 let (lower, _) = it.size_hint();
683 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
684 it.for_each(|i| {
685 builder
686 .append(i)
687 .expect("Unable to append a value to a dictionary array.");
688 });
689
690 builder.finish()
691 }
692}
693
694impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
695 fn as_any(&self) -> &dyn Any {
696 self
697 }
698
699 fn to_data(&self) -> ArrayData {
700 self.clone().into()
701 }
702
703 fn into_data(self) -> ArrayData {
704 self.into()
705 }
706
707 fn data_type(&self) -> &DataType {
708 &self.data_type
709 }
710
711 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
712 Arc::new(self.slice(offset, length))
713 }
714
715 fn len(&self) -> usize {
716 self.keys.len()
717 }
718
719 fn is_empty(&self) -> bool {
720 self.keys.is_empty()
721 }
722
723 fn shrink_to_fit(&mut self) {
724 self.keys.shrink_to_fit();
725 self.values.shrink_to_fit();
726 }
727
728 fn offset(&self) -> usize {
729 self.keys.offset()
730 }
731
732 fn nulls(&self) -> Option<&NullBuffer> {
733 self.keys.nulls()
734 }
735
736 fn logical_nulls(&self) -> Option<NullBuffer> {
737 match self.values.logical_nulls() {
738 None => self.nulls().cloned(),
739 Some(value_nulls) => {
740 let mut builder = BooleanBufferBuilder::new(self.len());
741 match self.keys.nulls() {
742 Some(n) => builder.append_buffer(n.inner()),
743 None => builder.append_n(self.len(), true),
744 }
745 for (idx, k) in self.keys.values().iter().enumerate() {
746 let k = k.as_usize();
747 if k < value_nulls.len() && value_nulls.is_null(k) {
749 builder.set_bit(idx, false);
750 }
751 }
752 Some(builder.finish().into())
753 }
754 }
755 }
756
757 fn logical_null_count(&self) -> usize {
758 match (self.keys.nulls(), self.values.logical_nulls()) {
759 (None, None) => 0,
760 (Some(key_nulls), None) => key_nulls.null_count(),
761 (None, Some(value_nulls)) => self
762 .keys
763 .values()
764 .iter()
765 .filter(|k| value_nulls.is_null(k.as_usize()))
766 .count(),
767 (Some(key_nulls), Some(value_nulls)) => self
768 .keys
769 .values()
770 .iter()
771 .enumerate()
772 .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
773 .count(),
774 }
775 }
776
777 fn is_nullable(&self) -> bool {
778 !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
779 }
780
781 fn get_buffer_memory_size(&self) -> usize {
782 self.keys.get_buffer_memory_size() + self.values.get_buffer_memory_size()
783 }
784
785 fn get_array_memory_size(&self) -> usize {
786 std::mem::size_of::<Self>()
787 + self.keys.get_buffer_memory_size()
788 + self.values.get_array_memory_size()
789 }
790}
791
792impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
793 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
794 writeln!(
795 f,
796 "DictionaryArray {{keys: {:?} values: {:?}}}",
797 self.keys, self.values
798 )
799 }
800}
801
802pub struct TypedDictionaryArray<'a, K: ArrowDictionaryKeyType, V> {
820 dictionary: &'a DictionaryArray<K>,
822 values: &'a V,
824}
825
826impl<K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'_, K, V> {
828 fn clone(&self) -> Self {
829 *self
830 }
831}
832
833impl<K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'_, K, V> {}
834
835impl<K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'_, K, V> {
836 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
837 writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
838 }
839}
840
841impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
842 pub fn keys(&self) -> &'a PrimitiveArray<K> {
844 self.dictionary.keys()
845 }
846
847 pub fn values(&self) -> &'a V {
849 self.values
850 }
851}
852
853impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
854 fn as_any(&self) -> &dyn Any {
855 self.dictionary
856 }
857
858 fn to_data(&self) -> ArrayData {
859 self.dictionary.to_data()
860 }
861
862 fn into_data(self) -> ArrayData {
863 self.dictionary.into_data()
864 }
865
866 fn data_type(&self) -> &DataType {
867 self.dictionary.data_type()
868 }
869
870 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
871 Arc::new(self.dictionary.slice(offset, length))
872 }
873
874 fn len(&self) -> usize {
875 self.dictionary.len()
876 }
877
878 fn is_empty(&self) -> bool {
879 self.dictionary.is_empty()
880 }
881
882 fn offset(&self) -> usize {
883 self.dictionary.offset()
884 }
885
886 fn nulls(&self) -> Option<&NullBuffer> {
887 self.dictionary.nulls()
888 }
889
890 fn logical_nulls(&self) -> Option<NullBuffer> {
891 self.dictionary.logical_nulls()
892 }
893
894 fn logical_null_count(&self) -> usize {
895 self.dictionary.logical_null_count()
896 }
897
898 fn is_nullable(&self) -> bool {
899 self.dictionary.is_nullable()
900 }
901
902 fn get_buffer_memory_size(&self) -> usize {
903 self.dictionary.get_buffer_memory_size()
904 }
905
906 fn get_array_memory_size(&self) -> usize {
907 self.dictionary.get_array_memory_size()
908 }
909}
910
911impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
912where
913 K: ArrowDictionaryKeyType,
914 Self: ArrayAccessor,
915{
916 type Item = Option<<Self as ArrayAccessor>::Item>;
917 type IntoIter = ArrayIter<Self>;
918
919 fn into_iter(self) -> Self::IntoIter {
920 ArrayIter::new(self)
921 }
922}
923
924impl<'a, K, V> ArrayAccessor for TypedDictionaryArray<'a, K, V>
925where
926 K: ArrowDictionaryKeyType,
927 V: Sync + Send,
928 &'a V: ArrayAccessor,
929 <&'a V as ArrayAccessor>::Item: Default,
930{
931 type Item = <&'a V as ArrayAccessor>::Item;
932
933 fn value(&self, index: usize) -> Self::Item {
934 assert!(
935 index < self.len(),
936 "Trying to access an element at index {} from a TypedDictionaryArray of length {}",
937 index,
938 self.len()
939 );
940 unsafe { self.value_unchecked(index) }
941 }
942
943 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
944 let val = self.dictionary.keys.value_unchecked(index);
945 let value_idx = val.as_usize();
946
947 match value_idx < self.values.len() {
950 true => self.values.value_unchecked(value_idx),
951 false => Default::default(),
952 }
953 }
954}
955
956pub trait AnyDictionaryArray: Array {
1001 fn keys(&self) -> &dyn Array;
1003
1004 fn values(&self) -> &ArrayRef;
1006
1007 fn normalized_keys(&self) -> Vec<usize>;
1016
1017 fn with_values(&self, values: ArrayRef) -> ArrayRef;
1021}
1022
1023impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
1024 fn keys(&self) -> &dyn Array {
1025 &self.keys
1026 }
1027
1028 fn values(&self) -> &ArrayRef {
1029 self.values()
1030 }
1031
1032 fn normalized_keys(&self) -> Vec<usize> {
1033 let v_len = self.values().len();
1034 assert_ne!(v_len, 0);
1035 let iter = self.keys().values().iter();
1036 iter.map(|x| x.as_usize().min(v_len - 1)).collect()
1037 }
1038
1039 fn with_values(&self, values: ArrayRef) -> ArrayRef {
1040 Arc::new(self.with_values(values))
1041 }
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046 use super::*;
1047 use crate::cast::as_dictionary_array;
1048 use crate::{Int16Array, Int32Array, Int8Array, RunArray};
1049 use arrow_buffer::{Buffer, ToByteSlice};
1050
1051 #[test]
1052 fn test_dictionary_array() {
1053 let value_data = ArrayData::builder(DataType::Int8)
1055 .len(8)
1056 .add_buffer(Buffer::from(
1057 [10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
1058 ))
1059 .build()
1060 .unwrap();
1061
1062 let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());
1064
1065 let key_type = DataType::Int16;
1067 let value_type = DataType::Int8;
1068 let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
1069 let dict_data = ArrayData::builder(dict_data_type.clone())
1070 .len(3)
1071 .add_buffer(keys.clone())
1072 .add_child_data(value_data.clone())
1073 .build()
1074 .unwrap();
1075 let dict_array = Int16DictionaryArray::from(dict_data);
1076
1077 let values = dict_array.values();
1078 assert_eq!(value_data, values.to_data());
1079 assert_eq!(DataType::Int8, dict_array.value_type());
1080 assert_eq!(3, dict_array.len());
1081
1082 assert_eq!(0, dict_array.null_count());
1084 assert_eq!(0, dict_array.values().null_count());
1085 assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
1086
1087 let dict_data = ArrayData::builder(dict_data_type)
1089 .len(2)
1090 .offset(1)
1091 .add_buffer(keys)
1092 .add_child_data(value_data.clone())
1093 .build()
1094 .unwrap();
1095 let dict_array = Int16DictionaryArray::from(dict_data);
1096
1097 let values = dict_array.values();
1098 assert_eq!(value_data, values.to_data());
1099 assert_eq!(DataType::Int8, dict_array.value_type());
1100 assert_eq!(2, dict_array.len());
1101 assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
1102 }
1103
1104 #[test]
1105 fn test_dictionary_builder_append_many() {
1106 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
1107
1108 builder.append(1).unwrap();
1109 builder.append_n(2, 2).unwrap();
1110 builder.append_options(None, 2);
1111 builder.append_options(Some(3), 3);
1112
1113 let array = builder.finish();
1114
1115 let values = array
1116 .values()
1117 .as_primitive::<UInt32Type>()
1118 .iter()
1119 .map(Option::unwrap)
1120 .collect::<Vec<_>>();
1121 assert_eq!(values, &[1, 2, 3]);
1122 let keys = array.keys().iter().collect::<Vec<_>>();
1123 assert_eq!(
1124 keys,
1125 &[
1126 Some(0),
1127 Some(1),
1128 Some(1),
1129 None,
1130 None,
1131 Some(2),
1132 Some(2),
1133 Some(2)
1134 ]
1135 );
1136 }
1137
1138 #[test]
1139 fn test_string_dictionary_builder_append_many() {
1140 let mut builder = StringDictionaryBuilder::<Int8Type>::new();
1141
1142 builder.append("a").unwrap();
1143 builder.append_n("b", 2).unwrap();
1144 builder.append_options(None::<&str>, 2);
1145 builder.append_options(Some("c"), 3);
1146
1147 let array = builder.finish();
1148
1149 let values = array
1150 .values()
1151 .as_string::<i32>()
1152 .iter()
1153 .map(Option::unwrap)
1154 .collect::<Vec<_>>();
1155 assert_eq!(values, &["a", "b", "c"]);
1156 let keys = array.keys().iter().collect::<Vec<_>>();
1157 assert_eq!(
1158 keys,
1159 &[
1160 Some(0),
1161 Some(1),
1162 Some(1),
1163 None,
1164 None,
1165 Some(2),
1166 Some(2),
1167 Some(2)
1168 ]
1169 );
1170 }
1171
1172 #[test]
1173 fn test_dictionary_array_fmt_debug() {
1174 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
1175 builder.append(12345678).unwrap();
1176 builder.append_null();
1177 builder.append(22345678).unwrap();
1178 let array = builder.finish();
1179 assert_eq!(
1180 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
1181 format!("{array:?}")
1182 );
1183
1184 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
1185 for _ in 0..20 {
1186 builder.append(1).unwrap();
1187 }
1188 let array = builder.finish();
1189 assert_eq!(
1190 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
1191 format!("{array:?}")
1192 );
1193 }
1194
1195 #[test]
1196 fn test_dictionary_array_from_iter() {
1197 let test = vec!["a", "a", "b", "c"];
1198 let array: DictionaryArray<Int8Type> = test
1199 .iter()
1200 .map(|&x| if x == "b" { None } else { Some(x) })
1201 .collect();
1202 assert_eq!(
1203 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
1204 format!("{array:?}")
1205 );
1206
1207 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1208 assert_eq!(
1209 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
1210 format!("{array:?}")
1211 );
1212 }
1213
1214 #[test]
1215 fn test_dictionary_array_reverse_lookup_key() {
1216 let test = vec!["a", "a", "b", "c"];
1217 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1218
1219 assert_eq!(array.lookup_key("c"), Some(2));
1220
1221 let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
1223 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1224
1225 assert_eq!(array.lookup_key("t1"), Some(2));
1226 assert_eq!(array.lookup_key("non-existent"), None);
1227 }
1228
1229 #[test]
1230 fn test_dictionary_keys_as_primitive_array() {
1231 let test = vec!["a", "b", "c", "a"];
1232 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1233
1234 let keys = array.keys();
1235 assert_eq!(&DataType::Int8, keys.data_type());
1236 assert_eq!(0, keys.null_count());
1237 assert_eq!(&[0, 1, 2, 0], keys.values());
1238 }
1239
1240 #[test]
1241 fn test_dictionary_keys_as_primitive_array_with_null() {
1242 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1243 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1244
1245 let keys = array.keys();
1246 assert_eq!(&DataType::Int32, keys.data_type());
1247 assert_eq!(3, keys.null_count());
1248
1249 assert!(keys.is_valid(0));
1250 assert!(!keys.is_valid(1));
1251 assert!(keys.is_valid(2));
1252 assert!(!keys.is_valid(3));
1253 assert!(!keys.is_valid(4));
1254 assert!(keys.is_valid(5));
1255
1256 assert_eq!(0, keys.value(0));
1257 assert_eq!(1, keys.value(2));
1258 assert_eq!(0, keys.value(5));
1259 }
1260
1261 #[test]
1262 fn test_dictionary_all_nulls() {
1263 let test = vec![None, None, None];
1264 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1265 array
1266 .into_data()
1267 .validate_full()
1268 .expect("All null array has valid array data");
1269 }
1270
1271 #[test]
1272 fn test_dictionary_iter() {
1273 let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1275 let keys = Int16Array::from_iter_values([2_i16, 3, 4]);
1276
1277 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1279
1280 let mut key_iter = dict_array.keys_iter();
1281 assert_eq!(2, key_iter.next().unwrap().unwrap());
1282 assert_eq!(3, key_iter.next().unwrap().unwrap());
1283 assert_eq!(4, key_iter.next().unwrap().unwrap());
1284 assert!(key_iter.next().is_none());
1285
1286 let mut iter = dict_array
1287 .values()
1288 .as_any()
1289 .downcast_ref::<Int8Array>()
1290 .unwrap()
1291 .take_iter(dict_array.keys_iter());
1292
1293 assert_eq!(12, iter.next().unwrap().unwrap());
1294 assert_eq!(13, iter.next().unwrap().unwrap());
1295 assert_eq!(14, iter.next().unwrap().unwrap());
1296 assert!(iter.next().is_none());
1297 }
1298
1299 #[test]
1300 fn test_dictionary_iter_with_null() {
1301 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1302 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1303
1304 let mut iter = array
1305 .values()
1306 .as_any()
1307 .downcast_ref::<StringArray>()
1308 .unwrap()
1309 .take_iter(array.keys_iter());
1310
1311 assert_eq!("a", iter.next().unwrap().unwrap());
1312 assert!(iter.next().unwrap().is_none());
1313 assert_eq!("b", iter.next().unwrap().unwrap());
1314 assert!(iter.next().unwrap().is_none());
1315 assert!(iter.next().unwrap().is_none());
1316 assert_eq!("a", iter.next().unwrap().unwrap());
1317 assert!(iter.next().is_none());
1318 }
1319
1320 #[test]
1321 fn test_dictionary_key() {
1322 let keys = Int8Array::from(vec![Some(2), None, Some(1)]);
1323 let values = StringArray::from(vec!["foo", "bar", "baz", "blarg"]);
1324
1325 let array = DictionaryArray::new(keys, Arc::new(values));
1326 assert_eq!(array.key(0), Some(2));
1327 assert_eq!(array.key(1), None);
1328 assert_eq!(array.key(2), Some(1));
1329 }
1330
1331 #[test]
1332 fn test_try_new() {
1333 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1334 .into_iter()
1335 .collect();
1336 let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
1337
1338 let array = DictionaryArray::new(keys, Arc::new(values));
1339 assert_eq!(array.keys().data_type(), &DataType::Int32);
1340 assert_eq!(array.values().data_type(), &DataType::Utf8);
1341
1342 assert_eq!(array.null_count(), 1);
1343 assert_eq!(array.logical_null_count(), 1);
1344
1345 assert!(array.keys().is_valid(0));
1346 assert!(array.keys().is_valid(1));
1347 assert!(array.keys().is_null(2));
1348 assert!(array.keys().is_valid(3));
1349
1350 assert_eq!(array.keys().value(0), 0);
1351 assert_eq!(array.keys().value(1), 2);
1352 assert_eq!(array.keys().value(3), 1);
1353
1354 assert_eq!(
1355 "DictionaryArray {keys: PrimitiveArray<Int32>\n[\n 0,\n 2,\n null,\n 1,\n] values: StringArray\n[\n \"foo\",\n \"bar\",\n \"baz\",\n]}\n",
1356 format!("{array:?}")
1357 );
1358 }
1359
1360 #[test]
1361 #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
1362 fn test_try_new_index_too_large() {
1363 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1364 let keys: Int32Array = [Some(0), Some(3)].into_iter().collect();
1366 DictionaryArray::new(keys, Arc::new(values));
1367 }
1368
1369 #[test]
1370 #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
1371 fn test_try_new_index_too_small() {
1372 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1373 let keys: Int32Array = [Some(-100)].into_iter().collect();
1374 DictionaryArray::new(keys, Arc::new(values));
1375 }
1376
1377 #[test]
1378 #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
1379 fn test_from_array_data_validation() {
1380 let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
1381 let _ = DictionaryArray::<Int64Type>::from(a.into_data());
1382 }
1383
1384 #[test]
1385 fn test_into_primitive_dict_builder() {
1386 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1387 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1388
1389 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1390
1391 let boxed: ArrayRef = Arc::new(dict_array);
1392 let col: DictionaryArray<Int8Type> = as_dictionary_array(&boxed).clone();
1393
1394 drop(boxed);
1395
1396 let mut builder = col.into_primitive_dict_builder::<Int32Type>().unwrap();
1397
1398 let slice = builder.values_slice_mut();
1399 assert_eq!(slice, &[10, 12, 15]);
1400
1401 slice[0] = 4;
1402 slice[1] = 2;
1403 slice[2] = 1;
1404
1405 let values = Int32Array::from_iter_values([4_i32, 2, 1]);
1406 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1407
1408 let expected = DictionaryArray::new(keys, Arc::new(values));
1409
1410 let new_array = builder.finish();
1411 assert_eq!(expected, new_array);
1412 }
1413
1414 #[test]
1415 fn test_into_primitive_dict_builder_cloned_array() {
1416 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1417 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1418
1419 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1420
1421 let boxed: ArrayRef = Arc::new(dict_array);
1422
1423 let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
1424 let err = col.into_primitive_dict_builder::<Int32Type>();
1425
1426 let returned = err.unwrap_err();
1427
1428 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1429 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1430
1431 let expected = DictionaryArray::new(keys, Arc::new(values));
1432 assert_eq!(expected, returned);
1433 }
1434
1435 #[test]
1436 fn test_occupancy() {
1437 let keys = Int32Array::new((100..200).collect(), None);
1438 let values = Int32Array::from(vec![0; 1024]);
1439 let dict = DictionaryArray::new(keys, Arc::new(values));
1440 for (idx, v) in dict.occupancy().iter().enumerate() {
1441 let expected = (100..200).contains(&idx);
1442 assert_eq!(v, expected, "{idx}");
1443 }
1444
1445 let keys = Int32Array::new(
1446 (0..100).collect(),
1447 Some((0..100).map(|x| x % 4 == 0).collect()),
1448 );
1449 let values = Int32Array::from(vec![0; 1024]);
1450 let dict = DictionaryArray::new(keys, Arc::new(values));
1451 for (idx, v) in dict.occupancy().iter().enumerate() {
1452 let expected = idx % 4 == 0 && idx < 100;
1453 assert_eq!(v, expected, "{idx}");
1454 }
1455 }
1456
1457 #[test]
1458 fn test_iterator_nulls() {
1459 let keys = Int32Array::new(
1460 vec![0, 700, 1, 2].into(),
1461 Some(NullBuffer::from(vec![true, false, true, true])),
1462 );
1463 let values = Int32Array::from(vec![Some(50), None, Some(2)]);
1464 let dict = DictionaryArray::new(keys, Arc::new(values));
1465 let values: Vec<_> = dict
1466 .downcast_dict::<Int32Array>()
1467 .unwrap()
1468 .into_iter()
1469 .collect();
1470 assert_eq!(values, &[Some(50), None, None, Some(2)])
1471 }
1472
1473 #[test]
1474 fn test_logical_nulls() -> Result<(), ArrowError> {
1475 let values = Arc::new(RunArray::try_new(
1476 &Int32Array::from(vec![1, 3, 7]),
1477 &Int32Array::from(vec![Some(1), None, Some(3)]),
1478 )?) as ArrayRef;
1479
1480 assert_eq!(values.null_count(), 0);
1482 assert_eq!(values.logical_null_count(), 2);
1483
1484 let dictionary = DictionaryArray::<Int8Type>::try_new(
1486 Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
1487 Arc::clone(&values),
1488 )?;
1489
1490 assert_eq!(dictionary.null_count(), 0);
1492 assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
1494 assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
1495 assert!(dictionary.is_nullable());
1496
1497 let dictionary = DictionaryArray::<Int8Type>::try_new(
1499 Int8Array::from(
1500 (0..values.len())
1501 .map(|i| i as i8)
1502 .map(|i| if i == 0 { None } else { Some(i) })
1503 .collect::<Vec<_>>(),
1504 ),
1505 Arc::clone(&values),
1506 )?;
1507
1508 assert_eq!(dictionary.null_count(), 1);
1510
1511 assert_eq!(
1513 dictionary.logical_null_count(),
1514 values.logical_null_count() + 1
1515 );
1516 assert!(dictionary.is_nullable());
1517
1518 Ok(())
1519 }
1520
1521 #[test]
1522 fn test_normalized_keys() {
1523 let values = vec![132, 0, 1].into();
1524 let nulls = NullBuffer::from(vec![false, true, true]);
1525 let keys = Int32Array::new(values, Some(nulls));
1526 let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
1527 assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
1528 }
1529}