1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 builder::StringRunBuilder,
27 make_array,
28 run_iterator::RunArrayIter,
29 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
30 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
31};
32
33pub struct RunArray<R: RunEndIndexType> {
64 data_type: DataType,
65 run_ends: RunEndBuffer<R::Native>,
66 values: ArrayRef,
67}
68
69impl<R: RunEndIndexType> Clone for RunArray<R> {
70 fn clone(&self) -> Self {
71 Self {
72 data_type: self.data_type.clone(),
73 run_ends: self.run_ends.clone(),
74 values: self.values.clone(),
75 }
76 }
77}
78
79impl<R: RunEndIndexType> RunArray<R> {
80 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
83 let len = run_ends.len();
84 if len == 0 {
85 return 0;
86 }
87 run_ends.value(len - 1).as_usize()
88 }
89
90 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
94 let run_ends_type = run_ends.data_type().clone();
95 let values_type = values.data_type().clone();
96 let ree_array_type = DataType::RunEndEncoded(
97 Arc::new(Field::new("run_ends", run_ends_type, false)),
98 Arc::new(Field::new("values", values_type, true)),
99 );
100 let len = RunArray::logical_len(run_ends);
101 let builder = ArrayDataBuilder::new(ree_array_type)
102 .len(len)
103 .add_child_data(run_ends.to_data())
104 .add_child_data(values.to_data());
105
106 let array_data = unsafe { builder.build_unchecked() };
108
109 array_data.validate_data()?;
116
117 Ok(array_data.into())
118 }
119
120 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
122 &self.run_ends
123 }
124
125 pub fn values(&self) -> &ArrayRef {
130 &self.values
131 }
132
133 pub fn get_start_physical_index(&self) -> usize {
135 self.run_ends.get_start_physical_index()
136 }
137
138 pub fn get_end_physical_index(&self) -> usize {
140 self.run_ends.get_end_physical_index()
141 }
142
143 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
157 let values = self.values.as_any().downcast_ref()?;
158 Some(TypedRunArray {
159 run_array: self,
160 values,
161 })
162 }
163
164 pub fn get_physical_index(&self, logical_index: usize) -> usize {
170 self.run_ends.get_physical_index(logical_index)
171 }
172
173 #[inline]
181 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
182 where
183 I: ArrowNativeType,
184 {
185 let len = self.run_ends().len();
186 let offset = self.run_ends().offset();
187
188 let indices_len = logical_indices.len();
189
190 if indices_len == 0 {
191 return Ok(vec![]);
192 }
193
194 let mut ordered_indices: Vec<usize> = (0..indices_len).collect();
197
198 ordered_indices.sort_unstable_by(|lhs, rhs| {
201 logical_indices[*lhs]
202 .partial_cmp(&logical_indices[*rhs])
203 .unwrap()
204 });
205
206 let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
208 if largest_logical_index >= len {
209 return Err(ArrowError::InvalidArgumentError(format!(
210 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
211 )));
212 }
213
214 let skip_value = self.get_start_physical_index();
216
217 let mut physical_indices = vec![0; indices_len];
218
219 let mut ordered_index = 0_usize;
220 for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
221 {
222 let run_end_value = run_end.as_usize() - offset;
224
225 while ordered_index < indices_len
228 && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
229 {
230 physical_indices[ordered_indices[ordered_index]] = physical_index;
231 ordered_index += 1;
232 }
233 }
234
235 if ordered_index < logical_indices.len() {
238 let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
239 return Err(ArrowError::InvalidArgumentError(format!(
240 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
241 )));
242 }
243 Ok(physical_indices)
244 }
245
246 pub fn slice(&self, offset: usize, length: usize) -> Self {
248 Self {
249 data_type: self.data_type.clone(),
250 run_ends: self.run_ends.slice(offset, length),
251 values: self.values.clone(),
252 }
253 }
254}
255
256impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
257 fn from(data: ArrayData) -> Self {
259 match data.data_type() {
260 DataType::RunEndEncoded(_, _) => {}
261 _ => {
262 panic!("Invalid data type for RunArray. The data type should be DataType::RunEndEncoded");
263 }
264 }
265
266 let child = &data.child_data()[0];
269 assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
270 let run_ends = unsafe {
271 let scalar = child.buffers()[0].clone().into();
272 RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
273 };
274
275 let values = make_array(data.child_data()[1].clone());
276 Self {
277 data_type: data.data_type().clone(),
278 run_ends,
279 values,
280 }
281 }
282}
283
284impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
285 fn from(array: RunArray<R>) -> Self {
286 let len = array.run_ends.len();
287 let offset = array.run_ends.offset();
288
289 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
290 .len(array.run_ends.values().len())
291 .buffers(vec![array.run_ends.into_inner().into_inner()]);
292
293 let run_ends = unsafe { run_ends.build_unchecked() };
294
295 let builder = ArrayDataBuilder::new(array.data_type)
296 .len(len)
297 .offset(offset)
298 .child_data(vec![run_ends, array.values.to_data()]);
299
300 unsafe { builder.build_unchecked() }
301 }
302}
303
304impl<T: RunEndIndexType> Array for RunArray<T> {
305 fn as_any(&self) -> &dyn Any {
306 self
307 }
308
309 fn to_data(&self) -> ArrayData {
310 self.clone().into()
311 }
312
313 fn into_data(self) -> ArrayData {
314 self.into()
315 }
316
317 fn data_type(&self) -> &DataType {
318 &self.data_type
319 }
320
321 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
322 Arc::new(self.slice(offset, length))
323 }
324
325 fn len(&self) -> usize {
326 self.run_ends.len()
327 }
328
329 fn is_empty(&self) -> bool {
330 self.run_ends.is_empty()
331 }
332
333 fn shrink_to_fit(&mut self) {
334 self.run_ends.shrink_to_fit();
335 self.values.shrink_to_fit();
336 }
337
338 fn offset(&self) -> usize {
339 self.run_ends.offset()
340 }
341
342 fn nulls(&self) -> Option<&NullBuffer> {
343 None
344 }
345
346 fn logical_nulls(&self) -> Option<NullBuffer> {
347 let len = self.len();
348 let nulls = self.values.logical_nulls()?;
349 let mut out = BooleanBufferBuilder::new(len);
350 let offset = self.run_ends.offset();
351 let mut valid_start = 0;
352 let mut last_end = 0;
353 for (idx, end) in self.run_ends.values().iter().enumerate() {
354 let end = end.as_usize();
355 if end < offset {
356 continue;
357 }
358 let end = (end - offset).min(len);
359 if nulls.is_null(idx) {
360 if valid_start < last_end {
361 out.append_n(last_end - valid_start, true);
362 }
363 out.append_n(end - last_end, false);
364 valid_start = end;
365 }
366 last_end = end;
367 if end == len {
368 break;
369 }
370 }
371 if valid_start < len {
372 out.append_n(len - valid_start, true)
373 }
374 assert_eq!(out.len(), len);
376 Some(out.finish().into())
377 }
378
379 fn is_nullable(&self) -> bool {
380 !self.is_empty() && self.values.is_nullable()
381 }
382
383 fn get_buffer_memory_size(&self) -> usize {
384 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
385 }
386
387 fn get_array_memory_size(&self) -> usize {
388 std::mem::size_of::<Self>()
389 + self.run_ends.inner().inner().capacity()
390 + self.values.get_array_memory_size()
391 }
392}
393
394impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
395 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
396 writeln!(
397 f,
398 "RunArray {{run_ends: {:?}, values: {:?}}}",
399 self.run_ends.values(),
400 self.values
401 )
402 }
403}
404
405impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
422 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
423 let it = iter.into_iter();
424 let (lower, _) = it.size_hint();
425 let mut builder = StringRunBuilder::with_capacity(lower, 256);
426 it.for_each(|i| {
427 builder.append_option(i);
428 });
429
430 builder.finish()
431 }
432}
433
434impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
449 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
450 let it = iter.into_iter();
451 let (lower, _) = it.size_hint();
452 let mut builder = StringRunBuilder::with_capacity(lower, 256);
453 it.for_each(|i| {
454 builder.append_value(i);
455 });
456
457 builder.finish()
458 }
459}
460
461pub type Int16RunArray = RunArray<Int16Type>;
475
476pub type Int32RunArray = RunArray<Int32Type>;
490
491pub type Int64RunArray = RunArray<Int64Type>;
505
506pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
524 run_array: &'a RunArray<R>,
526
527 values: &'a V,
529}
530
531impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
533 fn clone(&self) -> Self {
534 *self
535 }
536}
537
538impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
539
540impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
541 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
542 writeln!(f, "TypedRunArray({:?})", self.run_array)
543 }
544}
545
546impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
547 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
549 self.run_array.run_ends()
550 }
551
552 pub fn values(&self) -> &'a V {
554 self.values
555 }
556
557 pub fn run_array(&self) -> &'a RunArray<R> {
559 self.run_array
560 }
561}
562
563impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
564 fn as_any(&self) -> &dyn Any {
565 self.run_array
566 }
567
568 fn to_data(&self) -> ArrayData {
569 self.run_array.to_data()
570 }
571
572 fn into_data(self) -> ArrayData {
573 self.run_array.into_data()
574 }
575
576 fn data_type(&self) -> &DataType {
577 self.run_array.data_type()
578 }
579
580 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
581 Arc::new(self.run_array.slice(offset, length))
582 }
583
584 fn len(&self) -> usize {
585 self.run_array.len()
586 }
587
588 fn is_empty(&self) -> bool {
589 self.run_array.is_empty()
590 }
591
592 fn offset(&self) -> usize {
593 self.run_array.offset()
594 }
595
596 fn nulls(&self) -> Option<&NullBuffer> {
597 self.run_array.nulls()
598 }
599
600 fn logical_nulls(&self) -> Option<NullBuffer> {
601 self.run_array.logical_nulls()
602 }
603
604 fn logical_null_count(&self) -> usize {
605 self.run_array.logical_null_count()
606 }
607
608 fn is_nullable(&self) -> bool {
609 self.run_array.is_nullable()
610 }
611
612 fn get_buffer_memory_size(&self) -> usize {
613 self.run_array.get_buffer_memory_size()
614 }
615
616 fn get_array_memory_size(&self) -> usize {
617 self.run_array.get_array_memory_size()
618 }
619}
620
621impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
624where
625 R: RunEndIndexType,
626 V: Sync + Send,
627 &'a V: ArrayAccessor,
628 <&'a V as ArrayAccessor>::Item: Default,
629{
630 type Item = <&'a V as ArrayAccessor>::Item;
631
632 fn value(&self, logical_index: usize) -> Self::Item {
633 assert!(
634 logical_index < self.len(),
635 "Trying to access an element at index {} from a TypedRunArray of length {}",
636 logical_index,
637 self.len()
638 );
639 unsafe { self.value_unchecked(logical_index) }
640 }
641
642 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
643 let physical_index = self.run_array.get_physical_index(logical_index);
644 self.values().value_unchecked(physical_index)
645 }
646}
647
648impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
649where
650 R: RunEndIndexType,
651 V: Sync + Send,
652 &'a V: ArrayAccessor,
653 <&'a V as ArrayAccessor>::Item: Default,
654{
655 type Item = Option<<&'a V as ArrayAccessor>::Item>;
656 type IntoIter = RunArrayIter<'a, R, V>;
657
658 fn into_iter(self) -> Self::IntoIter {
659 RunArrayIter::new(self)
660 }
661}
662
663#[cfg(test)]
664mod tests {
665 use rand::seq::SliceRandom;
666 use rand::thread_rng;
667 use rand::Rng;
668
669 use super::*;
670 use crate::builder::PrimitiveRunBuilder;
671 use crate::cast::AsArray;
672 use crate::types::{Int8Type, UInt32Type};
673 use crate::{Int32Array, StringArray};
674
675 fn build_input_array(size: usize) -> Vec<Option<i32>> {
676 let mut seed: Vec<Option<i32>> = vec![
679 None,
680 None,
681 None,
682 Some(1),
683 Some(2),
684 Some(3),
685 Some(4),
686 Some(5),
687 Some(6),
688 Some(7),
689 Some(8),
690 Some(9),
691 ];
692 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
693 let mut ix = 0;
694 let mut rng = thread_rng();
695 let max_run_length = 8_usize.min(1_usize.max(size / 2));
697 while result.len() < size {
698 if ix == 0 {
700 seed.shuffle(&mut rng);
701 }
702 let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
704 for _ in 0..num {
705 result.push(seed[ix]);
706 }
707 ix += 1;
708 if ix == seed.len() {
709 ix = 0
710 }
711 }
712 result.resize(size, None);
713 result
714 }
715
716 fn compare_logical_and_physical_indices(
718 logical_indices: &[u32],
719 logical_array: &[Option<i32>],
720 physical_indices: &[usize],
721 physical_array: &PrimitiveArray<Int32Type>,
722 ) {
723 assert_eq!(logical_indices.len(), physical_indices.len());
724
725 logical_indices
727 .iter()
728 .map(|f| f.as_usize())
729 .zip(physical_indices.iter())
730 .for_each(|(logical_ix, physical_ix)| {
731 let expected = logical_array[logical_ix];
732 match expected {
733 Some(val) => {
734 assert!(physical_array.is_valid(*physical_ix));
735 let actual = physical_array.value(*physical_ix);
736 assert_eq!(val, actual);
737 }
738 None => {
739 assert!(physical_array.is_null(*physical_ix))
740 }
741 };
742 });
743 }
744 #[test]
745 fn test_run_array() {
746 let value_data =
748 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
749
750 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
752 let run_ends_data =
753 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
754
755 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
757
758 assert_eq!(ree_array.len(), 22);
759 assert_eq!(ree_array.null_count(), 0);
760
761 let values = ree_array.values();
762 assert_eq!(value_data.into_data(), values.to_data());
763 assert_eq!(&DataType::Int8, values.data_type());
764
765 let run_ends = ree_array.run_ends();
766 assert_eq!(run_ends.values(), &run_ends_values);
767 }
768
769 #[test]
770 fn test_run_array_fmt_debug() {
771 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
772 builder.append_value(12345678);
773 builder.append_null();
774 builder.append_value(22345678);
775 let array = builder.finish();
776 assert_eq!(
777 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
778 format!("{array:?}")
779 );
780
781 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
782 for _ in 0..20 {
783 builder.append_value(1);
784 }
785 let array = builder.finish();
786
787 assert_eq!(array.len(), 20);
788 assert_eq!(array.null_count(), 0);
789 assert_eq!(array.logical_null_count(), 0);
790
791 assert_eq!(
792 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
793 format!("{array:?}")
794 );
795 }
796
797 #[test]
798 fn test_run_array_from_iter() {
799 let test = vec!["a", "a", "b", "c"];
800 let array: RunArray<Int16Type> = test
801 .iter()
802 .map(|&x| if x == "b" { None } else { Some(x) })
803 .collect();
804 assert_eq!(
805 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
806 format!("{array:?}")
807 );
808
809 assert_eq!(array.len(), 4);
810 assert_eq!(array.null_count(), 0);
811 assert_eq!(array.logical_null_count(), 1);
812
813 let array: RunArray<Int16Type> = test.into_iter().collect();
814 assert_eq!(
815 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
816 format!("{array:?}")
817 );
818 }
819
820 #[test]
821 fn test_run_array_run_ends_as_primitive_array() {
822 let test = vec!["a", "b", "c", "a"];
823 let array: RunArray<Int16Type> = test.into_iter().collect();
824
825 assert_eq!(array.len(), 4);
826 assert_eq!(array.null_count(), 0);
827 assert_eq!(array.logical_null_count(), 0);
828
829 let run_ends = array.run_ends();
830 assert_eq!(&[1, 2, 3, 4], run_ends.values());
831 }
832
833 #[test]
834 fn test_run_array_as_primitive_array_with_null() {
835 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
836 let array: RunArray<Int32Type> = test.into_iter().collect();
837
838 assert_eq!(array.len(), 6);
839 assert_eq!(array.null_count(), 0);
840 assert_eq!(array.logical_null_count(), 3);
841
842 let run_ends = array.run_ends();
843 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
844
845 let values_data = array.values();
846 assert_eq!(2, values_data.null_count());
847 assert_eq!(5, values_data.len());
848 }
849
850 #[test]
851 fn test_run_array_all_nulls() {
852 let test = vec![None, None, None];
853 let array: RunArray<Int32Type> = test.into_iter().collect();
854
855 assert_eq!(array.len(), 3);
856 assert_eq!(array.null_count(), 0);
857 assert_eq!(array.logical_null_count(), 3);
858
859 let run_ends = array.run_ends();
860 assert_eq!(3, run_ends.len());
861 assert_eq!(&[3], run_ends.values());
862
863 let values_data = array.values();
864 assert_eq!(1, values_data.null_count());
865 }
866
867 #[test]
868 fn test_run_array_try_new() {
869 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
870 .into_iter()
871 .collect();
872 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
873
874 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
875 assert_eq!(array.values().data_type(), &DataType::Utf8);
876
877 assert_eq!(array.null_count(), 0);
878 assert_eq!(array.logical_null_count(), 1);
879 assert_eq!(array.len(), 4);
880 assert_eq!(array.values().null_count(), 1);
881
882 assert_eq!(
883 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
884 format!("{array:?}")
885 );
886 }
887
888 #[test]
889 fn test_run_array_int16_type_definition() {
890 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
891 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
892 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
893 assert_eq!(array.values(), &values);
894 }
895
896 #[test]
897 fn test_run_array_empty_string() {
898 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
899 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
900 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
901 assert_eq!(array.values(), &values);
902 }
903
904 #[test]
905 fn test_run_array_length_mismatch() {
906 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
907 .into_iter()
908 .collect();
909 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
910
911 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
912 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
913 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
914 }
915
916 #[test]
917 fn test_run_array_run_ends_with_null() {
918 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
919 .into_iter()
920 .collect();
921 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
922
923 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
924 let expected = ArrowError::InvalidArgumentError(
925 "Found null values in run_ends array. The run_ends array should not have null values."
926 .to_string(),
927 );
928 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
929 }
930
931 #[test]
932 fn test_run_array_run_ends_with_zeroes() {
933 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
934 .into_iter()
935 .collect();
936 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
937
938 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
939 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
940 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
941 }
942
943 #[test]
944 fn test_run_array_run_ends_non_increasing() {
945 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
946 .into_iter()
947 .collect();
948 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
949
950 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
951 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
952 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
953 }
954
955 #[test]
956 #[should_panic(expected = "Incorrect run ends type")]
957 fn test_run_array_run_ends_data_type_mismatch() {
958 let a = RunArray::<Int32Type>::from_iter(["32"]);
959 let _ = RunArray::<Int64Type>::from(a.into_data());
960 }
961
962 #[test]
963 fn test_ree_array_accessor() {
964 let input_array = build_input_array(256);
965
966 let mut builder =
968 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
969 builder.extend(input_array.iter().copied());
970 let run_array = builder.finish();
971 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
972
973 for (i, inp_val) in input_array.iter().enumerate() {
975 if let Some(val) = inp_val {
976 let actual = typed.value(i);
977 assert_eq!(*val, actual)
978 } else {
979 let physical_ix = run_array.get_physical_index(i);
980 assert!(typed.values().is_null(physical_ix));
981 };
982 }
983 }
984
985 #[test]
986 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
988 for logical_len in (0..250).step_by(10) {
990 let input_array = build_input_array(logical_len);
991
992 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
994 builder.extend(input_array.clone().into_iter());
995
996 let run_array = builder.finish();
997 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
998
999 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1001 logical_indices.append(&mut logical_indices.clone());
1003 let mut rng = thread_rng();
1004 logical_indices.shuffle(&mut rng);
1005
1006 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1007
1008 assert_eq!(logical_indices.len(), physical_indices.len());
1009
1010 compare_logical_and_physical_indices(
1012 &logical_indices,
1013 &input_array,
1014 &physical_indices,
1015 physical_values_array,
1016 );
1017 }
1018 }
1019
1020 #[test]
1021 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1023 let total_len = 80;
1024 let input_array = build_input_array(total_len);
1025
1026 let mut builder =
1028 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1029 builder.extend(input_array.iter().copied());
1030 let run_array = builder.finish();
1031 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1032
1033 for slice_len in 1..=total_len {
1035 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1037 logical_indices.append(&mut logical_indices.clone());
1039 let mut rng = thread_rng();
1040 logical_indices.shuffle(&mut rng);
1041
1042 let sliced_input_array = &input_array[0..slice_len];
1045
1046 let sliced_run_array: RunArray<Int16Type> =
1048 run_array.slice(0, slice_len).into_data().into();
1049
1050 let physical_indices = sliced_run_array
1052 .get_physical_indices(&logical_indices)
1053 .unwrap();
1054
1055 compare_logical_and_physical_indices(
1056 &logical_indices,
1057 sliced_input_array,
1058 &physical_indices,
1059 physical_values_array,
1060 );
1061
1062 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1065
1066 let sliced_run_array: RunArray<Int16Type> = run_array
1068 .slice(total_len - slice_len, slice_len)
1069 .into_data()
1070 .into();
1071
1072 let physical_indices = sliced_run_array
1074 .get_physical_indices(&logical_indices)
1075 .unwrap();
1076
1077 compare_logical_and_physical_indices(
1078 &logical_indices,
1079 sliced_input_array,
1080 &physical_indices,
1081 physical_values_array,
1082 );
1083 }
1084 }
1085
1086 #[test]
1087 fn test_logical_nulls() {
1088 let run = Int32Array::from(vec![3, 6, 9, 12]);
1089 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1090 let array = RunArray::try_new(&run, &values).unwrap();
1091
1092 let expected = [
1093 true, true, true, false, false, false, true, true, true, false, false, false,
1094 ];
1095
1096 let n = array.logical_nulls().unwrap();
1097 assert_eq!(n.null_count(), 6);
1098
1099 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1100 for (offset, length) in slices {
1101 let a = array.slice(offset, length);
1102 let n = a.logical_nulls().unwrap();
1103 let n = n.into_iter().collect::<Vec<_>>();
1104 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1105 }
1106 }
1107}