1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22 ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41 const IS_LARGE: bool;
43 const PREFIX: &'static str;
45}
46
47impl OffsetSizeTrait for i32 {
48 const IS_LARGE: bool = false;
49 const PREFIX: &'static str = "";
50}
51
52impl OffsetSizeTrait for i64 {
53 const IS_LARGE: bool = true;
54 const PREFIX: &'static str = "Large";
55}
56
57pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
166 data_type: DataType,
167 nulls: Option<NullBuffer>,
168 values: ArrayRef,
169 value_offsets: OffsetBuffer<OffsetSize>,
170}
171
172impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
173 fn clone(&self) -> Self {
174 Self {
175 data_type: self.data_type.clone(),
176 nulls: self.nulls.clone(),
177 values: self.values.clone(),
178 value_offsets: self.value_offsets.clone(),
179 }
180 }
181}
182
183impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
184 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
188 DataType::LargeList
189 } else {
190 DataType::List
191 };
192
193 pub fn try_new(
204 field: FieldRef,
205 offsets: OffsetBuffer<OffsetSize>,
206 values: ArrayRef,
207 nulls: Option<NullBuffer>,
208 ) -> Result<Self, ArrowError> {
209 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
211 if end_offset > values.len() {
214 return Err(ArrowError::InvalidArgumentError(format!(
215 "Max offset of {end_offset} exceeds length of values {}",
216 values.len()
217 )));
218 }
219
220 if let Some(n) = nulls.as_ref() {
221 if n.len() != len {
222 return Err(ArrowError::InvalidArgumentError(format!(
223 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
224 OffsetSize::PREFIX,
225 n.len(),
226 )));
227 }
228 }
229 if !field.is_nullable() && values.is_nullable() {
230 return Err(ArrowError::InvalidArgumentError(format!(
231 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
232 OffsetSize::PREFIX,
233 field.name()
234 )));
235 }
236
237 if field.data_type() != values.data_type() {
238 return Err(ArrowError::InvalidArgumentError(format!(
239 "{}ListArray expected data type {} got {} for {:?}",
240 OffsetSize::PREFIX,
241 field.data_type(),
242 values.data_type(),
243 field.name()
244 )));
245 }
246
247 Ok(Self {
248 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
249 nulls,
250 values,
251 value_offsets: offsets,
252 })
253 }
254
255 pub fn new(
261 field: FieldRef,
262 offsets: OffsetBuffer<OffsetSize>,
263 values: ArrayRef,
264 nulls: Option<NullBuffer>,
265 ) -> Self {
266 Self::try_new(field, offsets, values, nulls).unwrap()
267 }
268
269 pub fn new_null(field: FieldRef, len: usize) -> Self {
271 let values = new_empty_array(field.data_type());
272 Self {
273 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
274 nulls: Some(NullBuffer::new_null(len)),
275 value_offsets: OffsetBuffer::new_zeroed(len),
276 values,
277 }
278 }
279
280 pub fn into_parts(
282 self,
283 ) -> (
284 FieldRef,
285 OffsetBuffer<OffsetSize>,
286 ArrayRef,
287 Option<NullBuffer>,
288 ) {
289 let f = match self.data_type {
290 DataType::List(f) | DataType::LargeList(f) => f,
291 _ => unreachable!(),
292 };
293 (f, self.value_offsets, self.values, self.nulls)
294 }
295
296 #[inline]
305 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
306 &self.value_offsets
307 }
308
309 #[inline]
316 pub fn values(&self) -> &ArrayRef {
317 &self.values
318 }
319
320 pub fn value_type(&self) -> DataType {
322 self.values.data_type().clone()
323 }
324
325 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
329 let end = self.value_offsets().get_unchecked(i + 1).as_usize();
330 let start = self.value_offsets().get_unchecked(i).as_usize();
331 self.values.slice(start, end - start)
332 }
333
334 pub fn value(&self, i: usize) -> ArrayRef {
336 let end = self.value_offsets()[i + 1].as_usize();
337 let start = self.value_offsets()[i].as_usize();
338 self.values.slice(start, end - start)
339 }
340
341 #[inline]
345 pub fn value_offsets(&self) -> &[OffsetSize] {
346 &self.value_offsets
347 }
348
349 #[inline]
351 pub fn value_length(&self, i: usize) -> OffsetSize {
352 let offsets = self.value_offsets();
353 offsets[i + 1] - offsets[i]
354 }
355
356 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
358 GenericListArrayIter::<'a, OffsetSize>::new(self)
359 }
360
361 #[inline]
362 fn get_type(data_type: &DataType) -> Option<&DataType> {
363 match (OffsetSize::IS_LARGE, data_type) {
364 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
365 Some(child.data_type())
366 }
367 _ => None,
368 }
369 }
370
371 pub fn slice(&self, offset: usize, length: usize) -> Self {
377 Self {
378 data_type: self.data_type.clone(),
379 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
380 values: self.values.clone(),
381 value_offsets: self.value_offsets.slice(offset, length),
382 }
383 }
384
385 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
401 where
402 T: ArrowPrimitiveType,
403 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
404 I: IntoIterator<Item = Option<P>>,
405 {
406 let iter = iter.into_iter();
407 let size_hint = iter.size_hint().0;
408 let mut builder =
409 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
410
411 for i in iter {
412 match i {
413 Some(p) => {
414 for t in p {
415 builder.values().append_option(t);
416 }
417 builder.append(true);
418 }
419 None => builder.append(false),
420 }
421 }
422 builder.finish()
423 }
424}
425
426impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
427 fn from(data: ArrayData) -> Self {
428 Self::try_new_from_array_data(data)
429 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
430 }
431}
432
433impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
434 fn from(array: GenericListArray<OffsetSize>) -> Self {
435 let len = array.len();
436 let builder = ArrayDataBuilder::new(array.data_type)
437 .len(len)
438 .nulls(array.nulls)
439 .buffers(vec![array.value_offsets.into_inner().into_inner()])
440 .child_data(vec![array.values.to_data()]);
441
442 unsafe { builder.build_unchecked() }
443 }
444}
445
446impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
447 fn from(value: FixedSizeListArray) -> Self {
448 let (field, size) = match value.data_type() {
449 DataType::FixedSizeList(f, size) => (f, *size as usize),
450 _ => unreachable!(),
451 };
452
453 let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
454
455 Self {
456 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
457 nulls: value.nulls().cloned(),
458 values: value.values().clone(),
459 value_offsets: offsets,
460 }
461 }
462}
463
464impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
465 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
466 if data.buffers().len() != 1 {
467 return Err(ArrowError::InvalidArgumentError(format!(
468 "ListArray data should contain a single buffer only (value offsets), had {}",
469 data.buffers().len()
470 )));
471 }
472
473 if data.child_data().len() != 1 {
474 return Err(ArrowError::InvalidArgumentError(format!(
475 "ListArray should contain a single child array (values array), had {}",
476 data.child_data().len()
477 )));
478 }
479
480 let values = data.child_data()[0].clone();
481
482 if let Some(child_data_type) = Self::get_type(data.data_type()) {
483 if values.data_type() != child_data_type {
484 return Err(ArrowError::InvalidArgumentError(format!(
485 "[Large]ListArray's child datatype {:?} does not \
486 correspond to the List's datatype {:?}",
487 values.data_type(),
488 child_data_type
489 )));
490 }
491 } else {
492 return Err(ArrowError::InvalidArgumentError(format!(
493 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
494 data.data_type()
495 )));
496 }
497
498 let values = make_array(values);
499 let value_offsets = unsafe { get_offsets(&data) };
502
503 Ok(Self {
504 data_type: data.data_type().clone(),
505 nulls: data.nulls().cloned(),
506 values,
507 value_offsets,
508 })
509 }
510}
511
512impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
513 fn as_any(&self) -> &dyn Any {
514 self
515 }
516
517 fn to_data(&self) -> ArrayData {
518 self.clone().into()
519 }
520
521 fn into_data(self) -> ArrayData {
522 self.into()
523 }
524
525 fn data_type(&self) -> &DataType {
526 &self.data_type
527 }
528
529 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
530 Arc::new(self.slice(offset, length))
531 }
532
533 fn len(&self) -> usize {
534 self.value_offsets.len() - 1
535 }
536
537 fn is_empty(&self) -> bool {
538 self.value_offsets.len() <= 1
539 }
540
541 fn shrink_to_fit(&mut self) {
542 if let Some(nulls) = &mut self.nulls {
543 nulls.shrink_to_fit();
544 }
545 self.values.shrink_to_fit();
546 self.value_offsets.shrink_to_fit();
547 }
548
549 fn offset(&self) -> usize {
550 0
551 }
552
553 fn nulls(&self) -> Option<&NullBuffer> {
554 self.nulls.as_ref()
555 }
556
557 fn logical_null_count(&self) -> usize {
558 self.null_count()
560 }
561
562 fn get_buffer_memory_size(&self) -> usize {
563 let mut size = self.values.get_buffer_memory_size();
564 size += self.value_offsets.inner().inner().capacity();
565 if let Some(n) = self.nulls.as_ref() {
566 size += n.buffer().capacity();
567 }
568 size
569 }
570
571 fn get_array_memory_size(&self) -> usize {
572 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
573 size += self.value_offsets.inner().inner().capacity();
574 if let Some(n) = self.nulls.as_ref() {
575 size += n.buffer().capacity();
576 }
577 size
578 }
579}
580
581impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
582 type Item = ArrayRef;
583
584 fn value(&self, index: usize) -> Self::Item {
585 GenericListArray::value(self, index)
586 }
587
588 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
589 GenericListArray::value(self, index)
590 }
591}
592
593impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
594 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
595 let prefix = OffsetSize::PREFIX;
596
597 write!(f, "{prefix}ListArray\n[\n")?;
598 print_long_array(self, f, |array, index, f| {
599 std::fmt::Debug::fmt(&array.value(index), f)
600 })?;
601 write!(f, "]")
602 }
603}
604
605pub type ListArray = GenericListArray<i32>;
609
610pub type LargeListArray = GenericListArray<i64>;
614
615#[cfg(test)]
616mod tests {
617 use super::*;
618 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
619 use crate::cast::AsArray;
620 use crate::types::Int32Type;
621 use crate::{Int32Array, Int64Array};
622 use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
623 use arrow_schema::Field;
624
625 fn create_from_buffers() -> ListArray {
626 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
628 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
629 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
630 ListArray::new(field, offsets, Arc::new(values), None)
631 }
632
633 #[test]
634 fn test_from_iter_primitive() {
635 let data = vec![
636 Some(vec![Some(0), Some(1), Some(2)]),
637 Some(vec![Some(3), Some(4), Some(5)]),
638 Some(vec![Some(6), Some(7)]),
639 ];
640 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
641
642 let another = create_from_buffers();
643 assert_eq!(list_array, another)
644 }
645
646 #[test]
647 fn test_empty_list_array() {
648 let value_data = ArrayData::builder(DataType::Int32)
650 .len(0)
651 .add_buffer(Buffer::from([]))
652 .build()
653 .unwrap();
654
655 let value_offsets = Buffer::from([]);
657
658 let list_data_type =
660 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
661 let list_data = ArrayData::builder(list_data_type)
662 .len(0)
663 .add_buffer(value_offsets)
664 .add_child_data(value_data)
665 .build()
666 .unwrap();
667
668 let list_array = ListArray::from(list_data);
669 assert_eq!(list_array.len(), 0)
670 }
671
672 #[test]
673 fn test_list_array() {
674 let value_data = ArrayData::builder(DataType::Int32)
676 .len(8)
677 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
678 .build()
679 .unwrap();
680
681 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
684
685 let list_data_type =
687 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
688 let list_data = ArrayData::builder(list_data_type.clone())
689 .len(3)
690 .add_buffer(value_offsets.clone())
691 .add_child_data(value_data.clone())
692 .build()
693 .unwrap();
694 let list_array = ListArray::from(list_data);
695
696 let values = list_array.values();
697 assert_eq!(value_data, values.to_data());
698 assert_eq!(DataType::Int32, list_array.value_type());
699 assert_eq!(3, list_array.len());
700 assert_eq!(0, list_array.null_count());
701 assert_eq!(6, list_array.value_offsets()[2]);
702 assert_eq!(2, list_array.value_length(2));
703 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
704 assert_eq!(
705 0,
706 unsafe { list_array.value_unchecked(0) }
707 .as_primitive::<Int32Type>()
708 .value(0)
709 );
710 for i in 0..3 {
711 assert!(list_array.is_valid(i));
712 assert!(!list_array.is_null(i));
713 }
714
715 let list_data = ArrayData::builder(list_data_type)
718 .len(2)
719 .offset(1)
720 .add_buffer(value_offsets)
721 .add_child_data(value_data.clone())
722 .build()
723 .unwrap();
724 let list_array = ListArray::from(list_data);
725
726 let values = list_array.values();
727 assert_eq!(value_data, values.to_data());
728 assert_eq!(DataType::Int32, list_array.value_type());
729 assert_eq!(2, list_array.len());
730 assert_eq!(0, list_array.null_count());
731 assert_eq!(6, list_array.value_offsets()[1]);
732 assert_eq!(2, list_array.value_length(1));
733 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
734 assert_eq!(
735 3,
736 unsafe { list_array.value_unchecked(0) }
737 .as_primitive::<Int32Type>()
738 .value(0)
739 );
740 }
741
742 #[test]
743 fn test_large_list_array() {
744 let value_data = ArrayData::builder(DataType::Int32)
746 .len(8)
747 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
748 .build()
749 .unwrap();
750
751 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
754
755 let list_data_type = DataType::new_large_list(DataType::Int32, false);
757 let list_data = ArrayData::builder(list_data_type.clone())
758 .len(3)
759 .add_buffer(value_offsets.clone())
760 .add_child_data(value_data.clone())
761 .build()
762 .unwrap();
763 let list_array = LargeListArray::from(list_data);
764
765 let values = list_array.values();
766 assert_eq!(value_data, values.to_data());
767 assert_eq!(DataType::Int32, list_array.value_type());
768 assert_eq!(3, list_array.len());
769 assert_eq!(0, list_array.null_count());
770 assert_eq!(6, list_array.value_offsets()[2]);
771 assert_eq!(2, list_array.value_length(2));
772 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
773 assert_eq!(
774 0,
775 unsafe { list_array.value_unchecked(0) }
776 .as_primitive::<Int32Type>()
777 .value(0)
778 );
779 for i in 0..3 {
780 assert!(list_array.is_valid(i));
781 assert!(!list_array.is_null(i));
782 }
783
784 let list_data = ArrayData::builder(list_data_type)
787 .len(2)
788 .offset(1)
789 .add_buffer(value_offsets)
790 .add_child_data(value_data.clone())
791 .build()
792 .unwrap();
793 let list_array = LargeListArray::from(list_data);
794
795 let values = list_array.values();
796 assert_eq!(value_data, values.to_data());
797 assert_eq!(DataType::Int32, list_array.value_type());
798 assert_eq!(2, list_array.len());
799 assert_eq!(0, list_array.null_count());
800 assert_eq!(6, list_array.value_offsets()[1]);
801 assert_eq!(2, list_array.value_length(1));
802 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
803 assert_eq!(
804 3,
805 unsafe { list_array.value_unchecked(0) }
806 .as_primitive::<Int32Type>()
807 .value(0)
808 );
809 }
810
811 #[test]
812 fn test_list_array_slice() {
813 let value_data = ArrayData::builder(DataType::Int32)
815 .len(10)
816 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
817 .build()
818 .unwrap();
819
820 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
823 let mut null_bits: [u8; 2] = [0; 2];
825 bit_util::set_bit(&mut null_bits, 0);
826 bit_util::set_bit(&mut null_bits, 3);
827 bit_util::set_bit(&mut null_bits, 4);
828 bit_util::set_bit(&mut null_bits, 6);
829 bit_util::set_bit(&mut null_bits, 8);
830
831 let list_data_type =
833 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
834 let list_data = ArrayData::builder(list_data_type)
835 .len(9)
836 .add_buffer(value_offsets)
837 .add_child_data(value_data.clone())
838 .null_bit_buffer(Some(Buffer::from(null_bits)))
839 .build()
840 .unwrap();
841 let list_array = ListArray::from(list_data);
842
843 let values = list_array.values();
844 assert_eq!(value_data, values.to_data());
845 assert_eq!(DataType::Int32, list_array.value_type());
846 assert_eq!(9, list_array.len());
847 assert_eq!(4, list_array.null_count());
848 assert_eq!(2, list_array.value_offsets()[3]);
849 assert_eq!(2, list_array.value_length(3));
850
851 let sliced_array = list_array.slice(1, 6);
852 assert_eq!(6, sliced_array.len());
853 assert_eq!(3, sliced_array.null_count());
854
855 for i in 0..sliced_array.len() {
856 if bit_util::get_bit(&null_bits, 1 + i) {
857 assert!(sliced_array.is_valid(i));
858 } else {
859 assert!(sliced_array.is_null(i));
860 }
861 }
862
863 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
865 assert_eq!(2, sliced_list_array.value_offsets()[2]);
866 assert_eq!(2, sliced_list_array.value_length(2));
867 assert_eq!(4, sliced_list_array.value_offsets()[3]);
868 assert_eq!(2, sliced_list_array.value_length(3));
869 assert_eq!(6, sliced_list_array.value_offsets()[5]);
870 assert_eq!(3, sliced_list_array.value_length(5));
871 }
872
873 #[test]
874 fn test_large_list_array_slice() {
875 let value_data = ArrayData::builder(DataType::Int32)
877 .len(10)
878 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
879 .build()
880 .unwrap();
881
882 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
885 let mut null_bits: [u8; 2] = [0; 2];
887 bit_util::set_bit(&mut null_bits, 0);
888 bit_util::set_bit(&mut null_bits, 3);
889 bit_util::set_bit(&mut null_bits, 4);
890 bit_util::set_bit(&mut null_bits, 6);
891 bit_util::set_bit(&mut null_bits, 8);
892
893 let list_data_type = DataType::new_large_list(DataType::Int32, false);
895 let list_data = ArrayData::builder(list_data_type)
896 .len(9)
897 .add_buffer(value_offsets)
898 .add_child_data(value_data.clone())
899 .null_bit_buffer(Some(Buffer::from(null_bits)))
900 .build()
901 .unwrap();
902 let list_array = LargeListArray::from(list_data);
903
904 let values = list_array.values();
905 assert_eq!(value_data, values.to_data());
906 assert_eq!(DataType::Int32, list_array.value_type());
907 assert_eq!(9, list_array.len());
908 assert_eq!(4, list_array.null_count());
909 assert_eq!(2, list_array.value_offsets()[3]);
910 assert_eq!(2, list_array.value_length(3));
911
912 let sliced_array = list_array.slice(1, 6);
913 assert_eq!(6, sliced_array.len());
914 assert_eq!(3, sliced_array.null_count());
915
916 for i in 0..sliced_array.len() {
917 if bit_util::get_bit(&null_bits, 1 + i) {
918 assert!(sliced_array.is_valid(i));
919 } else {
920 assert!(sliced_array.is_null(i));
921 }
922 }
923
924 let sliced_list_array = sliced_array
926 .as_any()
927 .downcast_ref::<LargeListArray>()
928 .unwrap();
929 assert_eq!(2, sliced_list_array.value_offsets()[2]);
930 assert_eq!(2, sliced_list_array.value_length(2));
931 assert_eq!(4, sliced_list_array.value_offsets()[3]);
932 assert_eq!(2, sliced_list_array.value_length(3));
933 assert_eq!(6, sliced_list_array.value_offsets()[5]);
934 assert_eq!(3, sliced_list_array.value_length(5));
935 }
936
937 #[test]
938 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
939 fn test_list_array_index_out_of_bound() {
940 let value_data = ArrayData::builder(DataType::Int32)
942 .len(10)
943 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
944 .build()
945 .unwrap();
946
947 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
950 let mut null_bits: [u8; 2] = [0; 2];
952 bit_util::set_bit(&mut null_bits, 0);
953 bit_util::set_bit(&mut null_bits, 3);
954 bit_util::set_bit(&mut null_bits, 4);
955 bit_util::set_bit(&mut null_bits, 6);
956 bit_util::set_bit(&mut null_bits, 8);
957
958 let list_data_type = DataType::new_large_list(DataType::Int32, false);
960 let list_data = ArrayData::builder(list_data_type)
961 .len(9)
962 .add_buffer(value_offsets)
963 .add_child_data(value_data)
964 .null_bit_buffer(Some(Buffer::from(null_bits)))
965 .build()
966 .unwrap();
967 let list_array = LargeListArray::from(list_data);
968 assert_eq!(9, list_array.len());
969
970 list_array.value(10);
971 }
972 #[test]
973 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
974 #[cfg(not(feature = "force_validate"))]
977 fn test_list_array_invalid_buffer_len() {
978 let value_data = unsafe {
979 ArrayData::builder(DataType::Int32)
980 .len(8)
981 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
982 .build_unchecked()
983 };
984 let list_data_type =
985 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
986 let list_data = unsafe {
987 ArrayData::builder(list_data_type)
988 .len(3)
989 .add_child_data(value_data)
990 .build_unchecked()
991 };
992 drop(ListArray::from(list_data));
993 }
994
995 #[test]
996 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
997 #[cfg(not(feature = "force_validate"))]
1000 fn test_list_array_invalid_child_array_len() {
1001 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1002 let list_data_type =
1003 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1004 let list_data = unsafe {
1005 ArrayData::builder(list_data_type)
1006 .len(3)
1007 .add_buffer(value_offsets)
1008 .build_unchecked()
1009 };
1010 drop(ListArray::from(list_data));
1011 }
1012
1013 #[test]
1014 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1015 fn test_from_array_data_validation() {
1016 let mut builder = ListBuilder::new(Int32Builder::new());
1017 builder.values().append_value(1);
1018 builder.append(true);
1019 let array = builder.finish();
1020 let _ = LargeListArray::from(array.into_data());
1021 }
1022
1023 #[test]
1024 fn test_list_array_offsets_need_not_start_at_zero() {
1025 let value_data = ArrayData::builder(DataType::Int32)
1026 .len(8)
1027 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1028 .build()
1029 .unwrap();
1030
1031 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1032
1033 let list_data_type =
1034 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1035 let list_data = ArrayData::builder(list_data_type)
1036 .len(3)
1037 .add_buffer(value_offsets)
1038 .add_child_data(value_data)
1039 .build()
1040 .unwrap();
1041
1042 let list_array = ListArray::from(list_data);
1043 assert_eq!(list_array.value_length(0), 0);
1044 assert_eq!(list_array.value_length(1), 3);
1045 assert_eq!(list_array.value_length(2), 2);
1046 }
1047
1048 #[test]
1049 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1050 #[cfg(not(feature = "force_validate"))]
1053 fn test_primitive_array_alignment() {
1054 let buf = Buffer::from_slice_ref([0_u64]);
1055 let buf2 = buf.slice(1);
1056 let array_data = unsafe {
1057 ArrayData::builder(DataType::Int32)
1058 .add_buffer(buf2)
1059 .build_unchecked()
1060 };
1061 drop(Int32Array::from(array_data));
1062 }
1063
1064 #[test]
1065 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1066 #[cfg(not(feature = "force_validate"))]
1069 fn test_list_array_alignment() {
1070 let buf = Buffer::from_slice_ref([0_u64]);
1071 let buf2 = buf.slice(1);
1072
1073 let values: [i32; 8] = [0; 8];
1074 let value_data = unsafe {
1075 ArrayData::builder(DataType::Int32)
1076 .add_buffer(Buffer::from_slice_ref(values))
1077 .build_unchecked()
1078 };
1079
1080 let list_data_type =
1081 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1082 let list_data = unsafe {
1083 ArrayData::builder(list_data_type)
1084 .add_buffer(buf2)
1085 .add_child_data(value_data)
1086 .build_unchecked()
1087 };
1088 drop(ListArray::from(list_data));
1089 }
1090
1091 #[test]
1092 fn list_array_equality() {
1093 fn do_comparison(
1095 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1096 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1097 should_equal: bool,
1098 ) {
1099 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1100 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1101 assert_eq!(lhs == rhs, should_equal);
1102
1103 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1104 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1105 assert_eq!(lhs == rhs, should_equal);
1106 }
1107
1108 do_comparison(
1109 vec![
1110 Some(vec![Some(0), Some(1), Some(2)]),
1111 None,
1112 Some(vec![Some(3), None, Some(5)]),
1113 Some(vec![Some(6), Some(7)]),
1114 ],
1115 vec![
1116 Some(vec![Some(0), Some(1), Some(2)]),
1117 None,
1118 Some(vec![Some(3), None, Some(5)]),
1119 Some(vec![Some(6), Some(7)]),
1120 ],
1121 true,
1122 );
1123
1124 do_comparison(
1125 vec![
1126 None,
1127 None,
1128 Some(vec![Some(3), None, Some(5)]),
1129 Some(vec![Some(6), Some(7)]),
1130 ],
1131 vec![
1132 Some(vec![Some(0), Some(1), Some(2)]),
1133 None,
1134 Some(vec![Some(3), None, Some(5)]),
1135 Some(vec![Some(6), Some(7)]),
1136 ],
1137 false,
1138 );
1139
1140 do_comparison(
1141 vec![
1142 None,
1143 None,
1144 Some(vec![Some(3), None, Some(5)]),
1145 Some(vec![Some(6), Some(7)]),
1146 ],
1147 vec![
1148 None,
1149 None,
1150 Some(vec![Some(3), None, Some(5)]),
1151 Some(vec![Some(0), Some(0)]),
1152 ],
1153 false,
1154 );
1155
1156 do_comparison(
1157 vec![None, None, Some(vec![Some(1)])],
1158 vec![None, None, Some(vec![Some(2)])],
1159 false,
1160 );
1161 }
1162
1163 #[test]
1164 fn test_empty_offsets() {
1165 let f = Arc::new(Field::new("element", DataType::Int32, true));
1166 let string = ListArray::from(
1167 ArrayData::builder(DataType::List(f.clone()))
1168 .buffers(vec![Buffer::from(&[])])
1169 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1170 .build()
1171 .unwrap(),
1172 );
1173 assert_eq!(string.value_offsets(), &[0]);
1174 let string = LargeListArray::from(
1175 ArrayData::builder(DataType::LargeList(f))
1176 .buffers(vec![Buffer::from(&[])])
1177 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1178 .build()
1179 .unwrap(),
1180 );
1181 assert_eq!(string.len(), 0);
1182 assert_eq!(string.value_offsets(), &[0]);
1183 }
1184
1185 #[test]
1186 fn test_try_new() {
1187 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1188 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1189 let values = Arc::new(values) as ArrayRef;
1190
1191 let field = Arc::new(Field::new("element", DataType::Int32, false));
1192 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1193
1194 let nulls = NullBuffer::new_null(3);
1195 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1196
1197 let nulls = NullBuffer::new_null(3);
1198 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1199 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1200 .unwrap_err();
1201
1202 assert_eq!(
1203 err.to_string(),
1204 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1205 );
1206
1207 let field = Arc::new(Field::new("element", DataType::Int64, false));
1208 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1209 .unwrap_err();
1210
1211 assert_eq!(
1212 err.to_string(),
1213 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1214 );
1215
1216 let nulls = NullBuffer::new_null(7);
1217 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1218 let values = Arc::new(values);
1219
1220 let err =
1221 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1222
1223 assert_eq!(
1224 err.to_string(),
1225 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1226 );
1227
1228 let field = Arc::new(Field::new("element", DataType::Int64, true));
1229 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1230
1231 let values = Int64Array::new(vec![0; 2].into(), None);
1232 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1233
1234 assert_eq!(
1235 err.to_string(),
1236 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1237 );
1238 }
1239
1240 #[test]
1241 fn test_from_fixed_size_list() {
1242 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1243 builder.values().append_slice(&[1, 2, 3]);
1244 builder.append(true);
1245 builder.values().append_slice(&[0, 0, 0]);
1246 builder.append(false);
1247 builder.values().append_slice(&[4, 5, 6]);
1248 builder.append(true);
1249 let list: ListArray = builder.finish().into();
1250
1251 let values: Vec<_> = list
1252 .iter()
1253 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1254 .collect();
1255 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1256 }
1257
1258 #[test]
1259 fn test_nullable_union() {
1260 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1261 let mut builder = UnionBuilder::new_dense();
1262 builder.append::<Int32Type>("a", 1).unwrap();
1263 builder.append::<Int32Type>("b", 2).unwrap();
1264 builder.append::<Int32Type>("b", 3).unwrap();
1265 builder.append::<Int32Type>("a", 4).unwrap();
1266 builder.append::<Int32Type>("a", 5).unwrap();
1267 let values = builder.build().unwrap();
1268 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1269 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1270 }
1271}