1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
20use arrow_buffer::NullBufferBuilder;
21use arrow_buffer::{Buffer, OffsetBuffer};
22use arrow_schema::{Field, FieldRef};
23use std::any::Any;
24use std::sync::Arc;
25
26#[derive(Debug)]
88pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
89 offsets_builder: BufferBuilder<OffsetSize>,
90 null_buffer_builder: NullBufferBuilder,
91 values_builder: T,
92 field: Option<FieldRef>,
93}
94
95impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> {
96 fn default() -> Self {
97 Self::new(T::default())
98 }
99}
100
101impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
102 pub fn new(values_builder: T) -> Self {
104 let capacity = values_builder.len();
105 Self::with_capacity(values_builder, capacity)
106 }
107
108 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
111 let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
112 offsets_builder.append(OffsetSize::zero());
113 Self {
114 offsets_builder,
115 null_buffer_builder: NullBufferBuilder::new(capacity),
116 values_builder,
117 field: None,
118 }
119 }
120
121 pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
128 Self {
129 field: Some(field.into()),
130 ..self
131 }
132 }
133}
134
135impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
136 for GenericListBuilder<OffsetSize, T>
137where
138 T: 'static,
139{
140 fn as_any(&self) -> &dyn Any {
142 self
143 }
144
145 fn as_any_mut(&mut self) -> &mut dyn Any {
147 self
148 }
149
150 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
152 self
153 }
154
155 fn len(&self) -> usize {
157 self.null_buffer_builder.len()
158 }
159
160 fn finish(&mut self) -> ArrayRef {
162 Arc::new(self.finish())
163 }
164
165 fn finish_cloned(&self) -> ArrayRef {
167 Arc::new(self.finish_cloned())
168 }
169}
170
171impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
172where
173 T: 'static,
174{
175 pub fn values(&mut self) -> &mut T {
180 &mut self.values_builder
181 }
182
183 pub fn values_ref(&self) -> &T {
185 &self.values_builder
186 }
187
188 #[inline]
194 pub fn append(&mut self, is_valid: bool) {
195 self.offsets_builder.append(self.next_offset());
196 self.null_buffer_builder.append(is_valid);
197 }
198
199 #[inline]
205 fn next_offset(&self) -> OffsetSize {
206 OffsetSize::from_usize(self.values_builder.len()).unwrap()
207 }
208
209 #[inline]
256 pub fn append_value<I, V>(&mut self, i: I)
257 where
258 T: Extend<Option<V>>,
259 I: IntoIterator<Item = Option<V>>,
260 {
261 self.extend(std::iter::once(Some(i)))
262 }
263
264 #[inline]
268 pub fn append_null(&mut self) {
269 self.offsets_builder.append(self.next_offset());
270 self.null_buffer_builder.append_null();
271 }
272
273 #[inline]
277 pub fn append_option<I, V>(&mut self, i: Option<I>)
278 where
279 T: Extend<Option<V>>,
280 I: IntoIterator<Item = Option<V>>,
281 {
282 match i {
283 Some(i) => self.append_value(i),
284 None => self.append_null(),
285 }
286 }
287
288 pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
290 let values = self.values_builder.finish();
291 let nulls = self.null_buffer_builder.finish();
292
293 let offsets = self.offsets_builder.finish();
294 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
296 self.offsets_builder.append(OffsetSize::zero());
297
298 let field = match &self.field {
299 Some(f) => f.clone(),
300 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
301 };
302
303 GenericListArray::new(field, offsets, values, nulls)
304 }
305
306 pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
308 let values = self.values_builder.finish_cloned();
309 let nulls = self.null_buffer_builder.finish_cloned();
310
311 let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
312 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
314
315 let field = match &self.field {
316 Some(f) => f.clone(),
317 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
318 };
319
320 GenericListArray::new(field, offsets, values, nulls)
321 }
322
323 pub fn offsets_slice(&self) -> &[OffsetSize] {
325 self.offsets_builder.as_slice()
326 }
327
328 pub fn validity_slice(&self) -> Option<&[u8]> {
330 self.null_buffer_builder.as_slice()
331 }
332}
333
334impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
335where
336 O: OffsetSizeTrait,
337 B: ArrayBuilder + Extend<E>,
338 V: IntoIterator<Item = E>,
339{
340 #[inline]
341 fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
342 for v in iter {
343 match v {
344 Some(elements) => {
345 self.values_builder.extend(elements);
346 self.append(true);
347 }
348 None => self.append(false),
349 }
350 }
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357 use crate::builder::{make_builder, Int32Builder, ListBuilder};
358 use crate::cast::AsArray;
359 use crate::types::Int32Type;
360 use crate::Int32Array;
361 use arrow_schema::DataType;
362
363 fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
364 let values_builder = Int32Builder::with_capacity(10);
365 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
366
367 builder.values().append_value(0);
369 builder.values().append_value(1);
370 builder.values().append_value(2);
371 builder.append(true);
372 builder.values().append_value(3);
373 builder.values().append_value(4);
374 builder.values().append_value(5);
375 builder.append(true);
376 builder.values().append_value(6);
377 builder.values().append_value(7);
378 builder.append(true);
379 let list_array = builder.finish();
380
381 let list_values = list_array.values().as_primitive::<Int32Type>();
382 assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
383 assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as));
384 assert_eq!(DataType::Int32, list_array.value_type());
385 assert_eq!(3, list_array.len());
386 assert_eq!(0, list_array.null_count());
387 assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
388 assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2));
389 for i in 0..3 {
390 assert!(list_array.is_valid(i));
391 assert!(!list_array.is_null(i));
392 }
393 }
394
395 #[test]
396 fn test_list_array_builder() {
397 _test_generic_list_array_builder::<i32>()
398 }
399
400 #[test]
401 fn test_large_list_array_builder() {
402 _test_generic_list_array_builder::<i64>()
403 }
404
405 fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() {
406 let values_builder = Int32Builder::with_capacity(10);
407 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
408
409 builder.values().append_value(0);
411 builder.values().append_value(1);
412 builder.values().append_value(2);
413 builder.append(true);
414 builder.append(false);
415 builder.values().append_value(3);
416 builder.values().append_null();
417 builder.values().append_value(5);
418 builder.append(true);
419 builder.values().append_value(6);
420 builder.values().append_value(7);
421 builder.append(true);
422
423 let list_array = builder.finish();
424
425 assert_eq!(DataType::Int32, list_array.value_type());
426 assert_eq!(4, list_array.len());
427 assert_eq!(1, list_array.null_count());
428 assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
429 assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
430 }
431
432 #[test]
433 fn test_list_array_builder_nulls() {
434 _test_generic_list_array_builder_nulls::<i32>()
435 }
436
437 #[test]
438 fn test_large_list_array_builder_nulls() {
439 _test_generic_list_array_builder_nulls::<i64>()
440 }
441
442 #[test]
443 fn test_list_array_builder_finish() {
444 let values_builder = Int32Array::builder(5);
445 let mut builder = ListBuilder::new(values_builder);
446
447 builder.values().append_slice(&[1, 2, 3]);
448 builder.append(true);
449 builder.values().append_slice(&[4, 5, 6]);
450 builder.append(true);
451
452 let mut arr = builder.finish();
453 assert_eq!(2, arr.len());
454 assert!(builder.is_empty());
455
456 builder.values().append_slice(&[7, 8, 9]);
457 builder.append(true);
458 arr = builder.finish();
459 assert_eq!(1, arr.len());
460 assert!(builder.is_empty());
461 }
462
463 #[test]
464 fn test_list_array_builder_finish_cloned() {
465 let values_builder = Int32Array::builder(5);
466 let mut builder = ListBuilder::new(values_builder);
467
468 builder.values().append_slice(&[1, 2, 3]);
469 builder.append(true);
470 builder.values().append_slice(&[4, 5, 6]);
471 builder.append(true);
472
473 let mut arr = builder.finish_cloned();
474 assert_eq!(2, arr.len());
475 assert!(!builder.is_empty());
476
477 builder.values().append_slice(&[7, 8, 9]);
478 builder.append(true);
479 arr = builder.finish();
480 assert_eq!(3, arr.len());
481 assert!(builder.is_empty());
482 }
483
484 #[test]
485 fn test_list_list_array_builder() {
486 let primitive_builder = Int32Builder::with_capacity(10);
487 let values_builder = ListBuilder::new(primitive_builder);
488 let mut builder = ListBuilder::new(values_builder);
489
490 builder.values().values().append_value(1);
492 builder.values().values().append_value(2);
493 builder.values().append(true);
494 builder.values().values().append_value(3);
495 builder.values().values().append_value(4);
496 builder.values().append(true);
497 builder.append(true);
498
499 builder.values().values().append_value(5);
500 builder.values().values().append_value(6);
501 builder.values().values().append_value(7);
502 builder.values().append(true);
503 builder.values().append(false);
504 builder.values().values().append_value(8);
505 builder.values().append(true);
506 builder.append(true);
507
508 builder.append(false);
509
510 builder.values().values().append_value(9);
511 builder.values().values().append_value(10);
512 builder.values().append(true);
513 builder.append(true);
514
515 let l1 = builder.finish();
516
517 assert_eq!(4, l1.len());
518 assert_eq!(1, l1.null_count());
519
520 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
521 let l2 = l1.values().as_list::<i32>();
522
523 assert_eq!(6, l2.len());
524 assert_eq!(1, l2.null_count());
525 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
526
527 let i1 = l2.values().as_primitive::<Int32Type>();
528 assert_eq!(10, i1.len());
529 assert_eq!(0, i1.null_count());
530 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
531 }
532
533 #[test]
534 fn test_extend() {
535 let mut builder = ListBuilder::new(Int32Builder::new());
536 builder.extend([
537 Some(vec![Some(1), Some(2), Some(7), None]),
538 Some(vec![]),
539 Some(vec![Some(4), Some(5)]),
540 None,
541 ]);
542
543 let array = builder.finish();
544 assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
545 assert_eq!(array.null_count(), 1);
546 assert_eq!(array.logical_null_count(), 1);
547 assert!(array.is_null(3));
548 let elements = array.values().as_primitive::<Int32Type>();
549 assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
550 assert_eq!(elements.null_count(), 1);
551 assert_eq!(elements.logical_null_count(), 1);
552 assert!(elements.is_null(3));
553 }
554
555 #[test]
556 fn test_boxed_primitive_array_builder() {
557 let values_builder = make_builder(&DataType::Int32, 5);
558 let mut builder = ListBuilder::new(values_builder);
559
560 builder
561 .values()
562 .as_any_mut()
563 .downcast_mut::<Int32Builder>()
564 .expect("should be an Int32Builder")
565 .append_slice(&[1, 2, 3]);
566 builder.append(true);
567
568 builder
569 .values()
570 .as_any_mut()
571 .downcast_mut::<Int32Builder>()
572 .expect("should be an Int32Builder")
573 .append_slice(&[4, 5, 6]);
574 builder.append(true);
575
576 let arr = builder.finish();
577 assert_eq!(2, arr.len());
578
579 let elements = arr.values().as_primitive::<Int32Type>();
580 assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
581 }
582
583 #[test]
584 fn test_boxed_list_list_array_builder() {
585 let values_builder = make_builder(
587 &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
588 10,
589 );
590 test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
591 }
592
593 #[test]
594 fn test_boxed_large_list_large_list_array_builder() {
595 let values_builder = make_builder(
597 &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))),
598 10,
599 );
600 test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
601 }
602
603 fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>(
604 values_builder: Box<dyn ArrayBuilder>,
605 ) {
606 let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> =
607 GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
608
609 builder
611 .values()
612 .as_any_mut()
613 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
614 .expect("should be an (Large)ListBuilder")
615 .values()
616 .as_any_mut()
617 .downcast_mut::<Int32Builder>()
618 .expect("should be an Int32Builder")
619 .append_value(1);
620 builder
621 .values()
622 .as_any_mut()
623 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
624 .expect("should be an (Large)ListBuilder")
625 .values()
626 .as_any_mut()
627 .downcast_mut::<Int32Builder>()
628 .expect("should be an Int32Builder")
629 .append_value(2);
630 builder
631 .values()
632 .as_any_mut()
633 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
634 .expect("should be an (Large)ListBuilder")
635 .append(true);
636 builder
637 .values()
638 .as_any_mut()
639 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
640 .expect("should be an (Large)ListBuilder")
641 .values()
642 .as_any_mut()
643 .downcast_mut::<Int32Builder>()
644 .expect("should be an Int32Builder")
645 .append_value(3);
646 builder
647 .values()
648 .as_any_mut()
649 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
650 .expect("should be an (Large)ListBuilder")
651 .values()
652 .as_any_mut()
653 .downcast_mut::<Int32Builder>()
654 .expect("should be an Int32Builder")
655 .append_value(4);
656 builder
657 .values()
658 .as_any_mut()
659 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
660 .expect("should be an (Large)ListBuilder")
661 .append(true);
662 builder.append(true);
663
664 builder
665 .values()
666 .as_any_mut()
667 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
668 .expect("should be an (Large)ListBuilder")
669 .values()
670 .as_any_mut()
671 .downcast_mut::<Int32Builder>()
672 .expect("should be an Int32Builder")
673 .append_value(5);
674 builder
675 .values()
676 .as_any_mut()
677 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
678 .expect("should be an (Large)ListBuilder")
679 .values()
680 .as_any_mut()
681 .downcast_mut::<Int32Builder>()
682 .expect("should be an Int32Builder")
683 .append_value(6);
684 builder
685 .values()
686 .as_any_mut()
687 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
688 .expect("should be an (Large)ListBuilder")
689 .values()
690 .as_any_mut()
691 .downcast_mut::<Int32Builder>()
692 .expect("should be an (Large)ListBuilder")
693 .append_value(7);
694 builder
695 .values()
696 .as_any_mut()
697 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
698 .expect("should be an (Large)ListBuilder")
699 .append(true);
700 builder
701 .values()
702 .as_any_mut()
703 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
704 .expect("should be an (Large)ListBuilder")
705 .append(false);
706 builder
707 .values()
708 .as_any_mut()
709 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
710 .expect("should be an (Large)ListBuilder")
711 .values()
712 .as_any_mut()
713 .downcast_mut::<Int32Builder>()
714 .expect("should be an Int32Builder")
715 .append_value(8);
716 builder
717 .values()
718 .as_any_mut()
719 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
720 .expect("should be an (Large)ListBuilder")
721 .append(true);
722 builder.append(true);
723
724 builder.append(false);
725
726 builder
727 .values()
728 .as_any_mut()
729 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
730 .expect("should be an (Large)ListBuilder")
731 .values()
732 .as_any_mut()
733 .downcast_mut::<Int32Builder>()
734 .expect("should be an Int32Builder")
735 .append_value(9);
736 builder
737 .values()
738 .as_any_mut()
739 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
740 .expect("should be an (Large)ListBuilder")
741 .values()
742 .as_any_mut()
743 .downcast_mut::<Int32Builder>()
744 .expect("should be an Int32Builder")
745 .append_value(10);
746 builder
747 .values()
748 .as_any_mut()
749 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
750 .expect("should be an (Large)ListBuilder")
751 .append(true);
752 builder.append(true);
753
754 let l1 = builder.finish();
755
756 assert_eq!(4, l1.len());
757 assert_eq!(1, l1.null_count());
758
759 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as));
760 let l2 = l1.values().as_list::<O>();
761
762 assert_eq!(6, l2.len());
763 assert_eq!(1, l2.null_count());
764 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as));
765
766 let i1 = l2.values().as_primitive::<Int32Type>();
767 assert_eq!(10, i1.len());
768 assert_eq!(0, i1.null_count());
769 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
770 }
771
772 #[test]
773 fn test_with_field() {
774 let field = Arc::new(Field::new("bar", DataType::Int32, false));
775 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
776 builder.append_value([Some(1), Some(2), Some(3)]);
777 builder.append_null(); builder.append_value([Some(4)]);
779 let array = builder.finish();
780 assert_eq!(array.len(), 3);
781 assert_eq!(array.data_type(), &DataType::List(field.clone()));
782
783 builder.append_value([Some(4), Some(5)]);
784 let array = builder.finish();
785 assert_eq!(array.data_type(), &DataType::List(field));
786 assert_eq!(array.len(), 1);
787 }
788
789 #[test]
790 #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
791 fn test_checks_nullability() {
792 let field = Arc::new(Field::new_list_field(DataType::Int32, false));
793 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
794 builder.append_value([Some(1), None]);
795 builder.finish();
796 }
797
798 #[test]
799 #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
800 fn test_checks_data_type() {
801 let field = Arc::new(Field::new_list_field(DataType::Int64, false));
802 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
803 builder.append_value([Some(1)]);
804 builder.finish();
805 }
806}