arrow_array/array/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21    iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22    ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31/// A type that can be used within a variable-size array to encode offset information
32///
33/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`],
34/// [`StringArray`] and [`LargeStringArray`]
35///
36/// [`BinaryArray`]: crate::array::BinaryArray
37/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
38/// [`StringArray`]: crate::array::StringArray
39/// [`LargeStringArray`]: crate::array::LargeStringArray
40pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41    /// True for 64 bit offset size and false for 32 bit offset size
42    const IS_LARGE: bool;
43    /// Prefix for the offset size
44    const PREFIX: &'static str;
45}
46
47impl OffsetSizeTrait for i32 {
48    const IS_LARGE: bool = false;
49    const PREFIX: &'static str = "";
50}
51
52impl OffsetSizeTrait for i64 {
53    const IS_LARGE: bool = true;
54    const PREFIX: &'static str = "Large";
55}
56
57/// An array of [variable length lists], similar to JSON arrays
58/// (e.g. `["A", "B", "C"]`). This struct specifically represents
59/// the [list layout]. Refer to [`GenericListViewArray`] for the
60/// [list-view layout].
61///
62/// Lists are represented using `offsets` into a `values` child
63/// array. Offsets are stored in two adjacent entries of an
64/// [`OffsetBuffer`].
65///
66/// Arrow defines [`ListArray`] with `i32` offsets and
67/// [`LargeListArray`] with `i64` offsets.
68///
69/// Use [`GenericListBuilder`] to construct a [`GenericListArray`].
70///
71/// # Representation
72///
73/// A [`ListArray`] can represent a list of values of any other
74/// supported Arrow type. Each element of the `ListArray` itself is
75/// a list which may be empty, may contain NULL and non-null values,
76/// or may itself be NULL.
77///
78/// For example, the `ListArray` shown in the following diagram stores
79/// lists of strings. Note that `[]` represents an empty (length
80/// 0), but non NULL list.
81///
82/// ```text
83/// ┌─────────────┐
84/// │   [A,B,C]   │
85/// ├─────────────┤
86/// │     []      │
87/// ├─────────────┤
88/// │    NULL     │
89/// ├─────────────┤
90/// │     [D]     │
91/// ├─────────────┤
92/// │  [NULL, F]  │
93/// └─────────────┘
94/// ```
95///
96/// The `values` are stored in a child [`StringArray`] and the offsets
97/// are stored in an [`OffsetBuffer`] as shown in the following
98/// diagram. The logical values and offsets are shown on the left, and
99/// the actual `ListArray` encoding on the right.
100///
101/// ```text
102///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
103///                                                                 ┌ ─ ─ ─ ─ ─ ─ ┐    │
104///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
105///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
106///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
107///  │      []     │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
108///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
109///  │    NULL     │  │ (3,4) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
110///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
111///  │     [D]     │  │ (4,5) │                   │ 1 │   │ 4 │     │ │ ? │ │ ? │ │ 3  │
112///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
113///  │  [NULL, F]  │  │ (5,7) │                   │ 1 │   │ 5 │     │ │ 1 │ │ D │ │ 4  │
114///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
115///                                                       │ 7 │     │ │ 0 │ │ ? │ │ 5  │
116///                                         │  Validity   └───┘       ├───┤ ├───┤
117///     Logical       Logical                  (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
118///      Values       Offsets               │                         └───┘ └───┘
119///                                                                 │    Values   │    │
120///                 (offsets[i],            │   ListArray               (Array)
121///                offsets[i+1])                                    └ ─ ─ ─ ─ ─ ─ ┘    │
122///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
123/// ```
124///
125/// # Slicing
126///
127/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
128/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data
129///
130/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
131/// would result in the following. Note
132///
133/// 1. `Values` array is unchanged
134/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
135///
136/// ```text
137///                                 ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
138///                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │  ╔═══╗
139///                                 │                         ╔═══╗ ╔═══╗         ║   ║  Not used
140///                                                         │ ║ 1 ║ ║ A ║ │ 0  │  ╚═══╝
141///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
142///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  │
143///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
144///  │    NULL     │  │ (3,4) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
145///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠───╣ ╠───╣
146///  │     [D]     │  │ (4,5) │           │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 3  │
147///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ├───┤ ├───┤
148///                                               │ 5 │     │ │ 1 │ │ D │ │ 4  │
149///                                 │             └───┘       ├───┤ ├───┤
150///                                                         │ │ 0 │ │ ? │ │ 5  │
151///                                 │  Validity               ╠═══╣ ╠═══╣
152///     Logical       Logical          (nulls)   Offsets    │ ║ 1 ║ ║ F ║ │ 6  │
153///      Values       Offsets       │                         ╚═══╝ ╚═══╝
154///                                                         │    Values   │    │
155///                 (offsets[i],    │   ListArray               (Array)
156///                offsets[i+1])                            └ ─ ─ ─ ─ ─ ─ ┘    │
157///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
158/// ```
159///
160/// [`StringArray`]: crate::array::StringArray
161/// [`GenericListViewArray`]: crate::array::GenericListViewArray
162/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
163/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
164/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
165pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
166    data_type: DataType,
167    nulls: Option<NullBuffer>,
168    values: ArrayRef,
169    value_offsets: OffsetBuffer<OffsetSize>,
170}
171
172impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
173    fn clone(&self) -> Self {
174        Self {
175            data_type: self.data_type.clone(),
176            nulls: self.nulls.clone(),
177            values: self.values.clone(),
178            value_offsets: self.value_offsets.clone(),
179        }
180    }
181}
182
183impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
184    /// The data type constructor of list array.
185    /// The input is the schema of the child array and
186    /// the output is the [`DataType`], List or LargeList.
187    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
188        DataType::LargeList
189    } else {
190        DataType::List
191    };
192
193    /// Create a new [`GenericListArray`] from the provided parts
194    ///
195    /// # Errors
196    ///
197    /// Errors if
198    ///
199    /// * `offsets.len() - 1 != nulls.len()`
200    /// * `offsets.last() > values.len()`
201    /// * `!field.is_nullable() && values.is_nullable()`
202    /// * `field.data_type() != values.data_type()`
203    pub fn try_new(
204        field: FieldRef,
205        offsets: OffsetBuffer<OffsetSize>,
206        values: ArrayRef,
207        nulls: Option<NullBuffer>,
208    ) -> Result<Self, ArrowError> {
209        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
210        let end_offset = offsets.last().unwrap().as_usize();
211        // don't need to check other values of `offsets` because they are checked
212        // during construction of `OffsetBuffer`
213        if end_offset > values.len() {
214            return Err(ArrowError::InvalidArgumentError(format!(
215                "Max offset of {end_offset} exceeds length of values {}",
216                values.len()
217            )));
218        }
219
220        if let Some(n) = nulls.as_ref() {
221            if n.len() != len {
222                return Err(ArrowError::InvalidArgumentError(format!(
223                    "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
224                    OffsetSize::PREFIX,
225                    n.len(),
226                )));
227            }
228        }
229        if !field.is_nullable() && values.is_nullable() {
230            return Err(ArrowError::InvalidArgumentError(format!(
231                "Non-nullable field of {}ListArray {:?} cannot contain nulls",
232                OffsetSize::PREFIX,
233                field.name()
234            )));
235        }
236
237        if field.data_type() != values.data_type() {
238            return Err(ArrowError::InvalidArgumentError(format!(
239                "{}ListArray expected data type {} got {} for {:?}",
240                OffsetSize::PREFIX,
241                field.data_type(),
242                values.data_type(),
243                field.name()
244            )));
245        }
246
247        Ok(Self {
248            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
249            nulls,
250            values,
251            value_offsets: offsets,
252        })
253    }
254
255    /// Create a new [`GenericListArray`] from the provided parts
256    ///
257    /// # Panics
258    ///
259    /// Panics if [`Self::try_new`] returns an error
260    pub fn new(
261        field: FieldRef,
262        offsets: OffsetBuffer<OffsetSize>,
263        values: ArrayRef,
264        nulls: Option<NullBuffer>,
265    ) -> Self {
266        Self::try_new(field, offsets, values, nulls).unwrap()
267    }
268
269    /// Create a new [`GenericListArray`] of length `len` where all values are null
270    pub fn new_null(field: FieldRef, len: usize) -> Self {
271        let values = new_empty_array(field.data_type());
272        Self {
273            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
274            nulls: Some(NullBuffer::new_null(len)),
275            value_offsets: OffsetBuffer::new_zeroed(len),
276            values,
277        }
278    }
279
280    /// Deconstruct this array into its constituent parts
281    pub fn into_parts(
282        self,
283    ) -> (
284        FieldRef,
285        OffsetBuffer<OffsetSize>,
286        ArrayRef,
287        Option<NullBuffer>,
288    ) {
289        let f = match self.data_type {
290            DataType::List(f) | DataType::LargeList(f) => f,
291            _ => unreachable!(),
292        };
293        (f, self.value_offsets, self.values, self.nulls)
294    }
295
296    /// Returns a reference to the offsets of this list
297    ///
298    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
299    /// allowing for zero-copy cloning.
300    ///
301    /// Notes: The `offsets` may not start at 0 and may not cover all values in
302    /// [`Self::values`]. This can happen when the list array was sliced via
303    /// [`Self::slice`]. See documentation for [`Self`] for more details.
304    #[inline]
305    pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
306        &self.value_offsets
307    }
308
309    /// Returns a reference to the values of this list
310    ///
311    /// Note: The list array may not refer to all values in the `values` array.
312    /// For example if the list array was sliced via [`Self::slice`] values will
313    /// still contain values both before and after the slice. See documentation
314    /// for [`Self`] for more details.
315    #[inline]
316    pub fn values(&self) -> &ArrayRef {
317        &self.values
318    }
319
320    /// Returns a clone of the value type of this list.
321    pub fn value_type(&self) -> DataType {
322        self.values.data_type().clone()
323    }
324
325    /// Returns ith value of this list array.
326    /// # Safety
327    /// Caller must ensure that the index is within the array bounds
328    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
329        let end = self.value_offsets().get_unchecked(i + 1).as_usize();
330        let start = self.value_offsets().get_unchecked(i).as_usize();
331        self.values.slice(start, end - start)
332    }
333
334    /// Returns ith value of this list array.
335    pub fn value(&self, i: usize) -> ArrayRef {
336        let end = self.value_offsets()[i + 1].as_usize();
337        let start = self.value_offsets()[i].as_usize();
338        self.values.slice(start, end - start)
339    }
340
341    /// Returns the offset values in the offsets buffer.
342    ///
343    /// See [`Self::offsets`] for more details.
344    #[inline]
345    pub fn value_offsets(&self) -> &[OffsetSize] {
346        &self.value_offsets
347    }
348
349    /// Returns the length for value at index `i`.
350    #[inline]
351    pub fn value_length(&self, i: usize) -> OffsetSize {
352        let offsets = self.value_offsets();
353        offsets[i + 1] - offsets[i]
354    }
355
356    /// constructs a new iterator
357    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
358        GenericListArrayIter::<'a, OffsetSize>::new(self)
359    }
360
361    #[inline]
362    fn get_type(data_type: &DataType) -> Option<&DataType> {
363        match (OffsetSize::IS_LARGE, data_type) {
364            (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
365                Some(child.data_type())
366            }
367            _ => None,
368        }
369    }
370
371    /// Returns a zero-copy slice of this array with the indicated offset and length.
372    ///
373    /// Notes: this method does *NOT* slice the underlying values array or modify
374    /// the values in the offsets buffer. See [`Self::values`] and
375    /// [`Self::offsets`] for more information.
376    pub fn slice(&self, offset: usize, length: usize) -> Self {
377        Self {
378            data_type: self.data_type.clone(),
379            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
380            values: self.values.clone(),
381            value_offsets: self.value_offsets.slice(offset, length),
382        }
383    }
384
385    /// Creates a [`GenericListArray`] from an iterator of primitive values
386    /// # Example
387    /// ```
388    /// # use arrow_array::ListArray;
389    /// # use arrow_array::types::Int32Type;
390    ///
391    /// let data = vec![
392    ///    Some(vec![Some(0), Some(1), Some(2)]),
393    ///    None,
394    ///    Some(vec![Some(3), None, Some(5)]),
395    ///    Some(vec![Some(6), Some(7)]),
396    /// ];
397    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
398    /// println!("{:?}", list_array);
399    /// ```
400    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
401    where
402        T: ArrowPrimitiveType,
403        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
404        I: IntoIterator<Item = Option<P>>,
405    {
406        let iter = iter.into_iter();
407        let size_hint = iter.size_hint().0;
408        let mut builder =
409            GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
410
411        for i in iter {
412            match i {
413                Some(p) => {
414                    for t in p {
415                        builder.values().append_option(t);
416                    }
417                    builder.append(true);
418                }
419                None => builder.append(false),
420            }
421        }
422        builder.finish()
423    }
424}
425
426impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
427    fn from(data: ArrayData) -> Self {
428        Self::try_new_from_array_data(data)
429            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
430    }
431}
432
433impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
434    fn from(array: GenericListArray<OffsetSize>) -> Self {
435        let len = array.len();
436        let builder = ArrayDataBuilder::new(array.data_type)
437            .len(len)
438            .nulls(array.nulls)
439            .buffers(vec![array.value_offsets.into_inner().into_inner()])
440            .child_data(vec![array.values.to_data()]);
441
442        unsafe { builder.build_unchecked() }
443    }
444}
445
446impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
447    fn from(value: FixedSizeListArray) -> Self {
448        let (field, size) = match value.data_type() {
449            DataType::FixedSizeList(f, size) => (f, *size as usize),
450            _ => unreachable!(),
451        };
452
453        let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
454
455        Self {
456            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
457            nulls: value.nulls().cloned(),
458            values: value.values().clone(),
459            value_offsets: offsets,
460        }
461    }
462}
463
464impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
465    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
466        if data.buffers().len() != 1 {
467            return Err(ArrowError::InvalidArgumentError(format!(
468                "ListArray data should contain a single buffer only (value offsets), had {}",
469                data.buffers().len()
470            )));
471        }
472
473        if data.child_data().len() != 1 {
474            return Err(ArrowError::InvalidArgumentError(format!(
475                "ListArray should contain a single child array (values array), had {}",
476                data.child_data().len()
477            )));
478        }
479
480        let values = data.child_data()[0].clone();
481
482        if let Some(child_data_type) = Self::get_type(data.data_type()) {
483            if values.data_type() != child_data_type {
484                return Err(ArrowError::InvalidArgumentError(format!(
485                    "[Large]ListArray's child datatype {:?} does not \
486                             correspond to the List's datatype {:?}",
487                    values.data_type(),
488                    child_data_type
489                )));
490            }
491        } else {
492            return Err(ArrowError::InvalidArgumentError(format!(
493                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
494                data.data_type()
495            )));
496        }
497
498        let values = make_array(values);
499        // SAFETY:
500        // ArrayData is valid, and verified type above
501        let value_offsets = unsafe { get_offsets(&data) };
502
503        Ok(Self {
504            data_type: data.data_type().clone(),
505            nulls: data.nulls().cloned(),
506            values,
507            value_offsets,
508        })
509    }
510}
511
512impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
513    fn as_any(&self) -> &dyn Any {
514        self
515    }
516
517    fn to_data(&self) -> ArrayData {
518        self.clone().into()
519    }
520
521    fn into_data(self) -> ArrayData {
522        self.into()
523    }
524
525    fn data_type(&self) -> &DataType {
526        &self.data_type
527    }
528
529    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
530        Arc::new(self.slice(offset, length))
531    }
532
533    fn len(&self) -> usize {
534        self.value_offsets.len() - 1
535    }
536
537    fn is_empty(&self) -> bool {
538        self.value_offsets.len() <= 1
539    }
540
541    fn shrink_to_fit(&mut self) {
542        if let Some(nulls) = &mut self.nulls {
543            nulls.shrink_to_fit();
544        }
545        self.values.shrink_to_fit();
546        self.value_offsets.shrink_to_fit();
547    }
548
549    fn offset(&self) -> usize {
550        0
551    }
552
553    fn nulls(&self) -> Option<&NullBuffer> {
554        self.nulls.as_ref()
555    }
556
557    fn logical_null_count(&self) -> usize {
558        // More efficient that the default implementation
559        self.null_count()
560    }
561
562    fn get_buffer_memory_size(&self) -> usize {
563        let mut size = self.values.get_buffer_memory_size();
564        size += self.value_offsets.inner().inner().capacity();
565        if let Some(n) = self.nulls.as_ref() {
566            size += n.buffer().capacity();
567        }
568        size
569    }
570
571    fn get_array_memory_size(&self) -> usize {
572        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
573        size += self.value_offsets.inner().inner().capacity();
574        if let Some(n) = self.nulls.as_ref() {
575            size += n.buffer().capacity();
576        }
577        size
578    }
579}
580
581impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
582    type Item = ArrayRef;
583
584    fn value(&self, index: usize) -> Self::Item {
585        GenericListArray::value(self, index)
586    }
587
588    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
589        GenericListArray::value(self, index)
590    }
591}
592
593impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
594    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
595        let prefix = OffsetSize::PREFIX;
596
597        write!(f, "{prefix}ListArray\n[\n")?;
598        print_long_array(self, f, |array, index, f| {
599            std::fmt::Debug::fmt(&array.value(index), f)
600        })?;
601        write!(f, "]")
602    }
603}
604
605/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
606///
607/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
608pub type ListArray = GenericListArray<i32>;
609
610/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
611///
612/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
613pub type LargeListArray = GenericListArray<i64>;
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618    use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
619    use crate::cast::AsArray;
620    use crate::types::Int32Type;
621    use crate::{Int32Array, Int64Array};
622    use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
623    use arrow_schema::Field;
624
625    fn create_from_buffers() -> ListArray {
626        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
627        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
628        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
629        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
630        ListArray::new(field, offsets, Arc::new(values), None)
631    }
632
633    #[test]
634    fn test_from_iter_primitive() {
635        let data = vec![
636            Some(vec![Some(0), Some(1), Some(2)]),
637            Some(vec![Some(3), Some(4), Some(5)]),
638            Some(vec![Some(6), Some(7)]),
639        ];
640        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
641
642        let another = create_from_buffers();
643        assert_eq!(list_array, another)
644    }
645
646    #[test]
647    fn test_empty_list_array() {
648        // Construct an empty value array
649        let value_data = ArrayData::builder(DataType::Int32)
650            .len(0)
651            .add_buffer(Buffer::from([]))
652            .build()
653            .unwrap();
654
655        // Construct an empty offset buffer
656        let value_offsets = Buffer::from([]);
657
658        // Construct a list array from the above two
659        let list_data_type =
660            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
661        let list_data = ArrayData::builder(list_data_type)
662            .len(0)
663            .add_buffer(value_offsets)
664            .add_child_data(value_data)
665            .build()
666            .unwrap();
667
668        let list_array = ListArray::from(list_data);
669        assert_eq!(list_array.len(), 0)
670    }
671
672    #[test]
673    fn test_list_array() {
674        // Construct a value array
675        let value_data = ArrayData::builder(DataType::Int32)
676            .len(8)
677            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
678            .build()
679            .unwrap();
680
681        // Construct a buffer for value offsets, for the nested array:
682        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
683        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
684
685        // Construct a list array from the above two
686        let list_data_type =
687            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
688        let list_data = ArrayData::builder(list_data_type.clone())
689            .len(3)
690            .add_buffer(value_offsets.clone())
691            .add_child_data(value_data.clone())
692            .build()
693            .unwrap();
694        let list_array = ListArray::from(list_data);
695
696        let values = list_array.values();
697        assert_eq!(value_data, values.to_data());
698        assert_eq!(DataType::Int32, list_array.value_type());
699        assert_eq!(3, list_array.len());
700        assert_eq!(0, list_array.null_count());
701        assert_eq!(6, list_array.value_offsets()[2]);
702        assert_eq!(2, list_array.value_length(2));
703        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
704        assert_eq!(
705            0,
706            unsafe { list_array.value_unchecked(0) }
707                .as_primitive::<Int32Type>()
708                .value(0)
709        );
710        for i in 0..3 {
711            assert!(list_array.is_valid(i));
712            assert!(!list_array.is_null(i));
713        }
714
715        // Now test with a non-zero offset (skip first element)
716        //  [[3, 4, 5], [6, 7]]
717        let list_data = ArrayData::builder(list_data_type)
718            .len(2)
719            .offset(1)
720            .add_buffer(value_offsets)
721            .add_child_data(value_data.clone())
722            .build()
723            .unwrap();
724        let list_array = ListArray::from(list_data);
725
726        let values = list_array.values();
727        assert_eq!(value_data, values.to_data());
728        assert_eq!(DataType::Int32, list_array.value_type());
729        assert_eq!(2, list_array.len());
730        assert_eq!(0, list_array.null_count());
731        assert_eq!(6, list_array.value_offsets()[1]);
732        assert_eq!(2, list_array.value_length(1));
733        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
734        assert_eq!(
735            3,
736            unsafe { list_array.value_unchecked(0) }
737                .as_primitive::<Int32Type>()
738                .value(0)
739        );
740    }
741
742    #[test]
743    fn test_large_list_array() {
744        // Construct a value array
745        let value_data = ArrayData::builder(DataType::Int32)
746            .len(8)
747            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
748            .build()
749            .unwrap();
750
751        // Construct a buffer for value offsets, for the nested array:
752        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
753        let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
754
755        // Construct a list array from the above two
756        let list_data_type = DataType::new_large_list(DataType::Int32, false);
757        let list_data = ArrayData::builder(list_data_type.clone())
758            .len(3)
759            .add_buffer(value_offsets.clone())
760            .add_child_data(value_data.clone())
761            .build()
762            .unwrap();
763        let list_array = LargeListArray::from(list_data);
764
765        let values = list_array.values();
766        assert_eq!(value_data, values.to_data());
767        assert_eq!(DataType::Int32, list_array.value_type());
768        assert_eq!(3, list_array.len());
769        assert_eq!(0, list_array.null_count());
770        assert_eq!(6, list_array.value_offsets()[2]);
771        assert_eq!(2, list_array.value_length(2));
772        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
773        assert_eq!(
774            0,
775            unsafe { list_array.value_unchecked(0) }
776                .as_primitive::<Int32Type>()
777                .value(0)
778        );
779        for i in 0..3 {
780            assert!(list_array.is_valid(i));
781            assert!(!list_array.is_null(i));
782        }
783
784        // Now test with a non-zero offset
785        //  [[3, 4, 5], [6, 7]]
786        let list_data = ArrayData::builder(list_data_type)
787            .len(2)
788            .offset(1)
789            .add_buffer(value_offsets)
790            .add_child_data(value_data.clone())
791            .build()
792            .unwrap();
793        let list_array = LargeListArray::from(list_data);
794
795        let values = list_array.values();
796        assert_eq!(value_data, values.to_data());
797        assert_eq!(DataType::Int32, list_array.value_type());
798        assert_eq!(2, list_array.len());
799        assert_eq!(0, list_array.null_count());
800        assert_eq!(6, list_array.value_offsets()[1]);
801        assert_eq!(2, list_array.value_length(1));
802        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
803        assert_eq!(
804            3,
805            unsafe { list_array.value_unchecked(0) }
806                .as_primitive::<Int32Type>()
807                .value(0)
808        );
809    }
810
811    #[test]
812    fn test_list_array_slice() {
813        // Construct a value array
814        let value_data = ArrayData::builder(DataType::Int32)
815            .len(10)
816            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
817            .build()
818            .unwrap();
819
820        // Construct a buffer for value offsets, for the nested array:
821        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
822        let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
823        // 01011001 00000001
824        let mut null_bits: [u8; 2] = [0; 2];
825        bit_util::set_bit(&mut null_bits, 0);
826        bit_util::set_bit(&mut null_bits, 3);
827        bit_util::set_bit(&mut null_bits, 4);
828        bit_util::set_bit(&mut null_bits, 6);
829        bit_util::set_bit(&mut null_bits, 8);
830
831        // Construct a list array from the above two
832        let list_data_type =
833            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
834        let list_data = ArrayData::builder(list_data_type)
835            .len(9)
836            .add_buffer(value_offsets)
837            .add_child_data(value_data.clone())
838            .null_bit_buffer(Some(Buffer::from(null_bits)))
839            .build()
840            .unwrap();
841        let list_array = ListArray::from(list_data);
842
843        let values = list_array.values();
844        assert_eq!(value_data, values.to_data());
845        assert_eq!(DataType::Int32, list_array.value_type());
846        assert_eq!(9, list_array.len());
847        assert_eq!(4, list_array.null_count());
848        assert_eq!(2, list_array.value_offsets()[3]);
849        assert_eq!(2, list_array.value_length(3));
850
851        let sliced_array = list_array.slice(1, 6);
852        assert_eq!(6, sliced_array.len());
853        assert_eq!(3, sliced_array.null_count());
854
855        for i in 0..sliced_array.len() {
856            if bit_util::get_bit(&null_bits, 1 + i) {
857                assert!(sliced_array.is_valid(i));
858            } else {
859                assert!(sliced_array.is_null(i));
860            }
861        }
862
863        // Check offset and length for each non-null value.
864        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
865        assert_eq!(2, sliced_list_array.value_offsets()[2]);
866        assert_eq!(2, sliced_list_array.value_length(2));
867        assert_eq!(4, sliced_list_array.value_offsets()[3]);
868        assert_eq!(2, sliced_list_array.value_length(3));
869        assert_eq!(6, sliced_list_array.value_offsets()[5]);
870        assert_eq!(3, sliced_list_array.value_length(5));
871    }
872
873    #[test]
874    fn test_large_list_array_slice() {
875        // Construct a value array
876        let value_data = ArrayData::builder(DataType::Int32)
877            .len(10)
878            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
879            .build()
880            .unwrap();
881
882        // Construct a buffer for value offsets, for the nested array:
883        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
884        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
885        // 01011001 00000001
886        let mut null_bits: [u8; 2] = [0; 2];
887        bit_util::set_bit(&mut null_bits, 0);
888        bit_util::set_bit(&mut null_bits, 3);
889        bit_util::set_bit(&mut null_bits, 4);
890        bit_util::set_bit(&mut null_bits, 6);
891        bit_util::set_bit(&mut null_bits, 8);
892
893        // Construct a list array from the above two
894        let list_data_type = DataType::new_large_list(DataType::Int32, false);
895        let list_data = ArrayData::builder(list_data_type)
896            .len(9)
897            .add_buffer(value_offsets)
898            .add_child_data(value_data.clone())
899            .null_bit_buffer(Some(Buffer::from(null_bits)))
900            .build()
901            .unwrap();
902        let list_array = LargeListArray::from(list_data);
903
904        let values = list_array.values();
905        assert_eq!(value_data, values.to_data());
906        assert_eq!(DataType::Int32, list_array.value_type());
907        assert_eq!(9, list_array.len());
908        assert_eq!(4, list_array.null_count());
909        assert_eq!(2, list_array.value_offsets()[3]);
910        assert_eq!(2, list_array.value_length(3));
911
912        let sliced_array = list_array.slice(1, 6);
913        assert_eq!(6, sliced_array.len());
914        assert_eq!(3, sliced_array.null_count());
915
916        for i in 0..sliced_array.len() {
917            if bit_util::get_bit(&null_bits, 1 + i) {
918                assert!(sliced_array.is_valid(i));
919            } else {
920                assert!(sliced_array.is_null(i));
921            }
922        }
923
924        // Check offset and length for each non-null value.
925        let sliced_list_array = sliced_array
926            .as_any()
927            .downcast_ref::<LargeListArray>()
928            .unwrap();
929        assert_eq!(2, sliced_list_array.value_offsets()[2]);
930        assert_eq!(2, sliced_list_array.value_length(2));
931        assert_eq!(4, sliced_list_array.value_offsets()[3]);
932        assert_eq!(2, sliced_list_array.value_length(3));
933        assert_eq!(6, sliced_list_array.value_offsets()[5]);
934        assert_eq!(3, sliced_list_array.value_length(5));
935    }
936
937    #[test]
938    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
939    fn test_list_array_index_out_of_bound() {
940        // Construct a value array
941        let value_data = ArrayData::builder(DataType::Int32)
942            .len(10)
943            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
944            .build()
945            .unwrap();
946
947        // Construct a buffer for value offsets, for the nested array:
948        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
949        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
950        // 01011001 00000001
951        let mut null_bits: [u8; 2] = [0; 2];
952        bit_util::set_bit(&mut null_bits, 0);
953        bit_util::set_bit(&mut null_bits, 3);
954        bit_util::set_bit(&mut null_bits, 4);
955        bit_util::set_bit(&mut null_bits, 6);
956        bit_util::set_bit(&mut null_bits, 8);
957
958        // Construct a list array from the above two
959        let list_data_type = DataType::new_large_list(DataType::Int32, false);
960        let list_data = ArrayData::builder(list_data_type)
961            .len(9)
962            .add_buffer(value_offsets)
963            .add_child_data(value_data)
964            .null_bit_buffer(Some(Buffer::from(null_bits)))
965            .build()
966            .unwrap();
967        let list_array = LargeListArray::from(list_data);
968        assert_eq!(9, list_array.len());
969
970        list_array.value(10);
971    }
972    #[test]
973    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
974    // Different error messages, so skip for now
975    // https://github.com/apache/arrow-rs/issues/1545
976    #[cfg(not(feature = "force_validate"))]
977    fn test_list_array_invalid_buffer_len() {
978        let value_data = unsafe {
979            ArrayData::builder(DataType::Int32)
980                .len(8)
981                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
982                .build_unchecked()
983        };
984        let list_data_type =
985            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
986        let list_data = unsafe {
987            ArrayData::builder(list_data_type)
988                .len(3)
989                .add_child_data(value_data)
990                .build_unchecked()
991        };
992        drop(ListArray::from(list_data));
993    }
994
995    #[test]
996    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
997    // Different error messages, so skip for now
998    // https://github.com/apache/arrow-rs/issues/1545
999    #[cfg(not(feature = "force_validate"))]
1000    fn test_list_array_invalid_child_array_len() {
1001        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1002        let list_data_type =
1003            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1004        let list_data = unsafe {
1005            ArrayData::builder(list_data_type)
1006                .len(3)
1007                .add_buffer(value_offsets)
1008                .build_unchecked()
1009        };
1010        drop(ListArray::from(list_data));
1011    }
1012
1013    #[test]
1014    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1015    fn test_from_array_data_validation() {
1016        let mut builder = ListBuilder::new(Int32Builder::new());
1017        builder.values().append_value(1);
1018        builder.append(true);
1019        let array = builder.finish();
1020        let _ = LargeListArray::from(array.into_data());
1021    }
1022
1023    #[test]
1024    fn test_list_array_offsets_need_not_start_at_zero() {
1025        let value_data = ArrayData::builder(DataType::Int32)
1026            .len(8)
1027            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1028            .build()
1029            .unwrap();
1030
1031        let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1032
1033        let list_data_type =
1034            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1035        let list_data = ArrayData::builder(list_data_type)
1036            .len(3)
1037            .add_buffer(value_offsets)
1038            .add_child_data(value_data)
1039            .build()
1040            .unwrap();
1041
1042        let list_array = ListArray::from(list_data);
1043        assert_eq!(list_array.value_length(0), 0);
1044        assert_eq!(list_array.value_length(1), 3);
1045        assert_eq!(list_array.value_length(2), 2);
1046    }
1047
1048    #[test]
1049    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1050    // Different error messages, so skip for now
1051    // https://github.com/apache/arrow-rs/issues/1545
1052    #[cfg(not(feature = "force_validate"))]
1053    fn test_primitive_array_alignment() {
1054        let buf = Buffer::from_slice_ref([0_u64]);
1055        let buf2 = buf.slice(1);
1056        let array_data = unsafe {
1057            ArrayData::builder(DataType::Int32)
1058                .add_buffer(buf2)
1059                .build_unchecked()
1060        };
1061        drop(Int32Array::from(array_data));
1062    }
1063
1064    #[test]
1065    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1066    // Different error messages, so skip for now
1067    // https://github.com/apache/arrow-rs/issues/1545
1068    #[cfg(not(feature = "force_validate"))]
1069    fn test_list_array_alignment() {
1070        let buf = Buffer::from_slice_ref([0_u64]);
1071        let buf2 = buf.slice(1);
1072
1073        let values: [i32; 8] = [0; 8];
1074        let value_data = unsafe {
1075            ArrayData::builder(DataType::Int32)
1076                .add_buffer(Buffer::from_slice_ref(values))
1077                .build_unchecked()
1078        };
1079
1080        let list_data_type =
1081            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1082        let list_data = unsafe {
1083            ArrayData::builder(list_data_type)
1084                .add_buffer(buf2)
1085                .add_child_data(value_data)
1086                .build_unchecked()
1087        };
1088        drop(ListArray::from(list_data));
1089    }
1090
1091    #[test]
1092    fn list_array_equality() {
1093        // test scaffold
1094        fn do_comparison(
1095            lhs_data: Vec<Option<Vec<Option<i32>>>>,
1096            rhs_data: Vec<Option<Vec<Option<i32>>>>,
1097            should_equal: bool,
1098        ) {
1099            let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1100            let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1101            assert_eq!(lhs == rhs, should_equal);
1102
1103            let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1104            let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1105            assert_eq!(lhs == rhs, should_equal);
1106        }
1107
1108        do_comparison(
1109            vec![
1110                Some(vec![Some(0), Some(1), Some(2)]),
1111                None,
1112                Some(vec![Some(3), None, Some(5)]),
1113                Some(vec![Some(6), Some(7)]),
1114            ],
1115            vec![
1116                Some(vec![Some(0), Some(1), Some(2)]),
1117                None,
1118                Some(vec![Some(3), None, Some(5)]),
1119                Some(vec![Some(6), Some(7)]),
1120            ],
1121            true,
1122        );
1123
1124        do_comparison(
1125            vec![
1126                None,
1127                None,
1128                Some(vec![Some(3), None, Some(5)]),
1129                Some(vec![Some(6), Some(7)]),
1130            ],
1131            vec![
1132                Some(vec![Some(0), Some(1), Some(2)]),
1133                None,
1134                Some(vec![Some(3), None, Some(5)]),
1135                Some(vec![Some(6), Some(7)]),
1136            ],
1137            false,
1138        );
1139
1140        do_comparison(
1141            vec![
1142                None,
1143                None,
1144                Some(vec![Some(3), None, Some(5)]),
1145                Some(vec![Some(6), Some(7)]),
1146            ],
1147            vec![
1148                None,
1149                None,
1150                Some(vec![Some(3), None, Some(5)]),
1151                Some(vec![Some(0), Some(0)]),
1152            ],
1153            false,
1154        );
1155
1156        do_comparison(
1157            vec![None, None, Some(vec![Some(1)])],
1158            vec![None, None, Some(vec![Some(2)])],
1159            false,
1160        );
1161    }
1162
1163    #[test]
1164    fn test_empty_offsets() {
1165        let f = Arc::new(Field::new("element", DataType::Int32, true));
1166        let string = ListArray::from(
1167            ArrayData::builder(DataType::List(f.clone()))
1168                .buffers(vec![Buffer::from(&[])])
1169                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1170                .build()
1171                .unwrap(),
1172        );
1173        assert_eq!(string.value_offsets(), &[0]);
1174        let string = LargeListArray::from(
1175            ArrayData::builder(DataType::LargeList(f))
1176                .buffers(vec![Buffer::from(&[])])
1177                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1178                .build()
1179                .unwrap(),
1180        );
1181        assert_eq!(string.len(), 0);
1182        assert_eq!(string.value_offsets(), &[0]);
1183    }
1184
1185    #[test]
1186    fn test_try_new() {
1187        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1188        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1189        let values = Arc::new(values) as ArrayRef;
1190
1191        let field = Arc::new(Field::new("element", DataType::Int32, false));
1192        ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1193
1194        let nulls = NullBuffer::new_null(3);
1195        ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1196
1197        let nulls = NullBuffer::new_null(3);
1198        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1199        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1200            .unwrap_err();
1201
1202        assert_eq!(
1203            err.to_string(),
1204            "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1205        );
1206
1207        let field = Arc::new(Field::new("element", DataType::Int64, false));
1208        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1209            .unwrap_err();
1210
1211        assert_eq!(
1212            err.to_string(),
1213            "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1214        );
1215
1216        let nulls = NullBuffer::new_null(7);
1217        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1218        let values = Arc::new(values);
1219
1220        let err =
1221            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1222
1223        assert_eq!(
1224            err.to_string(),
1225            "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1226        );
1227
1228        let field = Arc::new(Field::new("element", DataType::Int64, true));
1229        LargeListArray::new(field.clone(), offsets.clone(), values, None);
1230
1231        let values = Int64Array::new(vec![0; 2].into(), None);
1232        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1233
1234        assert_eq!(
1235            err.to_string(),
1236            "Invalid argument error: Max offset of 5 exceeds length of values 2"
1237        );
1238    }
1239
1240    #[test]
1241    fn test_from_fixed_size_list() {
1242        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1243        builder.values().append_slice(&[1, 2, 3]);
1244        builder.append(true);
1245        builder.values().append_slice(&[0, 0, 0]);
1246        builder.append(false);
1247        builder.values().append_slice(&[4, 5, 6]);
1248        builder.append(true);
1249        let list: ListArray = builder.finish().into();
1250
1251        let values: Vec<_> = list
1252            .iter()
1253            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1254            .collect();
1255        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1256    }
1257
1258    #[test]
1259    fn test_nullable_union() {
1260        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1261        let mut builder = UnionBuilder::new_dense();
1262        builder.append::<Int32Type>("a", 1).unwrap();
1263        builder.append::<Int32Type>("b", 2).unwrap();
1264        builder.append::<Int32Type>("b", 3).unwrap();
1265        builder.append::<Int32Type>("a", 4).unwrap();
1266        builder.append::<Int32Type>("a", 5).unwrap();
1267        let values = builder.build().unwrap();
1268        let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1269        ListArray::new(field.clone(), offsets, Arc::new(values), None);
1270    }
1271}