arrow_array/array/
list_view_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_buffer::{NullBuffer, ScalarBuffer};
19use arrow_data::{ArrayData, ArrayDataBuilder};
20use arrow_schema::{ArrowError, DataType, FieldRef};
21use std::any::Any;
22use std::ops::Add;
23use std::sync::Arc;
24
25use crate::array::{make_array, print_long_array};
26use crate::iterator::GenericListViewArrayIter;
27use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};
28
29/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
30pub type ListViewArray = GenericListViewArray<i32>;
31
32/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`.
33pub type LargeListViewArray = GenericListViewArray<i64>;
34
35/// An array of [variable length lists], specifically in the [list-view layout].
36///
37/// Differs from [`GenericListArray`] (which represents the [list layout]) in that
38/// the sizes of the child arrays are explicitly encoded in a separate buffer, instead
39/// of being derived from the difference between subsequent offsets in the offset buffer.
40///
41/// This allows the offsets (and subsequently child data) to be out of order. It also
42/// allows take / filter operations to be implemented without copying the underlying data.
43///
44/// # Representation
45///
46/// Given the same example array from [`GenericListArray`], it would be represented
47/// as such via a list-view layout array:
48///
49/// ```text
50///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
51///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
52///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
53///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │   │ 3 │     │ │ 1 │ │ A │ │ 0  │
54///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
55///  │      []     │  │ (3,0) │                   │ 1 │   │ 3 │   │ 0 │     │ │ 1 │ │ B │ │ 1  │
56///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
57///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ C │ │ 2  │
58///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
59///  │     [D]     │  │ (4,1) │                   │ 1 │   │ 4 │   │ 1 │     │ │ ? │ │ ? │ │ 3  │
60///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
61///  │  [NULL, F]  │  │ (5,2) │                   │ 1 │   │ 5 │   │ 2 │     │ │ 1 │ │ D │ │ 4  │
62///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
63///                                                                         │ │ 0 │ │ ? │ │ 5  │
64///     Logical       Logical               │  Validity  Offsets  Sizes       ├───┤ ├───┤
65///      Values       Offset                   (nulls)                      │ │ 1 │ │ F │ │ 6  │
66///                   & Size                │                                 └───┘ └───┘
67///                                                                         │    Values   │    │
68///                 (offsets[i],            │   ListViewArray                   (Array)
69///                  sizes[i])                                              └ ─ ─ ─ ─ ─ ─ ┘    │
70///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
71/// ```
72///
73/// Another way of representing the same array but taking advantage of the offsets being out of order:
74///
75/// ```text
76///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
77///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
78///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
79///  │   [A,B,C]   │  │ (2,3) │                   │ 1 │   │ 2 │   │ 3 │     │ │ 0 │ │ ? │ │ 0  │
80///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
81///  │      []     │  │ (0,0) │                   │ 1 │   │ 0 │   │ 0 │     │ │ 1 │ │ F │ │ 1  │
82///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
83///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ A │ │ 2  │
84///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
85///  │     [D]     │  │ (5,1) │                   │ 1 │   │ 5 │   │ 1 │     │ │ 1 │ │ B │ │ 3  │
86///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
87///  │  [NULL, F]  │  │ (0,2) │                   │ 1 │   │ 0 │   │ 2 │     │ │ 1 │ │ C │ │ 4  │
88///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
89///                                                                         │ │ 1 │ │ D │ │ 5  │
90///     Logical       Logical               │  Validity  Offsets  Sizes       └───┘ └───┘
91///      Values       Offset                   (nulls)                      │    Values   │    │
92///                   & Size                │                                   (Array)  
93///                                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
94///                 (offsets[i],            │   ListViewArray                          
95///                  sizes[i])                                                                 │
96///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
97/// ```
98///
99/// [`GenericListArray`]: crate::array::GenericListArray
100/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
101/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
102/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
103#[derive(Clone)]
104pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
105    data_type: DataType,
106    nulls: Option<NullBuffer>,
107    values: ArrayRef,
108    // Unlike GenericListArray, we do not use OffsetBuffer here as offsets are not
109    // guaranteed to be monotonically increasing.
110    value_offsets: ScalarBuffer<OffsetSize>,
111    value_sizes: ScalarBuffer<OffsetSize>,
112}
113
114impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
115    /// The data type constructor of listview array.
116    /// The input is the schema of the child array and
117    /// the output is the [`DataType`], ListView or LargeListView.
118    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
119        DataType::LargeListView
120    } else {
121        DataType::ListView
122    };
123
124    /// Create a new [`GenericListViewArray`] from the provided parts
125    ///
126    /// # Errors
127    ///
128    /// Errors if
129    ///
130    /// * `offsets.len() != sizes.len()`
131    /// * `offsets.len() != nulls.len()`
132    /// * `offsets[i] > values.len()`
133    /// * `!field.is_nullable() && values.is_nullable()`
134    /// * `field.data_type() != values.data_type()`
135    /// * `0 <= offsets[i] <= length of the child array`
136    /// * `0 <= offsets[i] + size[i] <= length of the child array`
137    pub fn try_new(
138        field: FieldRef,
139        offsets: ScalarBuffer<OffsetSize>,
140        sizes: ScalarBuffer<OffsetSize>,
141        values: ArrayRef,
142        nulls: Option<NullBuffer>,
143    ) -> Result<Self, ArrowError> {
144        let len = offsets.len();
145        if let Some(n) = nulls.as_ref() {
146            if n.len() != len {
147                return Err(ArrowError::InvalidArgumentError(format!(
148                    "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}",
149                    OffsetSize::PREFIX,
150                    n.len(),
151                )));
152            }
153        }
154        if len != sizes.len() {
155            return Err(ArrowError::InvalidArgumentError(format!(
156                "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
157                OffsetSize::PREFIX, sizes.len()
158            )));
159        }
160
161        for (offset, size) in offsets.iter().zip(sizes.iter()) {
162            let offset = offset.as_usize();
163            let size = size.as_usize();
164            if offset.checked_add(size).ok_or_else(|| {
165                ArrowError::InvalidArgumentError(format!(
166                    "Overflow in offset + size for {}ListViewArray",
167                    OffsetSize::PREFIX
168                ))
169            })? > values.len()
170            {
171                return Err(ArrowError::InvalidArgumentError(format!(
172                    "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}",
173                    OffsetSize::PREFIX,
174                    values.len()
175                )));
176            }
177        }
178
179        if !field.is_nullable() && values.is_nullable() {
180            return Err(ArrowError::InvalidArgumentError(format!(
181                "Non-nullable field of {}ListViewArray {:?} cannot contain nulls",
182                OffsetSize::PREFIX,
183                field.name()
184            )));
185        }
186
187        if field.data_type() != values.data_type() {
188            return Err(ArrowError::InvalidArgumentError(format!(
189                "{}ListViewArray expected data type {} got {} for {:?}",
190                OffsetSize::PREFIX,
191                field.data_type(),
192                values.data_type(),
193                field.name()
194            )));
195        }
196
197        Ok(Self {
198            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
199            nulls,
200            values,
201            value_offsets: offsets,
202            value_sizes: sizes,
203        })
204    }
205
206    /// Create a new [`GenericListViewArray`] from the provided parts
207    ///
208    /// # Panics
209    ///
210    /// Panics if [`Self::try_new`] returns an error
211    pub fn new(
212        field: FieldRef,
213        offsets: ScalarBuffer<OffsetSize>,
214        sizes: ScalarBuffer<OffsetSize>,
215        values: ArrayRef,
216        nulls: Option<NullBuffer>,
217    ) -> Self {
218        Self::try_new(field, offsets, sizes, values, nulls).unwrap()
219    }
220
221    /// Create a new [`GenericListViewArray`] of length `len` where all values are null
222    pub fn new_null(field: FieldRef, len: usize) -> Self {
223        let values = new_empty_array(field.data_type());
224        Self {
225            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
226            nulls: Some(NullBuffer::new_null(len)),
227            value_offsets: ScalarBuffer::from(vec![]),
228            value_sizes: ScalarBuffer::from(vec![]),
229            values,
230        }
231    }
232
233    /// Deconstruct this array into its constituent parts
234    pub fn into_parts(
235        self,
236    ) -> (
237        FieldRef,
238        ScalarBuffer<OffsetSize>,
239        ScalarBuffer<OffsetSize>,
240        ArrayRef,
241        Option<NullBuffer>,
242    ) {
243        let f = match self.data_type {
244            DataType::ListView(f) | DataType::LargeListView(f) => f,
245            _ => unreachable!(),
246        };
247        (
248            f,
249            self.value_offsets,
250            self.value_sizes,
251            self.values,
252            self.nulls,
253        )
254    }
255
256    /// Returns a reference to the offsets of this list
257    ///
258    /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
259    /// allowing for zero-copy cloning
260    #[inline]
261    pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
262        &self.value_offsets
263    }
264
265    /// Returns a reference to the values of this list
266    #[inline]
267    pub fn values(&self) -> &ArrayRef {
268        &self.values
269    }
270
271    /// Returns a reference to the sizes of this list
272    ///
273    /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`]
274    /// allowing for zero-copy cloning
275    #[inline]
276    pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
277        &self.value_sizes
278    }
279
280    /// Returns a clone of the value type of this list.
281    pub fn value_type(&self) -> DataType {
282        self.values.data_type().clone()
283    }
284
285    /// Returns ith value of this list view array.
286    /// # Safety
287    /// Caller must ensure that the index is within the array bounds
288    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
289        let offset = self.value_offsets().get_unchecked(i).as_usize();
290        let length = self.value_sizes().get_unchecked(i).as_usize();
291        self.values.slice(offset, length)
292    }
293
294    /// Returns ith value of this list view array.
295    /// # Panics
296    /// Panics if the index is out of bounds
297    pub fn value(&self, i: usize) -> ArrayRef {
298        let offset = self.value_offsets()[i].as_usize();
299        let length = self.value_sizes()[i].as_usize();
300        self.values.slice(offset, length)
301    }
302
303    /// Returns the offset values in the offsets buffer
304    #[inline]
305    pub fn value_offsets(&self) -> &[OffsetSize] {
306        &self.value_offsets
307    }
308
309    /// Returns the sizes values in the offsets buffer
310    #[inline]
311    pub fn value_sizes(&self) -> &[OffsetSize] {
312        &self.value_sizes
313    }
314
315    /// Returns the size for value at index `i`.
316    #[inline]
317    pub fn value_size(&self, i: usize) -> OffsetSize {
318        self.value_sizes[i]
319    }
320
321    /// Returns the offset for value at index `i`.
322    pub fn value_offset(&self, i: usize) -> OffsetSize {
323        self.value_offsets[i]
324    }
325
326    /// Constructs a new iterator
327    pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> {
328        GenericListViewArrayIter::<'_, OffsetSize>::new(self)
329    }
330
331    #[inline]
332    fn get_type(data_type: &DataType) -> Option<&DataType> {
333        match (OffsetSize::IS_LARGE, data_type) {
334            (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => {
335                Some(child.data_type())
336            }
337            _ => None,
338        }
339    }
340
341    /// Returns a zero-copy slice of this array with the indicated offset and length.
342    pub fn slice(&self, offset: usize, length: usize) -> Self {
343        Self {
344            data_type: self.data_type.clone(),
345            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
346            values: self.values.clone(),
347            value_offsets: self.value_offsets.slice(offset, length),
348            value_sizes: self.value_sizes.slice(offset, length),
349        }
350    }
351}
352
353impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
354    type Item = ArrayRef;
355
356    fn value(&self, index: usize) -> Self::Item {
357        GenericListViewArray::value(self, index)
358    }
359
360    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
361        GenericListViewArray::value_unchecked(self, index)
362    }
363}
364
365impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
366    fn as_any(&self) -> &dyn Any {
367        self
368    }
369
370    fn to_data(&self) -> ArrayData {
371        self.clone().into()
372    }
373
374    fn into_data(self) -> ArrayData {
375        self.into()
376    }
377
378    fn data_type(&self) -> &DataType {
379        &self.data_type
380    }
381
382    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
383        Arc::new(self.slice(offset, length))
384    }
385
386    fn len(&self) -> usize {
387        self.sizes().len()
388    }
389
390    fn is_empty(&self) -> bool {
391        self.value_sizes.is_empty()
392    }
393
394    fn shrink_to_fit(&mut self) {
395        if let Some(nulls) = &mut self.nulls {
396            nulls.shrink_to_fit();
397        }
398        self.values.shrink_to_fit();
399        self.value_offsets.shrink_to_fit();
400        self.value_sizes.shrink_to_fit();
401    }
402
403    fn offset(&self) -> usize {
404        0
405    }
406
407    fn nulls(&self) -> Option<&NullBuffer> {
408        self.nulls.as_ref()
409    }
410
411    fn logical_null_count(&self) -> usize {
412        // More efficient that the default implementation
413        self.null_count()
414    }
415
416    fn get_buffer_memory_size(&self) -> usize {
417        let mut size = self.values.get_buffer_memory_size();
418        size += self.value_offsets.inner().capacity();
419        size += self.value_sizes.inner().capacity();
420        if let Some(n) = self.nulls.as_ref() {
421            size += n.buffer().capacity();
422        }
423        size
424    }
425
426    fn get_array_memory_size(&self) -> usize {
427        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
428        size += self.value_offsets.inner().capacity();
429        size += self.value_sizes.inner().capacity();
430        if let Some(n) = self.nulls.as_ref() {
431            size += n.buffer().capacity();
432        }
433        size
434    }
435}
436
437impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
438    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
439        let prefix = OffsetSize::PREFIX;
440        write!(f, "{prefix}ListViewArray\n[\n")?;
441        print_long_array(self, f, |array, index, f| {
442            std::fmt::Debug::fmt(&array.value(index), f)
443        })?;
444        write!(f, "]")
445    }
446}
447
448impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
449    fn from(array: GenericListViewArray<OffsetSize>) -> Self {
450        let len = array.len();
451        let builder = ArrayDataBuilder::new(array.data_type)
452            .len(len)
453            .nulls(array.nulls)
454            .buffers(vec![
455                array.value_offsets.into_inner(),
456                array.value_sizes.into_inner(),
457            ])
458            .child_data(vec![array.values.to_data()]);
459
460        unsafe { builder.build_unchecked() }
461    }
462}
463
464impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> {
465    fn from(data: ArrayData) -> Self {
466        Self::try_new_from_array_data(data)
467            .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed")
468    }
469}
470
471impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> {
472    fn from(value: FixedSizeListArray) -> Self {
473        let (field, size) = match value.data_type() {
474            DataType::FixedSizeList(f, size) => (f, *size as usize),
475            _ => unreachable!(),
476        };
477        let mut acc = 0_usize;
478        let iter = std::iter::repeat(size).take(value.len());
479        let mut sizes = Vec::with_capacity(iter.size_hint().0);
480        let mut offsets = Vec::with_capacity(iter.size_hint().0);
481
482        for size in iter {
483            offsets.push(OffsetSize::usize_as(acc));
484            acc = acc.add(size);
485            sizes.push(OffsetSize::usize_as(size));
486        }
487        let sizes = ScalarBuffer::from(sizes);
488        let offsets = ScalarBuffer::from(offsets);
489        Self {
490            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
491            nulls: value.nulls().cloned(),
492            values: value.values().clone(),
493            value_offsets: offsets,
494            value_sizes: sizes,
495        }
496    }
497}
498
499impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
500    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
501        if data.buffers().len() != 2 {
502            return Err(ArrowError::InvalidArgumentError(format!(
503                "ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
504                data.buffers().len()
505            )));
506        }
507
508        if data.child_data().len() != 1 {
509            return Err(ArrowError::InvalidArgumentError(format!(
510                "ListViewArray should contain a single child array (values array), had {}",
511                data.child_data().len()
512            )));
513        }
514
515        let values = data.child_data()[0].clone();
516
517        if let Some(child_data_type) = Self::get_type(data.data_type()) {
518            if values.data_type() != child_data_type {
519                return Err(ArrowError::InvalidArgumentError(format!(
520                    "{}ListViewArray's child datatype {:?} does not \
521                             correspond to the List's datatype {:?}",
522                    OffsetSize::PREFIX,
523                    values.data_type(),
524                    child_data_type
525                )));
526            }
527        } else {
528            return Err(ArrowError::InvalidArgumentError(format!(
529                "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
530                OffsetSize::PREFIX,
531                OffsetSize::PREFIX,
532                data.data_type()
533            )));
534        }
535
536        let values = make_array(values);
537        // ArrayData is valid, and verified type above
538        let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
539        let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
540
541        Ok(Self {
542            data_type: data.data_type().clone(),
543            nulls: data.nulls().cloned(),
544            values,
545            value_offsets,
546            value_sizes,
547        })
548    }
549}
550
551#[cfg(test)]
552mod tests {
553    use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer};
554    use arrow_schema::Field;
555
556    use crate::builder::{FixedSizeListBuilder, Int32Builder};
557    use crate::cast::AsArray;
558    use crate::types::Int32Type;
559    use crate::{Int32Array, Int64Array};
560
561    use super::*;
562
563    #[test]
564    fn test_empty_list_view_array() {
565        // Construct an empty value array
566        let vec: Vec<i32> = vec![];
567        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
568        let sizes = ScalarBuffer::from(vec![]);
569        let offsets = ScalarBuffer::from(vec![]);
570        let values = Int32Array::from(vec);
571        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
572
573        assert_eq!(list_array.len(), 0)
574    }
575
576    #[test]
577    fn test_list_view_array() {
578        // Construct a value array
579        let value_data = ArrayData::builder(DataType::Int32)
580            .len(8)
581            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
582            .build()
583            .unwrap();
584
585        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
586        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
587        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
588        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
589        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
590
591        let values = list_array.values();
592        assert_eq!(value_data, values.to_data());
593        assert_eq!(DataType::Int32, list_array.value_type());
594        assert_eq!(3, list_array.len());
595        assert_eq!(0, list_array.null_count());
596        assert_eq!(6, list_array.value_offsets()[2]);
597        assert_eq!(2, list_array.value_sizes()[2]);
598        assert_eq!(2, list_array.value_size(2));
599        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
600        assert_eq!(
601            0,
602            unsafe { list_array.value_unchecked(0) }
603                .as_primitive::<Int32Type>()
604                .value(0)
605        );
606        for i in 0..3 {
607            assert!(list_array.is_valid(i));
608            assert!(!list_array.is_null(i));
609        }
610    }
611
612    #[test]
613    fn test_large_list_view_array() {
614        // Construct a value array
615        let value_data = ArrayData::builder(DataType::Int32)
616            .len(8)
617            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
618            .build()
619            .unwrap();
620
621        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
622        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
623        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
624        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
625        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
626
627        let values = list_array.values();
628        assert_eq!(value_data, values.to_data());
629        assert_eq!(DataType::Int32, list_array.value_type());
630        assert_eq!(3, list_array.len());
631        assert_eq!(0, list_array.null_count());
632        assert_eq!(6, list_array.value_offsets()[2]);
633        assert_eq!(2, list_array.value_sizes()[2]);
634        assert_eq!(2, list_array.value_size(2));
635        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
636        assert_eq!(
637            0,
638            unsafe { list_array.value_unchecked(0) }
639                .as_primitive::<Int32Type>()
640                .value(0)
641        );
642        for i in 0..3 {
643            assert!(list_array.is_valid(i));
644            assert!(!list_array.is_null(i));
645        }
646    }
647
648    #[test]
649    fn test_list_view_array_slice() {
650        // Construct a value array
651        let value_data = ArrayData::builder(DataType::Int32)
652            .len(10)
653            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
654            .build()
655            .unwrap();
656
657        // 01011001 00000001
658        let mut null_bits: [u8; 2] = [0; 2];
659        bit_util::set_bit(&mut null_bits, 0);
660        bit_util::set_bit(&mut null_bits, 3);
661        bit_util::set_bit(&mut null_bits, 4);
662        bit_util::set_bit(&mut null_bits, 6);
663        bit_util::set_bit(&mut null_bits, 8);
664        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
665        let null_buffer = NullBuffer::new(buffer);
666
667        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
668        let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
669        let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
670        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
671        let list_array =
672            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
673
674        let values = list_array.values();
675        assert_eq!(value_data, values.to_data());
676        assert_eq!(DataType::Int32, list_array.value_type());
677        assert_eq!(9, list_array.len());
678        assert_eq!(4, list_array.null_count());
679        assert_eq!(2, list_array.value_offsets()[3]);
680        assert_eq!(2, list_array.value_sizes()[3]);
681        assert_eq!(2, list_array.value_size(3));
682
683        let sliced_array = list_array.slice(1, 6);
684        assert_eq!(6, sliced_array.len());
685        assert_eq!(3, sliced_array.null_count());
686
687        for i in 0..sliced_array.len() {
688            if bit_util::get_bit(&null_bits, 1 + i) {
689                assert!(sliced_array.is_valid(i));
690            } else {
691                assert!(sliced_array.is_null(i));
692            }
693        }
694
695        // Check offset and length for each non-null value.
696        let sliced_list_array = sliced_array
697            .as_any()
698            .downcast_ref::<ListViewArray>()
699            .unwrap();
700        assert_eq!(2, sliced_list_array.value_offsets()[2]);
701        assert_eq!(2, sliced_list_array.value_sizes()[2]);
702        assert_eq!(2, sliced_list_array.value_size(2));
703
704        assert_eq!(4, sliced_list_array.value_offsets()[3]);
705        assert_eq!(2, sliced_list_array.value_sizes()[3]);
706        assert_eq!(2, sliced_list_array.value_size(3));
707
708        assert_eq!(6, sliced_list_array.value_offsets()[5]);
709        assert_eq!(3, sliced_list_array.value_sizes()[5]);
710        assert_eq!(3, sliced_list_array.value_size(5));
711    }
712
713    #[test]
714    fn test_large_list_view_array_slice() {
715        // Construct a value array
716        let value_data = ArrayData::builder(DataType::Int32)
717            .len(10)
718            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
719            .build()
720            .unwrap();
721
722        // 01011001 00000001
723        let mut null_bits: [u8; 2] = [0; 2];
724        bit_util::set_bit(&mut null_bits, 0);
725        bit_util::set_bit(&mut null_bits, 3);
726        bit_util::set_bit(&mut null_bits, 4);
727        bit_util::set_bit(&mut null_bits, 6);
728        bit_util::set_bit(&mut null_bits, 8);
729        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
730        let null_buffer = NullBuffer::new(buffer);
731
732        // Construct a large list view array from the above two
733        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
734        let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
735        let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
736        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
737        let list_array =
738            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
739
740        let values = list_array.values();
741        assert_eq!(value_data, values.to_data());
742        assert_eq!(DataType::Int32, list_array.value_type());
743        assert_eq!(9, list_array.len());
744        assert_eq!(4, list_array.null_count());
745        assert_eq!(2, list_array.value_offsets()[3]);
746        assert_eq!(2, list_array.value_sizes()[3]);
747        assert_eq!(2, list_array.value_size(3));
748
749        let sliced_array = list_array.slice(1, 6);
750        assert_eq!(6, sliced_array.len());
751        assert_eq!(3, sliced_array.null_count());
752
753        for i in 0..sliced_array.len() {
754            if bit_util::get_bit(&null_bits, 1 + i) {
755                assert!(sliced_array.is_valid(i));
756            } else {
757                assert!(sliced_array.is_null(i));
758            }
759        }
760
761        // Check offset and length for each non-null value.
762        let sliced_list_array = sliced_array
763            .as_any()
764            .downcast_ref::<LargeListViewArray>()
765            .unwrap();
766        assert_eq!(2, sliced_list_array.value_offsets()[2]);
767        assert_eq!(2, sliced_list_array.value_size(2));
768        assert_eq!(2, sliced_list_array.value_sizes()[2]);
769
770        assert_eq!(4, sliced_list_array.value_offsets()[3]);
771        assert_eq!(2, sliced_list_array.value_size(3));
772        assert_eq!(2, sliced_list_array.value_sizes()[3]);
773
774        assert_eq!(6, sliced_list_array.value_offsets()[5]);
775        assert_eq!(3, sliced_list_array.value_size(5));
776        assert_eq!(2, sliced_list_array.value_sizes()[3]);
777    }
778
779    #[test]
780    #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")]
781    fn test_list_view_array_index_out_of_bound() {
782        // 01011001 00000001
783        let mut null_bits: [u8; 2] = [0; 2];
784        bit_util::set_bit(&mut null_bits, 0);
785        bit_util::set_bit(&mut null_bits, 3);
786        bit_util::set_bit(&mut null_bits, 4);
787        bit_util::set_bit(&mut null_bits, 6);
788        bit_util::set_bit(&mut null_bits, 8);
789        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
790        let null_buffer = NullBuffer::new(buffer);
791
792        // Construct a buffer for value offsets, for the nested array:
793        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
794        // Construct a list array from the above two
795        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
796        let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
797        let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
798        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
799        let list_array =
800            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
801
802        assert_eq!(9, list_array.len());
803        list_array.value(10);
804    }
805    #[test]
806    #[should_panic(
807        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0"
808    )]
809    #[cfg(not(feature = "force_validate"))]
810    fn test_list_view_array_invalid_buffer_len() {
811        let value_data = unsafe {
812            ArrayData::builder(DataType::Int32)
813                .len(8)
814                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
815                .build_unchecked()
816        };
817        let list_data_type =
818            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
819        let list_data = unsafe {
820            ArrayData::builder(list_data_type)
821                .len(3)
822                .add_child_data(value_data)
823                .build_unchecked()
824        };
825        drop(ListViewArray::from(list_data));
826    }
827
828    #[test]
829    #[should_panic(
830        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1"
831    )]
832    #[cfg(not(feature = "force_validate"))]
833    fn test_list_view_array_invalid_child_array_len() {
834        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
835        let list_data_type =
836            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
837        let list_data = unsafe {
838            ArrayData::builder(list_data_type)
839                .len(3)
840                .add_buffer(value_offsets)
841                .build_unchecked()
842        };
843        drop(ListViewArray::from(list_data));
844    }
845
846    #[test]
847    fn test_list_view_array_offsets_need_not_start_at_zero() {
848        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
849        let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
850        let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
851        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
852        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
853
854        assert_eq!(list_array.value_size(0), 0);
855        assert_eq!(list_array.value_size(1), 0);
856        assert_eq!(list_array.value_size(2), 3);
857    }
858
859    #[test]
860    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
861    #[cfg(not(feature = "force_validate"))]
862    fn test_list_view_array_alignment() {
863        let offset_buf = Buffer::from_slice_ref([0_u64]);
864        let offset_buf2 = offset_buf.slice(1);
865
866        let size_buf = Buffer::from_slice_ref([0_u64]);
867        let size_buf2 = size_buf.slice(1);
868
869        let values: [i32; 8] = [0; 8];
870        let value_data = unsafe {
871            ArrayData::builder(DataType::Int32)
872                .add_buffer(Buffer::from_slice_ref(values))
873                .build_unchecked()
874        };
875
876        let list_data_type =
877            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
878        let list_data = unsafe {
879            ArrayData::builder(list_data_type)
880                .add_buffer(offset_buf2)
881                .add_buffer(size_buf2)
882                .add_child_data(value_data)
883                .build_unchecked()
884        };
885        drop(ListViewArray::from(list_data));
886    }
887
888    #[test]
889    fn test_empty_offsets() {
890        let f = Arc::new(Field::new("element", DataType::Int32, true));
891        let string = ListViewArray::from(
892            ArrayData::builder(DataType::ListView(f.clone()))
893                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
894                .add_child_data(ArrayData::new_empty(&DataType::Int32))
895                .build()
896                .unwrap(),
897        );
898        assert_eq!(string.value_offsets(), &[] as &[i32; 0]);
899        assert_eq!(string.value_sizes(), &[] as &[i32; 0]);
900
901        let string = LargeListViewArray::from(
902            ArrayData::builder(DataType::LargeListView(f))
903                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
904                .add_child_data(ArrayData::new_empty(&DataType::Int32))
905                .build()
906                .unwrap(),
907        );
908        assert_eq!(string.len(), 0);
909        assert_eq!(string.value_offsets(), &[] as &[i64; 0]);
910        assert_eq!(string.value_sizes(), &[] as &[i64; 0]);
911    }
912
913    #[test]
914    fn test_try_new() {
915        let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
916        let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
917        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
918        let values = Arc::new(values) as ArrayRef;
919
920        let field = Arc::new(Field::new("element", DataType::Int32, false));
921        ListViewArray::new(
922            field.clone(),
923            offsets.clone(),
924            sizes.clone(),
925            values.clone(),
926            None,
927        );
928
929        let nulls = NullBuffer::new_null(4);
930        ListViewArray::new(
931            field.clone(),
932            offsets,
933            sizes.clone(),
934            values.clone(),
935            Some(nulls),
936        );
937
938        let nulls = NullBuffer::new_null(4);
939        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
940        let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
941        let err = LargeListViewArray::try_new(
942            field,
943            offsets.clone(),
944            sizes.clone(),
945            values.clone(),
946            Some(nulls),
947        )
948        .unwrap_err();
949
950        assert_eq!(
951            err.to_string(),
952            "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4"
953        );
954
955        let field = Arc::new(Field::new("element", DataType::Int64, false));
956        let err = LargeListViewArray::try_new(
957            field.clone(),
958            offsets.clone(),
959            sizes.clone(),
960            values.clone(),
961            None,
962        )
963        .unwrap_err();
964
965        assert_eq!(
966            err.to_string(),
967            "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\""
968        );
969
970        let nulls = NullBuffer::new_null(7);
971        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
972        let values = Arc::new(values);
973
974        let err = LargeListViewArray::try_new(
975            field,
976            offsets.clone(),
977            sizes.clone(),
978            values.clone(),
979            None,
980        )
981        .unwrap_err();
982
983        assert_eq!(
984            err.to_string(),
985            "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls"
986        );
987    }
988
989    #[test]
990    fn test_from_fixed_size_list() {
991        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
992        builder.values().append_slice(&[1, 2, 3]);
993        builder.append(true);
994        builder.values().append_slice(&[0, 0, 0]);
995        builder.append(false);
996        builder.values().append_slice(&[4, 5, 6]);
997        builder.append(true);
998        let list: ListViewArray = builder.finish().into();
999        let values: Vec<_> = list
1000            .iter()
1001            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1002            .collect();
1003        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]);
1004        let offsets = list.value_offsets();
1005        assert_eq!(offsets, &[0, 3, 6]);
1006        let sizes = list.value_sizes();
1007        assert_eq!(sizes, &[3, 3, 3]);
1008    }
1009
1010    #[test]
1011    fn test_list_view_array_overlap_lists() {
1012        let value_data = unsafe {
1013            ArrayData::builder(DataType::Int32)
1014                .len(8)
1015                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1016                .build_unchecked()
1017        };
1018        let list_data_type =
1019            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1020        let list_data = unsafe {
1021            ArrayData::builder(list_data_type)
1022                .len(2)
1023                .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets
1024                .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes
1025                .add_child_data(value_data)
1026                .build_unchecked()
1027        };
1028        let array = ListViewArray::from(list_data);
1029
1030        assert_eq!(array.len(), 2);
1031        assert_eq!(array.value_size(0), 5);
1032        assert_eq!(array.value_size(1), 5);
1033
1034        let values: Vec<_> = array
1035            .iter()
1036            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1037            .collect();
1038        assert_eq!(
1039            values,
1040            vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])]
1041        );
1042    }
1043
1044    #[test]
1045    fn test_list_view_array_incomplete_offsets() {
1046        let value_data = unsafe {
1047            ArrayData::builder(DataType::Int32)
1048                .len(50)
1049                .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>()))
1050                .build_unchecked()
1051        };
1052        let list_data_type =
1053            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1054        let list_data = unsafe {
1055            ArrayData::builder(list_data_type)
1056                .len(3)
1057                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets
1058                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes
1059                .add_child_data(value_data)
1060                .build_unchecked()
1061        };
1062        let array = ListViewArray::from(list_data);
1063
1064        assert_eq!(array.len(), 3);
1065        assert_eq!(array.value_size(0), 0);
1066        assert_eq!(array.value_size(1), 5);
1067        assert_eq!(array.value_size(2), 10);
1068
1069        let values: Vec<_> = array
1070            .iter()
1071            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1072            .collect();
1073        assert_eq!(
1074            values,
1075            vec![
1076                Some(vec![]),
1077                Some(vec![5, 6, 7, 8, 9]),
1078                Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
1079            ]
1080        );
1081    }
1082
1083    #[test]
1084    fn test_list_view_array_empty_lists() {
1085        let value_data = unsafe {
1086            ArrayData::builder(DataType::Int32)
1087                .len(0)
1088                .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[]))
1089                .build_unchecked()
1090        };
1091        let list_data_type =
1092            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1093        let list_data = unsafe {
1094            ArrayData::builder(list_data_type)
1095                .len(3)
1096                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets
1097                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes
1098                .add_child_data(value_data)
1099                .build_unchecked()
1100        };
1101        let array = ListViewArray::from(list_data);
1102
1103        assert_eq!(array.len(), 3);
1104        assert_eq!(array.value_size(0), 0);
1105        assert_eq!(array.value_size(1), 0);
1106        assert_eq!(array.value_size(2), 0);
1107
1108        let values: Vec<_> = array
1109            .iter()
1110            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1111            .collect();
1112        assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
1113    }
1114}