arrow_array/builder/
fixed_size_list_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::{ArrayRef, FixedSizeListArray};
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Field, FieldRef};
22use std::any::Any;
23use std::sync::Arc;
24
25///  Builder for [`FixedSizeListArray`]
26/// ```
27/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array, Int32Array};
28/// let values_builder = Int32Builder::new();
29/// let mut builder = FixedSizeListBuilder::new(values_builder, 3);
30///
31/// //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
32/// builder.values().append_value(0);
33/// builder.values().append_value(1);
34/// builder.values().append_value(2);
35/// builder.append(true);
36/// builder.values().append_null();
37/// builder.values().append_null();
38/// builder.values().append_null();
39/// builder.append(false);
40/// builder.values().append_value(3);
41/// builder.values().append_null();
42/// builder.values().append_value(5);
43/// builder.append(true);
44/// builder.values().append_value(6);
45/// builder.values().append_value(7);
46/// builder.values().append_null();
47/// builder.append(true);
48/// let list_array = builder.finish();
49/// assert_eq!(
50///     *list_array.value(0),
51///     Int32Array::from(vec![Some(0), Some(1), Some(2)])
52/// );
53/// assert!(list_array.is_null(1));
54/// assert_eq!(
55///     *list_array.value(2),
56///     Int32Array::from(vec![Some(3), None, Some(5)])
57/// );
58/// assert_eq!(
59///     *list_array.value(3),
60///     Int32Array::from(vec![Some(6), Some(7), None])
61/// )
62/// ```
63///
64#[derive(Debug)]
65pub struct FixedSizeListBuilder<T: ArrayBuilder> {
66    null_buffer_builder: NullBufferBuilder,
67    values_builder: T,
68    list_len: i32,
69    field: Option<FieldRef>,
70}
71
72impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
73    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
74    /// `value_length` is the number of values within each array
75    pub fn new(values_builder: T, value_length: i32) -> Self {
76        let capacity = values_builder
77            .len()
78            .checked_div(value_length as _)
79            .unwrap_or_default();
80
81        Self::with_capacity(values_builder, value_length, capacity)
82    }
83
84    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
85    /// `value_length` is the number of values within each array
86    /// `capacity` is the number of items to pre-allocate space for in this builder
87    pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
88        Self {
89            null_buffer_builder: NullBufferBuilder::new(capacity),
90            values_builder,
91            list_len: value_length,
92            field: None,
93        }
94    }
95
96    /// Override the field passed to [`FixedSizeListArray::new`]
97    ///
98    /// By default, a nullable field is created with the name `item`
99    ///
100    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
101    /// field's data type does not match that of `T`
102    pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
103        Self {
104            field: Some(field.into()),
105            ..self
106        }
107    }
108}
109
110impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
111where
112    T: 'static,
113{
114    /// Returns the builder as a non-mutable `Any` reference.
115    fn as_any(&self) -> &dyn Any {
116        self
117    }
118
119    /// Returns the builder as a mutable `Any` reference.
120    fn as_any_mut(&mut self) -> &mut dyn Any {
121        self
122    }
123
124    /// Returns the boxed builder as a box of `Any`.
125    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
126        self
127    }
128
129    /// Returns the number of array slots in the builder
130    fn len(&self) -> usize {
131        self.null_buffer_builder.len()
132    }
133
134    /// Builds the array and reset this builder.
135    fn finish(&mut self) -> ArrayRef {
136        Arc::new(self.finish())
137    }
138
139    /// Builds the array without resetting the builder.
140    fn finish_cloned(&self) -> ArrayRef {
141        Arc::new(self.finish_cloned())
142    }
143}
144
145impl<T: ArrayBuilder> FixedSizeListBuilder<T>
146where
147    T: 'static,
148{
149    /// Returns the child array builder as a mutable reference.
150    ///
151    /// This mutable reference can be used to append values into the child array builder,
152    /// but you must call [`append`](#method.append) to delimit each distinct list value.
153    pub fn values(&mut self) -> &mut T {
154        &mut self.values_builder
155    }
156
157    /// Returns the length of the list
158    pub fn value_length(&self) -> i32 {
159        self.list_len
160    }
161
162    /// Finish the current fixed-length list array slot
163    #[inline]
164    pub fn append(&mut self, is_valid: bool) {
165        self.null_buffer_builder.append(is_valid);
166    }
167
168    /// Builds the [`FixedSizeListBuilder`] and reset this builder.
169    pub fn finish(&mut self) -> FixedSizeListArray {
170        let len = self.len();
171        let values = self.values_builder.finish();
172        let nulls = self.null_buffer_builder.finish();
173
174        assert_eq!(
175            values.len(), len * self.list_len as usize,
176            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
177            values.len(),
178            self.list_len,
179            len,
180        );
181
182        let field = self
183            .field
184            .clone()
185            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
186
187        FixedSizeListArray::new(field, self.list_len, values, nulls)
188    }
189
190    /// Builds the [`FixedSizeListBuilder`] without resetting the builder.
191    pub fn finish_cloned(&self) -> FixedSizeListArray {
192        let len = self.len();
193        let values = self.values_builder.finish_cloned();
194        let nulls = self.null_buffer_builder.finish_cloned();
195
196        assert_eq!(
197            values.len(), len * self.list_len as usize,
198            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
199            values.len(),
200            self.list_len,
201            len,
202        );
203
204        let field = self
205            .field
206            .clone()
207            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
208
209        FixedSizeListArray::new(field, self.list_len, values, nulls)
210    }
211
212    /// Returns the current null buffer as a slice
213    pub fn validity_slice(&self) -> Option<&[u8]> {
214        self.null_buffer_builder.as_slice()
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use arrow_schema::DataType;
222
223    use crate::builder::Int32Builder;
224    use crate::Array;
225    use crate::Int32Array;
226
227    fn make_list_builder(
228        include_null_element: bool,
229        include_null_in_values: bool,
230    ) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
231        let values_builder = Int32Builder::new();
232        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
233
234        builder.values().append_value(0);
235        builder.values().append_value(1);
236        builder.values().append_value(2);
237        builder.append(true);
238
239        builder.values().append_value(2);
240        builder.values().append_value(3);
241        builder.values().append_value(4);
242        builder.append(true);
243
244        if include_null_element {
245            builder.values().append_null();
246            builder.values().append_null();
247            builder.values().append_null();
248            builder.append(false);
249        } else {
250            builder.values().append_value(2);
251            builder.values().append_value(3);
252            builder.values().append_value(4);
253            builder.append(true);
254        }
255
256        if include_null_in_values {
257            builder.values().append_value(3);
258            builder.values().append_null();
259            builder.values().append_value(5);
260            builder.append(true);
261        } else {
262            builder.values().append_value(3);
263            builder.values().append_value(4);
264            builder.values().append_value(5);
265            builder.append(true);
266        }
267
268        builder
269    }
270
271    #[test]
272    fn test_fixed_size_list_array_builder() {
273        let mut builder = make_list_builder(true, true);
274
275        let list_array = builder.finish();
276
277        assert_eq!(DataType::Int32, list_array.value_type());
278        assert_eq!(4, list_array.len());
279        assert_eq!(1, list_array.null_count());
280        assert_eq!(6, list_array.value_offset(2));
281        assert_eq!(3, list_array.value_length());
282    }
283
284    #[test]
285    fn test_fixed_size_list_array_builder_with_field() {
286        let builder = make_list_builder(false, false);
287        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
288        let list_array = builder.finish();
289
290        assert_eq!(DataType::Int32, list_array.value_type());
291        assert_eq!(4, list_array.len());
292        assert_eq!(0, list_array.null_count());
293        assert_eq!(6, list_array.value_offset(2));
294        assert_eq!(3, list_array.value_length());
295    }
296
297    #[test]
298    fn test_fixed_size_list_array_builder_with_field_and_null() {
299        let builder = make_list_builder(true, false);
300        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
301        let list_array = builder.finish();
302
303        assert_eq!(DataType::Int32, list_array.value_type());
304        assert_eq!(4, list_array.len());
305        assert_eq!(1, list_array.null_count());
306        assert_eq!(6, list_array.value_offset(2));
307        assert_eq!(3, list_array.value_length());
308    }
309
310    #[test]
311    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
312    fn test_fixed_size_list_array_builder_with_field_null_panic() {
313        let builder = make_list_builder(true, true);
314        let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
315
316        builder.finish();
317    }
318
319    #[test]
320    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
321    fn test_fixed_size_list_array_builder_with_field_type_panic() {
322        let values_builder = Int32Builder::new();
323        let builder = FixedSizeListBuilder::new(values_builder, 3);
324        let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
325
326        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
327        builder.values().append_value(0);
328        builder.values().append_value(1);
329        builder.values().append_value(2);
330        builder.append(true);
331        builder.values().append_null();
332        builder.values().append_null();
333        builder.values().append_null();
334        builder.append(false);
335        builder.values().append_value(3);
336        builder.values().append_value(4);
337        builder.values().append_value(5);
338        builder.append(true);
339
340        builder.finish();
341    }
342
343    #[test]
344    fn test_fixed_size_list_array_builder_cloned_with_field() {
345        let builder = make_list_builder(true, true);
346        let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));
347
348        let list_array = builder.finish_cloned();
349
350        assert_eq!(DataType::Int32, list_array.value_type());
351        assert_eq!(4, list_array.len());
352        assert_eq!(1, list_array.null_count());
353        assert_eq!(6, list_array.value_offset(2));
354        assert_eq!(3, list_array.value_length());
355    }
356
357    #[test]
358    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
359    fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
360        let builder = make_list_builder(true, true);
361        let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
362
363        builder.finish_cloned();
364    }
365
366    #[test]
367    fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
368        let builder = make_list_builder(true, false);
369        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
370        let list_array = builder.finish();
371
372        assert_eq!(DataType::Int32, list_array.value_type());
373        assert_eq!(4, list_array.len());
374        assert_eq!(1, list_array.null_count());
375        assert_eq!(6, list_array.value_offset(2));
376        assert_eq!(3, list_array.value_length());
377    }
378
379    #[test]
380    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
381    fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
382        let builder = make_list_builder(false, false);
383        let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
384
385        builder.finish_cloned();
386    }
387
388    #[test]
389    fn test_fixed_size_list_array_builder_finish_cloned() {
390        let mut builder = make_list_builder(true, true);
391
392        let mut list_array = builder.finish_cloned();
393
394        assert_eq!(DataType::Int32, list_array.value_type());
395        assert_eq!(4, list_array.len());
396        assert_eq!(1, list_array.null_count());
397        assert_eq!(3, list_array.value_length());
398
399        builder.values().append_value(6);
400        builder.values().append_value(7);
401        builder.values().append_null();
402        builder.append(true);
403        builder.values().append_null();
404        builder.values().append_null();
405        builder.values().append_null();
406        builder.append(false);
407        list_array = builder.finish();
408
409        assert_eq!(DataType::Int32, list_array.value_type());
410        assert_eq!(6, list_array.len());
411        assert_eq!(2, list_array.null_count());
412        assert_eq!(6, list_array.value_offset(2));
413        assert_eq!(3, list_array.value_length());
414    }
415
416    #[test]
417    fn test_fixed_size_list_array_builder_with_field_empty() {
418        let values_builder = Int32Array::builder(0);
419        let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
420            "list_item",
421            DataType::Int32,
422            false,
423        ));
424        assert!(builder.is_empty());
425        let arr = builder.finish();
426        assert_eq!(0, arr.len());
427        assert_eq!(0, builder.len());
428    }
429
430    #[test]
431    fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
432        let values_builder = Int32Array::builder(0);
433        let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
434            "list_item",
435            DataType::Int32,
436            false,
437        ));
438        assert!(builder.is_empty());
439        let arr = builder.finish_cloned();
440        assert_eq!(0, arr.len());
441        assert_eq!(0, builder.len());
442    }
443
444    #[test]
445    fn test_fixed_size_list_array_builder_empty() {
446        let values_builder = Int32Array::builder(5);
447        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
448        assert!(builder.is_empty());
449        let arr = builder.finish();
450        assert_eq!(0, arr.len());
451        assert_eq!(0, builder.len());
452    }
453
454    #[test]
455    fn test_fixed_size_list_array_builder_finish() {
456        let values_builder = Int32Array::builder(5);
457        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
458
459        builder.values().append_slice(&[1, 2, 3]);
460        builder.append(true);
461        builder.values().append_slice(&[4, 5, 6]);
462        builder.append(true);
463
464        let mut arr = builder.finish();
465        assert_eq!(2, arr.len());
466        assert_eq!(0, builder.len());
467
468        builder.values().append_slice(&[7, 8, 9]);
469        builder.append(true);
470        arr = builder.finish();
471        assert_eq!(1, arr.len());
472        assert_eq!(0, builder.len());
473    }
474
475    #[test]
476    #[should_panic(
477        expected = "Length of the child array (10) must be the multiple of the value length (3) and the array length (3)."
478    )]
479    fn test_fixed_size_list_array_builder_fail() {
480        let values_builder = Int32Array::builder(5);
481        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
482
483        builder.values().append_slice(&[1, 2, 3]);
484        builder.append(true);
485        builder.values().append_slice(&[4, 5, 6]);
486        builder.append(true);
487        builder.values().append_slice(&[7, 8, 9, 10]);
488        builder.append(true);
489
490        builder.finish();
491    }
492}