arrow_array/builder/
struct_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::StructArray;
19use crate::{
20    builder::*,
21    types::{Int16Type, Int32Type, Int64Type, Int8Type},
22};
23use arrow_buffer::NullBufferBuilder;
24use arrow_schema::{DataType, Fields, IntervalUnit, SchemaBuilder, TimeUnit};
25use std::sync::Arc;
26
27/// Builder for [`StructArray`]
28///
29/// Note that callers should make sure that methods of all the child field builders are
30/// properly called to maintain the consistency of the data structure.
31///
32///
33/// Handling arrays with complex layouts, such as `List<Struct<List<Struct>>>`, in Rust can be challenging due to its strong typing system.
34/// To construct a collection builder ([`ListBuilder`], [`LargeListBuilder`], or [`MapBuilder`]) using [`make_builder`], multiple calls are required. This complexity arises from the recursive approach utilized by [`StructBuilder::from_fields`].
35///
36/// Initially, [`StructBuilder::from_fields`] invokes [`make_builder`], which returns a `Box<dyn ArrayBuilder>`. To obtain the specific collection builder, one must first use [`StructBuilder::field_builder`] to get a `Collection<[Box<dyn ArrayBuilder>]>`. Subsequently, the `values()` result from this operation can be downcast to the desired builder type.
37///
38/// For example, when working with [`ListBuilder`], you would first call [`StructBuilder::field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>`] and then downcast the [`Box<dyn ArrayBuilder>`] to the specific [`StructBuilder`] you need.
39///
40/// For a practical example see the code below:
41///
42/// ```rust
43///    use arrow_array::builder::{ArrayBuilder, ListBuilder, StringBuilder, StructBuilder};
44///    use arrow_schema::{DataType, Field, Fields};
45///    use std::sync::Arc;
46///
47///    // This is an example column that has a List<Struct<List<Struct>>> layout
48///    let mut example_col = ListBuilder::new(StructBuilder::from_fields(
49///        vec![Field::new(
50///            "value_list",
51///            DataType::List(Arc::new(Field::new_list_field(
52///                DataType::Struct(Fields::from(vec![
53///                    Field::new("key", DataType::Utf8, true),
54///                    Field::new("value", DataType::Utf8, true),
55///                ])), //In this example we are trying to get to this builder and insert key/value pairs
56///                true,
57///            ))),
58///            true,
59///        )],
60///        0,
61///    ));
62///
63///   // We can obtain the StructBuilder without issues, because example_col was created with StructBuilder
64///   let col_struct_builder: &mut StructBuilder = example_col.values();
65///
66///   // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
67///   // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
68///   
69///   // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
70///   // let sb = col_struct_builder
71///   //     .field_builder::<ListBuilder<StructBuilder>>(0)
72///   //     .as_mut()
73///   //     .unwrap();
74///
75///   //To keep in line with Rust's strong typing, we fetch a ListBuilder<Box<dyn ArrayBuilder>> from the column StructBuilder first...
76///   let mut list_builder_option =
77///       col_struct_builder.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(0);
78///
79///   let list_builder = list_builder_option.as_mut().unwrap();
80///
81///   // ... and then downcast the key/value pair values to a StructBuilder
82///   let struct_builder = list_builder
83///       .values()
84///       .as_any_mut()
85///       .downcast_mut::<StructBuilder>()
86///       .unwrap();
87///
88///   // We can now append values to the StructBuilder
89///   let key_builder = struct_builder.field_builder::<StringBuilder>(0).unwrap();
90///   key_builder.append_value("my key");
91///
92///   let value_builder = struct_builder.field_builder::<StringBuilder>(1).unwrap();
93///   value_builder.append_value("my value");
94///
95///   struct_builder.append(true);
96///   list_builder.append(true);
97///   col_struct_builder.append(true);
98///   example_col.append(true);
99///
100///   let array = example_col.finish();
101///
102///   println!("My array: {:?}", array);
103/// ```
104///
105pub struct StructBuilder {
106    fields: Fields,
107    field_builders: Vec<Box<dyn ArrayBuilder>>,
108    null_buffer_builder: NullBufferBuilder,
109}
110
111impl std::fmt::Debug for StructBuilder {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        f.debug_struct("StructBuilder")
114            .field("fields", &self.fields)
115            .field("bitmap_builder", &self.null_buffer_builder)
116            .field("len", &self.len())
117            .finish()
118    }
119}
120
121impl ArrayBuilder for StructBuilder {
122    /// Returns the number of array slots in the builder.
123    ///
124    /// Note that this always return the first child field builder's length, and it is
125    /// the caller's responsibility to maintain the consistency that all the child field
126    /// builder should have the equal number of elements.
127    fn len(&self) -> usize {
128        self.null_buffer_builder.len()
129    }
130
131    /// Builds the array.
132    fn finish(&mut self) -> ArrayRef {
133        Arc::new(self.finish())
134    }
135
136    /// Builds the array without resetting the builder.
137    fn finish_cloned(&self) -> ArrayRef {
138        Arc::new(self.finish_cloned())
139    }
140
141    /// Returns the builder as a non-mutable `Any` reference.
142    ///
143    /// This is most useful when one wants to call non-mutable APIs on a specific builder
144    /// type. In this case, one can first cast this into a `Any`, and then use
145    /// `downcast_ref` to get a reference on the specific builder.
146    fn as_any(&self) -> &dyn Any {
147        self
148    }
149
150    /// Returns the builder as a mutable `Any` reference.
151    ///
152    /// This is most useful when one wants to call mutable APIs on a specific builder
153    /// type. In this case, one can first cast this into a `Any`, and then use
154    /// `downcast_mut` to get a reference on the specific builder.
155    fn as_any_mut(&mut self) -> &mut dyn Any {
156        self
157    }
158
159    /// Returns the boxed builder as a box of `Any`.
160    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
161        self
162    }
163}
164
165/// Returns a builder with capacity for `capacity` elements of datatype
166/// `DataType`.
167///
168/// This function is useful to construct arrays from an arbitrary vectors with
169/// known/expected schema.
170///
171/// See comments on [StructBuilder] for retrieving collection builders built by
172/// make_builder.
173pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
174    use crate::builder::*;
175    match datatype {
176        DataType::Null => Box::new(NullBuilder::new()),
177        DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
178        DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
179        DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),
180        DataType::Int32 => Box::new(Int32Builder::with_capacity(capacity)),
181        DataType::Int64 => Box::new(Int64Builder::with_capacity(capacity)),
182        DataType::UInt8 => Box::new(UInt8Builder::with_capacity(capacity)),
183        DataType::UInt16 => Box::new(UInt16Builder::with_capacity(capacity)),
184        DataType::UInt32 => Box::new(UInt32Builder::with_capacity(capacity)),
185        DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)),
186        DataType::Float16 => Box::new(Float16Builder::with_capacity(capacity)),
187        DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
188        DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
189        DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
190        DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
191        DataType::FixedSizeBinary(len) => {
192            Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
193        }
194        DataType::Decimal128(p, s) => Box::new(
195            Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
196        ),
197        DataType::Decimal256(p, s) => Box::new(
198            Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
199        ),
200        DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
201        DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
202        DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
203        DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
204        DataType::Time32(TimeUnit::Second) => {
205            Box::new(Time32SecondBuilder::with_capacity(capacity))
206        }
207        DataType::Time32(TimeUnit::Millisecond) => {
208            Box::new(Time32MillisecondBuilder::with_capacity(capacity))
209        }
210        DataType::Time64(TimeUnit::Microsecond) => {
211            Box::new(Time64MicrosecondBuilder::with_capacity(capacity))
212        }
213        DataType::Time64(TimeUnit::Nanosecond) => {
214            Box::new(Time64NanosecondBuilder::with_capacity(capacity))
215        }
216        DataType::Timestamp(TimeUnit::Second, tz) => Box::new(
217            TimestampSecondBuilder::with_capacity(capacity)
218                .with_data_type(DataType::Timestamp(TimeUnit::Second, tz.clone())),
219        ),
220        DataType::Timestamp(TimeUnit::Millisecond, tz) => Box::new(
221            TimestampMillisecondBuilder::with_capacity(capacity)
222                .with_data_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone())),
223        ),
224        DataType::Timestamp(TimeUnit::Microsecond, tz) => Box::new(
225            TimestampMicrosecondBuilder::with_capacity(capacity)
226                .with_data_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone())),
227        ),
228        DataType::Timestamp(TimeUnit::Nanosecond, tz) => Box::new(
229            TimestampNanosecondBuilder::with_capacity(capacity)
230                .with_data_type(DataType::Timestamp(TimeUnit::Nanosecond, tz.clone())),
231        ),
232        DataType::Interval(IntervalUnit::YearMonth) => {
233            Box::new(IntervalYearMonthBuilder::with_capacity(capacity))
234        }
235        DataType::Interval(IntervalUnit::DayTime) => {
236            Box::new(IntervalDayTimeBuilder::with_capacity(capacity))
237        }
238        DataType::Interval(IntervalUnit::MonthDayNano) => {
239            Box::new(IntervalMonthDayNanoBuilder::with_capacity(capacity))
240        }
241        DataType::Duration(TimeUnit::Second) => {
242            Box::new(DurationSecondBuilder::with_capacity(capacity))
243        }
244        DataType::Duration(TimeUnit::Millisecond) => {
245            Box::new(DurationMillisecondBuilder::with_capacity(capacity))
246        }
247        DataType::Duration(TimeUnit::Microsecond) => {
248            Box::new(DurationMicrosecondBuilder::with_capacity(capacity))
249        }
250        DataType::Duration(TimeUnit::Nanosecond) => {
251            Box::new(DurationNanosecondBuilder::with_capacity(capacity))
252        }
253        DataType::List(field) => {
254            let builder = make_builder(field.data_type(), capacity);
255            Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
256        }
257        DataType::LargeList(field) => {
258            let builder = make_builder(field.data_type(), capacity);
259            Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
260        }
261        DataType::FixedSizeList(field, size) => {
262            let size = *size;
263            let values_builder_capacity = {
264                let size: usize = size.try_into().unwrap();
265                capacity * size
266            };
267            let builder = make_builder(field.data_type(), values_builder_capacity);
268            Box::new(
269                FixedSizeListBuilder::with_capacity(builder, size, capacity)
270                    .with_field(field.clone()),
271            )
272        }
273        DataType::ListView(field) => {
274            let builder = make_builder(field.data_type(), capacity);
275            Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()))
276        }
277        DataType::LargeListView(field) => {
278            let builder = make_builder(field.data_type(), capacity);
279            Box::new(
280                LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()),
281            )
282        }
283        DataType::Map(field, _) => match field.data_type() {
284            DataType::Struct(fields) => {
285                let map_field_names = MapFieldNames {
286                    key: fields[0].name().clone(),
287                    value: fields[1].name().clone(),
288                    entry: field.name().clone(),
289                };
290                let key_builder = make_builder(fields[0].data_type(), capacity);
291                let value_builder = make_builder(fields[1].data_type(), capacity);
292                Box::new(
293                    MapBuilder::with_capacity(
294                        Some(map_field_names),
295                        key_builder,
296                        value_builder,
297                        capacity,
298                    )
299                    .with_keys_field(fields[0].clone())
300                    .with_values_field(fields[1].clone()),
301                )
302            }
303            t => panic!("The field of Map data type {t:?} should have a child Struct field"),
304        },
305        DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
306        t @ DataType::Dictionary(key_type, value_type) => {
307            macro_rules! dict_builder {
308                ($key_type:ty) => {
309                    match &**value_type {
310                        DataType::Utf8 => {
311                            let dict_builder: StringDictionaryBuilder<$key_type> =
312                                StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
313                            Box::new(dict_builder)
314                        }
315                        DataType::LargeUtf8 => {
316                            let dict_builder: LargeStringDictionaryBuilder<$key_type> =
317                                LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
318                            Box::new(dict_builder)
319                        }
320                        DataType::Binary => {
321                            let dict_builder: BinaryDictionaryBuilder<$key_type> =
322                                BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
323                            Box::new(dict_builder)
324                        }
325                        DataType::LargeBinary => {
326                            let dict_builder: LargeBinaryDictionaryBuilder<$key_type> =
327                                LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
328                            Box::new(dict_builder)
329                        }
330                        t => panic!("Dictionary value type {t:?} is not currently supported"),
331                    }
332                };
333            }
334            match &**key_type {
335                DataType::Int8 => dict_builder!(Int8Type),
336                DataType::Int16 => dict_builder!(Int16Type),
337                DataType::Int32 => dict_builder!(Int32Type),
338                DataType::Int64 => dict_builder!(Int64Type),
339                _ => {
340                    panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
341                }
342            }
343        }
344        t => panic!("Data type {t:?} is not currently supported"),
345    }
346}
347
348impl StructBuilder {
349    /// Creates a new `StructBuilder`
350    pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
351        Self {
352            field_builders,
353            fields: fields.into(),
354            null_buffer_builder: NullBufferBuilder::new(0),
355        }
356    }
357
358    /// Creates a new `StructBuilder` from [`Fields`] and `capacity`
359    pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
360        let fields = fields.into();
361        let mut builders = Vec::with_capacity(fields.len());
362        for field in &fields {
363            builders.push(make_builder(field.data_type(), capacity));
364        }
365        Self::new(fields, builders)
366    }
367
368    /// Returns a mutable reference to the child field builder at index `i`.
369    /// Result will be `None` if the input type `T` provided doesn't match the actual
370    /// field builder's type.
371    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
372        self.field_builders[i].as_any_mut().downcast_mut::<T>()
373    }
374
375    /// Returns the number of fields for the struct this builder is building.
376    pub fn num_fields(&self) -> usize {
377        self.field_builders.len()
378    }
379
380    /// Appends an element (either null or non-null) to the struct. The actual elements
381    /// should be appended for each child sub-array in a consistent way.
382    #[inline]
383    pub fn append(&mut self, is_valid: bool) {
384        self.null_buffer_builder.append(is_valid);
385    }
386
387    /// Appends a null element to the struct.
388    #[inline]
389    pub fn append_null(&mut self) {
390        self.append(false)
391    }
392
393    /// Builds the `StructArray` and reset this builder.
394    pub fn finish(&mut self) -> StructArray {
395        self.validate_content();
396        if self.fields.is_empty() {
397            return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
398        }
399
400        let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
401        let nulls = self.null_buffer_builder.finish();
402        StructArray::new(self.fields.clone(), arrays, nulls)
403    }
404
405    /// Builds the `StructArray` without resetting the builder.
406    pub fn finish_cloned(&self) -> StructArray {
407        self.validate_content();
408
409        if self.fields.is_empty() {
410            return StructArray::new_empty_fields(
411                self.len(),
412                self.null_buffer_builder.finish_cloned(),
413            );
414        }
415
416        let arrays = self
417            .field_builders
418            .iter()
419            .map(|f| f.finish_cloned())
420            .collect();
421
422        let nulls = self.null_buffer_builder.finish_cloned();
423
424        StructArray::new(self.fields.clone(), arrays, nulls)
425    }
426
427    /// Constructs and validates contents in the builder to ensure that
428    /// - fields and field_builders are of equal length
429    /// - the number of items in individual field_builders are equal to self.len()
430    fn validate_content(&self) {
431        if self.fields.len() != self.field_builders.len() {
432            panic!("Number of fields is not equal to the number of field_builders.");
433        }
434        self.field_builders.iter().enumerate().for_each(|(idx, x)| {
435            if x.len() != self.len() {
436                let builder = SchemaBuilder::from(&self.fields);
437                let schema = builder.finish();
438
439                panic!("{}", format!(
440                    "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
441                    schema,
442                    idx,
443                    self.fields[idx].data_type(),
444                    self.len(),
445                    x.len()
446                ));
447            }
448        });
449    }
450
451    /// Returns the current null buffer as a slice
452    pub fn validity_slice(&self) -> Option<&[u8]> {
453        self.null_buffer_builder.as_slice()
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use std::any::type_name;
460
461    use super::*;
462    use arrow_buffer::Buffer;
463    use arrow_data::ArrayData;
464    use arrow_schema::Field;
465
466    use crate::{array::Array, types::ArrowDictionaryKeyType};
467
468    #[test]
469    fn test_struct_array_builder() {
470        let string_builder = StringBuilder::new();
471        let int_builder = Int32Builder::new();
472
473        let fields = vec![
474            Field::new("f1", DataType::Utf8, true),
475            Field::new("f2", DataType::Int32, true),
476        ];
477        let field_builders = vec![
478            Box::new(string_builder) as Box<dyn ArrayBuilder>,
479            Box::new(int_builder) as Box<dyn ArrayBuilder>,
480        ];
481
482        let mut builder = StructBuilder::new(fields, field_builders);
483        assert_eq!(2, builder.num_fields());
484
485        let string_builder = builder
486            .field_builder::<StringBuilder>(0)
487            .expect("builder at field 0 should be string builder");
488        string_builder.append_value("joe");
489        string_builder.append_null();
490        string_builder.append_null();
491        string_builder.append_value("mark");
492
493        let int_builder = builder
494            .field_builder::<Int32Builder>(1)
495            .expect("builder at field 1 should be int builder");
496        int_builder.append_value(1);
497        int_builder.append_value(2);
498        int_builder.append_null();
499        int_builder.append_value(4);
500
501        builder.append(true);
502        builder.append(true);
503        builder.append_null();
504        builder.append(true);
505
506        let struct_data = builder.finish().into_data();
507
508        assert_eq!(4, struct_data.len());
509        assert_eq!(1, struct_data.null_count());
510        assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity());
511
512        let expected_string_data = ArrayData::builder(DataType::Utf8)
513            .len(4)
514            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
515            .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7]))
516            .add_buffer(Buffer::from_slice_ref(b"joemark"))
517            .build()
518            .unwrap();
519
520        let expected_int_data = ArrayData::builder(DataType::Int32)
521            .len(4)
522            .null_bit_buffer(Some(Buffer::from_slice_ref([11_u8])))
523            .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4]))
524            .build()
525            .unwrap();
526
527        assert_eq!(expected_string_data, struct_data.child_data()[0]);
528        assert_eq!(expected_int_data, struct_data.child_data()[1]);
529    }
530
531    #[test]
532    fn test_struct_array_builder_finish() {
533        let int_builder = Int32Builder::new();
534        let bool_builder = BooleanBuilder::new();
535
536        let fields = vec![
537            Field::new("f1", DataType::Int32, false),
538            Field::new("f2", DataType::Boolean, false),
539        ];
540        let field_builders = vec![
541            Box::new(int_builder) as Box<dyn ArrayBuilder>,
542            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
543        ];
544
545        let mut builder = StructBuilder::new(fields, field_builders);
546        builder
547            .field_builder::<Int32Builder>(0)
548            .unwrap()
549            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
550        builder
551            .field_builder::<BooleanBuilder>(1)
552            .unwrap()
553            .append_slice(&[
554                false, true, false, true, false, true, false, true, false, true,
555            ]);
556
557        // Append slot values - all are valid.
558        for _ in 0..10 {
559            builder.append(true);
560        }
561
562        assert_eq!(10, builder.len());
563
564        let arr = builder.finish();
565
566        assert_eq!(10, arr.len());
567        assert_eq!(0, builder.len());
568
569        builder
570            .field_builder::<Int32Builder>(0)
571            .unwrap()
572            .append_slice(&[1, 3, 5, 7, 9]);
573        builder
574            .field_builder::<BooleanBuilder>(1)
575            .unwrap()
576            .append_slice(&[false, true, false, true, false]);
577
578        // Append slot values - all are valid.
579        for _ in 0..5 {
580            builder.append(true);
581        }
582
583        assert_eq!(5, builder.len());
584
585        let arr = builder.finish();
586
587        assert_eq!(5, arr.len());
588        assert_eq!(0, builder.len());
589    }
590
591    #[test]
592    fn test_build_fixed_size_list() {
593        const LIST_LENGTH: i32 = 4;
594        let fixed_size_list_dtype =
595            DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
596        let mut builder = make_builder(&fixed_size_list_dtype, 10);
597        let builder = builder
598            .as_any_mut()
599            .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
600        match builder {
601            Some(builder) => {
602                assert_eq!(builder.value_length(), LIST_LENGTH);
603                assert!(builder
604                    .values()
605                    .as_any_mut()
606                    .downcast_mut::<Int32Builder>()
607                    .is_some());
608            }
609            None => panic!("expected FixedSizeListBuilder, got a different builder type"),
610        }
611    }
612
613    #[test]
614    fn test_struct_array_builder_finish_cloned() {
615        let int_builder = Int32Builder::new();
616        let bool_builder = BooleanBuilder::new();
617
618        let fields = vec![
619            Field::new("f1", DataType::Int32, false),
620            Field::new("f2", DataType::Boolean, false),
621        ];
622        let field_builders = vec![
623            Box::new(int_builder) as Box<dyn ArrayBuilder>,
624            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
625        ];
626
627        let mut builder = StructBuilder::new(fields, field_builders);
628        builder
629            .field_builder::<Int32Builder>(0)
630            .unwrap()
631            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
632        builder
633            .field_builder::<BooleanBuilder>(1)
634            .unwrap()
635            .append_slice(&[
636                false, true, false, true, false, true, false, true, false, true,
637            ]);
638
639        // Append slot values - all are valid.
640        for _ in 0..10 {
641            builder.append(true);
642        }
643
644        assert_eq!(10, builder.len());
645
646        let mut arr = builder.finish_cloned();
647
648        assert_eq!(10, arr.len());
649        assert_eq!(10, builder.len());
650
651        builder
652            .field_builder::<Int32Builder>(0)
653            .unwrap()
654            .append_slice(&[1, 3, 5, 7, 9]);
655        builder
656            .field_builder::<BooleanBuilder>(1)
657            .unwrap()
658            .append_slice(&[false, true, false, true, false]);
659
660        // Append slot values - all are valid.
661        for _ in 0..5 {
662            builder.append(true);
663        }
664
665        assert_eq!(15, builder.len());
666
667        arr = builder.finish();
668
669        assert_eq!(15, arr.len());
670        assert_eq!(0, builder.len());
671    }
672
673    #[test]
674    fn test_struct_array_builder_from_schema() {
675        let mut fields = vec![
676            Field::new("f1", DataType::Float32, false),
677            Field::new("f2", DataType::Utf8, false),
678        ];
679        let sub_fields = vec![
680            Field::new("g1", DataType::Int32, false),
681            Field::new("g2", DataType::Boolean, false),
682        ];
683        let struct_type = DataType::Struct(sub_fields.into());
684        fields.push(Field::new("f3", struct_type, false));
685
686        let mut builder = StructBuilder::from_fields(fields, 5);
687        assert_eq!(3, builder.num_fields());
688        assert!(builder.field_builder::<Float32Builder>(0).is_some());
689        assert!(builder.field_builder::<StringBuilder>(1).is_some());
690        assert!(builder.field_builder::<StructBuilder>(2).is_some());
691    }
692
693    #[test]
694    fn test_datatype_properties() {
695        let fields = Fields::from(vec![
696            Field::new("f1", DataType::Decimal128(1, 2), false),
697            Field::new(
698                "f2",
699                DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
700                false,
701            ),
702        ]);
703        let mut builder = StructBuilder::from_fields(fields.clone(), 1);
704        builder
705            .field_builder::<Decimal128Builder>(0)
706            .unwrap()
707            .append_value(1);
708        builder
709            .field_builder::<TimestampMillisecondBuilder>(1)
710            .unwrap()
711            .append_value(1);
712        builder.append(true);
713        let array = builder.finish();
714
715        assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
716        assert_eq!(array.column(0).data_type(), fields[0].data_type());
717        assert_eq!(array.column(1).data_type(), fields[1].data_type());
718    }
719
720    #[test]
721    fn test_struct_array_builder_from_dictionary_type_int8_key() {
722        test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
723    }
724
725    #[test]
726    fn test_struct_array_builder_from_dictionary_type_int16_key() {
727        test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
728    }
729
730    #[test]
731    fn test_struct_array_builder_from_dictionary_type_int32_key() {
732        test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
733    }
734
735    #[test]
736    fn test_struct_array_builder_from_dictionary_type_int64_key() {
737        test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
738    }
739
740    fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
741        key_type: DataType,
742    ) {
743        let dict_field = Field::new(
744            "f1",
745            DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
746            false,
747        );
748        let fields = vec![dict_field.clone()];
749        let expected_dtype = DataType::Struct(fields.into());
750        let cloned_dict_field = dict_field.clone();
751        let expected_child_dtype = dict_field.data_type();
752        let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
753        let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
754        else {
755            panic!(
756                "Builder should be StringDictionaryBuilder<{}>",
757                type_name::<K>()
758            )
759        };
760        dict_builder.append_value("dict string");
761        struct_builder.append(true);
762        let array = struct_builder.finish();
763
764        assert_eq!(array.data_type(), &expected_dtype);
765        assert_eq!(array.column(0).data_type(), expected_child_dtype);
766        assert_eq!(array.column(0).len(), 1);
767    }
768
769    #[test]
770    #[should_panic(
771        expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
772    )]
773    fn test_struct_array_builder_from_schema_unsupported_type() {
774        let fields = vec![
775            Field::new("f1", DataType::UInt64, false),
776            Field::new(
777                "f2",
778                DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
779                false,
780            ),
781        ];
782
783        let _ = StructBuilder::from_fields(fields, 5);
784    }
785
786    #[test]
787    #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
788    fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
789        let fields = vec![Field::new(
790            "f1",
791            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
792            false,
793        )];
794
795        let _ = StructBuilder::from_fields(fields, 5);
796    }
797
798    #[test]
799    fn test_struct_array_builder_field_builder_type_mismatch() {
800        let int_builder = Int32Builder::with_capacity(10);
801
802        let fields = vec![Field::new("f1", DataType::Int32, false)];
803        let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
804
805        let mut builder = StructBuilder::new(fields, field_builders);
806        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
807    }
808
809    #[test]
810    #[should_panic(
811        expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
812    )]
813    fn test_struct_array_builder_unequal_field_builders_lengths() {
814        let mut int_builder = Int32Builder::with_capacity(10);
815        let mut bool_builder = BooleanBuilder::new();
816
817        int_builder.append_value(1);
818        int_builder.append_value(2);
819        bool_builder.append_value(true);
820
821        let fields = vec![
822            Field::new("f1", DataType::Int32, false),
823            Field::new("f2", DataType::Boolean, false),
824        ];
825        let field_builders = vec![
826            Box::new(int_builder) as Box<dyn ArrayBuilder>,
827            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
828        ];
829
830        let mut builder = StructBuilder::new(fields, field_builders);
831        builder.append(true);
832        builder.append(true);
833        builder.finish();
834    }
835
836    #[test]
837    #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
838    fn test_struct_array_builder_unequal_field_field_builders() {
839        let int_builder = Int32Builder::with_capacity(10);
840
841        let fields = vec![
842            Field::new("f1", DataType::Int32, false),
843            Field::new("f2", DataType::Boolean, false),
844        ];
845        let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
846
847        let mut builder = StructBuilder::new(fields, field_builders);
848        builder.finish();
849    }
850
851    #[test]
852    #[should_panic(
853        expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)"
854    )]
855    fn test_struct_array_mismatch_builder() {
856        let fields = vec![Field::new(
857            "timestamp",
858            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
859            false,
860        )];
861
862        let field_builders: Vec<Box<dyn ArrayBuilder>> =
863            vec![Box::new(TimestampNanosecondBuilder::new())];
864
865        let mut sa = StructBuilder::new(fields, field_builders);
866        sa.finish();
867    }
868
869    #[test]
870    fn test_empty() {
871        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
872        builder.append(true);
873        builder.append(false);
874
875        let a1 = builder.finish_cloned();
876        let a2 = builder.finish();
877        assert_eq!(a1, a2);
878        assert_eq!(a1.len(), 2);
879        assert_eq!(a1.null_count(), 1);
880        assert!(a1.is_valid(0));
881        assert!(a1.is_null(1));
882    }
883}