arrow_array/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines push-based APIs for constructing arrays
19//!
20//! # Basic Usage
21//!
22//! Builders can be used to build simple, non-nested arrays
23//!
24//! ```
25//! # use arrow_array::builder::Int32Builder;
26//! # use arrow_array::PrimitiveArray;
27//! let mut a = Int32Builder::new();
28//! a.append_value(1);
29//! a.append_null();
30//! a.append_value(2);
31//! let a = a.finish();
32//!
33//! assert_eq!(a, PrimitiveArray::from(vec![Some(1), None, Some(2)]));
34//! ```
35//!
36//! ```
37//! # use arrow_array::builder::StringBuilder;
38//! # use arrow_array::{Array, StringArray};
39//! let mut a = StringBuilder::new();
40//! a.append_value("foo");
41//! a.append_value("bar");
42//! a.append_null();
43//! let a = a.finish();
44//!
45//! assert_eq!(a, StringArray::from_iter([Some("foo"), Some("bar"), None]));
46//! ```
47//!
48//! # Nested Usage
49//!
50//! Builders can also be used to build more complex nested arrays, such as lists
51//!
52//! ```
53//! # use arrow_array::builder::{Int32Builder, ListBuilder};
54//! # use arrow_array::ListArray;
55//! # use arrow_array::types::Int32Type;
56//! let mut a = ListBuilder::new(Int32Builder::new());
57//! // [1, 2]
58//! a.values().append_value(1);
59//! a.values().append_value(2);
60//! a.append(true);
61//! // null
62//! a.append(false);
63//! // []
64//! a.append(true);
65//! // [3, null]
66//! a.values().append_value(3);
67//! a.values().append_null();
68//! a.append(true);
69//!
70//! // [[1, 2], null, [], [3, null]]
71//! let a = a.finish();
72//!
73//! assert_eq!(a, ListArray::from_iter_primitive::<Int32Type, _, _>([
74//!     Some(vec![Some(1), Some(2)]),
75//!     None,
76//!     Some(vec![]),
77//!     Some(vec![Some(3), None])]
78//! ))
79//! ```
80//!
81//! # Using the [`Extend`] trait to append values from an iterable:
82//!
83//! ```
84//! # use arrow_array::{Array};
85//! # use arrow_array::builder::{ArrayBuilder, StringBuilder};
86//!
87//! let mut builder = StringBuilder::new();
88//! builder.extend(vec![Some("🍐"), Some("🍎"), None]);
89//! assert_eq!(builder.finish().len(), 3);
90//! ```
91//!
92//! # Using the [`Extend`] trait to write generic functions:
93//!
94//! ```
95//! # use arrow_array::{Array, ArrayRef, StringArray};
96//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
97//!
98//! // For generic methods that fill a list of values for an [`ArrayBuilder`], use the [`Extend`] trait.
99//! fn filter_and_fill<V, I: IntoIterator<Item = V>>(builder: &mut impl Extend<V>, values: I, filter: V)
100//! where V: PartialEq
101//! {
102//!     builder.extend(values.into_iter().filter(|v| *v == filter));
103//! }
104//! let mut string_builder = StringBuilder::new();
105//! filter_and_fill(
106//!     &mut string_builder,
107//!     vec![Some("🍐"), Some("🍎"), None],
108//!     Some("🍎"),
109//! );
110//! assert_eq!(string_builder.finish().len(), 1);
111//!
112//! let mut int_builder = Int32Builder::new();
113//! filter_and_fill(
114//!     &mut int_builder,
115//!     vec![Some(11), Some(42), None],
116//!     Some(42),
117//! );
118//! assert_eq!(int_builder.finish().len(), 1);
119//!
120//! // For generic methods that fill lists-of-lists for an [`ArrayBuilder`], use the [`Extend`] trait.
121//! fn filter_and_fill_if_contains<T, V, I: IntoIterator<Item = Option<V>>>(
122//!     list_builder: &mut impl Extend<Option<V>>,
123//!     values: I,
124//!     filter: Option<T>,
125//! ) where
126//!     T: PartialEq,
127//!     for<'a> &'a V: IntoIterator<Item = &'a Option<T>>,
128//! {
129//!     list_builder.extend(values.into_iter().filter(|string: &Option<V>| {
130//!         string
131//!             .as_ref()
132//!             .map(|str: &V| str.into_iter().any(|ch: &Option<T>| ch == &filter))
133//!             .unwrap_or(false)
134//!     }));
135//!  }
136//! let builder = StringBuilder::new();
137//! let mut list_builder = ListBuilder::new(builder);
138//! let pear_pear = vec![Some("🍐"),Some("🍐")];
139//! let pear_app = vec![Some("🍐"),Some("🍎")];
140//! filter_and_fill_if_contains(
141//!     &mut list_builder,
142//!     vec![Some(pear_pear), Some(pear_app), None],
143//!     Some("🍎"),
144//! );
145//! assert_eq!(list_builder.finish().len(), 1);
146//! ```
147//!
148//! # Custom Builders
149//!
150//! It is common to have a collection of statically defined Rust types that
151//! you want to convert to Arrow arrays.
152//!
153//! An example of doing so is below
154//!
155//! ```
156//! # use std::any::Any;
157//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
158//! # use arrow_array::{ArrayRef, RecordBatch, StructArray};
159//! # use arrow_schema::{DataType, Field};
160//! # use std::sync::Arc;
161//! /// A custom row representation
162//! struct MyRow {
163//!     i32: i32,
164//!     optional_i32: Option<i32>,
165//!     string: Option<String>,
166//!     i32_list: Option<Vec<Option<i32>>>,
167//! }
168//!
169//! /// Converts `Vec<Row>` into `StructArray`
170//! #[derive(Debug, Default)]
171//! struct MyRowBuilder {
172//!     i32: Int32Builder,
173//!     string: StringBuilder,
174//!     i32_list: ListBuilder<Int32Builder>,
175//! }
176//!
177//! impl MyRowBuilder {
178//!     fn append(&mut self, row: &MyRow) {
179//!         self.i32.append_value(row.i32);
180//!         self.string.append_option(row.string.as_ref());
181//!         self.i32_list.append_option(row.i32_list.as_ref().map(|x| x.iter().copied()));
182//!     }
183//!
184//!     /// Note: returns StructArray to allow nesting within another array if desired
185//!     fn finish(&mut self) -> StructArray {
186//!         let i32 = Arc::new(self.i32.finish()) as ArrayRef;
187//!         let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));
188//!
189//!         let string = Arc::new(self.string.finish()) as ArrayRef;
190//!         let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
191//!
192//!         let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
193//!         let value_field = Arc::new(Field::new_list_field(DataType::Int32, true));
194//!         let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
195//!
196//!         StructArray::from(vec![
197//!             (i32_field, i32),
198//!             (string_field, string),
199//!             (i32_list_field, i32_list),
200//!         ])
201//!     }
202//! }
203//!
204//! /// For building arrays in generic code, use Extend instead of the append_* methods
205//! /// e.g. append_value, append_option, append_null
206//! impl<'a> Extend<&'a MyRow> for MyRowBuilder {
207//!     fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
208//!         iter.into_iter().for_each(|row| self.append(row));
209//!     }
210//! }
211//!
212//! /// Converts a slice of [`MyRow`] to a [`RecordBatch`]
213//! fn rows_to_batch(rows: &[MyRow]) -> RecordBatch {
214//!     let mut builder = MyRowBuilder::default();
215//!     builder.extend(rows);
216//!     RecordBatch::from(&builder.finish())
217//! }
218//! ```
219//!
220//! # Null / Validity Masks
221//!
222//! The [`NullBufferBuilder`] is optimized for creating the null mask for an array.
223//!
224//! ```
225//! # use arrow_array::builder::NullBufferBuilder;
226//! let mut builder = NullBufferBuilder::new(8);
227//! let mut builder = NullBufferBuilder::new(8);
228//! builder.append_n_non_nulls(7);
229//! builder.append_null();
230//! let buffer = builder.finish().unwrap();
231//! assert_eq!(buffer.len(), 8);
232//! assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
233//! ```
234
235pub use arrow_buffer::BooleanBufferBuilder;
236pub use arrow_buffer::NullBufferBuilder;
237
238mod boolean_builder;
239pub use boolean_builder::*;
240mod buffer_builder;
241pub use buffer_builder::*;
242mod fixed_size_binary_builder;
243pub use fixed_size_binary_builder::*;
244mod fixed_size_list_builder;
245pub use fixed_size_list_builder::*;
246mod generic_bytes_builder;
247pub use generic_bytes_builder::*;
248mod generic_list_builder;
249pub use generic_list_builder::*;
250mod map_builder;
251pub use map_builder::*;
252mod null_builder;
253pub use null_builder::*;
254mod primitive_builder;
255pub use primitive_builder::*;
256mod primitive_dictionary_builder;
257pub use primitive_dictionary_builder::*;
258mod primitive_run_builder;
259pub use primitive_run_builder::*;
260mod struct_builder;
261pub use struct_builder::*;
262mod generic_bytes_dictionary_builder;
263pub use generic_bytes_dictionary_builder::*;
264mod generic_byte_run_builder;
265pub use generic_byte_run_builder::*;
266mod generic_bytes_view_builder;
267pub use generic_bytes_view_builder::*;
268mod generic_list_view_builder;
269pub use generic_list_view_builder::*;
270mod union_builder;
271
272pub use union_builder::*;
273
274use crate::ArrayRef;
275use std::any::Any;
276
277/// Trait for dealing with different array builders at runtime
278///
279/// # Example
280///
281/// ```
282/// // Create
283/// # use arrow_array::{ArrayRef, StringArray};
284/// # use arrow_array::builder::{ArrayBuilder, Float64Builder, Int64Builder, StringBuilder};
285///
286/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
287///     Box::new(Float64Builder::new()),
288///     Box::new(Int64Builder::new()),
289///     Box::new(StringBuilder::new()),
290/// ];
291///
292/// // Fill
293/// data_builders[0]
294///     .as_any_mut()
295///     .downcast_mut::<Float64Builder>()
296///     .unwrap()
297///     .append_value(3.14);
298/// data_builders[1]
299///     .as_any_mut()
300///     .downcast_mut::<Int64Builder>()
301///     .unwrap()
302///     .append_value(-1);
303/// data_builders[2]
304///     .as_any_mut()
305///     .downcast_mut::<StringBuilder>()
306///     .unwrap()
307///     .append_value("🍎");
308///
309/// // Finish
310/// let array_refs: Vec<ArrayRef> = data_builders
311///     .iter_mut()
312///     .map(|builder| builder.finish())
313///     .collect();
314/// assert_eq!(array_refs[0].len(), 1);
315/// assert_eq!(array_refs[1].is_null(0), false);
316/// assert_eq!(
317///     array_refs[2]
318///         .as_any()
319///         .downcast_ref::<StringArray>()
320///         .unwrap()
321///         .value(0),
322///     "🍎"
323/// );
324/// ```
325pub trait ArrayBuilder: Any + Send + Sync {
326    /// Returns the number of array slots in the builder
327    fn len(&self) -> usize;
328
329    /// Returns whether number of array slots is zero
330    fn is_empty(&self) -> bool {
331        self.len() == 0
332    }
333
334    /// Builds the array
335    fn finish(&mut self) -> ArrayRef;
336
337    /// Builds the array without resetting the underlying builder.
338    fn finish_cloned(&self) -> ArrayRef;
339
340    /// Returns the builder as a non-mutable `Any` reference.
341    ///
342    /// This is most useful when one wants to call non-mutable APIs on a specific builder
343    /// type. In this case, one can first cast this into a `Any`, and then use
344    /// `downcast_ref` to get a reference on the specific builder.
345    fn as_any(&self) -> &dyn Any;
346
347    /// Returns the builder as a mutable `Any` reference.
348    ///
349    /// This is most useful when one wants to call mutable APIs on a specific builder
350    /// type. In this case, one can first cast this into a `Any`, and then use
351    /// `downcast_mut` to get a reference on the specific builder.
352    fn as_any_mut(&mut self) -> &mut dyn Any;
353
354    /// Returns the boxed builder as a box of `Any`.
355    fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
356}
357
358impl ArrayBuilder for Box<dyn ArrayBuilder> {
359    fn len(&self) -> usize {
360        (**self).len()
361    }
362
363    fn is_empty(&self) -> bool {
364        (**self).is_empty()
365    }
366
367    fn finish(&mut self) -> ArrayRef {
368        (**self).finish()
369    }
370
371    fn finish_cloned(&self) -> ArrayRef {
372        (**self).finish_cloned()
373    }
374
375    fn as_any(&self) -> &dyn Any {
376        (**self).as_any()
377    }
378
379    fn as_any_mut(&mut self) -> &mut dyn Any {
380        (**self).as_any_mut()
381    }
382
383    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
384        self
385    }
386}
387
388/// Builder for [`ListArray`](crate::array::ListArray)
389pub type ListBuilder<T> = GenericListBuilder<i32, T>;
390
391/// Builder for [`LargeListArray`](crate::array::LargeListArray)
392pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
393
394/// Builder for [`ListViewArray`](crate::array::ListViewArray)
395pub type ListViewBuilder<T> = GenericListViewBuilder<i32, T>;
396
397/// Builder for [`LargeListViewArray`](crate::array::LargeListViewArray)
398pub type LargeListViewBuilder<T> = GenericListViewBuilder<i64, T>;
399
400/// Builder for [`BinaryArray`](crate::array::BinaryArray)
401///
402/// See examples on [`GenericBinaryBuilder`]
403pub type BinaryBuilder = GenericBinaryBuilder<i32>;
404
405/// Builder for [`LargeBinaryArray`](crate::array::LargeBinaryArray)
406///
407/// See examples on [`GenericBinaryBuilder`]
408pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
409
410/// Builder for [`StringArray`](crate::array::StringArray)
411///
412/// See examples on [`GenericStringBuilder`]
413pub type StringBuilder = GenericStringBuilder<i32>;
414
415/// Builder for [`LargeStringArray`](crate::array::LargeStringArray)
416///
417/// See examples on [`GenericStringBuilder`]
418pub type LargeStringBuilder = GenericStringBuilder<i64>;