arrow_array/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines push-based APIs for constructing arrays
19//!
20//! # Basic Usage
21//!
22//! Builders can be used to build simple, non-nested arrays
23//!
24//! ```
25//! # use arrow_array::builder::Int32Builder;
26//! # use arrow_array::PrimitiveArray;
27//! let mut a = Int32Builder::new();
28//! a.append_value(1);
29//! a.append_null();
30//! a.append_value(2);
31//! let a = a.finish();
32//!
33//! assert_eq!(a, PrimitiveArray::from(vec![Some(1), None, Some(2)]));
34//! ```
35//!
36//! ```
37//! # use arrow_array::builder::StringBuilder;
38//! # use arrow_array::{Array, StringArray};
39//! let mut a = StringBuilder::new();
40//! a.append_value("foo");
41//! a.append_value("bar");
42//! a.append_null();
43//! let a = a.finish();
44//!
45//! assert_eq!(a, StringArray::from_iter([Some("foo"), Some("bar"), None]));
46//! ```
47//!
48//! # Nested Usage
49//!
50//! Builders can also be used to build more complex nested arrays, such as lists
51//!
52//! ```
53//! # use arrow_array::builder::{Int32Builder, ListBuilder};
54//! # use arrow_array::ListArray;
55//! # use arrow_array::types::Int32Type;
56//! let mut a = ListBuilder::new(Int32Builder::new());
57//! // [1, 2]
58//! a.values().append_value(1);
59//! a.values().append_value(2);
60//! a.append(true);
61//! // null
62//! a.append(false);
63//! // []
64//! a.append(true);
65//! // [3, null]
66//! a.values().append_value(3);
67//! a.values().append_null();
68//! a.append(true);
69//!
70//! // [[1, 2], null, [], [3, null]]
71//! let a = a.finish();
72//!
73//! assert_eq!(a, ListArray::from_iter_primitive::<Int32Type, _, _>([
74//! Some(vec![Some(1), Some(2)]),
75//! None,
76//! Some(vec![]),
77//! Some(vec![Some(3), None])]
78//! ))
79//! ```
80//!
81//! # Using the [`Extend`] trait to append values from an iterable:
82//!
83//! ```
84//! # use arrow_array::{Array};
85//! # use arrow_array::builder::{ArrayBuilder, StringBuilder};
86//!
87//! let mut builder = StringBuilder::new();
88//! builder.extend(vec![Some("🍐"), Some("🍎"), None]);
89//! assert_eq!(builder.finish().len(), 3);
90//! ```
91//!
92//! # Using the [`Extend`] trait to write generic functions:
93//!
94//! ```
95//! # use arrow_array::{Array, ArrayRef, StringArray};
96//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
97//!
98//! // For generic methods that fill a list of values for an [`ArrayBuilder`], use the [`Extend`] trait.
99//! fn filter_and_fill<V, I: IntoIterator<Item = V>>(builder: &mut impl Extend<V>, values: I, filter: V)
100//! where V: PartialEq
101//! {
102//! builder.extend(values.into_iter().filter(|v| *v == filter));
103//! }
104//! let mut string_builder = StringBuilder::new();
105//! filter_and_fill(
106//! &mut string_builder,
107//! vec![Some("🍐"), Some("🍎"), None],
108//! Some("🍎"),
109//! );
110//! assert_eq!(string_builder.finish().len(), 1);
111//!
112//! let mut int_builder = Int32Builder::new();
113//! filter_and_fill(
114//! &mut int_builder,
115//! vec![Some(11), Some(42), None],
116//! Some(42),
117//! );
118//! assert_eq!(int_builder.finish().len(), 1);
119//!
120//! // For generic methods that fill lists-of-lists for an [`ArrayBuilder`], use the [`Extend`] trait.
121//! fn filter_and_fill_if_contains<T, V, I: IntoIterator<Item = Option<V>>>(
122//! list_builder: &mut impl Extend<Option<V>>,
123//! values: I,
124//! filter: Option<T>,
125//! ) where
126//! T: PartialEq,
127//! for<'a> &'a V: IntoIterator<Item = &'a Option<T>>,
128//! {
129//! list_builder.extend(values.into_iter().filter(|string: &Option<V>| {
130//! string
131//! .as_ref()
132//! .map(|str: &V| str.into_iter().any(|ch: &Option<T>| ch == &filter))
133//! .unwrap_or(false)
134//! }));
135//! }
136//! let builder = StringBuilder::new();
137//! let mut list_builder = ListBuilder::new(builder);
138//! let pear_pear = vec![Some("🍐"),Some("🍐")];
139//! let pear_app = vec![Some("🍐"),Some("🍎")];
140//! filter_and_fill_if_contains(
141//! &mut list_builder,
142//! vec![Some(pear_pear), Some(pear_app), None],
143//! Some("🍎"),
144//! );
145//! assert_eq!(list_builder.finish().len(), 1);
146//! ```
147//!
148//! # Custom Builders
149//!
150//! It is common to have a collection of statically defined Rust types that
151//! you want to convert to Arrow arrays.
152//!
153//! An example of doing so is below
154//!
155//! ```
156//! # use std::any::Any;
157//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
158//! # use arrow_array::{ArrayRef, RecordBatch, StructArray};
159//! # use arrow_schema::{DataType, Field};
160//! # use std::sync::Arc;
161//! /// A custom row representation
162//! struct MyRow {
163//! i32: i32,
164//! optional_i32: Option<i32>,
165//! string: Option<String>,
166//! i32_list: Option<Vec<Option<i32>>>,
167//! }
168//!
169//! /// Converts `Vec<Row>` into `StructArray`
170//! #[derive(Debug, Default)]
171//! struct MyRowBuilder {
172//! i32: Int32Builder,
173//! string: StringBuilder,
174//! i32_list: ListBuilder<Int32Builder>,
175//! }
176//!
177//! impl MyRowBuilder {
178//! fn append(&mut self, row: &MyRow) {
179//! self.i32.append_value(row.i32);
180//! self.string.append_option(row.string.as_ref());
181//! self.i32_list.append_option(row.i32_list.as_ref().map(|x| x.iter().copied()));
182//! }
183//!
184//! /// Note: returns StructArray to allow nesting within another array if desired
185//! fn finish(&mut self) -> StructArray {
186//! let i32 = Arc::new(self.i32.finish()) as ArrayRef;
187//! let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));
188//!
189//! let string = Arc::new(self.string.finish()) as ArrayRef;
190//! let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
191//!
192//! let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
193//! let value_field = Arc::new(Field::new_list_field(DataType::Int32, true));
194//! let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
195//!
196//! StructArray::from(vec![
197//! (i32_field, i32),
198//! (string_field, string),
199//! (i32_list_field, i32_list),
200//! ])
201//! }
202//! }
203//!
204//! /// For building arrays in generic code, use Extend instead of the append_* methods
205//! /// e.g. append_value, append_option, append_null
206//! impl<'a> Extend<&'a MyRow> for MyRowBuilder {
207//! fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
208//! iter.into_iter().for_each(|row| self.append(row));
209//! }
210//! }
211//!
212//! /// Converts a slice of [`MyRow`] to a [`RecordBatch`]
213//! fn rows_to_batch(rows: &[MyRow]) -> RecordBatch {
214//! let mut builder = MyRowBuilder::default();
215//! builder.extend(rows);
216//! RecordBatch::from(&builder.finish())
217//! }
218//! ```
219//!
220//! # Null / Validity Masks
221//!
222//! The [`NullBufferBuilder`] is optimized for creating the null mask for an array.
223//!
224//! ```
225//! # use arrow_array::builder::NullBufferBuilder;
226//! let mut builder = NullBufferBuilder::new(8);
227//! let mut builder = NullBufferBuilder::new(8);
228//! builder.append_n_non_nulls(7);
229//! builder.append_null();
230//! let buffer = builder.finish().unwrap();
231//! assert_eq!(buffer.len(), 8);
232//! assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
233//! ```
234
235pub use arrow_buffer::BooleanBufferBuilder;
236pub use arrow_buffer::NullBufferBuilder;
237
238mod boolean_builder;
239pub use boolean_builder::*;
240mod buffer_builder;
241pub use buffer_builder::*;
242mod fixed_size_binary_builder;
243pub use fixed_size_binary_builder::*;
244mod fixed_size_list_builder;
245pub use fixed_size_list_builder::*;
246mod generic_bytes_builder;
247pub use generic_bytes_builder::*;
248mod generic_list_builder;
249pub use generic_list_builder::*;
250mod map_builder;
251pub use map_builder::*;
252mod null_builder;
253pub use null_builder::*;
254mod primitive_builder;
255pub use primitive_builder::*;
256mod primitive_dictionary_builder;
257pub use primitive_dictionary_builder::*;
258mod primitive_run_builder;
259pub use primitive_run_builder::*;
260mod struct_builder;
261pub use struct_builder::*;
262mod generic_bytes_dictionary_builder;
263pub use generic_bytes_dictionary_builder::*;
264mod generic_byte_run_builder;
265pub use generic_byte_run_builder::*;
266mod generic_bytes_view_builder;
267pub use generic_bytes_view_builder::*;
268mod generic_list_view_builder;
269pub use generic_list_view_builder::*;
270mod union_builder;
271
272pub use union_builder::*;
273
274use crate::ArrayRef;
275use std::any::Any;
276
277/// Trait for dealing with different array builders at runtime
278///
279/// # Example
280///
281/// ```
282/// // Create
283/// # use arrow_array::{ArrayRef, StringArray};
284/// # use arrow_array::builder::{ArrayBuilder, Float64Builder, Int64Builder, StringBuilder};
285///
286/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
287/// Box::new(Float64Builder::new()),
288/// Box::new(Int64Builder::new()),
289/// Box::new(StringBuilder::new()),
290/// ];
291///
292/// // Fill
293/// data_builders[0]
294/// .as_any_mut()
295/// .downcast_mut::<Float64Builder>()
296/// .unwrap()
297/// .append_value(3.14);
298/// data_builders[1]
299/// .as_any_mut()
300/// .downcast_mut::<Int64Builder>()
301/// .unwrap()
302/// .append_value(-1);
303/// data_builders[2]
304/// .as_any_mut()
305/// .downcast_mut::<StringBuilder>()
306/// .unwrap()
307/// .append_value("🍎");
308///
309/// // Finish
310/// let array_refs: Vec<ArrayRef> = data_builders
311/// .iter_mut()
312/// .map(|builder| builder.finish())
313/// .collect();
314/// assert_eq!(array_refs[0].len(), 1);
315/// assert_eq!(array_refs[1].is_null(0), false);
316/// assert_eq!(
317/// array_refs[2]
318/// .as_any()
319/// .downcast_ref::<StringArray>()
320/// .unwrap()
321/// .value(0),
322/// "🍎"
323/// );
324/// ```
325pub trait ArrayBuilder: Any + Send + Sync {
326 /// Returns the number of array slots in the builder
327 fn len(&self) -> usize;
328
329 /// Returns whether number of array slots is zero
330 fn is_empty(&self) -> bool {
331 self.len() == 0
332 }
333
334 /// Builds the array
335 fn finish(&mut self) -> ArrayRef;
336
337 /// Builds the array without resetting the underlying builder.
338 fn finish_cloned(&self) -> ArrayRef;
339
340 /// Returns the builder as a non-mutable `Any` reference.
341 ///
342 /// This is most useful when one wants to call non-mutable APIs on a specific builder
343 /// type. In this case, one can first cast this into a `Any`, and then use
344 /// `downcast_ref` to get a reference on the specific builder.
345 fn as_any(&self) -> &dyn Any;
346
347 /// Returns the builder as a mutable `Any` reference.
348 ///
349 /// This is most useful when one wants to call mutable APIs on a specific builder
350 /// type. In this case, one can first cast this into a `Any`, and then use
351 /// `downcast_mut` to get a reference on the specific builder.
352 fn as_any_mut(&mut self) -> &mut dyn Any;
353
354 /// Returns the boxed builder as a box of `Any`.
355 fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
356}
357
358impl ArrayBuilder for Box<dyn ArrayBuilder> {
359 fn len(&self) -> usize {
360 (**self).len()
361 }
362
363 fn is_empty(&self) -> bool {
364 (**self).is_empty()
365 }
366
367 fn finish(&mut self) -> ArrayRef {
368 (**self).finish()
369 }
370
371 fn finish_cloned(&self) -> ArrayRef {
372 (**self).finish_cloned()
373 }
374
375 fn as_any(&self) -> &dyn Any {
376 (**self).as_any()
377 }
378
379 fn as_any_mut(&mut self) -> &mut dyn Any {
380 (**self).as_any_mut()
381 }
382
383 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
384 self
385 }
386}
387
388/// Builder for [`ListArray`](crate::array::ListArray)
389pub type ListBuilder<T> = GenericListBuilder<i32, T>;
390
391/// Builder for [`LargeListArray`](crate::array::LargeListArray)
392pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
393
394/// Builder for [`ListViewArray`](crate::array::ListViewArray)
395pub type ListViewBuilder<T> = GenericListViewBuilder<i32, T>;
396
397/// Builder for [`LargeListViewArray`](crate::array::LargeListViewArray)
398pub type LargeListViewBuilder<T> = GenericListViewBuilder<i64, T>;
399
400/// Builder for [`BinaryArray`](crate::array::BinaryArray)
401///
402/// See examples on [`GenericBinaryBuilder`]
403pub type BinaryBuilder = GenericBinaryBuilder<i32>;
404
405/// Builder for [`LargeBinaryArray`](crate::array::LargeBinaryArray)
406///
407/// See examples on [`GenericBinaryBuilder`]
408pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
409
410/// Builder for [`StringArray`](crate::array::StringArray)
411///
412/// See examples on [`GenericStringBuilder`]
413pub type StringBuilder = GenericStringBuilder<i32>;
414
415/// Builder for [`LargeStringArray`](crate::array::LargeStringArray)
416///
417/// See examples on [`GenericStringBuilder`]
418pub type LargeStringBuilder = GenericStringBuilder<i64>;