Module builder

Source
Expand description

Defines push-based APIs for constructing arrays

§Basic Usage

Builders can be used to build simple, non-nested arrays

let mut a = Int32Builder::new();
a.append_value(1);
a.append_null();
a.append_value(2);
let a = a.finish();

assert_eq!(a, PrimitiveArray::from(vec![Some(1), None, Some(2)]));
let mut a = StringBuilder::new();
a.append_value("foo");
a.append_value("bar");
a.append_null();
let a = a.finish();

assert_eq!(a, StringArray::from_iter([Some("foo"), Some("bar"), None]));

§Nested Usage

Builders can also be used to build more complex nested arrays, such as lists

let mut a = ListBuilder::new(Int32Builder::new());
// [1, 2]
a.values().append_value(1);
a.values().append_value(2);
a.append(true);
// null
a.append(false);
// []
a.append(true);
// [3, null]
a.values().append_value(3);
a.values().append_null();
a.append(true);

// [[1, 2], null, [], [3, null]]
let a = a.finish();

assert_eq!(a, ListArray::from_iter_primitive::<Int32Type, _, _>([
    Some(vec![Some(1), Some(2)]),
    None,
    Some(vec![]),
    Some(vec![Some(3), None])]
))

§Using the Extend trait to append values from an iterable:


let mut builder = StringBuilder::new();
builder.extend(vec![Some("🍐"), Some("🍎"), None]);
assert_eq!(builder.finish().len(), 3);

§Using the Extend trait to write generic functions:


// For generic methods that fill a list of values for an [`ArrayBuilder`], use the [`Extend`] trait.
fn filter_and_fill<V, I: IntoIterator<Item = V>>(builder: &mut impl Extend<V>, values: I, filter: V)
where V: PartialEq
{
    builder.extend(values.into_iter().filter(|v| *v == filter));
}
let mut string_builder = StringBuilder::new();
filter_and_fill(
    &mut string_builder,
    vec![Some("🍐"), Some("🍎"), None],
    Some("🍎"),
);
assert_eq!(string_builder.finish().len(), 1);

let mut int_builder = Int32Builder::new();
filter_and_fill(
    &mut int_builder,
    vec![Some(11), Some(42), None],
    Some(42),
);
assert_eq!(int_builder.finish().len(), 1);

// For generic methods that fill lists-of-lists for an [`ArrayBuilder`], use the [`Extend`] trait.
fn filter_and_fill_if_contains<T, V, I: IntoIterator<Item = Option<V>>>(
    list_builder: &mut impl Extend<Option<V>>,
    values: I,
    filter: Option<T>,
) where
    T: PartialEq,
    for<'a> &'a V: IntoIterator<Item = &'a Option<T>>,
{
    list_builder.extend(values.into_iter().filter(|string: &Option<V>| {
        string
            .as_ref()
            .map(|str: &V| str.into_iter().any(|ch: &Option<T>| ch == &filter))
            .unwrap_or(false)
    }));
 }
let builder = StringBuilder::new();
let mut list_builder = ListBuilder::new(builder);
let pear_pear = vec![Some("🍐"),Some("🍐")];
let pear_app = vec![Some("🍐"),Some("🍎")];
filter_and_fill_if_contains(
    &mut list_builder,
    vec![Some(pear_pear), Some(pear_app), None],
    Some("🍎"),
);
assert_eq!(list_builder.finish().len(), 1);

§Custom Builders

It is common to have a collection of statically defined Rust types that you want to convert to Arrow arrays.

An example of doing so is below

/// A custom row representation
struct MyRow {
    i32: i32,
    optional_i32: Option<i32>,
    string: Option<String>,
    i32_list: Option<Vec<Option<i32>>>,
}

/// Converts `Vec<Row>` into `StructArray`
#[derive(Debug, Default)]
struct MyRowBuilder {
    i32: Int32Builder,
    string: StringBuilder,
    i32_list: ListBuilder<Int32Builder>,
}

impl MyRowBuilder {
    fn append(&mut self, row: &MyRow) {
        self.i32.append_value(row.i32);
        self.string.append_option(row.string.as_ref());
        self.i32_list.append_option(row.i32_list.as_ref().map(|x| x.iter().copied()));
    }

    /// Note: returns StructArray to allow nesting within another array if desired
    fn finish(&mut self) -> StructArray {
        let i32 = Arc::new(self.i32.finish()) as ArrayRef;
        let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));

        let string = Arc::new(self.string.finish()) as ArrayRef;
        let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));

        let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
        let value_field = Arc::new(Field::new_list_field(DataType::Int32, true));
        let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));

        StructArray::from(vec![
            (i32_field, i32),
            (string_field, string),
            (i32_list_field, i32_list),
        ])
    }
}

/// For building arrays in generic code, use Extend instead of the append_* methods
/// e.g. append_value, append_option, append_null
impl<'a> Extend<&'a MyRow> for MyRowBuilder {
    fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
        iter.into_iter().for_each(|row| self.append(row));
    }
}

/// Converts a slice of [`MyRow`] to a [`RecordBatch`]
fn rows_to_batch(rows: &[MyRow]) -> RecordBatch {
    let mut builder = MyRowBuilder::default();
    builder.extend(rows);
    RecordBatch::from(&builder.finish())
}

§Null / Validity Masks

The NullBufferBuilder is optimized for creating the null mask for an array.

let mut builder = NullBufferBuilder::new(8);
let mut builder = NullBufferBuilder::new(8);
builder.append_n_non_nulls(7);
builder.append_null();
let buffer = builder.finish().unwrap();
assert_eq!(buffer.len(), 8);
assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);

Structs§

BooleanBufferBuilder
Builder for BooleanBuffer
BooleanBuilder
Builder for BooleanArray
BufferBuilder
Builder for creating a Buffer object.
FixedSizeBinaryBuilder
Builder for FixedSizeBinaryArray
FixedSizeListBuilder
Builder for FixedSizeListArray
GenericByteBuilder
Builder for GenericByteArray
GenericByteDictionaryBuilder
Builder for DictionaryArray of GenericByteArray
GenericByteRunBuilder
Builder for RunArray of GenericByteArray
GenericByteViewBuilder
A builder for GenericByteViewArray
GenericListBuilder
Builder for GenericListArray
GenericListViewBuilder
Builder for GenericListViewArray
MapBuilder
Builder for MapArray
MapFieldNames
The Field names for a MapArray
NullBufferBuilder
Builder for creating NullBuffer
NullBuilder
Builder for NullArray
PrimitiveBuilder
Builder for PrimitiveArray
PrimitiveDictionaryBuilder
Builder for DictionaryArray of PrimitiveArray
PrimitiveRunBuilder
Builder for RunArray of PrimitiveArray
StructBuilder
Builder for StructArray
UnionBuilder
Builder for UnionArray

Traits§

ArrayBuilder
Trait for dealing with different array builders at runtime

Functions§

make_builder
Returns a builder with capacity for capacity elements of datatype DataType.
make_view
Create a view based on the given data, block id and offset.

Type Aliases§

BinaryBuilder
Builder for BinaryArray
BinaryDictionaryBuilder
Builder for DictionaryArray of BinaryArray
BinaryRunBuilder
Builder for RunArray of BinaryArray
BinaryViewBuilder
Array builder for BinaryViewArray
Date32BufferBuilder
Buffer builder for 32-bit date type.
Date32Builder
A 32-bit date array builder.
Date64BufferBuilder
Buffer builder for 64-bit date type.
Date64Builder
A 64-bit date array builder.
Decimal128BufferBuilder
Buffer builder for 128-bit decimal type.
Decimal128Builder
A decimal 128 array builder
Decimal256BufferBuilder
Buffer builder for 256-bit decimal type.
Decimal256Builder
A decimal 256 array builder
DurationMicrosecondBufferBuilder
Buffer builder for elaspsed time of microseconds unit.
DurationMicrosecondBuilder
An elapsed time in microseconds array builder.
DurationMillisecondBufferBuilder
Buffer builder for elaspsed time of milliseconds unit.
DurationMillisecondBuilder
An elapsed time in milliseconds array builder.
DurationNanosecondBufferBuilder
Buffer builder for elaspsed time of nanoseconds unit.
DurationNanosecondBuilder
An elapsed time in nanoseconds array builder.
DurationSecondBufferBuilder
Buffer builder for elaspsed time of second unit.
DurationSecondBuilder
An elapsed time in seconds array builder.
Float16BufferBuilder
Buffer builder for 16-bit floating point type.
Float16Builder
A 16-bit floating point array builder.
Float32BufferBuilder
Buffer builder for 32-bit floating point type.
Float32Builder
A 32-bit floating point array builder.
Float64BufferBuilder
Buffer builder for 64-bit floating point type.
Float64Builder
A 64-bit floating point array builder.
GenericBinaryBuilder
Array builder for GenericBinaryArray
GenericStringBuilder
Array builder for GenericStringArray
Int8BufferBuilder
Buffer builder for signed 8-bit integer type.
Int8Builder
A signed 8-bit integer array builder.
Int16BufferBuilder
Buffer builder for signed 16-bit integer type.
Int16Builder
A signed 16-bit integer array builder.
Int32BufferBuilder
Buffer builder for signed 32-bit integer type.
Int32Builder
A signed 32-bit integer array builder.
Int64BufferBuilder
Buffer builder for signed 64-bit integer type.
Int64Builder
A signed 64-bit integer array builder.
IntervalDayTimeBufferBuilder
Buffer builder for “calendar” interval in days and milliseconds.
IntervalDayTimeBuilder
A “calendar” interval in days and milliseconds array builder.
IntervalMonthDayNanoBufferBuilder
Buffer builder “calendar” interval in months, days, and nanoseconds.
IntervalMonthDayNanoBuilder
A “calendar” interval in months, days, and nanoseconds array builder.
IntervalYearMonthBufferBuilder
Buffer builder for “calendar” interval in months.
IntervalYearMonthBuilder
A “calendar” interval in months array builder.
LargeBinaryBuilder
Builder for LargeBinaryArray
LargeBinaryDictionaryBuilder
Builder for DictionaryArray of LargeBinaryArray
LargeBinaryRunBuilder
Builder for RunArray of LargeBinaryArray
LargeListBuilder
Builder for LargeListArray
LargeListViewBuilder
Builder for LargeListViewArray
LargeStringBuilder
Builder for LargeStringArray
LargeStringDictionaryBuilder
Builder for DictionaryArray of LargeStringArray
LargeStringRunBuilder
Builder for RunArray of LargeStringArray
ListBuilder
Builder for ListArray
ListViewBuilder
Builder for ListViewArray
StringBuilder
Builder for StringArray
StringDictionaryBuilder
Builder for DictionaryArray of StringArray
StringRunBuilder
Builder for RunArray of StringArray
StringViewBuilder
Array builder for StringViewArray
Time32MillisecondBufferBuilder
Buffer builder for 32-bit elaspsed time since midnight of millisecond unit.
Time32MillisecondBuilder
A 32-bit elaspsed time in milliseconds array builder.
Time32SecondBufferBuilder
Buffer builder for 32-bit elaspsed time since midnight of second unit.
Time32SecondBuilder
A 32-bit elaspsed time in seconds array builder.
Time64MicrosecondBufferBuilder
Buffer builder for 64-bit elaspsed time since midnight of microsecond unit.
Time64MicrosecondBuilder
A 64-bit elaspsed time in microseconds array builder.
Time64NanosecondBufferBuilder
Buffer builder for 64-bit elaspsed time since midnight of nanosecond unit.
Time64NanosecondBuilder
A 64-bit elaspsed time in nanoseconds array builder.
TimestampMicrosecondBufferBuilder
Buffer builder for timestamp type of microsecond unit.
TimestampMicrosecondBuilder
A timestamp microsecond array builder.
TimestampMillisecondBufferBuilder
Buffer builder for timestamp type of millisecond unit.
TimestampMillisecondBuilder
A timestamp millisecond array builder.
TimestampNanosecondBufferBuilder
Buffer builder for timestamp type of nanosecond unit.
TimestampNanosecondBuilder
A timestamp nanosecond array builder.
TimestampSecondBufferBuilder
Buffer builder for timestamp type of second unit.
TimestampSecondBuilder
A timestamp second array builder.
UInt8BufferBuilder
Buffer builder for usigned 8-bit integer type.
UInt8Builder
An usigned 8-bit integer array builder.
UInt16BufferBuilder
Buffer builder for usigned 16-bit integer type.
UInt16Builder
An usigned 16-bit integer array builder.
UInt32BufferBuilder
Buffer builder for usigned 32-bit integer type.
UInt32Builder
An usigned 32-bit integer array builder.
UInt64BufferBuilder
Buffer builder for usigned 64-bit integer type.
UInt64Builder
An usigned 64-bit integer array builder.