use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_schema::{Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
#[derive(Debug)]
pub struct FixedSizeListBuilder<T: ArrayBuilder> {
null_buffer_builder: NullBufferBuilder,
values_builder: T,
list_len: i32,
field: Option<FieldRef>,
}
impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
pub fn new(values_builder: T, value_length: i32) -> Self {
let capacity = values_builder
.len()
.checked_div(value_length as _)
.unwrap_or_default();
Self::with_capacity(values_builder, value_length, capacity)
}
pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
Self {
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
list_len: value_length,
field: None,
}
}
pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
Self {
field: Some(field.into()),
..self
}
}
}
impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
where
T: 'static,
{
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn len(&self) -> usize {
self.null_buffer_builder.len()
}
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
}
impl<T: ArrayBuilder> FixedSizeListBuilder<T>
where
T: 'static,
{
pub fn values(&mut self) -> &mut T {
&mut self.values_builder
}
pub fn value_length(&self) -> i32 {
self.list_len
}
#[inline]
pub fn append(&mut self, is_valid: bool) {
self.null_buffer_builder.append(is_valid);
}
pub fn finish(&mut self) -> FixedSizeListArray {
let len = self.len();
let values = self.values_builder.finish();
let nulls = self.null_buffer_builder.finish();
assert_eq!(
values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
values.len(),
self.list_len,
len,
);
let field = self
.field
.clone()
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
FixedSizeListArray::new(field, self.list_len, values, nulls)
}
pub fn finish_cloned(&self) -> FixedSizeListArray {
let len = self.len();
let values = self.values_builder.finish_cloned();
let nulls = self.null_buffer_builder.finish_cloned();
assert_eq!(
values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
values.len(),
self.list_len,
len,
);
let field = self
.field
.clone()
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
FixedSizeListArray::new(field, self.list_len, values, nulls)
}
pub fn validity_slice(&self) -> Option<&[u8]> {
self.null_buffer_builder.as_slice()
}
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_schema::DataType;
use crate::builder::Int32Builder;
use crate::Array;
use crate::Int32Array;
fn make_list_builder(
include_null_element: bool,
include_null_in_values: bool,
) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
let values_builder = Int32Builder::new();
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
builder.values().append_value(2);
builder.values().append_value(3);
builder.values().append_value(4);
builder.append(true);
if include_null_element {
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
} else {
builder.values().append_value(2);
builder.values().append_value(3);
builder.values().append_value(4);
builder.append(true);
}
if include_null_in_values {
builder.values().append_value(3);
builder.values().append_null();
builder.values().append_value(5);
builder.append(true);
} else {
builder.values().append_value(3);
builder.values().append_value(4);
builder.values().append_value(5);
builder.append(true);
}
builder
}
#[test]
fn test_fixed_size_list_array_builder() {
let mut builder = make_list_builder(true, true);
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
fn test_fixed_size_list_array_builder_with_field() {
let builder = make_list_builder(false, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(0, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
fn test_fixed_size_list_array_builder_with_field_and_null() {
let builder = make_list_builder(true, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
fn test_fixed_size_list_array_builder_with_field_null_panic() {
let builder = make_list_builder(true, true);
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
builder.finish();
}
#[test]
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
fn test_fixed_size_list_array_builder_with_field_type_panic() {
let values_builder = Int32Builder::new();
let builder = FixedSizeListBuilder::new(values_builder, 3);
let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
builder.values().append_value(3);
builder.values().append_value(4);
builder.values().append_value(5);
builder.append(true);
builder.finish();
}
#[test]
fn test_fixed_size_list_array_builder_cloned_with_field() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));
let list_array = builder.finish_cloned();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
builder.finish_cloned();
}
#[test]
fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
let builder = make_list_builder(true, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
let builder = make_list_builder(false, false);
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
builder.finish_cloned();
}
#[test]
fn test_fixed_size_list_array_builder_finish_cloned() {
let mut builder = make_list_builder(true, true);
let mut list_array = builder.finish_cloned();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(3, list_array.value_length());
builder.values().append_value(6);
builder.values().append_value(7);
builder.values().append_null();
builder.append(true);
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(6, list_array.len());
assert_eq!(2, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
#[test]
fn test_fixed_size_list_array_builder_with_field_empty() {
let values_builder = Int32Array::builder(0);
let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
"list_item",
DataType::Int32,
false,
));
assert!(builder.is_empty());
let arr = builder.finish();
assert_eq!(0, arr.len());
assert_eq!(0, builder.len());
}
#[test]
fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
let values_builder = Int32Array::builder(0);
let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
"list_item",
DataType::Int32,
false,
));
assert!(builder.is_empty());
let arr = builder.finish_cloned();
assert_eq!(0, arr.len());
assert_eq!(0, builder.len());
}
#[test]
fn test_fixed_size_list_array_builder_empty() {
let values_builder = Int32Array::builder(5);
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
assert!(builder.is_empty());
let arr = builder.finish();
assert_eq!(0, arr.len());
assert_eq!(0, builder.len());
}
#[test]
fn test_fixed_size_list_array_builder_finish() {
let values_builder = Int32Array::builder(5);
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
builder.values().append_slice(&[1, 2, 3]);
builder.append(true);
builder.values().append_slice(&[4, 5, 6]);
builder.append(true);
let mut arr = builder.finish();
assert_eq!(2, arr.len());
assert_eq!(0, builder.len());
builder.values().append_slice(&[7, 8, 9]);
builder.append(true);
arr = builder.finish();
assert_eq!(1, arr.len());
assert_eq!(0, builder.len());
}
#[test]
#[should_panic(
expected = "Length of the child array (10) must be the multiple of the value length (3) and the array length (3)."
)]
fn test_fixed_size_list_array_builder_fail() {
let values_builder = Int32Array::builder(5);
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
builder.values().append_slice(&[1, 2, 3]);
builder.append(true);
builder.values().append_slice(&[4, 5, 6]);
builder.append(true);
builder.values().append_slice(&[7, 8, 9, 10]);
builder.append(true);
builder.finish();
}
}