polars_arrow/array/fixed_size_list/
mod.rsuse super::{new_empty_array, new_null_array, Array, ArrayRef, Splitable};
use crate::bitmap::Bitmap;
use crate::datatypes::{ArrowDataType, Field};
mod ffi;
pub(super) mod fmt;
mod iterator;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, polars_ensure, PolarsResult};
use polars_utils::format_tuple;
use polars_utils::pl_str::PlSmallStr;
use crate::datatypes::reshape::{Dimension, ReshapeDimension};
#[derive(Clone)]
pub struct FixedSizeListArray {
size: usize, length: usize, dtype: ArrowDataType,
values: Box<dyn Array>,
validity: Option<Bitmap>,
}
impl FixedSizeListArray {
pub fn try_new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> PolarsResult<Self> {
let (child, size) = Self::try_child_and_size(&dtype)?;
let child_dtype = &child.dtype;
let values_dtype = values.dtype();
if child_dtype != values_dtype {
polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
}
polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
"values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
values.len(),
size
);
polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
"length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
values.len() / size,
length,
);
polars_ensure!(size != 0 || values.len() == 0, ComputeError:
"zero width FixedSizeListArray has values (length = {}).",
values.len(),
);
if validity
.as_ref()
.is_some_and(|validity| validity.len() != length)
{
polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
}
Ok(Self {
size,
length,
dtype,
values,
validity,
})
}
#[inline]
fn has_invariants(&self) -> bool {
let has_valid_length = (self.size == 0 && self.values().len() == 0)
|| (self.size > 0
&& self.values().len() % self.size() == 0
&& self.values().len() / self.size() == self.length);
let has_valid_validity = self
.validity
.as_ref()
.map_or(true, |v| v.len() == self.length);
has_valid_length && has_valid_validity
}
#[track_caller]
pub fn new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(dtype, length, values, validity).unwrap()
}
pub const fn size(&self) -> usize {
self.size
}
pub fn new_empty(dtype: ArrowDataType) -> Self {
let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
Self::new(dtype, 0, values, None)
}
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
let (field, size) = Self::get_child_and_size(&dtype);
let values = new_null_array(field.dtype().clone(), length * size);
Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
}
pub fn from_shape(
leaf_array: ArrayRef,
dimensions: &[ReshapeDimension],
) -> PolarsResult<ArrayRef> {
polars_ensure!(
!dimensions.is_empty(),
InvalidOperation: "at least one dimension must be specified"
);
let size = leaf_array.len();
let mut total_dim_size = 1;
let mut num_infers = 0;
for &dim in dimensions {
match dim {
ReshapeDimension::Infer => num_infers += 1,
ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
}
}
polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
if size == 0 {
polars_ensure!(
num_infers > 0 || total_dim_size == 0,
InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
format_tuple!(dimensions),
);
let mut prev_arrow_dtype = leaf_array.dtype().clone();
let mut prev_array = leaf_array;
let mut current_length = dimensions[0].get_or_infer(0);
let len_iter = dimensions[1..]
.iter()
.map(|d| {
let length = current_length as usize;
current_length *= d.get_or_infer(0);
length
})
.collect::<Vec<_>>();
for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
let dim = dim.get_or_infer(0);
prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
prev_array =
FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
.boxed();
}
return Ok(prev_array);
}
polars_ensure!(
total_dim_size > 0,
InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
format_tuple!(dimensions)
);
polars_ensure!(
size % total_dim_size == 0,
InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
);
let mut prev_arrow_dtype = leaf_array.dtype().clone();
let mut prev_array = leaf_array;
for dim in dimensions[1..].iter().rev() {
let dim = dim.get_or_infer((size / total_dim_size) as u64);
prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
prev_array = FixedSizeListArray::new(
prev_arrow_dtype.clone(),
prev_array.len() / dim as usize,
prev_array,
None,
)
.boxed();
}
Ok(prev_array)
}
pub fn get_dims(&self) -> Vec<Dimension> {
let mut dims = vec![
Dimension::new(self.length as _),
Dimension::new(self.size as _),
];
let mut prev_array = &self.values;
while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
dims.push(Dimension::new(a.size as _));
prev_array = &a.values;
}
dims
}
pub fn propagate_nulls(&self) -> Self {
let Some(validity) = self.validity() else {
return self.clone();
};
let propagated_validity = if self.size == 1 {
validity.clone()
} else {
Bitmap::from_trusted_len_iter(
(0..self.size * validity.len())
.map(|i| unsafe { validity.get_bit_unchecked(i / self.size) }),
)
};
let propagated_validity = match self.values.validity() {
None => propagated_validity,
Some(val) => val & &propagated_validity,
};
Self::new(
self.dtype().clone(),
self.length,
self.values.with_validity(Some(propagated_validity)),
self.validity.clone(),
)
}
}
impl FixedSizeListArray {
pub fn slice(&mut self, offset: usize, length: usize) {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
debug_assert!(offset + length <= self.len());
self.validity = self
.validity
.take()
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
.filter(|bitmap| bitmap.unset_bits() > 0);
self.values
.slice_unchecked(offset * self.size, length * self.size);
self.length = length;
}
impl_sliced!();
impl_mut_validity!();
impl_into_array!();
}
impl FixedSizeListArray {
#[inline]
pub fn len(&self) -> usize {
debug_assert!(self.has_invariants());
self.length
}
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
pub fn values(&self) -> &Box<dyn Array> {
&self.values
}
#[inline]
pub fn value(&self, i: usize) -> Box<dyn Array> {
self.values.sliced(i * self.size, self.size)
}
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
self.values.sliced_unchecked(i * self.size, self.size)
}
#[inline]
pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
if !self.is_null(i) {
unsafe { Some(self.value_unchecked(i)) }
} else {
None
}
}
}
impl FixedSizeListArray {
pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
match dtype.to_logical_type() {
ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
_ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
}
}
pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
Self::try_child_and_size(dtype).unwrap()
}
pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
ArrowDataType::FixedSizeList(field, size)
}
}
impl Array for FixedSizeListArray {
impl_common_array!();
fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
#[inline]
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.clone().with_validity(validity))
}
}
impl Splitable for FixedSizeListArray {
fn check_bound(&self, offset: usize) -> bool {
offset <= self.len()
}
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
let (lhs_values, rhs_values) =
unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
let size = self.size;
(
Self {
dtype: self.dtype.clone(),
length: offset,
values: lhs_values,
validity: lhs_validity,
size,
},
Self {
dtype: self.dtype.clone(),
length: self.length - offset,
values: rhs_values,
validity: rhs_validity,
size,
},
)
}
}