polars_arrow/array/map/
mod.rsuse super::specification::try_check_offsets_bounds;
use super::{new_empty_array, Array, Splitable};
use crate::bitmap::Bitmap;
use crate::datatypes::{ArrowDataType, Field};
use crate::offset::OffsetsBuffer;
mod ffi;
pub(super) mod fmt;
mod iterator;
use polars_error::{polars_bail, PolarsResult};
#[derive(Clone)]
pub struct MapArray {
dtype: ArrowDataType,
offsets: OffsetsBuffer<i32>,
field: Box<dyn Array>,
validity: Option<Bitmap>,
}
impl MapArray {
pub fn try_new(
dtype: ArrowDataType,
offsets: OffsetsBuffer<i32>,
field: Box<dyn Array>,
validity: Option<Bitmap>,
) -> PolarsResult<Self> {
try_check_offsets_bounds(&offsets, field.len())?;
let inner_field = Self::try_get_field(&dtype)?;
if let ArrowDataType::Struct(inner) = inner_field.dtype() {
if inner.len() != 2 {
polars_bail!(ComputeError: "MapArray's inner `Struct` must have 2 fields (keys and maps)")
}
} else {
polars_bail!(ComputeError: "MapArray expects `DataType::Struct` as its inner logical type")
}
if field.dtype() != inner_field.dtype() {
polars_bail!(ComputeError: "MapArray expects `field.dtype` to match its inner DataType")
}
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
polars_bail!(ComputeError: "validity mask length must match the number of values")
}
Ok(Self {
dtype,
field,
offsets,
validity,
})
}
pub fn new(
dtype: ArrowDataType,
offsets: OffsetsBuffer<i32>,
field: Box<dyn Array>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(dtype, offsets, field, validity).unwrap()
}
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
Self::new(
dtype,
vec![0i32; 1 + length].try_into().unwrap(),
field,
Some(Bitmap::new_zeroed(length)),
)
}
pub fn new_empty(dtype: ArrowDataType) -> Self {
let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
Self::new(dtype, OffsetsBuffer::default(), field, None)
}
}
impl MapArray {
pub fn slice(&mut self, offset: usize, length: usize) {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}
#[inline]
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
self.validity = self
.validity
.take()
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
.filter(|bitmap| bitmap.unset_bits() > 0);
self.offsets.slice_unchecked(offset, length + 1);
}
impl_sliced!();
impl_mut_validity!();
impl_into_array!();
pub(crate) fn try_get_field(dtype: &ArrowDataType) -> PolarsResult<&Field> {
if let ArrowDataType::Map(field, _) = dtype.to_logical_type() {
Ok(field.as_ref())
} else {
polars_bail!(ComputeError: "The dtype's logical type must be DataType::Map")
}
}
pub(crate) fn get_field(dtype: &ArrowDataType) -> &Field {
Self::try_get_field(dtype).unwrap()
}
}
impl MapArray {
#[inline]
pub fn len(&self) -> usize {
self.offsets.len_proxy()
}
#[inline]
pub fn offsets(&self) -> &OffsetsBuffer<i32> {
&self.offsets
}
#[inline]
pub fn field(&self) -> &Box<dyn Array> {
&self.field
}
#[inline]
pub fn value(&self, i: usize) -> Box<dyn Array> {
assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
let (start, end) = self.offsets.start_end_unchecked(i);
let length = end - start;
self.field.sliced_unchecked(start, length)
}
}
impl Array for MapArray {
impl_common_array!();
fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
#[inline]
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.clone().with_validity(validity))
}
}
impl Splitable for MapArray {
fn check_bound(&self, offset: usize) -> bool {
offset <= self.len()
}
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
(
Self {
dtype: self.dtype.clone(),
offsets: lhs_offsets,
field: self.field.clone(),
validity: lhs_validity,
},
Self {
dtype: self.dtype.clone(),
offsets: rhs_offsets,
field: self.field.clone(),
validity: rhs_validity,
},
)
}
}