use std::sync::Arc;
use polars_error::{polars_bail, PolarsResult};
use super::{MutableUtf8ValuesArray, MutableUtf8ValuesIter, StrAsBytes, Utf8Array};
use crate::array::physical_binary::*;
use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::datatypes::ArrowDataType;
use crate::offset::{Offset, Offsets};
use crate::trusted_len::TrustedLen;
#[derive(Debug, Clone)]
pub struct MutableUtf8Array<O: Offset> {
values: MutableUtf8ValuesArray<O>,
validity: Option<MutableBitmap>,
}
impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O> {
fn from(other: MutableUtf8Array<O>) -> Self {
let validity = other.validity.and_then(|x| {
let validity: Option<Bitmap> = x.into();
validity
});
let array: Utf8Array<O> = other.values.into();
array.with_validity(validity)
}
}
impl<O: Offset> Default for MutableUtf8Array<O> {
fn default() -> Self {
Self::new()
}
}
impl<O: Offset> MutableUtf8Array<O> {
pub fn new() -> Self {
Self {
values: Default::default(),
validity: None,
}
}
pub fn try_new(
dtype: ArrowDataType,
offsets: Offsets<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> PolarsResult<Self> {
let values = MutableUtf8ValuesArray::try_new(dtype, offsets, values)?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != values.len())
{
polars_bail!(ComputeError: "validity's length must be equal to the number of values")
}
Ok(Self { values, validity })
}
pub unsafe fn new_unchecked(
dtype: ArrowDataType,
offsets: Offsets<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
let values = MutableUtf8ValuesArray::new_unchecked(dtype, offsets, values);
if let Some(ref validity) = validity {
assert_eq!(values.len(), validity.len());
}
Self { values, validity }
}
pub fn from<T: AsRef<str>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
}
fn default_dtype() -> ArrowDataType {
Utf8Array::<O>::default_dtype()
}
pub fn with_capacity(capacity: usize) -> Self {
Self::with_capacities(capacity, 0)
}
pub fn with_capacities(capacity: usize, values: usize) -> Self {
Self {
values: MutableUtf8ValuesArray::with_capacities(capacity, values),
validity: None,
}
}
pub fn reserve(&mut self, additional: usize, additional_values: usize) {
self.values.reserve(additional, additional_values);
if let Some(x) = self.validity.as_mut() {
x.reserve(additional)
}
}
pub fn capacity(&self) -> usize {
self.values.capacity()
}
#[inline]
pub fn len(&self) -> usize {
self.values.len()
}
#[inline]
pub fn push<T: AsRef<str>>(&mut self, value: Option<T>) {
self.try_push(value).unwrap()
}
#[inline]
pub fn value(&self, i: usize) -> &str {
self.values.value(i)
}
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
self.values.value_unchecked(i)
}
pub fn pop(&mut self) -> Option<String> {
let value = self.values.pop()?;
self.validity
.as_mut()
.map(|x| x.pop()?.then(|| ()))
.unwrap_or_else(|| Some(()))
.map(|_| value)
}
fn init_validity(&mut self) {
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity);
}
pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}
pub fn into_arc(self) -> Arc<dyn Array> {
let a: Utf8Array<O> = self.into();
Arc::new(a)
}
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
pub fn into_data(self) -> (ArrowDataType, Offsets<O>, Vec<u8>, Option<MutableBitmap>) {
let (dtype, offsets, values) = self.values.into_inner();
(dtype, offsets, values, self.validity)
}
pub fn values_iter(&self) -> MutableUtf8ValuesIter<O> {
self.values.iter()
}
pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
if let Some(validity) = &validity {
assert_eq!(self.values.len(), validity.len())
}
self.validity = validity;
}
pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
if let Some(validity) = std::mem::take(&mut self.validity) {
self.set_validity(Some(f(validity)))
}
}
}
impl<O: Offset> MutableUtf8Array<O> {
pub fn values(&self) -> &Vec<u8> {
self.values.values()
}
pub fn offsets(&self) -> &Offsets<O> {
self.values.offsets()
}
}
impl<O: Offset> MutableArray for MutableUtf8Array<O> {
fn len(&self) -> usize {
self.len()
}
fn validity(&self) -> Option<&MutableBitmap> {
self.validity.as_ref()
}
fn as_box(&mut self) -> Box<dyn Array> {
let array: Utf8Array<O> = std::mem::take(self).into();
array.boxed()
}
fn as_arc(&mut self) -> Arc<dyn Array> {
let array: Utf8Array<O> = std::mem::take(self).into();
array.arced()
}
fn dtype(&self) -> &ArrowDataType {
if O::IS_LARGE {
&ArrowDataType::LargeUtf8
} else {
&ArrowDataType::Utf8
}
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
#[inline]
fn push_null(&mut self) {
self.push::<&str>(None)
}
fn reserve(&mut self, additional: usize) {
self.reserve(additional, 0)
}
fn shrink_to_fit(&mut self) {
self.shrink_to_fit()
}
}
impl<O: Offset, P: AsRef<str>> FromIterator<Option<P>> for MutableUtf8Array<O> {
fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
Self::try_from_iter(iter).unwrap()
}
}
impl<O: Offset> MutableUtf8Array<O> {
#[inline]
pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: TrustedLen<Item = P>,
{
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}
#[inline]
pub fn extend_values<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = P>,
{
let length = self.values.len();
self.values.extend(iterator);
let additional = self.values.len() - length;
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}
#[inline]
pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = P>,
{
let length = self.values.len();
self.values.extend_trusted_len_unchecked(iterator);
let additional = self.values.len() - length;
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}
#[inline]
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { self.extend_trusted_len_unchecked(iterator) }
}
#[inline]
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
{
if self.validity.is_none() {
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
self.validity = Some(validity);
}
self.values
.extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
}
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
where
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
{
let iterator = iterator.map(|x| x.map(StrAsBytes));
let (validity, offsets, values) = trusted_len_unzip(iterator);
Self::new_unchecked(Self::default_dtype(), offsets, values, validity)
}
#[inline]
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
where
P: AsRef<str>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
}
#[inline]
pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<str>, I: Iterator<Item = T>>(
iterator: I,
) -> Self {
MutableUtf8ValuesArray::from_trusted_len_iter_unchecked(iterator).into()
}
#[inline]
pub fn from_trusted_len_values_iter<T: AsRef<str>, I: TrustedLen<Item = T>>(
iterator: I,
) -> Self {
unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
}
fn try_from_iter<P: AsRef<str>, I: IntoIterator<Item = Option<P>>>(
iter: I,
) -> PolarsResult<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let mut array = Self::with_capacity(lower);
for item in iterator {
array.try_push(item)?;
}
Ok(array)
}
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
iterator: I,
) -> std::result::Result<Self, E>
where
P: AsRef<str>,
I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
{
let iterator = iterator.into_iter();
let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes)));
let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
Ok(Self::new_unchecked(
Self::default_dtype(),
offsets,
values,
validity,
))
}
#[inline]
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
where
P: AsRef<str>,
I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
{
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
}
pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(iterator: I) -> Self {
MutableUtf8ValuesArray::from_iter(iterator).into()
}
pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
where
E: std::error::Error,
I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
T: AsRef<str>,
{
let mut iter = iter.into_iter();
self.reserve(iter.size_hint().0, 0);
iter.try_for_each(|x| {
self.push(x?);
Ok(())
})
}
}
impl<O: Offset, T: AsRef<str>> Extend<Option<T>> for MutableUtf8Array<O> {
fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
self.try_extend(iter).unwrap();
}
}
impl<O: Offset, T: AsRef<str>> TryExtend<Option<T>> for MutableUtf8Array<O> {
fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
let mut iter = iter.into_iter();
self.reserve(iter.size_hint().0, 0);
iter.try_for_each(|x| self.try_push(x))
}
}
impl<O: Offset, T: AsRef<str>> TryPush<Option<T>> for MutableUtf8Array<O> {
#[inline]
fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {
match value {
Some(value) => {
self.values.try_push(value.as_ref())?;
if let Some(validity) = &mut self.validity {
validity.push(true)
}
},
None => {
self.values.push("");
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
},
}
Ok(())
}
}
impl<O: Offset> PartialEq for MutableUtf8Array<O> {
fn eq(&self, other: &Self) -> bool {
self.iter().eq(other.iter())
}
}
impl<O: Offset> TryExtendFromSelf for MutableUtf8Array<O> {
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);
self.values.try_extend_from_self(&other.values)
}
}