polars_arrow::array

Struct BinaryArray

Source
pub struct BinaryArray<O: Offset> { /* private fields */ }
Expand description

A BinaryArray is Arrow’s semantically equivalent of an immutable Vec<Option<Vec<u8>>>. It implements Array.

The size of this struct is O(1), as all data is stored behind an std::sync::Arc.

§Example

use polars_arrow::array::BinaryArray;
use polars_arrow::bitmap::Bitmap;
use polars_arrow::buffer::Buffer;

let array = BinaryArray::<i32>::from([Some([1, 2].as_ref()), None, Some([3].as_ref())]);
assert_eq!(array.value(0), &[1, 2]);
assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some([1, 2].as_ref()), None, Some([3].as_ref())]);
assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![[1, 2].as_ref(), &[], &[3]]);
// the underlying representation:
assert_eq!(array.values(), &Buffer::from(vec![1, 2, 3]));
assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 3]));
assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));

§Generic parameter

The generic parameter Offset can only be i32 or i64 and tradeoffs maximum array length with memory usage:

  • the sum of lengths of all elements cannot exceed Offset::MAX
  • the total size of the underlying data is array.len() * size_of::<Offset>() + sum of lengths of all elements

§Safety

The following invariants hold:

  • Two consecutives offsets casted (as) to usize are valid slices of values.
  • len is equal to validity.len(), when defined.

Implementations§

Source§

impl<O: Offset> BinaryArray<O>

Source

pub fn try_new( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> PolarsResult<Self>

Returns a BinaryArray created from its internal representation.

§Errors

This function returns an error iff:

  • The last offset is not equal to the values’ length.
  • the validity’s length is not equal to offsets.len().
  • The dtype’s crate::datatypes::PhysicalType is not equal to either Binary or LargeBinary.
§Implementation

This function is O(1)

Source

pub unsafe fn new_unchecked( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Self

Creates a new BinaryArray without checking invariants.

§Safety

The invariants must be valid (see try_new).

Source

pub fn from_slice<T: AsRef<[u8]>, P: AsRef<[T]>>(slice: P) -> Self

Creates a new BinaryArray from slices of &[u8].

Source

pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self

Creates a new BinaryArray from a slice of optional &[u8].

Source

pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<'_, O>, BitmapIter<'_>>

Returns an iterator of Option<&[u8]> over every element of this array.

Source

pub fn values_iter(&self) -> BinaryValueIter<'_, O>

Returns an iterator of &[u8] over every element of this array, ignoring the validity

Source

pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryArray<O>>

Returns an iterator of the non-null values.

Source

pub fn len(&self) -> usize

Returns the length of this array

Source

pub fn value(&self, i: usize) -> &[u8]

Returns the element at index i

§Panics

iff i >= self.len()

Source

pub unsafe fn value_unchecked(&self, i: usize) -> &[u8]

Returns the element at index i

§Safety

Assumes that the i < self.len.

Source

pub fn get(&self, i: usize) -> Option<&[u8]>

Returns the element at index i or None if it is null

§Panics

iff i >= self.len()

Source

pub fn dtype(&self) -> &ArrowDataType

Returns the ArrowDataType of this array.

Source

pub fn values(&self) -> &Buffer<u8>

Returns the values of this BinaryArray.

Source

pub fn offsets(&self) -> &OffsetsBuffer<O>

Returns the offsets of this BinaryArray.

Source

pub fn validity(&self) -> Option<&Bitmap>

The optional validity.

Source

pub fn slice(&mut self, offset: usize, length: usize)

Slices this BinaryArray.

§Implementation

This function is O(1).

§Panics

iff offset + length > self.len().

Source

pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)

Slices this BinaryArray.

§Implementation

This function is O(1).

§Safety

The caller must ensure that offset + length <= self.len().

Source

pub fn sliced(self, offset: usize, length: usize) -> Self

Returns this array sliced.

§Implementation

This function is O(1).

§Panics

iff offset + length > self.len().

Source

pub unsafe fn sliced_unchecked(self, offset: usize, length: usize) -> Self

Returns this array sliced.

§Implementation

This function is O(1).

§Safety

The caller must ensure that offset + length <= self.len().

Source

pub fn with_validity(self, validity: Option<Bitmap>) -> Self

Returns this array with a new validity.

§Panic

Panics iff validity.len() != self.len().

Source

pub fn set_validity(&mut self, validity: Option<Bitmap>)

Sets the validity of this array.

§Panics

This function panics iff values.len() != self.len().

Source

pub fn take_validity(&mut self) -> Option<Bitmap>

Takes the validity of this array, leaving it without a validity mask.

Source

pub fn boxed(self) -> Box<dyn Array>

Boxes this array into a Box<dyn Array>.

Source

pub fn arced(self) -> Arc<dyn Array>

Arcs this array into a std::sync::Arc<dyn Array>.

Source

pub fn into_inner( self, ) -> (ArrowDataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>)

Returns its internal representation

Source

pub fn into_mut(self) -> Either<Self, MutableBinaryArray<O>>

Try to convert this BinaryArray to a MutableBinaryArray

Source

pub fn new_empty(dtype: ArrowDataType) -> Self

Creates an empty BinaryArray, i.e. whose .len is zero.

Source

pub fn new_null(dtype: ArrowDataType, length: usize) -> Self

Creates an null BinaryArray, i.e. whose .null_count() == .len().

Source

pub fn default_dtype() -> ArrowDataType

Returns the default ArrowDataType, DataType::Binary or DataType::LargeBinary

Source

pub fn new( dtype: ArrowDataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Self

Alias for unwrapping Self::try_new

Source

pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>( iterator: I, ) -> Self

Returns a BinaryArray from an iterator of trusted length.

The BinaryArray is guaranteed to not have a validity

Source

pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>( iterator: I, ) -> Self

Returns a new BinaryArray from a Iterator of &[u8].

The BinaryArray is guaranteed to not have a validity

Source

pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
where P: AsRef<[u8]>, I: Iterator<Item = Option<P>>,

Creates a BinaryArray from an iterator of trusted length.

§Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

Source

pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
where P: AsRef<[u8]>, I: TrustedLen<Item = Option<P>>,

Creates a BinaryArray from a TrustedLen

Source

pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>( iterator: I, ) -> Result<Self, E>
where P: AsRef<[u8]>, I: IntoIterator<Item = Result<Option<P>, E>>,

Creates a BinaryArray from an falible iterator of trusted length.

§Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

Source

pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Self, E>
where P: AsRef<[u8]>, I: TrustedLen<Item = Result<Option<P>, E>>,

Creates a BinaryArray from an fallible iterator of trusted length.

Trait Implementations§

Source§

impl<O: Offset> Array for BinaryArray<O>

Source§

fn as_any(&self) -> &dyn Any

Converts itself to a reference of Any, which enables downcasting to concrete types.
Source§

fn as_any_mut(&mut self) -> &mut dyn Any

Converts itself to a mutable reference of Any, which enables mutable downcasting to concrete types.
Source§

fn len(&self) -> usize

The length of the Array. Every array has a length corresponding to the number of elements (slots).
Source§

fn dtype(&self) -> &ArrowDataType

The ArrowDataType of the Array. In combination with Array::as_any, this can be used to downcast trait objects (dyn Array) to concrete arrays.
Source§

fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>)

Split Self at offset into two boxed Arrays where offset <= self.len().
Source§

unsafe fn split_at_boxed_unchecked( &self, offset: usize, ) -> (Box<dyn Array>, Box<dyn Array>)

Split Self at offset into two boxed Arrays without checking offset <= self.len(). Read more
Source§

fn slice(&mut self, offset: usize, length: usize)

Slices this Array. Read more
Source§

unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)

Slices the Array. Read more
Source§

fn to_boxed(&self) -> Box<dyn Array>

Clone a &dyn Array to an owned Box<dyn Array>.
Source§

fn validity(&self) -> Option<&Bitmap>

The validity of the Array: every array has an optional Bitmap that, when available specifies whether the array slot is valid or not (null). When the validity is None, all slots are valid.
Source§

fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>

Clones this Array with a new assigned bitmap. Read more
Source§

fn is_empty(&self) -> bool

whether the array is empty
Source§

fn null_count(&self) -> usize

The number of null slots on this Array. Read more
Source§

fn has_nulls(&self) -> bool

Source§

fn is_null(&self, i: usize) -> bool

Returns whether slot i is null. Read more
Source§

unsafe fn is_null_unchecked(&self, i: usize) -> bool

Returns whether slot i is null. Read more
Source§

fn is_valid(&self, i: usize) -> bool

Returns whether slot i is valid. Read more
Source§

fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array>

Returns a slice of this Array. Read more
Source§

unsafe fn sliced_unchecked( &self, offset: usize, length: usize, ) -> Box<dyn Array>

Returns a slice of this Array. Read more
Source§

impl<'a, O: Offset> ArrayAccessor<'a> for BinaryArray<O>

Source§

type Item = &'a [u8]

Source§

unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item

Safety Read more
Source§

fn len(&self) -> usize

Source§

impl<T: IntoBytes> ArrayFromIter<Option<T>> for BinaryArray<i64>

Source§

fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self

Source§

fn arr_from_iter_trusted<I>(iter: I) -> Self
where I: IntoIterator<Item = Option<T>>, I::IntoIter: TrustedLen,

Source§

fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>( iter: I, ) -> Result<Self, E>

Source§

fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
where I: IntoIterator<Item = Result<Option<T>, E>>, I::IntoIter: TrustedLen,

Source§

impl<T: IntoBytes> ArrayFromIter<T> for BinaryArray<i64>

Source§

fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self

Source§

fn arr_from_iter_trusted<I>(iter: I) -> Self
where I: IntoIterator<Item = T>, I::IntoIter: TrustedLen,

Source§

fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>( iter: I, ) -> Result<Self, E>

Source§

fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
where I: IntoIterator<Item = Result<T, E>>, I::IntoIter: TrustedLen,

Source§

impl BinaryFromIter for BinaryArray<i64>

Source§

fn from_values_iter<I, S>( iter: I, len: usize, value_cap: usize, ) -> BinaryArray<i64>
where S: AsRef<[u8]>, I: Iterator<Item = S>,

Source§

impl<O: Clone + Offset> Clone for BinaryArray<O>

Source§

fn clone(&self) -> BinaryArray<O>

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<O: Offset> Debug for BinaryArray<O>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'a, O: Offset> From<GrowableBinary<'a, O>> for BinaryArray<O>

Source§

fn from(val: GrowableBinary<'a, O>) -> Self

Converts to this type from the input type.
Source§

impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O>

Source§

fn from(other: MutableBinaryArray<O>) -> Self

Converts to this type from the input type.
Source§

impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O>

Source§

fn from(other: MutableBinaryValuesArray<O>) -> Self

Converts to this type from the input type.
Source§

impl FromDataBinary for BinaryArray<i64>

Source§

unsafe fn from_data_unchecked_default( offsets: Buffer<i64>, values: Buffer<u8>, validity: Option<Bitmap>, ) -> Self

Safety Read more
Source§

impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for BinaryArray<O>

Source§

fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self

Creates a value from an iterator. Read more
Source§

impl<O: Offset> GenericBinaryArray<O> for BinaryArray<O>

Source§

fn values(&self) -> &[u8]

The values of the array
Source§

fn offsets(&self) -> &[O]

The offsets of the array
Source§

impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O>

Source§

type Item = Option<&'a [u8]>

The type of the elements being iterated over.
Source§

type IntoIter = ZipValidity<&'a [u8], ArrayValuesIter<'a, BinaryArray<O>>, BitmapIter<'a>>

Which kind of iterator are we turning this into?
Source§

fn into_iter(self) -> Self::IntoIter

Creates an iterator from a value. Read more
Source§

impl ParameterFreeDtypeStaticArray for BinaryArray<i64>

Source§

impl<O: Offset> PartialEq<&(dyn Array + 'static)> for BinaryArray<O>

Source§

fn eq(&self, other: &&dyn Array) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<O: Offset> PartialEq<BinaryArray<O>> for &dyn Array

Source§

fn eq(&self, other: &BinaryArray<O>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<O: Offset> PartialEq for BinaryArray<O>

Source§

fn eq(&self, other: &Self) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<O: Offset> Splitable for BinaryArray<O>

Source§

fn check_bound(&self, offset: usize) -> bool

Source§

unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self)

Internal implementation of split_at_unchecked. For any usage, prefer the using split_at or split_at_unchecked. Read more
Source§

fn split_at(&self, offset: usize) -> (Self, Self)

Split Self at offset where offset <= self.len().
Source§

unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self)

Split Self at offset without checking offset <= self.len(). Read more
Source§

impl StaticArray for BinaryArray<i64>

Source§

type ValueT<'a> = &'a [u8]

Source§

type ZeroableValueT<'a> = Option<&'a [u8]>

Source§

type ValueIterT<'a> = ArrayValuesIter<'a, BinaryArray<i64>>

Source§

unsafe fn value_unchecked(&self, idx: usize) -> Self::ValueT<'_>

Safety Read more
Source§

fn values_iter(&self) -> Self::ValueIterT<'_>

Source§

fn iter( &self, ) -> ZipValidity<Self::ValueT<'_>, Self::ValueIterT<'_>, BitmapIter<'_>>

Source§

fn with_validity_typed(self, validity: Option<Bitmap>) -> Self

Source§

fn full_null(length: usize, dtype: ArrowDataType) -> Self

Source§

fn get(&self, idx: usize) -> Option<Self::ValueT<'_>>

Source§

unsafe fn get_unchecked(&self, idx: usize) -> Option<Self::ValueT<'_>>

Safety Read more
Source§

fn last(&self) -> Option<Self::ValueT<'_>>

Source§

fn value(&self, idx: usize) -> Self::ValueT<'_>

Source§

fn as_slice(&self) -> Option<&[Self::ValueT<'_>]>

Source§

fn from_vec(v: Vec<Self::ValueT<'_>>, dtype: ArrowDataType) -> Self

Source§

fn from_zeroable_vec( v: Vec<Self::ZeroableValueT<'_>>, dtype: ArrowDataType, ) -> Self

Source§

fn full(length: usize, value: Self::ValueT<'_>, dtype: ArrowDataType) -> Self

Source§

impl<O: Offset> ValueSize for BinaryArray<O>

Source§

fn get_values_size(&self) -> usize

Get the values size that is still “visible” to the underlying array. E.g. take the offsets into account.
Source§

impl ArrowArray for BinaryArray<i64>

Auto Trait Implementations§

§

impl<O> !Freeze for BinaryArray<O>

§

impl<O> RefUnwindSafe for BinaryArray<O>

§

impl<O> Send for BinaryArray<O>

§

impl<O> Sync for BinaryArray<O>

§

impl<O> Unpin for BinaryArray<O>
where O: Unpin,

§

impl<O> UnwindSafe for BinaryArray<O>
where O: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T, A> ArrayFromIterDtype<T> for A

Source§

fn arr_from_iter_with_dtype<I>(dtype: ArrowDataType, iter: I) -> A
where I: IntoIterator<Item = T>,

Source§

fn arr_from_iter_trusted_with_dtype<I>(dtype: ArrowDataType, iter: I) -> A
where I: IntoIterator<Item = T>, <I as IntoIterator>::IntoIter: TrustedLen,

Source§

fn try_arr_from_iter_with_dtype<E, I>( dtype: ArrowDataType, iter: I, ) -> Result<A, E>
where I: IntoIterator<Item = Result<T, E>>,

Source§

fn try_arr_from_iter_trusted_with_dtype<E, I>( dtype: ArrowDataType, iter: I, ) -> Result<A, E>
where I: IntoIterator<Item = Result<T, E>>, <I as IntoIterator>::IntoIter: TrustedLen,

Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> DynClone for T
where T: Clone,

Source§

fn __clone_box(&self, _: Private) -> *mut ()

Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<A> IsValid for A
where A: ArrowArray,

Source§

unsafe fn is_valid_unchecked(&self, i: usize) -> bool

Safety Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize = _

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> SlicedArray for T
where T: Array + Clone,

Source§

fn slice_typed(&self, offset: usize, length: usize) -> T

Slices this Array. Read more
Source§

unsafe fn slice_typed_unchecked(&self, offset: usize, length: usize) -> T

Slices the Array. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V