Struct datafusion::common::arrow::array::DictionaryArray

source ·
pub struct DictionaryArray<K>{ /* private fields */ }
Expand description

An array of dictionary encoded values

This is mostly used to represent strings or a limited set of primitive types as integers, for example when doing NLP analysis or representing chromosomes by name.

DictionaryArray are represented using a keys array and a values array, which may be different lengths. The keys array stores indexes in the values array which holds the corresponding logical value, as shown here:

┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
  ┌─────────────────┐  ┌─────────┐ │     ┌─────────────────┐
│ │        A        │  │    0    │       │        A        │     values[keys[0]]
  ├─────────────────┤  ├─────────┤ │     ├─────────────────┤
│ │        D        │  │    2    │       │        B        │     values[keys[1]]
  ├─────────────────┤  ├─────────┤ │     ├─────────────────┤
│ │        B        │  │    2    │       │        B        │     values[keys[2]]
  └─────────────────┘  ├─────────┤ │     ├─────────────────┤
│                      │    1    │       │        D        │     values[keys[3]]
                       ├─────────┤ │     ├─────────────────┤
│                      │    1    │       │        D        │     values[keys[4]]
                       ├─────────┤ │     ├─────────────────┤
│                      │    0    │       │        A        │     values[keys[5]]
                       └─────────┘ │     └─────────────────┘
│       values            keys
 ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
                                            Logical array
                                               Contents
          DictionaryArray
             length = 6

§Example: From Nullable Data

let test = vec!["a", "a", "b", "c"];
let array : DictionaryArray<Int8Type> = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect();
assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)]));

§Example: From Non-Nullable Data

let test = vec!["a", "a", "b", "c"];
let array : DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));

§Example: From Existing Arrays

// You can form your own DictionaryArray by providing the
// values (dictionary) and keys (indexes into the dictionary):
let values = StringArray::from_iter_values(["a", "b", "c"]);
let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
let array = DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap();
let expected: DictionaryArray::<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
assert_eq!(&array, &expected);

§Example: Using Builder

let mut builder = StringDictionaryBuilder::<Int32Type>::new();
builder.append_value("a");
builder.append_null();
builder.append_value("a");
builder.append_value("b");
let array = builder.finish();

let values: Vec<_> = array.downcast_dict::<StringArray>().unwrap().into_iter().collect();
assert_eq!(&values, &[Some("a"), None, Some("a"), Some("b")]);

Implementations§

source§

impl<K> DictionaryArray<K>

source

pub fn new( keys: PrimitiveArray<K>, values: Arc<dyn Array>, ) -> DictionaryArray<K>

Attempt to create a new DictionaryArray with a specified keys (indexes into the dictionary) and values (dictionary) array.

§Panics

Panics if Self::try_new returns an error

source

pub fn try_new( keys: PrimitiveArray<K>, values: Arc<dyn Array>, ) -> Result<DictionaryArray<K>, ArrowError>

Attempt to create a new DictionaryArray with a specified keys (indexes into the dictionary) and values (dictionary) array.

§Errors

Returns an error if any keys[i] >= values.len() || keys[i] < 0

source

pub fn new_scalar<T>(value: Scalar<T>) -> Scalar<DictionaryArray<K>>
where T: Array + 'static,

Create a new Scalar from value

source

pub unsafe fn new_unchecked( keys: PrimitiveArray<K>, values: Arc<dyn Array>, ) -> DictionaryArray<K>

Create a new DictionaryArray without performing validation

§Safety

Safe provided Self::try_new would not return an error

source

pub fn into_parts(self) -> (PrimitiveArray<K>, Arc<dyn Array>)

Deconstruct this array into its constituent parts

source

pub fn keys(&self) -> &PrimitiveArray<K>

Return an array view of the keys of this dictionary as a PrimitiveArray.

source

pub fn lookup_key( &self, value: &str, ) -> Option<<K as ArrowPrimitiveType>::Native>

If value is present in values (aka the dictionary), returns the corresponding key (index into the values array). Otherwise returns None.

Panics if values is not a StringArray.

source

pub fn values(&self) -> &Arc<dyn Array>

Returns a reference to the dictionary values array

source

pub fn value_type(&self) -> DataType

Returns a clone of the value type of this list.

source

pub fn len(&self) -> usize

The length of the dictionary is the length of the keys array.

source

pub fn is_empty(&self) -> bool

Whether this dictionary is empty

source

pub fn is_ordered(&self) -> bool

Currently exists for compatibility purposes with Arrow IPC.

source

pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>>

Return an iterator over the keys (indexes into the dictionary)

source

pub fn key(&self, i: usize) -> Option<usize>

Return the value of keys (the dictionary key) at index i, cast to usize, None if the value at i is NULL.

source

pub fn slice(&self, offset: usize, length: usize) -> DictionaryArray<K>

Returns a zero-copy slice of this array with the indicated offset and length.

source

pub fn downcast_dict<V>(&self) -> Option<TypedDictionaryArray<'_, K, V>>
where V: 'static,

Downcast this dictionary to a TypedDictionaryArray

use arrow_array::{Array, ArrayAccessor, DictionaryArray, StringArray, types::Int32Type};

let orig = [Some("a"), Some("b"), None];
let dictionary = DictionaryArray::<Int32Type>::from_iter(orig);
let typed = dictionary.downcast_dict::<StringArray>().unwrap();
assert_eq!(typed.value(0), "a");
assert_eq!(typed.value(1), "b");
assert!(typed.is_null(2));
source

pub fn with_values(&self, values: Arc<dyn Array>) -> DictionaryArray<K>

Returns a new dictionary with the same keys as the current instance but with a different set of dictionary values

This can be used to perform an operation on the values of a dictionary

§Panics

Panics if values has a length less than the current values


// Construct a Dict(Int32, Int8)
let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int8Type>::with_capacity(2, 200);
for i in 0..100 {
    builder.append(i % 2).unwrap();
}

let dictionary = builder.finish();

// Perform a widening cast of dictionary values
let typed_dictionary = dictionary.downcast_dict::<Int8Array>().unwrap();
let values: Int64Array = typed_dictionary.values().unary(|x| x as i64);

// Create a Dict(Int32,
let new = dictionary.with_values(Arc::new(values));

// Verify values are as expected
let new_typed = new.downcast_dict::<Int64Array>().unwrap();
for i in 0..100 {
    assert_eq!(new_typed.value(i), (i % 2) as i64)
}
source

pub fn into_primitive_dict_builder<V>( self, ) -> Result<PrimitiveDictionaryBuilder<K, V>, DictionaryArray<K>>

Returns PrimitiveDictionaryBuilder of this dictionary array for mutating its keys and values if the underlying data buffer is not shared by others.

source

pub fn unary_mut<F, V>( self, op: F, ) -> Result<DictionaryArray<K>, DictionaryArray<K>>

Applies an unary and infallible function to a mutable dictionary array. Mutable dictionary array means that the buffers are not shared with other arrays. As a result, this mutates the buffers directly without allocating new buffers.

§Implementation

This will apply the function for all dictionary values, including those on null slots. This implies that the operation must be infallible for any value of the corresponding type or this function may panic.

§Example
let values = Int32Array::from(vec![Some(10), Some(20), None]);
let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
let dictionary = DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap();
let c = dictionary.unary_mut::<_, Int32Type>(|x| x + 1).unwrap();
let typed = c.downcast_dict::<Int32Array>().unwrap();
assert_eq!(typed.value(0), 11);
assert_eq!(typed.value(1), 11);
assert_eq!(typed.value(2), 21);
source

pub fn occupancy(&self) -> BooleanBuffer

Computes an occupancy mask for this dictionary’s values

For each value in Self::values the corresponding bit will be set in the returned mask if it is referenced by a key in this DictionaryArray

Trait Implementations§

source§

impl<K> AnyDictionaryArray for DictionaryArray<K>

source§

fn keys(&self) -> &dyn Array

Returns the primitive keys of this dictionary as an Array
source§

fn values(&self) -> &Arc<dyn Array>

Returns the values of this dictionary
source§

fn normalized_keys(&self) -> Vec<usize>

Returns the keys of this dictionary as usize Read more
source§

fn with_values(&self, values: Arc<dyn Array>) -> Arc<dyn Array>

Create a new DictionaryArray replacing values with the new values Read more
source§

impl<T> Array for DictionaryArray<T>

source§

fn as_any(&self) -> &(dyn Any + 'static)

Returns the array as Any so that it can be downcasted to a specific implementation. Read more
source§

fn to_data(&self) -> ArrayData

Returns the underlying data of this array
source§

fn into_data(self) -> ArrayData

Returns the underlying data of this array Read more
source§

fn data_type(&self) -> &DataType

Returns a reference to the DataType of this array. Read more
source§

fn slice(&self, offset: usize, length: usize) -> Arc<dyn Array>

Returns a zero-copy slice of this array with the indicated offset and length. Read more
source§

fn len(&self) -> usize

Returns the length (i.e., number of elements) of this array. Read more
source§

fn is_empty(&self) -> bool

Returns whether this array is empty. Read more
source§

fn offset(&self) -> usize

Returns the offset into the underlying data used by this array(-slice). Note that the underlying data can be shared by many arrays. This defaults to 0. Read more
source§

fn nulls(&self) -> Option<&NullBuffer>

Returns the null buffer of this array if any. Read more
source§

fn logical_nulls(&self) -> Option<NullBuffer>

Returns a potentially computed NullBuffer that represents the logical null values of this array, if any. Read more
source§

fn is_nullable(&self) -> bool

Returns false if the array is guaranteed to not contain any logical nulls Read more
source§

fn get_buffer_memory_size(&self) -> usize

Returns the total number of bytes of memory pointed to by this array. The buffers store bytes in the Arrow memory format, and include the data as well as the validity map. Note that this does not always correspond to the exact memory usage of an array, since multiple arrays can share the same buffers or slices thereof.
source§

fn get_array_memory_size(&self) -> usize

Returns the total number of bytes of memory occupied physically by this array. This value will always be greater than returned by get_buffer_memory_size() and includes the overhead of the data structures that contain the pointers to the various buffers.
source§

fn is_null(&self, index: usize) -> bool

Returns whether the element at index is null according to Array::nulls Read more
source§

fn is_valid(&self, index: usize) -> bool

Returns whether the element at index is not null, the opposite of Self::is_null. Read more
source§

fn null_count(&self) -> usize

Returns the total number of physical null values in this array. Read more
source§

impl<K> Clone for DictionaryArray<K>

source§

fn clone(&self) -> DictionaryArray<K>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl<T> Debug for DictionaryArray<T>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
source§

impl<T> From<ArrayData> for DictionaryArray<T>

Constructs a DictionaryArray from an array data reference.

source§

fn from(data: ArrayData) -> DictionaryArray<T>

Converts to this type from the input type.
source§

impl<T> From<DictionaryArray<T>> for ArrayData

source§

fn from(array: DictionaryArray<T>) -> ArrayData

Converts to this type from the input type.
source§

impl<'a, T> FromIterator<&'a str> for DictionaryArray<T>

Constructs a DictionaryArray from an iterator of strings.

§Example:

use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type};

let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(
    "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  1,\n  2,\n] values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
    format!("{:?}", array)
);
source§

fn from_iter<I>(iter: I) -> DictionaryArray<T>
where I: IntoIterator<Item = &'a str>,

Creates a value from an iterator. Read more
source§

impl<'a, T> FromIterator<Option<&'a str>> for DictionaryArray<T>

Constructs a DictionaryArray from an iterator of optional strings.

§Example:

use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type};

let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test
    .iter()
    .map(|&x| if x == "b" { None } else { Some(x) })
    .collect();
assert_eq!(
    "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  null,\n  1,\n] values: StringArray\n[\n  \"a\",\n  \"c\",\n]}\n",
    format!("{:?}", array)
);
source§

fn from_iter<I>(iter: I) -> DictionaryArray<T>
where I: IntoIterator<Item = Option<&'a str>>,

Creates a value from an iterator. Read more
source§

impl<K> PartialEq for DictionaryArray<K>

source§

fn eq(&self, other: &DictionaryArray<K>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.

Auto Trait Implementations§

§

impl<K> Freeze for DictionaryArray<K>

§

impl<K> !RefUnwindSafe for DictionaryArray<K>

§

impl<K> Send for DictionaryArray<K>

§

impl<K> Sync for DictionaryArray<K>

§

impl<K> Unpin for DictionaryArray<K>

§

impl<K> !UnwindSafe for DictionaryArray<K>

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

default unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> Datum for T
where T: Array,

source§

fn get(&self) -> (&dyn Array, bool)

Returns the value for this Datum and a boolean indicating if the value is scalar
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

impl<T> Same for T

§

type Output = T

Should always be Self
source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V