polars_compute/unique/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
use arrow::array::Array;
/// Kernel to calculate the number of unique elements where the elements are already sorted.
pub trait SortedUniqueKernel: Array {
/// Calculate the set of unique elements in `fst` and `others` and fold the result into one
/// array.
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;
/// Calculate the set of unique elements in [`Self`] where we have no further information about
/// `self`.
fn unique(&self) -> Self;
/// Calculate the number of unique elements in [`Self`]
///
/// A null is also considered a unique value
fn n_unique(&self) -> usize;
/// Calculate the number of unique non-null elements in [`Self`]
fn n_unique_non_null(&self) -> usize;
}
/// Optimized kernel to calculate the unique elements of an array.
///
/// This kernel is a specialized for where all values are known to be in some small range of
/// values. In this case, you can usually get by with a bitset and bit-arithmetic instead of using
/// vectors and hashsets. Consequently, this kernel is usually called when further information is
/// known about the underlying array.
///
/// This trait is not implemented directly on the `Array` as with many other kernels. Rather, it is
/// implemented on a `State` struct to which `Array`s can be appended. This allows for sharing of
/// `State` between many chunks and allows for different implementations for the same array (e.g. a
/// maintain order and no maintain-order variant).
pub trait RangedUniqueKernel {
type Array: Array;
/// Returns whether all the values in the whole range are in the state
fn has_seen_all(&self) -> bool;
/// Append an `Array`'s values to the `State`
fn append(&mut self, array: &Self::Array);
/// Consume the state to get the unique elements
fn finalize_unique(self) -> Self::Array;
/// Consume the state to get the number of unique elements including null
fn finalize_n_unique(self) -> usize;
/// Consume the state to get the number of unique elements excluding null
fn finalize_n_unique_non_null(self) -> usize;
}
/// A generic unique kernel that selects the generally applicable unique kernel for an `Array`.
pub trait GenericUniqueKernel {
/// Calculate the set of unique elements
fn unique(&self) -> Self;
/// Calculate the number of unique elements including null
fn n_unique(&self) -> usize;
/// Calculate the number of unique elements excluding null
fn n_unique_non_null(&self) -> usize;
}
mod boolean;
mod primitive;
pub use boolean::BooleanUniqueKernelState;
pub use primitive::PrimitiveRangedUniqueState;