polars_expr/groups/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
use std::any::Any;
use std::path::Path;
use polars_core::prelude::*;
use polars_utils::aliases::PlRandomState;
use polars_utils::cardinality_sketch::CardinalitySketch;
use polars_utils::hashing::HashPartitioner;
use polars_utils::IdxSize;
mod row_encoded;
/// A Grouper maps keys to groups, such that duplicate keys map to the same group.
pub trait Grouper: Any + Send + Sync {
/// Creates a new empty Grouper similar to this one.
fn new_empty(&self) -> Box<dyn Grouper>;
/// Reserves space for the given number additional of groups.
fn reserve(&mut self, additional: usize);
/// Returns the number of groups in this Grouper.
fn num_groups(&self) -> IdxSize;
/// Inserts the given keys into this Grouper, mutating groups_idxs such
/// that group_idxs[i] is the group index of keys[..][i].
fn insert_keys(&mut self, keys: &DataFrame, group_idxs: &mut Vec<IdxSize>);
/// Adds the given Grouper into this one, mutating groups_idxs such that
/// the ith group of other now has group index group_idxs[i] in self.
fn combine(&mut self, other: &dyn Grouper, group_idxs: &mut Vec<IdxSize>);
/// Adds the given Grouper into this one, mutating groups_idxs such that
/// the group subset[i] of other now has group index group_idxs[i] in self.
///
/// # Safety
/// For all i, subset[i] < other.len().
unsafe fn gather_combine(
&mut self,
other: &dyn Grouper,
subset: &[IdxSize],
group_idxs: &mut Vec<IdxSize>,
);
/// Generate partition indices.
///
/// After this function partitions_idxs[i] will contain the indices for
/// partition i, and sketches[i] will contain a cardinality sketch for
/// partition i.
fn gen_partition_idxs(
&self,
partitioner: &HashPartitioner,
partition_idxs: &mut [Vec<IdxSize>],
sketches: &mut [CardinalitySketch],
);
/// Returns the keys in this Grouper in group order, that is the key for
/// group i is returned in row i.
fn get_keys_in_group_order(&self) -> DataFrame;
/// Stores this Grouper at the given path.
fn store_ooc(&self, _path: &Path) {
unimplemented!();
}
/// Loads this Grouper from the given path.
fn load_ooc(&mut self, _path: &Path) {
unimplemented!();
}
fn as_any(&self) -> &dyn Any;
}
pub fn new_hash_grouper(key_schema: Arc<Schema>, random_state: PlRandomState) -> Box<dyn Grouper> {
Box::new(row_encoded::RowEncodedHashGrouper::new(
key_schema,
random_state,
))
}