polars_expr/chunked_idx_table/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
use std::any::Any;

use polars_core::prelude::*;
use polars_utils::index::ChunkId;
use polars_utils::IdxSize;

use crate::hash_keys::HashKeys;

mod row_encoded;

pub trait ChunkedIdxTable: Any + Send + Sync {
    /// Creates a new empty ChunkedIdxTable similar to this one.
    fn new_empty(&self) -> Box<dyn ChunkedIdxTable>;

    /// Reserves space for the given number additional keys.
    fn reserve(&mut self, additional: usize);

    /// Returns the number of unique keys in this ChunkedIdxTable.
    fn num_keys(&self) -> IdxSize;

    /// Inserts the given key chunk into this ChunkedIdxTable.
    fn insert_key_chunk(&mut self, keys: HashKeys, track_unmatchable: bool);

    /// Probe the table, updating table_match and probe_match with
    /// (ChunkId, IdxSize) pairs for each match. Will stop processing new keys
    /// once limit matches have been generated, returning the number of keys
    /// processed.
    ///
    /// If mark_matches is true, matches are marked in the table as such.
    ///
    /// If emit_unmatched is true, for keys that do not have a match we emit a
    /// match with ChunkId::null() on the table match.
    fn probe(
        &self,
        hash_keys: &HashKeys,
        table_match: &mut Vec<ChunkId<32>>,
        probe_match: &mut Vec<IdxSize>,
        mark_matches: bool,
        emit_unmatched: bool,
        limit: IdxSize,
    ) -> IdxSize;

    /// The same as probe, except it will only apply to the specified subset of keys.
    /// # Safety
    /// The provided subset indices must be in-bounds.
    #[allow(clippy::too_many_arguments)]
    unsafe fn probe_subset(
        &self,
        hash_keys: &HashKeys,
        subset: &[IdxSize],
        table_match: &mut Vec<ChunkId<32>>,
        probe_match: &mut Vec<IdxSize>,
        mark_matches: bool,
        emit_unmatched: bool,
        limit: IdxSize,
    ) -> IdxSize;

    /// Get the ChunkIds for each key which was never marked during probing.
    fn unmarked_keys(&self, out: &mut Vec<ChunkId<32>>, offset: IdxSize, limit: IdxSize)
        -> IdxSize;
}

pub fn new_chunked_idx_table(_key_schema: Arc<Schema>) -> Box<dyn ChunkedIdxTable> {
    Box::new(row_encoded::RowEncodedChunkedIdxTable::new())
}