polars_compute/unique/
dictionary.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
use arrow::array::{Array, DictionaryArray};
use arrow::datatypes::ArrowDataType;

use super::{PrimitiveRangedUniqueState, RangedUniqueKernel};

/// A specialized unique kernel for [`DictionaryArray`] for when all values are in a small known
/// range.
pub struct DictionaryRangedUniqueState {
    key_state: PrimitiveRangedUniqueState<u32>,
    values: Box<dyn Array>,
}

impl DictionaryRangedUniqueState {
    pub fn new(values: Box<dyn Array>) -> Self {
        Self {
            key_state: PrimitiveRangedUniqueState::new(0, values.len() as u32 + 1),
            values,
        }
    }

    pub fn key_state(&mut self) -> &mut PrimitiveRangedUniqueState<u32> {
        &mut self.key_state
    }
}

impl RangedUniqueKernel for DictionaryRangedUniqueState {
    type Array = DictionaryArray<u32>;

    fn has_seen_all(&self) -> bool {
        self.key_state.has_seen_all()
    }

    fn append(&mut self, array: &Self::Array) {
        self.key_state.append(array.keys());
    }

    fn append_state(&mut self, other: &Self) {
        debug_assert_eq!(self.values, other.values);
        self.key_state.append_state(&other.key_state);
    }

    fn finalize_unique(self) -> Self::Array {
        let keys = self.key_state.finalize_unique();
        DictionaryArray::<u32>::try_new(
            ArrowDataType::Dictionary(
                arrow::datatypes::IntegerType::UInt32,
                Box::new(self.values.dtype().clone()),
                false,
            ),
            keys,
            self.values,
        )
        .unwrap()
    }

    fn finalize_n_unique(&self) -> usize {
        self.key_state.finalize_n_unique()
    }

    fn finalize_n_unique_non_null(&self) -> usize {
        self.key_state.finalize_n_unique_non_null()
    }
}