polars_arrow/array/dictionary/
mutable.rs

1use std::hash::Hash;
2use std::sync::Arc;
3
4use polars_error::PolarsResult;
5
6use super::value_map::ValueMap;
7use super::{DictionaryArray, DictionaryKey};
8use crate::array::indexable::{AsIndexed, Indexable};
9use crate::array::primitive::MutablePrimitiveArray;
10use crate::array::{Array, MutableArray, TryExtend, TryPush};
11use crate::bitmap::MutableBitmap;
12use crate::datatypes::ArrowDataType;
13
14#[derive(Debug)]
15pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
16    dtype: ArrowDataType,
17    map: ValueMap<K, M>,
18    // invariant: `max(keys) < map.values().len()`
19    keys: MutablePrimitiveArray<K>,
20}
21
22impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
23    fn from(other: MutableDictionaryArray<K, M>) -> Self {
24        // SAFETY: the invariant of this struct ensures that this is up-held
25        unsafe {
26            DictionaryArray::<K>::try_new_unchecked(
27                other.dtype,
28                other.keys.into(),
29                other.map.into_values().as_box(),
30            )
31            .unwrap()
32        }
33    }
34}
35
36impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
37    /// Creates an empty [`MutableDictionaryArray`].
38    pub fn new() -> Self {
39        Self::try_empty(M::default()).unwrap()
40    }
41}
42
43impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
50    /// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
51    /// # Errors
52    /// Errors if the array is non-empty.
53    pub fn try_empty(values: M) -> PolarsResult<Self> {
54        Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))
55    }
56
57    /// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.
58    /// Indices associated with those values are automatically assigned based on the order of
59    /// the values.
60    /// # Errors
61    /// Errors if there's more values than the maximum value of `K` or if values are not unique.
62    pub fn from_values(values: M) -> PolarsResult<Self>
63    where
64        M: Indexable,
65        M::Type: Eq + Hash,
66    {
67        Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))
68    }
69
70    fn from_value_map(value_map: ValueMap<K, M>) -> Self {
71        let keys = MutablePrimitiveArray::<K>::new();
72        let dtype =
73            ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);
74        Self {
75            dtype,
76            map: value_map,
77            keys,
78        }
79    }
80
81    /// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current
82    /// mutable dictionary array, but with no data. This may come useful when serializing the
83    /// array into multiple chunks, where there's a requirement that the dictionary is the same.
84    /// No copying is performed, the value map is moved over to the new array.
85    pub fn into_empty(self) -> Self {
86        Self::from_value_map(self.map)
87    }
88
89    /// Same as `into_empty` but clones the inner value map instead of taking full ownership.
90    pub fn to_empty(&self) -> Self
91    where
92        M: Clone,
93    {
94        Self::from_value_map(self.map.clone())
95    }
96
97    /// pushes a null value
98    pub fn push_null(&mut self) {
99        self.keys.push(None)
100    }
101
102    /// returns a reference to the inner values.
103    pub fn values(&self) -> &M {
104        self.map.values()
105    }
106
107    /// converts itself into [`Arc<dyn Array>`]
108    pub fn into_arc(self) -> Arc<dyn Array> {
109        let a: DictionaryArray<K> = self.into();
110        Arc::new(a)
111    }
112
113    /// converts itself into [`Box<dyn Array>`]
114    pub fn into_box(self) -> Box<dyn Array> {
115        let a: DictionaryArray<K> = self.into();
116        Box::new(a)
117    }
118
119    /// Reserves `additional` slots.
120    pub fn reserve(&mut self, additional: usize) {
121        self.keys.reserve(additional);
122    }
123
124    /// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
125    pub fn shrink_to_fit(&mut self) {
126        self.map.shrink_to_fit();
127        self.keys.shrink_to_fit();
128    }
129
130    /// Returns the dictionary keys
131    pub fn keys(&self) -> &MutablePrimitiveArray<K> {
132        &self.keys
133    }
134
135    fn take_into(&mut self) -> DictionaryArray<K> {
136        DictionaryArray::<K>::try_new(
137            self.dtype.clone(),
138            std::mem::take(&mut self.keys).into(),
139            self.map.take_into(),
140        )
141        .unwrap()
142    }
143}
144
145impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
146    fn len(&self) -> usize {
147        self.keys.len()
148    }
149
150    fn validity(&self) -> Option<&MutableBitmap> {
151        self.keys.validity()
152    }
153
154    fn as_box(&mut self) -> Box<dyn Array> {
155        Box::new(self.take_into())
156    }
157
158    fn as_arc(&mut self) -> Arc<dyn Array> {
159        Arc::new(self.take_into())
160    }
161
162    fn dtype(&self) -> &ArrowDataType {
163        &self.dtype
164    }
165
166    fn as_any(&self) -> &dyn std::any::Any {
167        self
168    }
169
170    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
171        self
172    }
173
174    fn push_null(&mut self) {
175        self.keys.push(None)
176    }
177
178    fn reserve(&mut self, additional: usize) {
179        self.reserve(additional)
180    }
181
182    fn shrink_to_fit(&mut self) {
183        self.shrink_to_fit()
184    }
185}
186
187impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
188where
189    K: DictionaryKey,
190    M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,
191    T: AsIndexed<M>,
192    M::Type: Eq + Hash,
193{
194    fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {
195        for value in iter {
196            if let Some(value) = value {
197                let key = self
198                    .map
199                    .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
200                self.keys.try_push(Some(key))?;
201            } else {
202                self.push_null();
203            }
204        }
205        Ok(())
206    }
207}
208
209impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
210where
211    K: DictionaryKey,
212    M: MutableArray + Indexable + TryPush<Option<T>>,
213    T: AsIndexed<M>,
214    M::Type: Eq + Hash,
215{
216    fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
217        if let Some(value) = item {
218            let key = self
219                .map
220                .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
221            self.keys.try_push(Some(key))?;
222        } else {
223            self.push_null();
224        }
225        Ok(())
226    }
227}