polars_arrow/array/binary/
mutable.rs

1use std::sync::Arc;
2
3use polars_error::{polars_bail, PolarsResult};
4
5use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
6use crate::array::physical_binary::*;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::utils::{BitmapIter, ZipValidity};
9use crate::bitmap::{Bitmap, MutableBitmap};
10use crate::datatypes::ArrowDataType;
11use crate::offset::{Offset, Offsets};
12use crate::trusted_len::TrustedLen;
13
14/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.
15/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.
16/// # Implementation
17/// This struct does not allocate a validity until one is required (i.e. push a null to it).
18#[derive(Debug, Clone)]
19pub struct MutableBinaryArray<O: Offset> {
20    values: MutableBinaryValuesArray<O>,
21    validity: Option<MutableBitmap>,
22}
23
24impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {
25    fn from(other: MutableBinaryArray<O>) -> Self {
26        let validity = other.validity.and_then(|x| {
27            let validity: Option<Bitmap> = x.into();
28            validity
29        });
30        let array: BinaryArray<O> = other.values.into();
31        array.with_validity(validity)
32    }
33}
34
35impl<O: Offset> Default for MutableBinaryArray<O> {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl<O: Offset> MutableBinaryArray<O> {
42    /// Creates a new empty [`MutableBinaryArray`].
43    /// # Implementation
44    /// This allocates a [`Vec`] of one element
45    pub fn new() -> Self {
46        Self::with_capacity(0)
47    }
48
49    /// Returns a [`MutableBinaryArray`] created from its internal representation.
50    ///
51    /// # Errors
52    /// This function returns an error iff:
53    /// * The last offset is not equal to the values' length.
54    /// * the validity's length is not equal to `offsets.len()`.
55    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
56    /// # Implementation
57    /// This function is `O(1)`
58    pub fn try_new(
59        dtype: ArrowDataType,
60        offsets: Offsets<O>,
61        values: Vec<u8>,
62        validity: Option<MutableBitmap>,
63    ) -> PolarsResult<Self> {
64        let values = MutableBinaryValuesArray::try_new(dtype, offsets, values)?;
65
66        if validity
67            .as_ref()
68            .is_some_and(|validity| validity.len() != values.len())
69        {
70            polars_bail!(ComputeError: "validity's length must be equal to the number of values")
71        }
72
73        Ok(Self { values, validity })
74    }
75
76    /// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
77    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
78    pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
79        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
80    }
81
82    fn default_dtype() -> ArrowDataType {
83        BinaryArray::<O>::default_dtype()
84    }
85
86    /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots.
87    pub fn with_capacity(capacity: usize) -> Self {
88        Self::with_capacities(capacity, 0)
89    }
90
91    /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots and values.
92    /// # Implementation
93    /// This does not allocate the validity.
94    pub fn with_capacities(capacity: usize, values: usize) -> Self {
95        Self {
96            values: MutableBinaryValuesArray::with_capacities(capacity, values),
97            validity: None,
98        }
99    }
100
101    /// Reserves `additional` elements and `additional_values` on the values buffer.
102    pub fn reserve(&mut self, additional: usize, additional_values: usize) {
103        self.values.reserve(additional, additional_values);
104        if let Some(x) = self.validity.as_mut() {
105            x.reserve(additional)
106        }
107    }
108
109    /// Pushes a new element to the array.
110    /// # Panic
111    /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
112    pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {
113        self.try_push(value).unwrap()
114    }
115
116    /// Pop the last entry from [`MutableBinaryArray`].
117    /// This function returns `None` iff this array is empty
118    pub fn pop(&mut self) -> Option<Vec<u8>> {
119        let value = self.values.pop()?;
120        self.validity
121            .as_mut()
122            .map(|x| x.pop()?.then(|| ()))
123            .unwrap_or_else(|| Some(()))
124            .map(|_| value)
125    }
126
127    fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
128        iter: I,
129    ) -> PolarsResult<Self> {
130        let iterator = iter.into_iter();
131        let (lower, _) = iterator.size_hint();
132        let mut primitive = Self::with_capacity(lower);
133        for item in iterator {
134            primitive.try_push(item.as_ref())?
135        }
136        Ok(primitive)
137    }
138
139    fn init_validity(&mut self) {
140        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
141        validity.extend_constant(self.len(), true);
142        validity.set(self.len() - 1, false);
143        self.validity = Some(validity);
144    }
145
146    /// Converts itself into an [`Array`].
147    pub fn into_arc(self) -> Arc<dyn Array> {
148        let a: BinaryArray<O> = self.into();
149        Arc::new(a)
150    }
151
152    /// Shrinks the capacity of the [`MutableBinaryArray`] to fit its current length.
153    pub fn shrink_to_fit(&mut self) {
154        self.values.shrink_to_fit();
155        if let Some(validity) = &mut self.validity {
156            validity.shrink_to_fit()
157        }
158    }
159
160    impl_mutable_array_mut_validity!();
161}
162
163impl<O: Offset> MutableBinaryArray<O> {
164    /// returns its values.
165    pub fn values(&self) -> &Vec<u8> {
166        self.values.values()
167    }
168
169    /// returns its offsets.
170    pub fn offsets(&self) -> &Offsets<O> {
171        self.values.offsets()
172    }
173
174    /// Returns an iterator of `Option<&[u8]>`
175    pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<O>, BitmapIter> {
176        ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
177    }
178
179    /// Returns an iterator over the values of this array
180    pub fn values_iter(&self) -> MutableBinaryValuesIter<O> {
181        self.values.iter()
182    }
183}
184
185impl<O: Offset> MutableArray for MutableBinaryArray<O> {
186    fn len(&self) -> usize {
187        self.values.len()
188    }
189
190    fn validity(&self) -> Option<&MutableBitmap> {
191        self.validity.as_ref()
192    }
193
194    fn as_box(&mut self) -> Box<dyn Array> {
195        let array: BinaryArray<O> = std::mem::take(self).into();
196        array.boxed()
197    }
198
199    fn as_arc(&mut self) -> Arc<dyn Array> {
200        let array: BinaryArray<O> = std::mem::take(self).into();
201        array.arced()
202    }
203
204    fn dtype(&self) -> &ArrowDataType {
205        self.values.dtype()
206    }
207
208    fn as_any(&self) -> &dyn std::any::Any {
209        self
210    }
211
212    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
213        self
214    }
215
216    #[inline]
217    fn push_null(&mut self) {
218        self.push::<&[u8]>(None)
219    }
220
221    fn reserve(&mut self, additional: usize) {
222        self.reserve(additional, 0)
223    }
224
225    fn shrink_to_fit(&mut self) {
226        self.shrink_to_fit()
227    }
228}
229
230impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {
231    fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
232        Self::try_from_iter(iter).unwrap()
233    }
234}
235
236impl<O: Offset> MutableBinaryArray<O> {
237    /// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
238    ///
239    /// # Safety
240    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
241    /// I.e. that `size_hint().1` correctly reports its length.
242    #[inline]
243    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
244    where
245        P: AsRef<[u8]>,
246        I: Iterator<Item = Option<P>>,
247    {
248        let (validity, offsets, values) = trusted_len_unzip(iterator);
249
250        Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap()
251    }
252
253    /// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
254    #[inline]
255    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
256    where
257        P: AsRef<[u8]>,
258        I: TrustedLen<Item = Option<P>>,
259    {
260        // soundness: I is `TrustedLen`
261        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
262    }
263
264    /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
265    ///
266    /// # Safety
267    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
268    /// I.e. that `size_hint().1` correctly reports its length.
269    #[inline]
270    pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(
271        iterator: I,
272    ) -> Self {
273        let (offsets, values) = trusted_len_values_iter(iterator);
274        Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
275    }
276
277    /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
278    #[inline]
279    pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
280        iterator: I,
281    ) -> Self {
282        // soundness: I is `TrustedLen`
283        unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
284    }
285
286    /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
287    ///
288    /// # Safety
289    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
290    /// I.e. that `size_hint().1` correctly reports its length.
291    #[inline]
292    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
293        iterator: I,
294    ) -> std::result::Result<Self, E>
295    where
296        P: AsRef<[u8]>,
297        I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
298    {
299        let iterator = iterator.into_iter();
300
301        // soundness: assumed trusted len
302        let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
303        Ok(Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap())
304    }
305
306    /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
307    #[inline]
308    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
309    where
310        P: AsRef<[u8]>,
311        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
312    {
313        // soundness: I: TrustedLen
314        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
315    }
316
317    /// Extends the [`MutableBinaryArray`] from an iterator of trusted length.
318    /// This differs from `extend_trusted_len` which accepts iterator of optional values.
319    #[inline]
320    pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
321    where
322        P: AsRef<[u8]>,
323        I: TrustedLen<Item = P>,
324    {
325        // SAFETY: The iterator is `TrustedLen`
326        unsafe { self.extend_trusted_len_values_unchecked(iterator) }
327    }
328
329    /// Extends the [`MutableBinaryArray`] from an iterator of values.
330    /// This differs from `extended_trusted_len` which accepts iterator of optional values.
331    #[inline]
332    pub fn extend_values<I, P>(&mut self, iterator: I)
333    where
334        P: AsRef<[u8]>,
335        I: Iterator<Item = P>,
336    {
337        let length = self.values.len();
338        self.values.extend(iterator);
339        let additional = self.values.len() - length;
340
341        if let Some(validity) = self.validity.as_mut() {
342            validity.extend_constant(additional, true);
343        }
344    }
345
346    /// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
347    /// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
348    /// values.
349    ///
350    /// # Safety
351    /// The `iterator` must be [`TrustedLen`]
352    #[inline]
353    pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
354    where
355        P: AsRef<[u8]>,
356        I: Iterator<Item = P>,
357    {
358        let length = self.values.len();
359        self.values.extend_trusted_len_unchecked(iterator);
360        let additional = self.values.len() - length;
361
362        if let Some(validity) = self.validity.as_mut() {
363            validity.extend_constant(additional, true);
364        }
365    }
366
367    /// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
368    #[inline]
369    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
370    where
371        P: AsRef<[u8]>,
372        I: TrustedLen<Item = Option<P>>,
373    {
374        // SAFETY: The iterator is `TrustedLen`
375        unsafe { self.extend_trusted_len_unchecked(iterator) }
376    }
377
378    /// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
379    ///
380    /// # Safety
381    /// The `iterator` must be [`TrustedLen`]
382    #[inline]
383    pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
384    where
385        P: AsRef<[u8]>,
386        I: Iterator<Item = Option<P>>,
387    {
388        if self.validity.is_none() {
389            let mut validity = MutableBitmap::new();
390            validity.extend_constant(self.len(), true);
391            self.validity = Some(validity);
392        }
393
394        self.values
395            .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
396    }
397
398    /// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`.
399    pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
400        let (offsets, values) = values_iter(iterator);
401        Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
402    }
403
404    /// Extend with a fallible iterator
405    pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
406    where
407        E: std::error::Error,
408        I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
409        T: AsRef<[u8]>,
410    {
411        let mut iter = iter.into_iter();
412        self.reserve(iter.size_hint().0, 0);
413        iter.try_for_each(|x| {
414            self.push(x?);
415            Ok(())
416        })
417    }
418}
419
420impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {
421    fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
422        self.try_extend(iter).unwrap();
423    }
424}
425
426impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {
427    fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
428        let mut iter = iter.into_iter();
429        self.reserve(iter.size_hint().0, 0);
430        iter.try_for_each(|x| self.try_push(x))
431    }
432}
433
434impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
435    fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {
436        match value {
437            Some(value) => {
438                self.values.try_push(value.as_ref())?;
439
440                if let Some(validity) = &mut self.validity {
441                    validity.push(true)
442                }
443            },
444            None => {
445                self.values.push("");
446                match &mut self.validity {
447                    Some(validity) => validity.push(false),
448                    None => self.init_validity(),
449                }
450            },
451        }
452        Ok(())
453    }
454}
455
456impl<O: Offset> PartialEq for MutableBinaryArray<O> {
457    fn eq(&self, other: &Self) -> bool {
458        self.iter().eq(other.iter())
459    }
460}
461
462impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
463    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
464        extend_validity(self.len(), &mut self.validity, &other.validity);
465
466        self.values.try_extend_from_self(&other.values)
467    }
468}