polars_arrow/array/binary/
mutable_values.rs

1use std::sync::Arc;
2
3use polars_error::{polars_bail, PolarsResult};
4
5use super::{BinaryArray, MutableBinaryArray};
6use crate::array::physical_binary::*;
7use crate::array::specification::try_check_offsets_bounds;
8use crate::array::{
9    Array, ArrayAccessor, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush,
10};
11use crate::bitmap::MutableBitmap;
12use crate::datatypes::ArrowDataType;
13use crate::offset::{Offset, Offsets};
14use crate::trusted_len::TrustedLen;
15
16/// A [`MutableArray`] that builds a [`BinaryArray`]. It differs
17/// from [`MutableBinaryArray`] in that it builds non-null [`BinaryArray`].
18#[derive(Debug, Clone)]
19pub struct MutableBinaryValuesArray<O: Offset> {
20    dtype: ArrowDataType,
21    offsets: Offsets<O>,
22    values: Vec<u8>,
23}
24
25impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O> {
26    fn from(other: MutableBinaryValuesArray<O>) -> Self {
27        BinaryArray::<O>::new(other.dtype, other.offsets.into(), other.values.into(), None)
28    }
29}
30
31impl<O: Offset> From<MutableBinaryValuesArray<O>> for MutableBinaryArray<O> {
32    fn from(other: MutableBinaryValuesArray<O>) -> Self {
33        MutableBinaryArray::<O>::try_new(other.dtype, other.offsets, other.values, None)
34            .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray")
35    }
36}
37
38impl<O: Offset> Default for MutableBinaryValuesArray<O> {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl<O: Offset> MutableBinaryValuesArray<O> {
45    /// Returns an empty [`MutableBinaryValuesArray`].
46    pub fn new() -> Self {
47        Self {
48            dtype: Self::default_dtype(),
49            offsets: Offsets::new(),
50            values: Vec::<u8>::new(),
51        }
52    }
53
54    /// Returns a [`MutableBinaryValuesArray`] created from its internal representation.
55    ///
56    /// # Errors
57    /// This function returns an error iff:
58    /// * The last offset is not equal to the values' length.
59    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
60    /// # Implementation
61    /// This function is `O(1)`
62    pub fn try_new(
63        dtype: ArrowDataType,
64        offsets: Offsets<O>,
65        values: Vec<u8>,
66    ) -> PolarsResult<Self> {
67        try_check_offsets_bounds(&offsets, values.len())?;
68
69        if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {
70            polars_bail!(ComputeError: "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",)
71        }
72
73        Ok(Self {
74            dtype,
75            offsets,
76            values,
77        })
78    }
79
80    /// Returns the default [`ArrowDataType`] of this container: [`ArrowDataType::Utf8`] or [`ArrowDataType::LargeUtf8`]
81    /// depending on the generic [`Offset`].
82    pub fn default_dtype() -> ArrowDataType {
83        BinaryArray::<O>::default_dtype()
84    }
85
86    /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items.
87    pub fn with_capacity(capacity: usize) -> Self {
88        Self::with_capacities(capacity, 0)
89    }
90
91    /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items and values.
92    pub fn with_capacities(capacity: usize, values: usize) -> Self {
93        Self {
94            dtype: Self::default_dtype(),
95            offsets: Offsets::<O>::with_capacity(capacity),
96            values: Vec::<u8>::with_capacity(values),
97        }
98    }
99
100    /// returns its values.
101    #[inline]
102    pub fn values(&self) -> &Vec<u8> {
103        &self.values
104    }
105
106    /// returns its offsets.
107    #[inline]
108    pub fn offsets(&self) -> &Offsets<O> {
109        &self.offsets
110    }
111
112    /// Reserves `additional` elements and `additional_values` on the values.
113    #[inline]
114    pub fn reserve(&mut self, additional: usize, additional_values: usize) {
115        self.offsets.reserve(additional);
116        self.values.reserve(additional_values);
117    }
118
119    /// Returns the capacity in number of items
120    pub fn capacity(&self) -> usize {
121        self.offsets.capacity()
122    }
123
124    /// Returns the length of this array
125    #[inline]
126    pub fn len(&self) -> usize {
127        self.offsets.len_proxy()
128    }
129
130    /// Pushes a new item to the array.
131    /// # Panic
132    /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
133    #[inline]
134    pub fn push<T: AsRef<[u8]>>(&mut self, value: T) {
135        self.try_push(value).unwrap()
136    }
137
138    /// Pop the last entry from [`MutableBinaryValuesArray`].
139    /// This function returns `None` iff this array is empty.
140    pub fn pop(&mut self) -> Option<Vec<u8>> {
141        if self.len() == 0 {
142            return None;
143        }
144        self.offsets.pop()?;
145        let start = self.offsets.last().to_usize();
146        let value = self.values.split_off(start);
147        Some(value.to_vec())
148    }
149
150    /// Returns the value of the element at index `i`.
151    /// # Panic
152    /// This function panics iff `i >= self.len`.
153    #[inline]
154    pub fn value(&self, i: usize) -> &[u8] {
155        assert!(i < self.len());
156        unsafe { self.value_unchecked(i) }
157    }
158
159    /// Returns the value of the element at index `i`.
160    ///
161    /// # Safety
162    /// This function is safe iff `i < self.len`.
163    #[inline]
164    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
165        // soundness: the invariant of the function
166        let (start, end) = self.offsets.start_end(i);
167
168        // soundness: the invariant of the struct
169        self.values.get_unchecked(start..end)
170    }
171
172    /// Returns an iterator of `&[u8]`
173    pub fn iter(&self) -> ArrayValuesIter<Self> {
174        ArrayValuesIter::new(self)
175    }
176
177    /// Shrinks the capacity of the [`MutableBinaryValuesArray`] to fit its current length.
178    pub fn shrink_to_fit(&mut self) {
179        self.values.shrink_to_fit();
180        self.offsets.shrink_to_fit();
181    }
182
183    /// Extract the low-end APIs from the [`MutableBinaryValuesArray`].
184    pub fn into_inner(self) -> (ArrowDataType, Offsets<O>, Vec<u8>) {
185        (self.dtype, self.offsets, self.values)
186    }
187}
188
189impl<O: Offset> MutableArray for MutableBinaryValuesArray<O> {
190    fn len(&self) -> usize {
191        self.len()
192    }
193
194    fn validity(&self) -> Option<&MutableBitmap> {
195        None
196    }
197
198    fn as_box(&mut self) -> Box<dyn Array> {
199        let (dtype, offsets, values) = std::mem::take(self).into_inner();
200        BinaryArray::new(dtype, offsets.into(), values.into(), None).boxed()
201    }
202
203    fn as_arc(&mut self) -> Arc<dyn Array> {
204        let (dtype, offsets, values) = std::mem::take(self).into_inner();
205        BinaryArray::new(dtype, offsets.into(), values.into(), None).arced()
206    }
207
208    fn dtype(&self) -> &ArrowDataType {
209        &self.dtype
210    }
211
212    fn as_any(&self) -> &dyn std::any::Any {
213        self
214    }
215
216    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
217        self
218    }
219
220    #[inline]
221    fn push_null(&mut self) {
222        self.push::<&[u8]>(b"")
223    }
224
225    fn reserve(&mut self, additional: usize) {
226        self.reserve(additional, 0)
227    }
228
229    fn shrink_to_fit(&mut self) {
230        self.shrink_to_fit()
231    }
232}
233
234impl<O: Offset, P: AsRef<[u8]>> FromIterator<P> for MutableBinaryValuesArray<O> {
235    fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
236        let (offsets, values) = values_iter(iter.into_iter());
237        Self::try_new(Self::default_dtype(), offsets, values).unwrap()
238    }
239}
240
241impl<O: Offset> MutableBinaryValuesArray<O> {
242    pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
243        &mut self,
244        validity: &mut MutableBitmap,
245        iterator: I,
246    ) where
247        P: AsRef<[u8]>,
248        I: Iterator<Item = Option<P>>,
249    {
250        extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
251    }
252
253    /// Extends the [`MutableBinaryValuesArray`] from a [`TrustedLen`]
254    #[inline]
255    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
256    where
257        P: AsRef<[u8]>,
258        I: TrustedLen<Item = P>,
259    {
260        unsafe { self.extend_trusted_len_unchecked(iterator) }
261    }
262
263    /// Extends [`MutableBinaryValuesArray`] from an iterator of trusted len.
264    ///
265    /// # Safety
266    /// The iterator must be trusted len.
267    #[inline]
268    pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
269    where
270        P: AsRef<[u8]>,
271        I: Iterator<Item = P>,
272    {
273        extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
274    }
275
276    /// Creates a [`MutableBinaryValuesArray`] from a [`TrustedLen`]
277    #[inline]
278    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
279    where
280        P: AsRef<[u8]>,
281        I: TrustedLen<Item = P>,
282    {
283        // soundness: I is `TrustedLen`
284        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
285    }
286
287    /// Returns a new [`MutableBinaryValuesArray`] from an iterator of trusted length.
288    ///
289    /// # Safety
290    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
291    /// I.e. that `size_hint().1` correctly reports its length.
292    #[inline]
293    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
294    where
295        P: AsRef<[u8]>,
296        I: Iterator<Item = P>,
297    {
298        let (offsets, values) = trusted_len_values_iter(iterator);
299        Self::try_new(Self::default_dtype(), offsets, values).unwrap()
300    }
301
302    /// Returns a new [`MutableBinaryValuesArray`] from an iterator.
303    /// # Error
304    /// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value.
305    /// (`i32::MAX` or `i64::MAX` respectively).
306    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = P>>(iter: I) -> PolarsResult<Self> {
307        let iterator = iter.into_iter();
308        let (lower, _) = iterator.size_hint();
309        let mut array = Self::with_capacity(lower);
310        for item in iterator {
311            array.try_push(item)?;
312        }
313        Ok(array)
314    }
315
316    /// Extend with a fallible iterator
317    pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
318    where
319        E: std::error::Error,
320        I: IntoIterator<Item = std::result::Result<T, E>>,
321        T: AsRef<[u8]>,
322    {
323        let mut iter = iter.into_iter();
324        self.reserve(iter.size_hint().0, 0);
325        iter.try_for_each(|x| {
326            self.push(x?);
327            Ok(())
328        })
329    }
330}
331
332impl<O: Offset, T: AsRef<[u8]>> Extend<T> for MutableBinaryValuesArray<O> {
333    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
334        extend_from_values_iter(&mut self.offsets, &mut self.values, iter.into_iter());
335    }
336}
337
338impl<O: Offset, T: AsRef<[u8]>> TryExtend<T> for MutableBinaryValuesArray<O> {
339    fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> PolarsResult<()> {
340        let mut iter = iter.into_iter();
341        self.reserve(iter.size_hint().0, 0);
342        iter.try_for_each(|x| self.try_push(x))
343    }
344}
345
346impl<O: Offset, T: AsRef<[u8]>> TryPush<T> for MutableBinaryValuesArray<O> {
347    #[inline]
348    fn try_push(&mut self, value: T) -> PolarsResult<()> {
349        let bytes = value.as_ref();
350        self.values.extend_from_slice(bytes);
351        self.offsets.try_push(bytes.len())
352    }
353}
354
355unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
356    type Item = &'a [u8];
357
358    #[inline]
359    unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
360        self.value_unchecked(index)
361    }
362
363    #[inline]
364    fn len(&self) -> usize {
365        self.len()
366    }
367}
368
369impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
370    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
371        self.values.extend_from_slice(&other.values);
372        self.offsets.try_extend_from_self(&other.offsets)
373    }
374}