polars_arrow/array/binary/
mutable.rs1use std::sync::Arc;
2
3use polars_error::{polars_bail, PolarsResult};
4
5use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
6use crate::array::physical_binary::*;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::utils::{BitmapIter, ZipValidity};
9use crate::bitmap::{Bitmap, MutableBitmap};
10use crate::datatypes::ArrowDataType;
11use crate::offset::{Offset, Offsets};
12use crate::trusted_len::TrustedLen;
13
14#[derive(Debug, Clone)]
19pub struct MutableBinaryArray<O: Offset> {
20 values: MutableBinaryValuesArray<O>,
21 validity: Option<MutableBitmap>,
22}
23
24impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {
25 fn from(other: MutableBinaryArray<O>) -> Self {
26 let validity = other.validity.and_then(|x| {
27 let validity: Option<Bitmap> = x.into();
28 validity
29 });
30 let array: BinaryArray<O> = other.values.into();
31 array.with_validity(validity)
32 }
33}
34
35impl<O: Offset> Default for MutableBinaryArray<O> {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl<O: Offset> MutableBinaryArray<O> {
42 pub fn new() -> Self {
46 Self::with_capacity(0)
47 }
48
49 pub fn try_new(
59 dtype: ArrowDataType,
60 offsets: Offsets<O>,
61 values: Vec<u8>,
62 validity: Option<MutableBitmap>,
63 ) -> PolarsResult<Self> {
64 let values = MutableBinaryValuesArray::try_new(dtype, offsets, values)?;
65
66 if validity
67 .as_ref()
68 .is_some_and(|validity| validity.len() != values.len())
69 {
70 polars_bail!(ComputeError: "validity's length must be equal to the number of values")
71 }
72
73 Ok(Self { values, validity })
74 }
75
76 pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
79 Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
80 }
81
82 fn default_dtype() -> ArrowDataType {
83 BinaryArray::<O>::default_dtype()
84 }
85
86 pub fn with_capacity(capacity: usize) -> Self {
88 Self::with_capacities(capacity, 0)
89 }
90
91 pub fn with_capacities(capacity: usize, values: usize) -> Self {
95 Self {
96 values: MutableBinaryValuesArray::with_capacities(capacity, values),
97 validity: None,
98 }
99 }
100
101 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
103 self.values.reserve(additional, additional_values);
104 if let Some(x) = self.validity.as_mut() {
105 x.reserve(additional)
106 }
107 }
108
109 pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {
113 self.try_push(value).unwrap()
114 }
115
116 pub fn pop(&mut self) -> Option<Vec<u8>> {
119 let value = self.values.pop()?;
120 self.validity
121 .as_mut()
122 .map(|x| x.pop()?.then(|| ()))
123 .unwrap_or_else(|| Some(()))
124 .map(|_| value)
125 }
126
127 fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
128 iter: I,
129 ) -> PolarsResult<Self> {
130 let iterator = iter.into_iter();
131 let (lower, _) = iterator.size_hint();
132 let mut primitive = Self::with_capacity(lower);
133 for item in iterator {
134 primitive.try_push(item.as_ref())?
135 }
136 Ok(primitive)
137 }
138
139 fn init_validity(&mut self) {
140 let mut validity = MutableBitmap::with_capacity(self.values.capacity());
141 validity.extend_constant(self.len(), true);
142 validity.set(self.len() - 1, false);
143 self.validity = Some(validity);
144 }
145
146 pub fn into_arc(self) -> Arc<dyn Array> {
148 let a: BinaryArray<O> = self.into();
149 Arc::new(a)
150 }
151
152 pub fn shrink_to_fit(&mut self) {
154 self.values.shrink_to_fit();
155 if let Some(validity) = &mut self.validity {
156 validity.shrink_to_fit()
157 }
158 }
159
160 impl_mutable_array_mut_validity!();
161}
162
163impl<O: Offset> MutableBinaryArray<O> {
164 pub fn values(&self) -> &Vec<u8> {
166 self.values.values()
167 }
168
169 pub fn offsets(&self) -> &Offsets<O> {
171 self.values.offsets()
172 }
173
174 pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<O>, BitmapIter> {
176 ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
177 }
178
179 pub fn values_iter(&self) -> MutableBinaryValuesIter<O> {
181 self.values.iter()
182 }
183}
184
185impl<O: Offset> MutableArray for MutableBinaryArray<O> {
186 fn len(&self) -> usize {
187 self.values.len()
188 }
189
190 fn validity(&self) -> Option<&MutableBitmap> {
191 self.validity.as_ref()
192 }
193
194 fn as_box(&mut self) -> Box<dyn Array> {
195 let array: BinaryArray<O> = std::mem::take(self).into();
196 array.boxed()
197 }
198
199 fn as_arc(&mut self) -> Arc<dyn Array> {
200 let array: BinaryArray<O> = std::mem::take(self).into();
201 array.arced()
202 }
203
204 fn dtype(&self) -> &ArrowDataType {
205 self.values.dtype()
206 }
207
208 fn as_any(&self) -> &dyn std::any::Any {
209 self
210 }
211
212 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
213 self
214 }
215
216 #[inline]
217 fn push_null(&mut self) {
218 self.push::<&[u8]>(None)
219 }
220
221 fn reserve(&mut self, additional: usize) {
222 self.reserve(additional, 0)
223 }
224
225 fn shrink_to_fit(&mut self) {
226 self.shrink_to_fit()
227 }
228}
229
230impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {
231 fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
232 Self::try_from_iter(iter).unwrap()
233 }
234}
235
236impl<O: Offset> MutableBinaryArray<O> {
237 #[inline]
243 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
244 where
245 P: AsRef<[u8]>,
246 I: Iterator<Item = Option<P>>,
247 {
248 let (validity, offsets, values) = trusted_len_unzip(iterator);
249
250 Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap()
251 }
252
253 #[inline]
255 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
256 where
257 P: AsRef<[u8]>,
258 I: TrustedLen<Item = Option<P>>,
259 {
260 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
262 }
263
264 #[inline]
270 pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(
271 iterator: I,
272 ) -> Self {
273 let (offsets, values) = trusted_len_values_iter(iterator);
274 Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
275 }
276
277 #[inline]
279 pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
280 iterator: I,
281 ) -> Self {
282 unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
284 }
285
286 #[inline]
292 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
293 iterator: I,
294 ) -> std::result::Result<Self, E>
295 where
296 P: AsRef<[u8]>,
297 I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
298 {
299 let iterator = iterator.into_iter();
300
301 let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
303 Ok(Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap())
304 }
305
306 #[inline]
308 pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
309 where
310 P: AsRef<[u8]>,
311 I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
312 {
313 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
315 }
316
317 #[inline]
320 pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
321 where
322 P: AsRef<[u8]>,
323 I: TrustedLen<Item = P>,
324 {
325 unsafe { self.extend_trusted_len_values_unchecked(iterator) }
327 }
328
329 #[inline]
332 pub fn extend_values<I, P>(&mut self, iterator: I)
333 where
334 P: AsRef<[u8]>,
335 I: Iterator<Item = P>,
336 {
337 let length = self.values.len();
338 self.values.extend(iterator);
339 let additional = self.values.len() - length;
340
341 if let Some(validity) = self.validity.as_mut() {
342 validity.extend_constant(additional, true);
343 }
344 }
345
346 #[inline]
353 pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
354 where
355 P: AsRef<[u8]>,
356 I: Iterator<Item = P>,
357 {
358 let length = self.values.len();
359 self.values.extend_trusted_len_unchecked(iterator);
360 let additional = self.values.len() - length;
361
362 if let Some(validity) = self.validity.as_mut() {
363 validity.extend_constant(additional, true);
364 }
365 }
366
367 #[inline]
369 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
370 where
371 P: AsRef<[u8]>,
372 I: TrustedLen<Item = Option<P>>,
373 {
374 unsafe { self.extend_trusted_len_unchecked(iterator) }
376 }
377
378 #[inline]
383 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
384 where
385 P: AsRef<[u8]>,
386 I: Iterator<Item = Option<P>>,
387 {
388 if self.validity.is_none() {
389 let mut validity = MutableBitmap::new();
390 validity.extend_constant(self.len(), true);
391 self.validity = Some(validity);
392 }
393
394 self.values
395 .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
396 }
397
398 pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
400 let (offsets, values) = values_iter(iterator);
401 Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
402 }
403
404 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
406 where
407 E: std::error::Error,
408 I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
409 T: AsRef<[u8]>,
410 {
411 let mut iter = iter.into_iter();
412 self.reserve(iter.size_hint().0, 0);
413 iter.try_for_each(|x| {
414 self.push(x?);
415 Ok(())
416 })
417 }
418}
419
420impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {
421 fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
422 self.try_extend(iter).unwrap();
423 }
424}
425
426impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {
427 fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
428 let mut iter = iter.into_iter();
429 self.reserve(iter.size_hint().0, 0);
430 iter.try_for_each(|x| self.try_push(x))
431 }
432}
433
434impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
435 fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {
436 match value {
437 Some(value) => {
438 self.values.try_push(value.as_ref())?;
439
440 if let Some(validity) = &mut self.validity {
441 validity.push(true)
442 }
443 },
444 None => {
445 self.values.push("");
446 match &mut self.validity {
447 Some(validity) => validity.push(false),
448 None => self.init_validity(),
449 }
450 },
451 }
452 Ok(())
453 }
454}
455
456impl<O: Offset> PartialEq for MutableBinaryArray<O> {
457 fn eq(&self, other: &Self) -> bool {
458 self.iter().eq(other.iter())
459 }
460}
461
462impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
463 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
464 extend_validity(self.len(), &mut self.validity, &other.validity);
465
466 self.values.try_extend_from_self(&other.values)
467 }
468}