1use std::sync::Arc;
2
3use polars_error::{polars_bail, PolarsResult};
4
5use super::{MutableUtf8ValuesArray, MutableUtf8ValuesIter, StrAsBytes, Utf8Array};
6use crate::array::physical_binary::*;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::utils::{BitmapIter, ZipValidity};
9use crate::bitmap::{Bitmap, MutableBitmap};
10use crate::datatypes::ArrowDataType;
11use crate::offset::{Offset, Offsets};
12use crate::trusted_len::TrustedLen;
13
14#[derive(Debug, Clone)]
17pub struct MutableUtf8Array<O: Offset> {
18 values: MutableUtf8ValuesArray<O>,
19 validity: Option<MutableBitmap>,
20}
21
22impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O> {
23 fn from(other: MutableUtf8Array<O>) -> Self {
24 let validity = other.validity.and_then(|x| {
25 let validity: Option<Bitmap> = x.into();
26 validity
27 });
28 let array: Utf8Array<O> = other.values.into();
29 array.with_validity(validity)
30 }
31}
32
33impl<O: Offset> Default for MutableUtf8Array<O> {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl<O: Offset> MutableUtf8Array<O> {
40 pub fn new() -> Self {
42 Self {
43 values: Default::default(),
44 validity: None,
45 }
46 }
47
48 pub fn try_new(
59 dtype: ArrowDataType,
60 offsets: Offsets<O>,
61 values: Vec<u8>,
62 validity: Option<MutableBitmap>,
63 ) -> PolarsResult<Self> {
64 let values = MutableUtf8ValuesArray::try_new(dtype, offsets, values)?;
65
66 if validity
67 .as_ref()
68 .is_some_and(|validity| validity.len() != values.len())
69 {
70 polars_bail!(ComputeError: "validity's length must be equal to the number of values")
71 }
72
73 Ok(Self { values, validity })
74 }
75
76 pub unsafe fn new_unchecked(
85 dtype: ArrowDataType,
86 offsets: Offsets<O>,
87 values: Vec<u8>,
88 validity: Option<MutableBitmap>,
89 ) -> Self {
90 let values = MutableUtf8ValuesArray::new_unchecked(dtype, offsets, values);
91 if let Some(ref validity) = validity {
92 assert_eq!(values.len(), validity.len());
93 }
94 Self { values, validity }
95 }
96
97 pub fn from<T: AsRef<str>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
100 Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
101 }
102
103 fn default_dtype() -> ArrowDataType {
104 Utf8Array::<O>::default_dtype()
105 }
106
107 pub fn with_capacity(capacity: usize) -> Self {
109 Self::with_capacities(capacity, 0)
110 }
111
112 pub fn with_capacities(capacity: usize, values: usize) -> Self {
114 Self {
115 values: MutableUtf8ValuesArray::with_capacities(capacity, values),
116 validity: None,
117 }
118 }
119
120 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
122 self.values.reserve(additional, additional_values);
123 if let Some(x) = self.validity.as_mut() {
124 x.reserve(additional)
125 }
126 }
127
128 pub fn capacity(&self) -> usize {
130 self.values.capacity()
131 }
132
133 #[inline]
135 pub fn len(&self) -> usize {
136 self.values.len()
137 }
138
139 #[inline]
143 pub fn push<T: AsRef<str>>(&mut self, value: Option<T>) {
144 self.try_push(value).unwrap()
145 }
146
147 #[inline]
149 pub fn value(&self, i: usize) -> &str {
150 self.values.value(i)
151 }
152
153 #[inline]
158 pub unsafe fn value_unchecked(&self, i: usize) -> &str {
159 self.values.value_unchecked(i)
160 }
161
162 pub fn pop(&mut self) -> Option<String> {
165 let value = self.values.pop()?;
166 self.validity
167 .as_mut()
168 .map(|x| x.pop()?.then(|| ()))
169 .unwrap_or_else(|| Some(()))
170 .map(|_| value)
171 }
172
173 fn init_validity(&mut self) {
174 let mut validity = MutableBitmap::with_capacity(self.values.capacity());
175 validity.extend_constant(self.len(), true);
176 validity.set(self.len() - 1, false);
177 self.validity = Some(validity);
178 }
179
180 pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>, BitmapIter> {
182 ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
183 }
184
185 pub fn into_arc(self) -> Arc<dyn Array> {
187 let a: Utf8Array<O> = self.into();
188 Arc::new(a)
189 }
190
191 pub fn shrink_to_fit(&mut self) {
193 self.values.shrink_to_fit();
194 if let Some(validity) = &mut self.validity {
195 validity.shrink_to_fit()
196 }
197 }
198
199 pub fn into_data(self) -> (ArrowDataType, Offsets<O>, Vec<u8>, Option<MutableBitmap>) {
201 let (dtype, offsets, values) = self.values.into_inner();
202 (dtype, offsets, values, self.validity)
203 }
204
205 pub fn values_iter(&self) -> MutableUtf8ValuesIter<O> {
207 self.values.iter()
208 }
209
210 pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
214 if let Some(validity) = &validity {
215 assert_eq!(self.values.len(), validity.len())
216 }
217 self.validity = validity;
218 }
219
220 pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
226 if let Some(validity) = std::mem::take(&mut self.validity) {
227 self.set_validity(Some(f(validity)))
228 }
229 }
230}
231
232impl<O: Offset> MutableUtf8Array<O> {
233 pub fn values(&self) -> &Vec<u8> {
235 self.values.values()
236 }
237
238 pub fn offsets(&self) -> &Offsets<O> {
240 self.values.offsets()
241 }
242}
243
244impl<O: Offset> MutableArray for MutableUtf8Array<O> {
245 fn len(&self) -> usize {
246 self.len()
247 }
248
249 fn validity(&self) -> Option<&MutableBitmap> {
250 self.validity.as_ref()
251 }
252
253 fn as_box(&mut self) -> Box<dyn Array> {
254 let array: Utf8Array<O> = std::mem::take(self).into();
255 array.boxed()
256 }
257
258 fn as_arc(&mut self) -> Arc<dyn Array> {
259 let array: Utf8Array<O> = std::mem::take(self).into();
260 array.arced()
261 }
262
263 fn dtype(&self) -> &ArrowDataType {
264 if O::IS_LARGE {
265 &ArrowDataType::LargeUtf8
266 } else {
267 &ArrowDataType::Utf8
268 }
269 }
270
271 fn as_any(&self) -> &dyn std::any::Any {
272 self
273 }
274
275 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
276 self
277 }
278
279 #[inline]
280 fn push_null(&mut self) {
281 self.push::<&str>(None)
282 }
283
284 fn reserve(&mut self, additional: usize) {
285 self.reserve(additional, 0)
286 }
287
288 fn shrink_to_fit(&mut self) {
289 self.shrink_to_fit()
290 }
291}
292
293impl<O: Offset, P: AsRef<str>> FromIterator<Option<P>> for MutableUtf8Array<O> {
294 fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
295 Self::try_from_iter(iter).unwrap()
296 }
297}
298
299impl<O: Offset> MutableUtf8Array<O> {
300 #[inline]
303 pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
304 where
305 P: AsRef<str>,
306 I: TrustedLen<Item = P>,
307 {
308 unsafe { self.extend_trusted_len_values_unchecked(iterator) }
309 }
310
311 #[inline]
314 pub fn extend_values<I, P>(&mut self, iterator: I)
315 where
316 P: AsRef<str>,
317 I: Iterator<Item = P>,
318 {
319 let length = self.values.len();
320 self.values.extend(iterator);
321 let additional = self.values.len() - length;
322
323 if let Some(validity) = self.validity.as_mut() {
324 validity.extend_constant(additional, true);
325 }
326 }
327
328 #[inline]
335 pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
336 where
337 P: AsRef<str>,
338 I: Iterator<Item = P>,
339 {
340 let length = self.values.len();
341 self.values.extend_trusted_len_unchecked(iterator);
342 let additional = self.values.len() - length;
343
344 if let Some(validity) = self.validity.as_mut() {
345 validity.extend_constant(additional, true);
346 }
347 }
348
349 #[inline]
351 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
352 where
353 P: AsRef<str>,
354 I: TrustedLen<Item = Option<P>>,
355 {
356 unsafe { self.extend_trusted_len_unchecked(iterator) }
357 }
358
359 #[inline]
364 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
365 where
366 P: AsRef<str>,
367 I: Iterator<Item = Option<P>>,
368 {
369 if self.validity.is_none() {
370 let mut validity = MutableBitmap::new();
371 validity.extend_constant(self.len(), true);
372 self.validity = Some(validity);
373 }
374
375 self.values
376 .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
377 }
378
379 #[inline]
385 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
386 where
387 P: AsRef<str>,
388 I: Iterator<Item = Option<P>>,
389 {
390 let iterator = iterator.map(|x| x.map(StrAsBytes));
391 let (validity, offsets, values) = trusted_len_unzip(iterator);
392
393 Self::new_unchecked(Self::default_dtype(), offsets, values, validity)
395 }
396
397 #[inline]
399 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
400 where
401 P: AsRef<str>,
402 I: TrustedLen<Item = Option<P>>,
403 {
404 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
406 }
407
408 #[inline]
414 pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<str>, I: Iterator<Item = T>>(
415 iterator: I,
416 ) -> Self {
417 MutableUtf8ValuesArray::from_trusted_len_iter_unchecked(iterator).into()
418 }
419
420 #[inline]
422 pub fn from_trusted_len_values_iter<T: AsRef<str>, I: TrustedLen<Item = T>>(
423 iterator: I,
424 ) -> Self {
425 unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
427 }
428
429 fn try_from_iter<P: AsRef<str>, I: IntoIterator<Item = Option<P>>>(
434 iter: I,
435 ) -> PolarsResult<Self> {
436 let iterator = iter.into_iter();
437 let (lower, _) = iterator.size_hint();
438 let mut array = Self::with_capacity(lower);
439 for item in iterator {
440 array.try_push(item)?;
441 }
442 Ok(array)
443 }
444
445 #[inline]
451 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
452 iterator: I,
453 ) -> std::result::Result<Self, E>
454 where
455 P: AsRef<str>,
456 I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
457 {
458 let iterator = iterator.into_iter();
459
460 let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes)));
461 let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
462
463 Ok(Self::new_unchecked(
465 Self::default_dtype(),
466 offsets,
467 values,
468 validity,
469 ))
470 }
471
472 #[inline]
474 pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
475 where
476 P: AsRef<str>,
477 I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
478 {
479 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
481 }
482
483 pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(iterator: I) -> Self {
485 MutableUtf8ValuesArray::from_iter(iterator).into()
486 }
487
488 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
490 where
491 E: std::error::Error,
492 I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
493 T: AsRef<str>,
494 {
495 let mut iter = iter.into_iter();
496 self.reserve(iter.size_hint().0, 0);
497 iter.try_for_each(|x| {
498 self.push(x?);
499 Ok(())
500 })
501 }
502}
503
504impl<O: Offset, T: AsRef<str>> Extend<Option<T>> for MutableUtf8Array<O> {
505 fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
506 self.try_extend(iter).unwrap();
507 }
508}
509
510impl<O: Offset, T: AsRef<str>> TryExtend<Option<T>> for MutableUtf8Array<O> {
511 fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
512 let mut iter = iter.into_iter();
513 self.reserve(iter.size_hint().0, 0);
514 iter.try_for_each(|x| self.try_push(x))
515 }
516}
517
518impl<O: Offset, T: AsRef<str>> TryPush<Option<T>> for MutableUtf8Array<O> {
519 #[inline]
520 fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {
521 match value {
522 Some(value) => {
523 self.values.try_push(value.as_ref())?;
524
525 if let Some(validity) = &mut self.validity {
526 validity.push(true)
527 }
528 },
529 None => {
530 self.values.push("");
531 match &mut self.validity {
532 Some(validity) => validity.push(false),
533 None => self.init_validity(),
534 }
535 },
536 }
537 Ok(())
538 }
539}
540
541impl<O: Offset> PartialEq for MutableUtf8Array<O> {
542 fn eq(&self, other: &Self) -> bool {
543 self.iter().eq(other.iter())
544 }
545}
546
547impl<O: Offset> TryExtendFromSelf for MutableUtf8Array<O> {
548 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
549 extend_validity(self.len(), &mut self.validity, &other.validity);
550
551 self.values.try_extend_from_self(&other.values)
552 }
553}