polars_arrow/array/binary/
mutable_values.rs1use std::sync::Arc;
2
3use polars_error::{polars_bail, PolarsResult};
4
5use super::{BinaryArray, MutableBinaryArray};
6use crate::array::physical_binary::*;
7use crate::array::specification::try_check_offsets_bounds;
8use crate::array::{
9 Array, ArrayAccessor, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush,
10};
11use crate::bitmap::MutableBitmap;
12use crate::datatypes::ArrowDataType;
13use crate::offset::{Offset, Offsets};
14use crate::trusted_len::TrustedLen;
15
16#[derive(Debug, Clone)]
19pub struct MutableBinaryValuesArray<O: Offset> {
20 dtype: ArrowDataType,
21 offsets: Offsets<O>,
22 values: Vec<u8>,
23}
24
25impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O> {
26 fn from(other: MutableBinaryValuesArray<O>) -> Self {
27 BinaryArray::<O>::new(other.dtype, other.offsets.into(), other.values.into(), None)
28 }
29}
30
31impl<O: Offset> From<MutableBinaryValuesArray<O>> for MutableBinaryArray<O> {
32 fn from(other: MutableBinaryValuesArray<O>) -> Self {
33 MutableBinaryArray::<O>::try_new(other.dtype, other.offsets, other.values, None)
34 .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray")
35 }
36}
37
38impl<O: Offset> Default for MutableBinaryValuesArray<O> {
39 fn default() -> Self {
40 Self::new()
41 }
42}
43
44impl<O: Offset> MutableBinaryValuesArray<O> {
45 pub fn new() -> Self {
47 Self {
48 dtype: Self::default_dtype(),
49 offsets: Offsets::new(),
50 values: Vec::<u8>::new(),
51 }
52 }
53
54 pub fn try_new(
63 dtype: ArrowDataType,
64 offsets: Offsets<O>,
65 values: Vec<u8>,
66 ) -> PolarsResult<Self> {
67 try_check_offsets_bounds(&offsets, values.len())?;
68
69 if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {
70 polars_bail!(ComputeError: "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",)
71 }
72
73 Ok(Self {
74 dtype,
75 offsets,
76 values,
77 })
78 }
79
80 pub fn default_dtype() -> ArrowDataType {
83 BinaryArray::<O>::default_dtype()
84 }
85
86 pub fn with_capacity(capacity: usize) -> Self {
88 Self::with_capacities(capacity, 0)
89 }
90
91 pub fn with_capacities(capacity: usize, values: usize) -> Self {
93 Self {
94 dtype: Self::default_dtype(),
95 offsets: Offsets::<O>::with_capacity(capacity),
96 values: Vec::<u8>::with_capacity(values),
97 }
98 }
99
100 #[inline]
102 pub fn values(&self) -> &Vec<u8> {
103 &self.values
104 }
105
106 #[inline]
108 pub fn offsets(&self) -> &Offsets<O> {
109 &self.offsets
110 }
111
112 #[inline]
114 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
115 self.offsets.reserve(additional);
116 self.values.reserve(additional_values);
117 }
118
119 pub fn capacity(&self) -> usize {
121 self.offsets.capacity()
122 }
123
124 #[inline]
126 pub fn len(&self) -> usize {
127 self.offsets.len_proxy()
128 }
129
130 #[inline]
134 pub fn push<T: AsRef<[u8]>>(&mut self, value: T) {
135 self.try_push(value).unwrap()
136 }
137
138 pub fn pop(&mut self) -> Option<Vec<u8>> {
141 if self.len() == 0 {
142 return None;
143 }
144 self.offsets.pop()?;
145 let start = self.offsets.last().to_usize();
146 let value = self.values.split_off(start);
147 Some(value.to_vec())
148 }
149
150 #[inline]
154 pub fn value(&self, i: usize) -> &[u8] {
155 assert!(i < self.len());
156 unsafe { self.value_unchecked(i) }
157 }
158
159 #[inline]
164 pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
165 let (start, end) = self.offsets.start_end(i);
167
168 self.values.get_unchecked(start..end)
170 }
171
172 pub fn iter(&self) -> ArrayValuesIter<Self> {
174 ArrayValuesIter::new(self)
175 }
176
177 pub fn shrink_to_fit(&mut self) {
179 self.values.shrink_to_fit();
180 self.offsets.shrink_to_fit();
181 }
182
183 pub fn into_inner(self) -> (ArrowDataType, Offsets<O>, Vec<u8>) {
185 (self.dtype, self.offsets, self.values)
186 }
187}
188
189impl<O: Offset> MutableArray for MutableBinaryValuesArray<O> {
190 fn len(&self) -> usize {
191 self.len()
192 }
193
194 fn validity(&self) -> Option<&MutableBitmap> {
195 None
196 }
197
198 fn as_box(&mut self) -> Box<dyn Array> {
199 let (dtype, offsets, values) = std::mem::take(self).into_inner();
200 BinaryArray::new(dtype, offsets.into(), values.into(), None).boxed()
201 }
202
203 fn as_arc(&mut self) -> Arc<dyn Array> {
204 let (dtype, offsets, values) = std::mem::take(self).into_inner();
205 BinaryArray::new(dtype, offsets.into(), values.into(), None).arced()
206 }
207
208 fn dtype(&self) -> &ArrowDataType {
209 &self.dtype
210 }
211
212 fn as_any(&self) -> &dyn std::any::Any {
213 self
214 }
215
216 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
217 self
218 }
219
220 #[inline]
221 fn push_null(&mut self) {
222 self.push::<&[u8]>(b"")
223 }
224
225 fn reserve(&mut self, additional: usize) {
226 self.reserve(additional, 0)
227 }
228
229 fn shrink_to_fit(&mut self) {
230 self.shrink_to_fit()
231 }
232}
233
234impl<O: Offset, P: AsRef<[u8]>> FromIterator<P> for MutableBinaryValuesArray<O> {
235 fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
236 let (offsets, values) = values_iter(iter.into_iter());
237 Self::try_new(Self::default_dtype(), offsets, values).unwrap()
238 }
239}
240
241impl<O: Offset> MutableBinaryValuesArray<O> {
242 pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
243 &mut self,
244 validity: &mut MutableBitmap,
245 iterator: I,
246 ) where
247 P: AsRef<[u8]>,
248 I: Iterator<Item = Option<P>>,
249 {
250 extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
251 }
252
253 #[inline]
255 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
256 where
257 P: AsRef<[u8]>,
258 I: TrustedLen<Item = P>,
259 {
260 unsafe { self.extend_trusted_len_unchecked(iterator) }
261 }
262
263 #[inline]
268 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
269 where
270 P: AsRef<[u8]>,
271 I: Iterator<Item = P>,
272 {
273 extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
274 }
275
276 #[inline]
278 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
279 where
280 P: AsRef<[u8]>,
281 I: TrustedLen<Item = P>,
282 {
283 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
285 }
286
287 #[inline]
293 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
294 where
295 P: AsRef<[u8]>,
296 I: Iterator<Item = P>,
297 {
298 let (offsets, values) = trusted_len_values_iter(iterator);
299 Self::try_new(Self::default_dtype(), offsets, values).unwrap()
300 }
301
302 pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = P>>(iter: I) -> PolarsResult<Self> {
307 let iterator = iter.into_iter();
308 let (lower, _) = iterator.size_hint();
309 let mut array = Self::with_capacity(lower);
310 for item in iterator {
311 array.try_push(item)?;
312 }
313 Ok(array)
314 }
315
316 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
318 where
319 E: std::error::Error,
320 I: IntoIterator<Item = std::result::Result<T, E>>,
321 T: AsRef<[u8]>,
322 {
323 let mut iter = iter.into_iter();
324 self.reserve(iter.size_hint().0, 0);
325 iter.try_for_each(|x| {
326 self.push(x?);
327 Ok(())
328 })
329 }
330}
331
332impl<O: Offset, T: AsRef<[u8]>> Extend<T> for MutableBinaryValuesArray<O> {
333 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
334 extend_from_values_iter(&mut self.offsets, &mut self.values, iter.into_iter());
335 }
336}
337
338impl<O: Offset, T: AsRef<[u8]>> TryExtend<T> for MutableBinaryValuesArray<O> {
339 fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> PolarsResult<()> {
340 let mut iter = iter.into_iter();
341 self.reserve(iter.size_hint().0, 0);
342 iter.try_for_each(|x| self.try_push(x))
343 }
344}
345
346impl<O: Offset, T: AsRef<[u8]>> TryPush<T> for MutableBinaryValuesArray<O> {
347 #[inline]
348 fn try_push(&mut self, value: T) -> PolarsResult<()> {
349 let bytes = value.as_ref();
350 self.values.extend_from_slice(bytes);
351 self.offsets.try_push(bytes.len())
352 }
353}
354
355unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
356 type Item = &'a [u8];
357
358 #[inline]
359 unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
360 self.value_unchecked(index)
361 }
362
363 #[inline]
364 fn len(&self) -> usize {
365 self.len()
366 }
367}
368
369impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
370 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
371 self.values.extend_from_slice(&other.values);
372 self.offsets.try_extend_from_self(&other.offsets)
373 }
374}