polars_arrow/array/growable/
binview.rs1use std::ops::Deref;
2use std::sync::Arc;
3
4use polars_utils::aliases::{InitHashMaps, PlHashSet};
5use polars_utils::itertools::Itertools;
6
7use super::Growable;
8use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
9use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
10use crate::array::{Array, MutableBinaryViewArray, View};
11use crate::bitmap::BitmapBuilder;
12use crate::buffer::Buffer;
13use crate::datatypes::ArrowDataType;
14
15pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> {
17 arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
18 dtype: ArrowDataType,
19 validity: Option<BitmapBuilder>,
20 inner: MutableBinaryViewArray<T>,
21 same_buffers: Option<&'a Arc<[Buffer<u8>]>>,
22 total_same_buffers_len: usize, has_duplicate_buffers: bool,
24}
25
26impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
27 pub fn new(
31 arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
32 mut use_validity: bool,
33 capacity: usize,
34 ) -> Self {
35 let dtype = arrays[0].dtype().clone();
36
37 if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
40 use_validity = true;
41 };
42
43 let all_same_buffer = arrays
48 .iter()
49 .map(|array| array.data_buffers().as_ptr())
50 .all_equal()
51 && !arrays.is_empty();
52 let same_buffers = all_same_buffer.then(|| arrays[0].data_buffers());
53 let total_same_buffers_len = all_same_buffer
54 .then(|| arrays[0].total_buffer_len())
55 .unwrap_or_default();
56
57 let mut duplicates = PlHashSet::new();
58 let mut has_duplicate_buffers = false;
59 for arr in arrays.iter() {
60 if !duplicates.insert(arr.data_buffers().as_ptr()) {
61 has_duplicate_buffers = true;
62 break;
63 }
64 }
65 Self {
66 arrays,
67 dtype,
68 validity: prepare_validity(use_validity, capacity),
69 inner: MutableBinaryViewArray::<T>::with_capacity(capacity),
70 same_buffers,
71 total_same_buffers_len,
72 has_duplicate_buffers,
73 }
74 }
75
76 fn to(&mut self) -> BinaryViewArrayGeneric<T> {
77 let arr = std::mem::take(&mut self.inner);
78 if let Some(buffers) = self.same_buffers {
79 unsafe {
80 BinaryViewArrayGeneric::<T>::new_unchecked(
81 self.dtype.clone(),
82 arr.views.into(),
83 buffers.clone(),
84 self.validity.take().map(BitmapBuilder::freeze),
85 arr.total_bytes_len,
86 self.total_same_buffers_len,
87 )
88 }
89 } else {
90 arr.freeze_with_dtype(self.dtype.clone())
91 .with_validity(self.validity.take().map(BitmapBuilder::freeze))
92 }
93 }
94}
95
96impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> {
97 unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
98 let array = *self.arrays.get_unchecked(index);
99 let local_buffers = array.data_buffers();
100
101 extend_validity(&mut self.validity, array, start, len);
102
103 let range = start..start + len;
104
105 let views_iter = array.views().get_unchecked(range).iter().cloned();
106
107 if self.same_buffers.is_some() {
108 let mut total_len = 0;
109 self.inner
110 .views
111 .extend(views_iter.inspect(|v| total_len += v.length as usize));
112 self.inner.total_bytes_len += total_len;
113 } else if self.has_duplicate_buffers {
114 self.inner
115 .extend_non_null_views_unchecked_dedupe(views_iter, local_buffers.deref());
116 } else {
117 self.inner
118 .extend_non_null_views_unchecked(views_iter, local_buffers.deref());
119 }
120 }
121
122 unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) {
123 let orig_view_start = self.inner.views.len();
124 let orig_total_bytes_len = self.inner.total_bytes_len;
125 if copies > 0 {
126 self.extend(index, start, len);
127 }
128 if copies > 1 {
129 let array = *self.arrays.get_unchecked(index);
130 extend_validity_copies(&mut self.validity, array, start, len, copies - 1);
131 let extended_view_end = self.inner.views.len();
132 let total_bytes_len_end = self.inner.total_bytes_len;
133 for _ in 0..copies - 1 {
134 self.inner
135 .views
136 .extend_from_within(orig_view_start..extended_view_end);
137 self.inner.total_bytes_len += total_bytes_len_end - orig_total_bytes_len;
138 }
139 }
140 }
141
142 fn extend_validity(&mut self, additional: usize) {
143 self.inner
144 .views
145 .extend(std::iter::repeat(View::default()).take(additional));
146 if let Some(validity) = &mut self.validity {
147 validity.extend_constant(additional, false);
148 }
149 }
150
151 #[inline]
152 fn len(&self) -> usize {
153 self.inner.len()
154 }
155
156 fn as_arc(&mut self) -> Arc<dyn Array> {
157 self.to().arced()
158 }
159
160 fn as_box(&mut self) -> Box<dyn Array> {
161 self.to().boxed()
162 }
163}
164
165impl<'a, T: ViewType + ?Sized> From<GrowableBinaryViewArray<'a, T>> for BinaryViewArrayGeneric<T> {
166 fn from(mut val: GrowableBinaryViewArray<'a, T>) -> Self {
167 val.to()
168 }
169}