polars_arrow/array/growable/
binview.rs

1use std::ops::Deref;
2use std::sync::Arc;
3
4use polars_utils::aliases::{InitHashMaps, PlHashSet};
5use polars_utils::itertools::Itertools;
6
7use super::Growable;
8use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
9use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
10use crate::array::{Array, MutableBinaryViewArray, View};
11use crate::bitmap::BitmapBuilder;
12use crate::buffer::Buffer;
13use crate::datatypes::ArrowDataType;
14
15/// Concrete [`Growable`] for the [`BinaryArray`].
16pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> {
17    arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
18    dtype: ArrowDataType,
19    validity: Option<BitmapBuilder>,
20    inner: MutableBinaryViewArray<T>,
21    same_buffers: Option<&'a Arc<[Buffer<u8>]>>,
22    total_same_buffers_len: usize, // Only valid if same_buffers is Some.
23    has_duplicate_buffers: bool,
24}
25
26impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
27    /// Creates a new [`GrowableBinaryViewArray`] bound to `arrays` with a pre-allocated `capacity`.
28    /// # Panics
29    /// If `arrays` is empty.
30    pub fn new(
31        arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
32        mut use_validity: bool,
33        capacity: usize,
34    ) -> Self {
35        let dtype = arrays[0].dtype().clone();
36
37        // if any of the arrays has nulls, insertions from any array requires setting bits
38        // as there is at least one array with nulls.
39        if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
40            use_validity = true;
41        };
42
43        // Fast case.
44        // This happens in group-by's
45        // And prevents us to push `M` buffers insert in the buffers
46        // #15615
47        let all_same_buffer = arrays
48            .iter()
49            .map(|array| array.data_buffers().as_ptr())
50            .all_equal()
51            && !arrays.is_empty();
52        let same_buffers = all_same_buffer.then(|| arrays[0].data_buffers());
53        let total_same_buffers_len = all_same_buffer
54            .then(|| arrays[0].total_buffer_len())
55            .unwrap_or_default();
56
57        let mut duplicates = PlHashSet::new();
58        let mut has_duplicate_buffers = false;
59        for arr in arrays.iter() {
60            if !duplicates.insert(arr.data_buffers().as_ptr()) {
61                has_duplicate_buffers = true;
62                break;
63            }
64        }
65        Self {
66            arrays,
67            dtype,
68            validity: prepare_validity(use_validity, capacity),
69            inner: MutableBinaryViewArray::<T>::with_capacity(capacity),
70            same_buffers,
71            total_same_buffers_len,
72            has_duplicate_buffers,
73        }
74    }
75
76    fn to(&mut self) -> BinaryViewArrayGeneric<T> {
77        let arr = std::mem::take(&mut self.inner);
78        if let Some(buffers) = self.same_buffers {
79            unsafe {
80                BinaryViewArrayGeneric::<T>::new_unchecked(
81                    self.dtype.clone(),
82                    arr.views.into(),
83                    buffers.clone(),
84                    self.validity.take().map(BitmapBuilder::freeze),
85                    arr.total_bytes_len,
86                    self.total_same_buffers_len,
87                )
88            }
89        } else {
90            arr.freeze_with_dtype(self.dtype.clone())
91                .with_validity(self.validity.take().map(BitmapBuilder::freeze))
92        }
93    }
94}
95
96impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> {
97    unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
98        let array = *self.arrays.get_unchecked(index);
99        let local_buffers = array.data_buffers();
100
101        extend_validity(&mut self.validity, array, start, len);
102
103        let range = start..start + len;
104
105        let views_iter = array.views().get_unchecked(range).iter().cloned();
106
107        if self.same_buffers.is_some() {
108            let mut total_len = 0;
109            self.inner
110                .views
111                .extend(views_iter.inspect(|v| total_len += v.length as usize));
112            self.inner.total_bytes_len += total_len;
113        } else if self.has_duplicate_buffers {
114            self.inner
115                .extend_non_null_views_unchecked_dedupe(views_iter, local_buffers.deref());
116        } else {
117            self.inner
118                .extend_non_null_views_unchecked(views_iter, local_buffers.deref());
119        }
120    }
121
122    unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) {
123        let orig_view_start = self.inner.views.len();
124        let orig_total_bytes_len = self.inner.total_bytes_len;
125        if copies > 0 {
126            self.extend(index, start, len);
127        }
128        if copies > 1 {
129            let array = *self.arrays.get_unchecked(index);
130            extend_validity_copies(&mut self.validity, array, start, len, copies - 1);
131            let extended_view_end = self.inner.views.len();
132            let total_bytes_len_end = self.inner.total_bytes_len;
133            for _ in 0..copies - 1 {
134                self.inner
135                    .views
136                    .extend_from_within(orig_view_start..extended_view_end);
137                self.inner.total_bytes_len += total_bytes_len_end - orig_total_bytes_len;
138            }
139        }
140    }
141
142    fn extend_validity(&mut self, additional: usize) {
143        self.inner
144            .views
145            .extend(std::iter::repeat(View::default()).take(additional));
146        if let Some(validity) = &mut self.validity {
147            validity.extend_constant(additional, false);
148        }
149    }
150
151    #[inline]
152    fn len(&self) -> usize {
153        self.inner.len()
154    }
155
156    fn as_arc(&mut self) -> Arc<dyn Array> {
157        self.to().arced()
158    }
159
160    fn as_box(&mut self) -> Box<dyn Array> {
161        self.to().boxed()
162    }
163}
164
165impl<'a, T: ViewType + ?Sized> From<GrowableBinaryViewArray<'a, T>> for BinaryViewArrayGeneric<T> {
166    fn from(mut val: GrowableBinaryViewArray<'a, T>) -> Self {
167        val.to()
168    }
169}