polars_arrow/array/growable/
primitive.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
use std::sync::Arc;

use super::Growable;
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
use crate::array::{Array, PrimitiveArray};
use crate::bitmap::MutableBitmap;
use crate::datatypes::ArrowDataType;
use crate::types::NativeType;

/// Concrete [`Growable`] for the [`PrimitiveArray`].
pub struct GrowablePrimitive<'a, T: NativeType> {
    dtype: ArrowDataType,
    arrays: Vec<&'a PrimitiveArray<T>>,
    validity: Option<MutableBitmap>,
    values: Vec<T>,
}

impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
    /// Creates a new [`GrowablePrimitive`] bound to `arrays` with a pre-allocated `capacity`.
    /// # Panics
    /// If `arrays` is empty.
    pub fn new(
        arrays: Vec<&'a PrimitiveArray<T>>,
        mut use_validity: bool,
        capacity: usize,
    ) -> Self {
        // if any of the arrays has nulls, insertions from any array requires setting bits
        // as there is at least one array with nulls.
        if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
            use_validity = true;
        };

        let dtype = arrays[0].dtype().clone();

        Self {
            dtype,
            arrays,
            values: Vec::with_capacity(capacity),
            validity: prepare_validity(use_validity, capacity),
        }
    }

    #[inline]
    fn to(&mut self) -> PrimitiveArray<T> {
        let validity = std::mem::take(&mut self.validity);
        let values = std::mem::take(&mut self.values);

        PrimitiveArray::<T>::new(
            self.dtype.clone(),
            values.into(),
            validity.map(|v| v.into()),
        )
    }
}

impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
    #[inline]
    unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
        let array = *self.arrays.get_unchecked(index);
        extend_validity(&mut self.validity, array, start, len);

        let values = array.values().as_slice();
        self.values
            .extend_from_slice(values.get_unchecked(start..start + len));
    }

    #[inline]
    unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) {
        let array = *self.arrays.get_unchecked(index);
        extend_validity_copies(&mut self.validity, array, start, len, copies);

        let values = array.values().as_slice();
        self.values.reserve(len * copies);
        for _ in 0..copies {
            self.values
                .extend_from_slice(values.get_unchecked(start..start + len));
        }
    }

    #[inline]
    fn extend_validity(&mut self, additional: usize) {
        self.values
            .resize(self.values.len() + additional, T::default());
        if let Some(validity) = &mut self.validity {
            validity.extend_constant(additional, false);
        }
    }

    #[inline]
    fn len(&self) -> usize {
        self.values.len()
    }

    #[inline]
    fn as_arc(&mut self) -> Arc<dyn Array> {
        Arc::new(self.to())
    }

    #[inline]
    fn as_box(&mut self) -> Box<dyn Array> {
        Box::new(self.to())
    }
}

impl<'a, T: NativeType> From<GrowablePrimitive<'a, T>> for PrimitiveArray<T> {
    #[inline]
    fn from(val: GrowablePrimitive<'a, T>) -> Self {
        PrimitiveArray::<T>::new(val.dtype, val.values.into(), val.validity.map(|v| v.into()))
    }
}