polars_arrow/array/growable/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
//! Contains the trait [`Growable`] and corresponding concreate implementations, one per concrete array,
//! that offer the ability to create a new [`Array`] out of slices of existing [`Array`]s.

use crate::array::*;
use crate::datatypes::*;

mod binary;
pub use binary::GrowableBinary;
mod boolean;
pub use boolean::GrowableBoolean;
mod fixed_binary;
pub use fixed_binary::GrowableFixedSizeBinary;
mod null;
pub use null::GrowableNull;
mod primitive;
pub use primitive::GrowablePrimitive;
mod list;
pub use list::GrowableList;
mod structure;
pub use structure::GrowableStruct;
mod fixed_size_list;
pub use fixed_size_list::GrowableFixedSizeList;
mod utf8;
pub use utf8::GrowableUtf8;
mod dictionary;
pub use dictionary::GrowableDictionary;

mod binview;
pub use binview::GrowableBinaryViewArray;

mod utils;

/// Describes a struct that can be extended from slices of other pre-existing [`Array`]s.
/// This is used in operations where a new array is built out of other arrays, such
/// as filter and concatenation.
pub trait Growable<'a> {
    /// Extends this [`Growable`] with elements from the bounded [`Array`] at index `index` from
    /// a slice starting at `start` and length `len`.
    ///
    /// # Safety
    /// Doesn't do any bound checks.
    unsafe fn extend(&mut self, index: usize, start: usize, len: usize);

    /// Same as extend, except it repeats the extension `copies` times.
    ///
    /// # Safety
    /// Doesn't do any bound checks.
    unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) {
        for _ in 0..copies {
            self.extend(index, start, len)
        }
    }

    /// Extends this [`Growable`] with null elements, disregarding the bound arrays
    ///
    /// # Safety
    /// Doesn't do any bound checks
    fn extend_validity(&mut self, additional: usize);

    /// The current length of the [`Growable`].
    fn len(&self) -> usize;

    /// Converts this [`Growable`] to an [`Arc<dyn Array>`], thereby finishing the mutation.
    /// Self will be empty after such operation.
    fn as_arc(&mut self) -> Arc<dyn Array> {
        self.as_box().into()
    }

    /// Converts this [`Growable`] to an [`Box<dyn Array>`], thereby finishing the mutation.
    /// Self will be empty after such operation
    fn as_box(&mut self) -> Box<dyn Array>;
}

macro_rules! dyn_growable {
    ($ty:ty, $arrays:expr, $use_validity:expr, $capacity:expr) => {{
        let arrays = $arrays
            .iter()
            .map(|array| array.as_any().downcast_ref().unwrap())
            .collect::<Vec<_>>();
        Box::new(<$ty>::new(arrays, $use_validity, $capacity))
    }};
}

/// Creates a new [`Growable`] from an arbitrary number of [`Array`]s.
/// # Panics
/// This function panics iff
/// * the arrays do not have the same [`ArrowDataType`].
/// * `arrays.is_empty()`.
pub fn make_growable<'a>(
    arrays: &[&'a dyn Array],
    use_validity: bool,
    capacity: usize,
) -> Box<dyn Growable<'a> + 'a> {
    assert!(!arrays.is_empty());
    let dtype = arrays[0].dtype();

    use PhysicalType::*;
    match dtype.to_physical_type() {
        Null => Box::new(null::GrowableNull::new(dtype.clone())),
        Boolean => dyn_growable!(boolean::GrowableBoolean, arrays, use_validity, capacity),
        Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
            dyn_growable!(primitive::GrowablePrimitive::<$T>, arrays, use_validity, capacity)
        }),
        Binary => dyn_growable!(
            binary::GrowableBinary::<i32>,
            arrays,
            use_validity,
            capacity
        ),
        LargeBinary => dyn_growable!(
            binary::GrowableBinary::<i64>,
            arrays,
            use_validity,
            capacity
        ),
        FixedSizeBinary => dyn_growable!(
            fixed_binary::GrowableFixedSizeBinary,
            arrays,
            use_validity,
            capacity
        ),
        LargeList => dyn_growable!(list::GrowableList::<i64>, arrays, use_validity, capacity),
        Struct => dyn_growable!(structure::GrowableStruct, arrays, use_validity, capacity),
        FixedSizeList => dyn_growable!(
            fixed_size_list::GrowableFixedSizeList,
            arrays,
            use_validity,
            capacity
        ),
        BinaryView => {
            dyn_growable!(
                binview::GrowableBinaryViewArray::<[u8]>,
                arrays,
                use_validity,
                capacity
            )
        },
        Utf8View => {
            dyn_growable!(
                binview::GrowableBinaryViewArray::<str>,
                arrays,
                use_validity,
                capacity
            )
        },
        Dictionary(key_type) => {
            match_integer_type!(key_type, |$T| {
                let arrays = arrays
                    .iter()
                    .map(|array| {
                        array
                            .as_any()
                            .downcast_ref::<DictionaryArray<$T>>()
                            .unwrap()
                    })
                    .collect::<Vec<_>>();
                Box::new(dictionary::GrowableDictionary::<$T>::new(
                    &arrays,
                    use_validity,
                    capacity,
                ))
            })
        },
        Union | Map | Utf8 | LargeUtf8 | List => unimplemented!(),
    }
}