polars_arrow/legacy/array/
mod.rs

1use crate::array::{
2    new_null_array, Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray,
3    PrimitiveArray, StructArray, ViewType,
4};
5use crate::bitmap::BitmapBuilder;
6use crate::datatypes::ArrowDataType;
7use crate::legacy::utils::CustomIterTools;
8use crate::offset::Offsets;
9use crate::types::NativeType;
10
11pub mod default_arrays;
12#[cfg(feature = "dtype-array")]
13pub mod fixed_size_list;
14pub mod list;
15pub mod null;
16pub mod slice;
17pub mod utf8;
18
19pub use slice::*;
20
21use crate::legacy::prelude::LargeListArray;
22
23macro_rules! iter_to_values {
24    ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
25        $iterator
26            .filter_map(|opt_iter| match opt_iter {
27                Some(x) => {
28                    let it = x.into_iter();
29                    $length_so_far += it.size_hint().0 as i64;
30                    $validity.push(true);
31                    $offsets.push($length_so_far);
32                    Some(it)
33                },
34                None => {
35                    $validity.push(false);
36                    $offsets.push($length_so_far);
37                    None
38                },
39            })
40            .flatten()
41            .collect()
42    }};
43}
44
45pub trait ListFromIter {
46    /// Create a list-array from an iterator.
47    /// Used in group_by agg-list
48    ///
49    /// # Safety
50    /// Will produce incorrect arrays if size hint is incorrect.
51    unsafe fn from_iter_primitive_trusted_len<T, P, I>(
52        iter: I,
53        dtype: ArrowDataType,
54    ) -> ListArray<i64>
55    where
56        T: NativeType,
57        P: IntoIterator<Item = Option<T>>,
58        I: IntoIterator<Item = Option<P>>,
59    {
60        let iterator = iter.into_iter();
61        let (lower, _) = iterator.size_hint();
62
63        let mut validity = BitmapBuilder::with_capacity(lower);
64        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
65        let mut length_so_far = 0i64;
66        offsets.push(length_so_far);
67
68        let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
69
70        // SAFETY:
71        // offsets are monotonically increasing
72        ListArray::new(
73            ListArray::<i64>::default_datatype(dtype.clone()),
74            Offsets::new_unchecked(offsets).into(),
75            Box::new(values.to(dtype)),
76            validity.into_opt_validity(),
77        )
78    }
79
80    /// Create a list-array from an iterator.
81    /// Used in group_by agg-list
82    ///
83    /// # Safety
84    /// Will produce incorrect arrays if size hint is incorrect.
85    unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
86    where
87        I: IntoIterator<Item = Option<P>>,
88        P: IntoIterator<Item = Option<bool>>,
89    {
90        let iterator = iter.into_iter();
91        let (lower, _) = iterator.size_hint();
92
93        let mut validity = Vec::with_capacity(lower);
94        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
95        let mut length_so_far = 0i64;
96        offsets.push(length_so_far);
97
98        let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
99
100        // SAFETY:
101        // Offsets are monotonically increasing.
102        ListArray::new(
103            ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
104            Offsets::new_unchecked(offsets).into(),
105            Box::new(values),
106            Some(validity.into()),
107        )
108    }
109
110    /// # Safety
111    /// Will produce incorrect arrays if size hint is incorrect.
112    unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
113        iter: I,
114        n_elements: usize,
115    ) -> ListArray<i64>
116    where
117        I: IntoIterator<Item = Option<P>>,
118        P: IntoIterator<Item = Option<Ref>>,
119        Ref: AsRef<T>,
120    {
121        let iterator = iter.into_iter();
122        let (lower, _) = iterator.size_hint();
123
124        let mut validity = BitmapBuilder::with_capacity(lower);
125        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
126        let mut length_so_far = 0i64;
127        offsets.push(length_so_far);
128
129        let values: MutableBinaryViewArray<T> = iterator
130            .filter_map(|opt_iter| match opt_iter {
131                Some(x) => {
132                    let it = x.into_iter();
133                    length_so_far += it.size_hint().0 as i64;
134                    validity.push(true);
135                    offsets.push(length_so_far);
136                    Some(it)
137                },
138                None => {
139                    validity.push(false);
140                    offsets.push(length_so_far);
141                    None
142                },
143            })
144            .flatten()
145            .trust_my_length(n_elements)
146            .collect();
147
148        // SAFETY:
149        // offsets are monotonically increasing
150        ListArray::new(
151            ListArray::<i64>::default_datatype(T::DATA_TYPE),
152            Offsets::new_unchecked(offsets).into(),
153            values.freeze().boxed(),
154            validity.into_opt_validity(),
155        )
156    }
157
158    /// Create a list-array from an iterator.
159    /// Used in group_by agg-list
160    ///
161    /// # Safety
162    /// Will produce incorrect arrays if size hint is incorrect.
163    unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
164    where
165        I: IntoIterator<Item = Option<P>>,
166        P: IntoIterator<Item = Option<Ref>>,
167        Ref: AsRef<str>,
168    {
169        Self::from_iter_binview_trusted_len(iter, n_elements)
170    }
171
172    /// Create a list-array from an iterator.
173    /// Used in group_by agg-list
174    ///
175    /// # Safety
176    /// Will produce incorrect arrays if size hint is incorrect.
177    unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
178    where
179        I: IntoIterator<Item = Option<P>>,
180        P: IntoIterator<Item = Option<Ref>>,
181        Ref: AsRef<[u8]>,
182    {
183        Self::from_iter_binview_trusted_len(iter, n_elements)
184    }
185}
186impl ListFromIter for ListArray<i64> {}
187
188fn is_nested_null(dtype: &ArrowDataType) -> bool {
189    match dtype {
190        ArrowDataType::Null => true,
191        ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
192        ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
193        ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
194        _ => false,
195    }
196}
197
198/// Cast null arrays to inner type and ensure that all offsets remain correct
199pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
200    match dtype {
201        ArrowDataType::LargeList(field) => {
202            let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
203            let inner = array.values();
204            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
205            let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
206            LargeListArray::new(
207                dtype,
208                array.offsets().clone(),
209                new_values,
210                array.validity().cloned(),
211            )
212            .boxed()
213        },
214        ArrowDataType::FixedSizeList(field, width) => {
215            let width = *width;
216
217            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
218            let inner = array.values();
219            let length = if width == array.size() {
220                array.len()
221            } else {
222                assert!(array.values().len() > 0 || width != 0);
223                if width == 0 {
224                    0
225                } else {
226                    array.values().len() / width
227                }
228            };
229            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
230            let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
231            FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
232        },
233        ArrowDataType::Struct(fields) => {
234            let array = array.as_any().downcast_ref::<StructArray>().unwrap();
235            let inner = array.values();
236            let new_values = inner
237                .iter()
238                .zip(fields)
239                .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
240                .collect::<Vec<_>>();
241            StructArray::new(
242                dtype.clone(),
243                array.len(),
244                new_values,
245                array.validity().cloned(),
246            )
247            .boxed()
248        },
249        _ => new_null_array(dtype.clone(), array.len()),
250    }
251}