polars_arrow/legacy/array/
mod.rs1use crate::array::{
2 new_null_array, Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray,
3 PrimitiveArray, StructArray, ViewType,
4};
5use crate::bitmap::BitmapBuilder;
6use crate::datatypes::ArrowDataType;
7use crate::legacy::utils::CustomIterTools;
8use crate::offset::Offsets;
9use crate::types::NativeType;
10
11pub mod default_arrays;
12#[cfg(feature = "dtype-array")]
13pub mod fixed_size_list;
14pub mod list;
15pub mod null;
16pub mod slice;
17pub mod utf8;
18
19pub use slice::*;
20
21use crate::legacy::prelude::LargeListArray;
22
23macro_rules! iter_to_values {
24 ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
25 $iterator
26 .filter_map(|opt_iter| match opt_iter {
27 Some(x) => {
28 let it = x.into_iter();
29 $length_so_far += it.size_hint().0 as i64;
30 $validity.push(true);
31 $offsets.push($length_so_far);
32 Some(it)
33 },
34 None => {
35 $validity.push(false);
36 $offsets.push($length_so_far);
37 None
38 },
39 })
40 .flatten()
41 .collect()
42 }};
43}
44
45pub trait ListFromIter {
46 unsafe fn from_iter_primitive_trusted_len<T, P, I>(
52 iter: I,
53 dtype: ArrowDataType,
54 ) -> ListArray<i64>
55 where
56 T: NativeType,
57 P: IntoIterator<Item = Option<T>>,
58 I: IntoIterator<Item = Option<P>>,
59 {
60 let iterator = iter.into_iter();
61 let (lower, _) = iterator.size_hint();
62
63 let mut validity = BitmapBuilder::with_capacity(lower);
64 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
65 let mut length_so_far = 0i64;
66 offsets.push(length_so_far);
67
68 let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
69
70 ListArray::new(
73 ListArray::<i64>::default_datatype(dtype.clone()),
74 Offsets::new_unchecked(offsets).into(),
75 Box::new(values.to(dtype)),
76 validity.into_opt_validity(),
77 )
78 }
79
80 unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
86 where
87 I: IntoIterator<Item = Option<P>>,
88 P: IntoIterator<Item = Option<bool>>,
89 {
90 let iterator = iter.into_iter();
91 let (lower, _) = iterator.size_hint();
92
93 let mut validity = Vec::with_capacity(lower);
94 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
95 let mut length_so_far = 0i64;
96 offsets.push(length_so_far);
97
98 let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
99
100 ListArray::new(
103 ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
104 Offsets::new_unchecked(offsets).into(),
105 Box::new(values),
106 Some(validity.into()),
107 )
108 }
109
110 unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
113 iter: I,
114 n_elements: usize,
115 ) -> ListArray<i64>
116 where
117 I: IntoIterator<Item = Option<P>>,
118 P: IntoIterator<Item = Option<Ref>>,
119 Ref: AsRef<T>,
120 {
121 let iterator = iter.into_iter();
122 let (lower, _) = iterator.size_hint();
123
124 let mut validity = BitmapBuilder::with_capacity(lower);
125 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
126 let mut length_so_far = 0i64;
127 offsets.push(length_so_far);
128
129 let values: MutableBinaryViewArray<T> = iterator
130 .filter_map(|opt_iter| match opt_iter {
131 Some(x) => {
132 let it = x.into_iter();
133 length_so_far += it.size_hint().0 as i64;
134 validity.push(true);
135 offsets.push(length_so_far);
136 Some(it)
137 },
138 None => {
139 validity.push(false);
140 offsets.push(length_so_far);
141 None
142 },
143 })
144 .flatten()
145 .trust_my_length(n_elements)
146 .collect();
147
148 ListArray::new(
151 ListArray::<i64>::default_datatype(T::DATA_TYPE),
152 Offsets::new_unchecked(offsets).into(),
153 values.freeze().boxed(),
154 validity.into_opt_validity(),
155 )
156 }
157
158 unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
164 where
165 I: IntoIterator<Item = Option<P>>,
166 P: IntoIterator<Item = Option<Ref>>,
167 Ref: AsRef<str>,
168 {
169 Self::from_iter_binview_trusted_len(iter, n_elements)
170 }
171
172 unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
178 where
179 I: IntoIterator<Item = Option<P>>,
180 P: IntoIterator<Item = Option<Ref>>,
181 Ref: AsRef<[u8]>,
182 {
183 Self::from_iter_binview_trusted_len(iter, n_elements)
184 }
185}
186impl ListFromIter for ListArray<i64> {}
187
188fn is_nested_null(dtype: &ArrowDataType) -> bool {
189 match dtype {
190 ArrowDataType::Null => true,
191 ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
192 ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
193 ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
194 _ => false,
195 }
196}
197
198pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
200 match dtype {
201 ArrowDataType::LargeList(field) => {
202 let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
203 let inner = array.values();
204 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
205 let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
206 LargeListArray::new(
207 dtype,
208 array.offsets().clone(),
209 new_values,
210 array.validity().cloned(),
211 )
212 .boxed()
213 },
214 ArrowDataType::FixedSizeList(field, width) => {
215 let width = *width;
216
217 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
218 let inner = array.values();
219 let length = if width == array.size() {
220 array.len()
221 } else {
222 assert!(array.values().len() > 0 || width != 0);
223 if width == 0 {
224 0
225 } else {
226 array.values().len() / width
227 }
228 };
229 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
230 let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
231 FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
232 },
233 ArrowDataType::Struct(fields) => {
234 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
235 let inner = array.values();
236 let new_values = inner
237 .iter()
238 .zip(fields)
239 .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
240 .collect::<Vec<_>>();
241 StructArray::new(
242 dtype.clone(),
243 array.len(),
244 new_values,
245 array.validity().cloned(),
246 )
247 .boxed()
248 },
249 _ => new_null_array(dtype.clone(), array.len()),
250 }
251}