arrow_array/
iterator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Idiomatic iterators for [`Array`](crate::Array)
19
20use crate::array::{
21    ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray,
22    GenericStringArray, PrimitiveArray,
23};
24use crate::{FixedSizeListArray, GenericListViewArray, MapArray};
25use arrow_buffer::NullBuffer;
26
27/// An iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
28///
29/// # Performance
30///
31/// [`ArrayIter`] provides an idiomatic way to iterate over an array, however, this
32/// comes at the cost of performance. In particular the interleaved handling of
33/// the null mask is often sub-optimal.
34///
35/// If performing an infallible operation, it is typically faster to perform the operation
36/// on every index of the array, and handle the null mask separately. For [`PrimitiveArray`]
37/// this functionality is provided by [`compute::unary`]
38///
39/// If performing a fallible operation, it isn't possible to perform the operation independently
40/// of the null mask, as this might result in a spurious failure on a null index. However,
41/// there are more efficient ways to iterate over just the non-null indices, this functionality
42/// is provided by [`compute::try_unary`]
43///
44/// [`PrimitiveArray`]: crate::PrimitiveArray
45/// [`compute::unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.unary.html
46/// [`compute::try_unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.try_unary.html
47#[derive(Debug)]
48pub struct ArrayIter<T: ArrayAccessor> {
49    array: T,
50    logical_nulls: Option<NullBuffer>,
51    current: usize,
52    current_end: usize,
53}
54
55impl<T: ArrayAccessor> ArrayIter<T> {
56    /// create a new iterator
57    pub fn new(array: T) -> Self {
58        let len = array.len();
59        let logical_nulls = array.logical_nulls();
60        ArrayIter {
61            array,
62            logical_nulls,
63            current: 0,
64            current_end: len,
65        }
66    }
67
68    #[inline]
69    fn is_null(&self, idx: usize) -> bool {
70        self.logical_nulls
71            .as_ref()
72            .map(|x| x.is_null(idx))
73            .unwrap_or_default()
74    }
75}
76
77impl<T: ArrayAccessor> Iterator for ArrayIter<T> {
78    type Item = Option<T::Item>;
79
80    #[inline]
81    fn next(&mut self) -> Option<Self::Item> {
82        if self.current == self.current_end {
83            None
84        } else if self.is_null(self.current) {
85            self.current += 1;
86            Some(None)
87        } else {
88            let old = self.current;
89            self.current += 1;
90            // Safety:
91            // we just checked bounds in `self.current_end == self.current`
92            // this is safe on the premise that this struct is initialized with
93            // current = array.len()
94            // and that current_end is ever only decremented
95            unsafe { Some(Some(self.array.value_unchecked(old))) }
96        }
97    }
98
99    fn size_hint(&self) -> (usize, Option<usize>) {
100        (
101            self.array.len() - self.current,
102            Some(self.array.len() - self.current),
103        )
104    }
105}
106
107impl<T: ArrayAccessor> DoubleEndedIterator for ArrayIter<T> {
108    fn next_back(&mut self) -> Option<Self::Item> {
109        if self.current_end == self.current {
110            None
111        } else {
112            self.current_end -= 1;
113            Some(if self.is_null(self.current_end) {
114                None
115            } else {
116                // Safety:
117                // we just checked bounds in `self.current_end == self.current`
118                // this is safe on the premise that this struct is initialized with
119                // current = array.len()
120                // and that current_end is ever only decremented
121                unsafe { Some(self.array.value_unchecked(self.current_end)) }
122            })
123        }
124    }
125}
126
127/// all arrays have known size.
128impl<T: ArrayAccessor> ExactSizeIterator for ArrayIter<T> {}
129
130/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
131pub type PrimitiveIter<'a, T> = ArrayIter<&'a PrimitiveArray<T>>;
132/// an iterator that returns Some(T) or None, that can be used on any BooleanArray
133pub type BooleanIter<'a> = ArrayIter<&'a BooleanArray>;
134/// an iterator that returns Some(T) or None, that can be used on any Utf8Array
135pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
136/// an iterator that returns Some(T) or None, that can be used on any BinaryArray
137pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
138/// an iterator that returns Some(T) or None, that can be used on any FixedSizeBinaryArray
139pub type FixedSizeBinaryIter<'a> = ArrayIter<&'a FixedSizeBinaryArray>;
140/// an iterator that returns Some(T) or None, that can be used on any FixedSizeListArray
141pub type FixedSizeListIter<'a> = ArrayIter<&'a FixedSizeListArray>;
142/// an iterator that returns Some(T) or None, that can be used on any ListArray
143pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
144/// an iterator that returns Some(T) or None, that can be used on any MapArray
145pub type MapArrayIter<'a> = ArrayIter<&'a MapArray>;
146/// an iterator that returns Some(T) or None, that can be used on any ListArray
147pub type GenericListViewArrayIter<'a, O> = ArrayIter<&'a GenericListViewArray<O>>;
148#[cfg(test)]
149mod tests {
150    use std::sync::Arc;
151
152    use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray};
153
154    #[test]
155    fn test_primitive_array_iter_round_trip() {
156        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
157        let array = Arc::new(array) as ArrayRef;
158
159        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
160
161        // to and from iter, with a +1
162        let result: Int32Array = array.iter().map(|e| e.map(|e| e + 1)).collect();
163
164        let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
165        assert_eq!(result, expected);
166
167        // check if DoubleEndedIterator is implemented
168        let result: Int32Array = array.iter().rev().collect();
169        let rev_array = Int32Array::from(vec![Some(4), None, Some(2), None, Some(0)]);
170        assert_eq!(result, rev_array);
171        // check if ExactSizeIterator is implemented
172        let _ = array.iter().rposition(|opt_b| opt_b == Some(1));
173    }
174
175    #[test]
176    fn test_double_ended() {
177        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
178        let mut a = array.iter();
179        assert_eq!(a.next(), Some(Some(0)));
180        assert_eq!(a.next(), Some(None));
181        assert_eq!(a.next_back(), Some(Some(4)));
182        assert_eq!(a.next_back(), Some(None));
183        assert_eq!(a.next_back(), Some(Some(2)));
184        // the two sides have met: None is returned by both
185        assert_eq!(a.next_back(), None);
186        assert_eq!(a.next(), None);
187    }
188
189    #[test]
190    fn test_string_array_iter_round_trip() {
191        let array = StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
192        let array = Arc::new(array) as ArrayRef;
193
194        let array = array.as_any().downcast_ref::<StringArray>().unwrap();
195
196        // to and from iter, with a +1
197        let result: StringArray = array
198            .iter()
199            .map(|e| {
200                e.map(|e| {
201                    let mut a = e.to_string();
202                    a.push('b');
203                    a
204                })
205            })
206            .collect();
207
208        let expected =
209            StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
210        assert_eq!(result, expected);
211
212        // check if DoubleEndedIterator is implemented
213        let result: StringArray = array.iter().rev().collect();
214        let rev_array = StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
215        assert_eq!(result, rev_array);
216        // check if ExactSizeIterator is implemented
217        let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
218    }
219
220    #[test]
221    fn test_binary_array_iter_round_trip() {
222        let array = BinaryArray::from(vec![
223            Some(b"a" as &[u8]),
224            None,
225            Some(b"aaa"),
226            None,
227            Some(b"aaaaa"),
228        ]);
229
230        // to and from iter
231        let result: BinaryArray = array.iter().collect();
232
233        assert_eq!(result, array);
234
235        // check if DoubleEndedIterator is implemented
236        let result: BinaryArray = array.iter().rev().collect();
237        let rev_array = BinaryArray::from(vec![
238            Some(b"aaaaa" as &[u8]),
239            None,
240            Some(b"aaa"),
241            None,
242            Some(b"a"),
243        ]);
244        assert_eq!(result, rev_array);
245
246        // check if ExactSizeIterator is implemented
247        let _ = array.iter().rposition(|opt_b| opt_b == Some(&[9]));
248    }
249
250    #[test]
251    fn test_boolean_array_iter_round_trip() {
252        let array = BooleanArray::from(vec![Some(true), None, Some(false)]);
253
254        // to and from iter
255        let result: BooleanArray = array.iter().collect();
256
257        assert_eq!(result, array);
258
259        // check if DoubleEndedIterator is implemented
260        let result: BooleanArray = array.iter().rev().collect();
261        let rev_array = BooleanArray::from(vec![Some(false), None, Some(true)]);
262        assert_eq!(result, rev_array);
263
264        // check if ExactSizeIterator is implemented
265        let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
266    }
267}