arrow_arith/
aggregate.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines aggregations over Arrow arrays.
19
20use arrow_array::cast::*;
21use arrow_array::iterator::ArrayIter;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_data::bit_iterator::try_for_each_valid_idx;
25use arrow_schema::*;
26use std::borrow::BorrowMut;
27use std::cmp::{self, Ordering};
28use std::ops::{BitAnd, BitOr, BitXor};
29use types::ByteViewType;
30
31/// An accumulator for primitive numeric values.
32trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
33    /// Accumulate a non-null value.
34    fn accumulate(&mut self, value: T);
35    /// Accumulate a nullable values.
36    /// If `valid` is false the `value` should not affect the accumulator state.
37    fn accumulate_nullable(&mut self, value: T, valid: bool);
38    /// Merge another accumulator into this accumulator
39    fn merge(&mut self, other: Self);
40    /// Return the aggregated value.
41    fn finish(&mut self) -> T;
42}
43
44/// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`.
45/// After verifying the generated assembly this can be a simple `if`.
46#[inline(always)]
47fn select<T: Copy>(m: bool, a: T, b: T) -> T {
48    if m {
49        a
50    } else {
51        b
52    }
53}
54
55#[derive(Clone, Copy)]
56struct SumAccumulator<T: ArrowNativeTypeOp> {
57    sum: T,
58}
59
60impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> {
61    fn default() -> Self {
62        Self { sum: T::ZERO }
63    }
64}
65
66impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> {
67    fn accumulate(&mut self, value: T) {
68        self.sum = self.sum.add_wrapping(value);
69    }
70
71    fn accumulate_nullable(&mut self, value: T, valid: bool) {
72        let sum = self.sum;
73        self.sum = select(valid, sum.add_wrapping(value), sum)
74    }
75
76    fn merge(&mut self, other: Self) {
77        self.sum = self.sum.add_wrapping(other.sum);
78    }
79
80    fn finish(&mut self) -> T {
81        self.sum
82    }
83}
84
85#[derive(Clone, Copy)]
86struct MinAccumulator<T: ArrowNativeTypeOp> {
87    min: T,
88}
89
90impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> {
91    fn default() -> Self {
92        Self {
93            min: T::MAX_TOTAL_ORDER,
94        }
95    }
96}
97
98impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> {
99    fn accumulate(&mut self, value: T) {
100        let min = self.min;
101        self.min = select(value.is_lt(min), value, min);
102    }
103
104    fn accumulate_nullable(&mut self, value: T, valid: bool) {
105        let min = self.min;
106        let is_lt = valid & value.is_lt(min);
107        self.min = select(is_lt, value, min);
108    }
109
110    fn merge(&mut self, other: Self) {
111        self.accumulate(other.min)
112    }
113
114    fn finish(&mut self) -> T {
115        self.min
116    }
117}
118
119#[derive(Clone, Copy)]
120struct MaxAccumulator<T: ArrowNativeTypeOp> {
121    max: T,
122}
123
124impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> {
125    fn default() -> Self {
126        Self {
127            max: T::MIN_TOTAL_ORDER,
128        }
129    }
130}
131
132impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> {
133    fn accumulate(&mut self, value: T) {
134        let max = self.max;
135        self.max = select(value.is_gt(max), value, max);
136    }
137
138    fn accumulate_nullable(&mut self, value: T, valid: bool) {
139        let max = self.max;
140        let is_gt = value.is_gt(max) & valid;
141        self.max = select(is_gt, value, max);
142    }
143
144    fn merge(&mut self, other: Self) {
145        self.accumulate(other.max)
146    }
147
148    fn finish(&mut self) -> T {
149        self.max
150    }
151}
152
153fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
154    mut acc: [A; LANES],
155) -> A {
156    assert!(LANES > 0 && LANES.is_power_of_two());
157    let mut len = LANES;
158
159    // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern,
160    // but the generated code is still a little faster than purely sequential reduction for floats.
161    while len >= 2 {
162        let mid = len / 2;
163        let (h, t) = acc[..len].split_at_mut(mid);
164
165        for i in 0..mid {
166            h[i].merge(t[i]);
167        }
168        len /= 2;
169    }
170    acc[0]
171}
172
173#[inline(always)]
174fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
175    acc: &mut [A; LANES],
176    values: &[T; LANES],
177) {
178    for i in 0..LANES {
179        acc[i].accumulate(values[i]);
180    }
181}
182
183#[inline(always)]
184fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
185    acc: &mut [A; LANES],
186    values: &[T; LANES],
187    validity: u64,
188) {
189    let mut bit = 1;
190    for i in 0..LANES {
191        acc[i].accumulate_nullable(values[i], (validity & bit) != 0);
192        bit <<= 1;
193    }
194}
195
196fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T {
197    values
198        .iter()
199        .copied()
200        .fold(A::default(), |mut a, b| {
201            a.accumulate(b);
202            a
203        })
204        .finish()
205}
206
207#[inline(never)]
208fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
209    values: &[T],
210) -> T {
211    // aggregating into multiple independent accumulators allows the compiler to use vector registers
212    // with a single accumulator the compiler would not be allowed to reorder floating point addition
213    let mut acc = [A::default(); LANES];
214    let mut chunks = values.chunks_exact(LANES);
215    chunks.borrow_mut().for_each(|chunk| {
216        aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap());
217    });
218
219    let remainder = chunks.remainder();
220    for i in 0..remainder.len() {
221        acc[i].accumulate(remainder[i]);
222    }
223
224    reduce_accumulators(acc).finish()
225}
226
227#[inline(never)]
228fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
229    values: &[T],
230    validity: &NullBuffer,
231) -> T {
232    assert!(LANES > 0 && 64 % LANES == 0);
233    assert_eq!(values.len(), validity.len());
234
235    // aggregating into multiple independent accumulators allows the compiler to use vector registers
236    let mut acc = [A::default(); LANES];
237    // we process 64 bits of validity at a time
238    let mut values_chunks = values.chunks_exact(64);
239    let validity_chunks = validity.inner().bit_chunks();
240    let mut validity_chunks_iter = validity_chunks.iter();
241
242    values_chunks.borrow_mut().for_each(|chunk| {
243        // Safety: we asserted that values and validity have the same length and trust the iterator impl
244        let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() };
245        // chunk further based on the number of vector lanes
246        chunk.chunks_exact(LANES).for_each(|chunk| {
247            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
248            validity >>= LANES;
249        });
250    });
251
252    let remainder = values_chunks.remainder();
253    if !remainder.is_empty() {
254        let mut validity = validity_chunks.remainder_bits();
255
256        let mut remainder_chunks = remainder.chunks_exact(LANES);
257        remainder_chunks.borrow_mut().for_each(|chunk| {
258            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
259            validity >>= LANES;
260        });
261
262        let remainder = remainder_chunks.remainder();
263        if !remainder.is_empty() {
264            let mut bit = 1;
265            for i in 0..remainder.len() {
266                acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0);
267                bit <<= 1;
268            }
269        }
270    }
271
272    reduce_accumulators(acc).finish()
273}
274
275/// The preferred vector size in bytes for the target platform.
276/// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust.
277const PREFERRED_VECTOR_SIZE: usize =
278    if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) {
279        64
280    } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) {
281        32
282    } else {
283        16
284    };
285
286/// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators
287const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2;
288
289/// Generic aggregation for any primitive type.
290/// Returns None if there are no non-null values in `array`.
291fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>(
292    array: &PrimitiveArray<P>,
293) -> Option<T> {
294    let null_count = array.null_count();
295    if null_count == array.len() {
296        return None;
297    }
298    let values = array.values().as_ref();
299    match array.nulls() {
300        Some(nulls) if null_count > 0 => {
301            // const generics depending on a generic type parameter are not supported
302            // so we have to match and call aggregate with the corresponding constant
303            match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() {
304                64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)),
305                32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)),
306                16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)),
307                8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)),
308                4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)),
309                2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)),
310                _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)),
311            }
312        }
313        _ => {
314            let is_float = matches!(
315                array.data_type(),
316                DataType::Float16 | DataType::Float32 | DataType::Float64
317            );
318            if is_float {
319                match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() {
320                    64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)),
321                    32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)),
322                    16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)),
323                    8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)),
324                    4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)),
325                    2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)),
326                    _ => Some(aggregate_nonnull_simple::<T, A>(values)),
327                }
328            } else {
329                // for non-null integers its better to not chunk ourselves and instead
330                // let llvm fully handle loop unrolling and vectorization
331                Some(aggregate_nonnull_simple::<T, A>(values))
332            }
333        }
334    }
335}
336
337/// Returns the minimum value in the boolean array.
338///
339/// ```
340/// # use arrow_array::BooleanArray;
341/// # use arrow_arith::aggregate::min_boolean;
342///
343/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
344/// assert_eq!(min_boolean(&a), Some(false))
345/// ```
346pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
347    // short circuit if all nulls / zero length array
348    if array.null_count() == array.len() {
349        return None;
350    }
351
352    // Note the min bool is false (0), so short circuit as soon as we see it
353    match array.nulls() {
354        None => {
355            let bit_chunks = array.values().bit_chunks();
356            if bit_chunks.iter().any(|x| {
357                // u64::MAX has all bits set, so if the value is not that, then there is a false
358                x != u64::MAX
359            }) {
360                return Some(false);
361            }
362            // If the remainder bits are not all set, then there is a false
363            if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() {
364                Some(false)
365            } else {
366                Some(true)
367            }
368        }
369        Some(nulls) => {
370            let validity_chunks = nulls.inner().bit_chunks();
371            let value_chunks = array.values().bit_chunks();
372
373            if value_chunks
374                .iter()
375                .zip(validity_chunks.iter())
376                .any(|(value, validity)| {
377                    // We are looking for a false value, but because applying the validity mask
378                    // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true.
379                    (!value & validity) != 0
380                })
381            {
382                return Some(false);
383            }
384
385            // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true
386            if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 {
387                Some(false)
388            } else {
389                Some(true)
390            }
391        }
392    }
393}
394
395/// Returns the maximum value in the boolean array
396///
397/// ```
398/// # use arrow_array::BooleanArray;
399/// # use arrow_arith::aggregate::max_boolean;
400///
401/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
402/// assert_eq!(max_boolean(&a), Some(true))
403/// ```
404pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
405    // short circuit if all nulls / zero length array
406    if array.null_count() == array.len() {
407        return None;
408    }
409
410    // Note the max bool is true (1), so short circuit as soon as we see it
411    match array.nulls() {
412        None => array
413            .values()
414            .bit_chunks()
415            .iter_padded()
416            // We found a true if any bit is set
417            .map(|x| x != 0)
418            .find(|b| *b)
419            .or(Some(false)),
420        Some(nulls) => {
421            let validity_chunks = nulls.inner().bit_chunks().iter_padded();
422            let value_chunks = array.values().bit_chunks().iter_padded();
423            value_chunks
424                .zip(validity_chunks)
425                // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk
426                .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0)
427                .find(|b| *b)
428                .or(Some(false))
429        }
430    }
431}
432
433/// Helper to compute min/max of [`ArrayAccessor`].
434fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T>
435where
436    F: Fn(&T, &T) -> bool,
437{
438    let null_count = array.null_count();
439    if null_count == array.len() {
440        None
441    } else if null_count == 0 {
442        // JUSTIFICATION
443        //  Benefit:  ~8% speedup
444        //  Soundness: `i` is always within the array bounds
445        (0..array.len())
446            .map(|i| unsafe { array.value_unchecked(i) })
447            .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
448    } else {
449        let nulls = array.nulls().unwrap();
450        unsafe {
451            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
452                let acc = array.value_unchecked(acc_idx);
453                let item = array.value_unchecked(idx);
454                if cmp(&acc, &item) {
455                    idx
456                } else {
457                    acc_idx
458                }
459            });
460            idx.map(|idx| array.value_unchecked(idx))
461        }
462    }
463}
464
465/// Helper to compute min/max of [`GenericByteViewArray<T>`].
466/// The specialized min/max leverages the inlined values to compare the byte views.
467/// `swap_cond` is the condition to swap current min/max with the new value.
468/// For example, `Ordering::Greater` for max and `Ordering::Less` for min.
469fn min_max_view_helper<T: ByteViewType>(
470    array: &GenericByteViewArray<T>,
471    swap_cond: cmp::Ordering,
472) -> Option<&T::Native> {
473    let null_count = array.null_count();
474    if null_count == array.len() {
475        None
476    } else if null_count == 0 {
477        let target_idx = (0..array.len()).reduce(|acc, item| {
478            // SAFETY:  array's length is correct so item is within bounds
479            let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
480            if cmp == swap_cond {
481                item
482            } else {
483                acc
484            }
485        });
486        // SAFETY: idx came from valid range `0..array.len()`
487        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
488    } else {
489        let nulls = array.nulls().unwrap();
490
491        let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
492            let cmp =
493                unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
494            if cmp == swap_cond {
495                idx
496            } else {
497                acc_idx
498            }
499        });
500
501        // SAFETY: idx came from valid range `0..array.len()`
502        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
503    }
504}
505
506/// Returns the maximum value in the binary array, according to the natural order.
507pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
508    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
509}
510
511/// Returns the maximum value in the binary view array, according to the natural order.
512pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
513    min_max_view_helper(array, Ordering::Greater)
514}
515
516/// Returns the minimum value in the binary array, according to the natural order.
517pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
518    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
519}
520
521/// Returns the minimum value in the binary view array, according to the natural order.
522pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
523    min_max_view_helper(array, Ordering::Less)
524}
525
526/// Returns the maximum value in the string array, according to the natural order.
527pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
528    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
529}
530
531/// Returns the maximum value in the string view array, according to the natural order.
532pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
533    min_max_view_helper(array, Ordering::Greater)
534}
535
536/// Returns the minimum value in the string array, according to the natural order.
537pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
538    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
539}
540
541/// Returns the minimum value in the string view array, according to the natural order.
542pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
543    min_max_view_helper(array, Ordering::Less)
544}
545
546/// Returns the sum of values in the array.
547///
548/// This doesn't detect overflow. Once overflowing, the result will wrap around.
549/// For an overflow-checking variant, use `sum_array_checked` instead.
550pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
551where
552    T: ArrowNumericType,
553    T::Native: ArrowNativeTypeOp,
554{
555    match array.data_type() {
556        DataType::Dictionary(_, _) => {
557            let null_count = array.null_count();
558
559            if null_count == array.len() {
560                return None;
561            }
562
563            let iter = ArrayIter::new(array);
564            let sum = iter
565                .into_iter()
566                .fold(T::default_value(), |accumulator, value| {
567                    if let Some(value) = value {
568                        accumulator.add_wrapping(value)
569                    } else {
570                        accumulator
571                    }
572                });
573
574            Some(sum)
575        }
576        _ => sum::<T>(as_primitive_array(&array)),
577    }
578}
579
580/// Returns the sum of values in the array.
581///
582/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
583/// use `sum_array` instead.
584pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
585    array: A,
586) -> Result<Option<T::Native>, ArrowError>
587where
588    T: ArrowNumericType,
589    T::Native: ArrowNativeTypeOp,
590{
591    match array.data_type() {
592        DataType::Dictionary(_, _) => {
593            let null_count = array.null_count();
594
595            if null_count == array.len() {
596                return Ok(None);
597            }
598
599            let iter = ArrayIter::new(array);
600            let sum = iter
601                .into_iter()
602                .try_fold(T::default_value(), |accumulator, value| {
603                    if let Some(value) = value {
604                        accumulator.add_checked(value)
605                    } else {
606                        Ok(accumulator)
607                    }
608                })?;
609
610            Ok(Some(sum))
611        }
612        _ => sum_checked::<T>(as_primitive_array(&array)),
613    }
614}
615
616/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
617/// array with value of `ArrowNumericType` type.
618pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
619where
620    T: ArrowNumericType,
621    T::Native: ArrowNativeType,
622{
623    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
624}
625
626/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
627/// array with value of `ArrowNumericType` type.
628pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
629where
630    T: ArrowNumericType,
631    T::Native: ArrowNativeTypeOp,
632{
633    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
634}
635
636fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
637    array: A,
638    cmp: F,
639    m: M,
640) -> Option<T::Native>
641where
642    T: ArrowNumericType,
643    F: Fn(&T::Native, &T::Native) -> bool,
644    M: Fn(&PrimitiveArray<T>) -> Option<T::Native>,
645{
646    match array.data_type() {
647        DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
648        _ => m(as_primitive_array(&array)),
649    }
650}
651
652macro_rules! bit_operation {
653    ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
654        #[doc = $DOC]
655        ///
656        /// Returns `None` if the array is empty or only contains null values.
657        pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
658        where
659            T: ArrowNumericType,
660            T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
661        {
662            let default;
663            if $DEFAULT == -1 {
664                default = T::Native::ONE.neg_wrapping();
665            } else {
666                default = T::default_value();
667            }
668
669            let null_count = array.null_count();
670
671            if null_count == array.len() {
672                return None;
673            }
674
675            let data: &[T::Native] = array.values();
676
677            match array.nulls() {
678                None => {
679                    let result = data
680                        .iter()
681                        .fold(default, |accumulator, value| accumulator.$OP(*value));
682
683                    Some(result)
684                }
685                Some(nulls) => {
686                    let mut result = default;
687                    let data_chunks = data.chunks_exact(64);
688                    let remainder = data_chunks.remainder();
689
690                    let bit_chunks = nulls.inner().bit_chunks();
691                    data_chunks
692                        .zip(bit_chunks.iter())
693                        .for_each(|(chunk, mask)| {
694                            // index_mask has value 1 << i in the loop
695                            let mut index_mask = 1;
696                            chunk.iter().for_each(|value| {
697                                if (mask & index_mask) != 0 {
698                                    result = result.$OP(*value);
699                                }
700                                index_mask <<= 1;
701                            });
702                        });
703
704                    let remainder_bits = bit_chunks.remainder_bits();
705
706                    remainder.iter().enumerate().for_each(|(i, value)| {
707                        if remainder_bits & (1 << i) != 0 {
708                            result = result.$OP(*value);
709                        }
710                    });
711
712                    Some(result)
713                }
714            }
715        }
716    };
717}
718
719bit_operation!(
720    bit_and,
721    bitand,
722    BitAnd,
723    -1,
724    "Returns the bitwise and of all non-null input values."
725);
726bit_operation!(
727    bit_or,
728    bitor,
729    BitOr,
730    0,
731    "Returns the bitwise or of all non-null input values."
732);
733bit_operation!(
734    bit_xor,
735    bitxor,
736    BitXor,
737    0,
738    "Returns the bitwise xor of all non-null input values."
739);
740
741/// Returns true if all non-null input values are true, otherwise false.
742///
743/// Returns `None` if the array is empty or only contains null values.
744pub fn bool_and(array: &BooleanArray) -> Option<bool> {
745    min_boolean(array)
746}
747
748/// Returns true if any non-null input value is true, otherwise false.
749///
750/// Returns `None` if the array is empty or only contains null values.
751pub fn bool_or(array: &BooleanArray) -> Option<bool> {
752    max_boolean(array)
753}
754
755/// Returns the sum of values in the primitive array.
756///
757/// Returns `Ok(None)` if the array is empty or only contains null values.
758///
759/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
760/// use `sum` instead.
761pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
762where
763    T: ArrowNumericType,
764    T::Native: ArrowNativeTypeOp,
765{
766    let null_count = array.null_count();
767
768    if null_count == array.len() {
769        return Ok(None);
770    }
771
772    let data: &[T::Native] = array.values();
773
774    match array.nulls() {
775        None => {
776            let sum = data
777                .iter()
778                .try_fold(T::default_value(), |accumulator, value| {
779                    accumulator.add_checked(*value)
780                })?;
781
782            Ok(Some(sum))
783        }
784        Some(nulls) => {
785            let mut sum = T::default_value();
786
787            try_for_each_valid_idx(
788                nulls.len(),
789                nulls.offset(),
790                nulls.null_count(),
791                Some(nulls.validity()),
792                |idx| {
793                    unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
794                    Ok::<_, ArrowError>(())
795                },
796            )?;
797
798            Ok(Some(sum))
799        }
800    }
801}
802
803/// Returns the sum of values in the primitive array.
804///
805/// Returns `None` if the array is empty or only contains null values.
806///
807/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
808/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
809pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
810where
811    T::Native: ArrowNativeTypeOp,
812{
813    aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
814}
815
816/// Returns the minimum value in the array, according to the natural order.
817/// For floating point arrays any NaN values are considered to be greater than any other non-null value
818pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
819where
820    T::Native: PartialOrd,
821{
822    aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
823}
824
825/// Returns the maximum value in the array, according to the natural order.
826/// For floating point arrays any NaN values are considered to be greater than any other non-null value
827pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
828where
829    T::Native: PartialOrd,
830{
831    aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
832}
833
834#[cfg(test)]
835mod tests {
836    use super::*;
837    use arrow_array::types::*;
838    use builder::BooleanBuilder;
839    use std::sync::Arc;
840
841    #[test]
842    fn test_primitive_array_sum() {
843        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
844        assert_eq!(15, sum(&a).unwrap());
845    }
846
847    #[test]
848    fn test_primitive_array_float_sum() {
849        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
850        assert_eq!(16.5, sum(&a).unwrap());
851    }
852
853    #[test]
854    fn test_primitive_array_sum_with_nulls() {
855        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
856        assert_eq!(10, sum(&a).unwrap());
857    }
858
859    #[test]
860    fn test_primitive_array_sum_all_nulls() {
861        let a = Int32Array::from(vec![None, None, None]);
862        assert_eq!(None, sum(&a));
863    }
864
865    #[test]
866    fn test_primitive_array_sum_large_float_64() {
867        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None);
868        assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c));
869
870        // create an array that actually has non-zero values at the invalid indices
871        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
872        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity));
873
874        assert_eq!(
875            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64),
876            sum(&c)
877        );
878    }
879
880    #[test]
881    fn test_primitive_array_sum_large_float_32() {
882        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None);
883        assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c));
884
885        // create an array that actually has non-zero values at the invalid indices
886        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
887        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity));
888
889        assert_eq!(
890            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32),
891            sum(&c)
892        );
893    }
894
895    #[test]
896    fn test_primitive_array_sum_large_64() {
897        let c = Int64Array::new((1..=100).collect(), None);
898        assert_eq!(Some((1..=100).sum()), sum(&c));
899
900        // create an array that actually has non-zero values at the invalid indices
901        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
902        let c = Int64Array::new((1..=100).collect(), Some(validity));
903
904        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
905    }
906
907    #[test]
908    fn test_primitive_array_sum_large_32() {
909        let c = Int32Array::new((1..=100).collect(), None);
910        assert_eq!(Some((1..=100).sum()), sum(&c));
911
912        // create an array that actually has non-zero values at the invalid indices
913        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
914        let c = Int32Array::new((1..=100).collect(), Some(validity));
915        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
916    }
917
918    #[test]
919    fn test_primitive_array_sum_large_16() {
920        let c = Int16Array::new((1..=100).collect(), None);
921        assert_eq!(Some((1..=100).sum()), sum(&c));
922
923        // create an array that actually has non-zero values at the invalid indices
924        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
925        let c = Int16Array::new((1..=100).collect(), Some(validity));
926        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
927    }
928
929    #[test]
930    fn test_primitive_array_sum_large_8() {
931        let c = UInt8Array::new((1..=100).collect(), None);
932        assert_eq!(
933            Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))),
934            sum(&c)
935        );
936
937        // create an array that actually has non-zero values at the invalid indices
938        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
939        let c = UInt8Array::new((1..=100).collect(), Some(validity));
940        assert_eq!(
941            Some(
942                (1..=100)
943                    .filter(|i| i % 3 == 0)
944                    .fold(0_u8, |a, x| a.wrapping_add(x))
945            ),
946            sum(&c)
947        );
948    }
949
950    #[test]
951    fn test_primitive_array_bit_and() {
952        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
953        assert_eq!(0, bit_and(&a).unwrap());
954    }
955
956    #[test]
957    fn test_primitive_array_bit_and_with_nulls() {
958        let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
959        assert_eq!(2, bit_and(&a).unwrap());
960    }
961
962    #[test]
963    fn test_primitive_array_bit_and_all_nulls() {
964        let a = Int32Array::from(vec![None, None, None]);
965        assert_eq!(None, bit_and(&a));
966    }
967
968    #[test]
969    fn test_primitive_array_bit_or() {
970        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
971        assert_eq!(7, bit_or(&a).unwrap());
972    }
973
974    #[test]
975    fn test_primitive_array_bit_or_with_nulls() {
976        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
977        assert_eq!(7, bit_or(&a).unwrap());
978    }
979
980    #[test]
981    fn test_primitive_array_bit_or_all_nulls() {
982        let a = Int32Array::from(vec![None, None, None]);
983        assert_eq!(None, bit_or(&a));
984    }
985
986    #[test]
987    fn test_primitive_array_bit_xor() {
988        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
989        assert_eq!(1, bit_xor(&a).unwrap());
990    }
991
992    #[test]
993    fn test_primitive_array_bit_xor_with_nulls() {
994        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
995        assert_eq!(4, bit_xor(&a).unwrap());
996    }
997
998    #[test]
999    fn test_primitive_array_bit_xor_all_nulls() {
1000        let a = Int32Array::from(vec![None, None, None]);
1001        assert_eq!(None, bit_xor(&a));
1002    }
1003
1004    #[test]
1005    fn test_primitive_array_bool_and() {
1006        let a = BooleanArray::from(vec![true, false, true, false, true]);
1007        assert!(!bool_and(&a).unwrap());
1008    }
1009
1010    #[test]
1011    fn test_primitive_array_bool_and_with_nulls() {
1012        let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]);
1013        assert!(bool_and(&a).unwrap());
1014    }
1015
1016    #[test]
1017    fn test_primitive_array_bool_and_all_nulls() {
1018        let a = BooleanArray::from(vec![None, None, None]);
1019        assert_eq!(None, bool_and(&a));
1020    }
1021
1022    #[test]
1023    fn test_primitive_array_bool_or() {
1024        let a = BooleanArray::from(vec![true, false, true, false, true]);
1025        assert!(bool_or(&a).unwrap());
1026    }
1027
1028    #[test]
1029    fn test_primitive_array_bool_or_with_nulls() {
1030        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
1031        assert!(!bool_or(&a).unwrap());
1032    }
1033
1034    #[test]
1035    fn test_primitive_array_bool_or_all_nulls() {
1036        let a = BooleanArray::from(vec![None, None, None]);
1037        assert_eq!(None, bool_or(&a));
1038    }
1039
1040    #[test]
1041    fn test_primitive_array_min_max() {
1042        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
1043        assert_eq!(5, min(&a).unwrap());
1044        assert_eq!(9, max(&a).unwrap());
1045    }
1046
1047    #[test]
1048    fn test_primitive_array_min_max_with_nulls() {
1049        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
1050        assert_eq!(5, min(&a).unwrap());
1051        assert_eq!(9, max(&a).unwrap());
1052    }
1053
1054    #[test]
1055    fn test_primitive_min_max_1() {
1056        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
1057        assert_eq!(Some(2), min(&a));
1058        assert_eq!(Some(5), max(&a));
1059    }
1060
1061    #[test]
1062    fn test_primitive_min_max_float_large_nonnull_array() {
1063        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
1064        // min/max are on boundaries of chunked data
1065        assert_eq!(Some(1.0), min(&a));
1066        assert_eq!(Some(256.0), max(&a));
1067
1068        // max is last value in remainder after chunking
1069        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1070        assert_eq!(Some(255.0), max(&a));
1071
1072        // max is first value in remainder after chunking
1073        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1074        assert_eq!(Some(257.0), max(&a));
1075    }
1076
1077    #[test]
1078    fn test_primitive_min_max_float_large_nullable_array() {
1079        let a: Float64Array = (0..256)
1080            .map(|i| {
1081                if (i + 1) % 3 == 0 {
1082                    None
1083                } else {
1084                    Some((i + 1) as f64)
1085                }
1086            })
1087            .collect();
1088        // min/max are on boundaries of chunked data
1089        assert_eq!(Some(1.0), min(&a));
1090        assert_eq!(Some(256.0), max(&a));
1091
1092        let a: Float64Array = (0..256)
1093            .map(|i| {
1094                if i == 0 || i == 255 {
1095                    None
1096                } else {
1097                    Some((i + 1) as f64)
1098                }
1099            })
1100            .collect();
1101        // boundaries of chunked data are null
1102        assert_eq!(Some(2.0), min(&a));
1103        assert_eq!(Some(255.0), max(&a));
1104
1105        let a: Float64Array = (0..256)
1106            .map(|i| if i != 100 { None } else { Some((i) as f64) })
1107            .collect();
1108        // a single non-null value somewhere in the middle
1109        assert_eq!(Some(100.0), min(&a));
1110        assert_eq!(Some(100.0), max(&a));
1111
1112        // max is last value in remainder after chunking
1113        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1114        assert_eq!(Some(255.0), max(&a));
1115
1116        // max is first value in remainder after chunking
1117        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1118        assert_eq!(Some(257.0), max(&a));
1119    }
1120
1121    #[test]
1122    fn test_primitive_min_max_float_edge_cases() {
1123        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
1124        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1125        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
1126
1127        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
1128        assert_eq!(Some(f64::MIN), min(&a));
1129        assert_eq!(Some(f64::MIN), max(&a));
1130
1131        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
1132        assert_eq!(Some(f64::MAX), min(&a));
1133        assert_eq!(Some(f64::MAX), max(&a));
1134
1135        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
1136        assert_eq!(Some(f64::INFINITY), min(&a));
1137        assert_eq!(Some(f64::INFINITY), max(&a));
1138    }
1139
1140    #[test]
1141    fn test_primitive_min_max_float_all_nans_non_null() {
1142        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
1143        assert!(max(&a).unwrap().is_nan());
1144        assert!(min(&a).unwrap().is_nan());
1145    }
1146
1147    #[test]
1148    fn test_primitive_min_max_float_negative_nan() {
1149        let a: Float64Array =
1150            Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]);
1151        let max = max(&a).unwrap();
1152        let min = min(&a).unwrap();
1153        assert!(max.is_nan());
1154        assert!(max.is_sign_positive());
1155
1156        assert!(min.is_nan());
1157        assert!(min.is_sign_negative());
1158    }
1159
1160    #[test]
1161    fn test_primitive_min_max_float_first_nan_nonnull() {
1162        let a: Float64Array = (0..100)
1163            .map(|i| {
1164                if i == 0 {
1165                    Some(f64::NAN)
1166                } else {
1167                    Some(i as f64)
1168                }
1169            })
1170            .collect();
1171        assert_eq!(Some(1.0), min(&a));
1172        assert!(max(&a).unwrap().is_nan());
1173    }
1174
1175    #[test]
1176    fn test_primitive_min_max_float_last_nan_nonnull() {
1177        let a: Float64Array = (0..100)
1178            .map(|i| {
1179                if i == 99 {
1180                    Some(f64::NAN)
1181                } else {
1182                    Some((i + 1) as f64)
1183                }
1184            })
1185            .collect();
1186        assert_eq!(Some(1.0), min(&a));
1187        assert!(max(&a).unwrap().is_nan());
1188    }
1189
1190    #[test]
1191    fn test_primitive_min_max_float_first_nan_nullable() {
1192        let a: Float64Array = (0..100)
1193            .map(|i| {
1194                if i == 0 {
1195                    Some(f64::NAN)
1196                } else if i % 2 == 0 {
1197                    None
1198                } else {
1199                    Some(i as f64)
1200                }
1201            })
1202            .collect();
1203        assert_eq!(Some(1.0), min(&a));
1204        assert!(max(&a).unwrap().is_nan());
1205    }
1206
1207    #[test]
1208    fn test_primitive_min_max_float_last_nan_nullable() {
1209        let a: Float64Array = (0..100)
1210            .map(|i| {
1211                if i == 99 {
1212                    Some(f64::NAN)
1213                } else if i % 2 == 0 {
1214                    None
1215                } else {
1216                    Some(i as f64)
1217                }
1218            })
1219            .collect();
1220        assert_eq!(Some(1.0), min(&a));
1221        assert!(max(&a).unwrap().is_nan());
1222    }
1223
1224    #[test]
1225    fn test_primitive_min_max_float_inf_and_nans() {
1226        let a: Float64Array = (0..100)
1227            .map(|i| {
1228                let x = match i % 10 {
1229                    0 => f64::NEG_INFINITY,
1230                    1 => f64::MIN,
1231                    2 => f64::MAX,
1232                    4 => f64::INFINITY,
1233                    5 => f64::NAN,
1234                    _ => i as f64,
1235                };
1236                Some(x)
1237            })
1238            .collect();
1239        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1240        assert!(max(&a).unwrap().is_nan());
1241    }
1242
1243    macro_rules! test_binary {
1244        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1245            #[test]
1246            fn $NAME() {
1247                let binary = BinaryArray::from($ARRAY);
1248                assert_eq!($EXPECTED_MIN, min_binary(&binary));
1249                assert_eq!($EXPECTED_MAX, max_binary(&binary));
1250
1251                let large_binary = LargeBinaryArray::from($ARRAY);
1252                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1253                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1254
1255                let binary_view = BinaryViewArray::from($ARRAY);
1256                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1257                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1258            }
1259        };
1260    }
1261
1262    test_binary!(
1263        test_binary_min_max_with_nulls,
1264        vec![
1265            Some("b01234567890123".as_bytes()), // long bytes
1266            None,
1267            None,
1268            Some(b"a"),
1269            Some(b"c"),
1270            Some(b"abcdedfg0123456"),
1271        ],
1272        Some("a".as_bytes()),
1273        Some("c".as_bytes())
1274    );
1275
1276    test_binary!(
1277        test_binary_min_max_no_null,
1278        vec![
1279            Some("b".as_bytes()),
1280            Some(b"abcdefghijklmnopqrst"), // long bytes
1281            Some(b"c"),
1282            Some(b"b01234567890123"), // long bytes for view types
1283        ],
1284        Some("abcdefghijklmnopqrst".as_bytes()),
1285        Some("c".as_bytes())
1286    );
1287
1288    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
1289
1290    test_binary!(
1291        test_binary_min_max_1,
1292        vec![
1293            None,
1294            Some("b01234567890123435".as_bytes()), // long bytes for view types
1295            None,
1296            Some(b"b0123xxxxxxxxxxx"),
1297            Some(b"a")
1298        ],
1299        Some("a".as_bytes()),
1300        Some("b0123xxxxxxxxxxx".as_bytes())
1301    );
1302
1303    macro_rules! test_string {
1304        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1305            #[test]
1306            fn $NAME() {
1307                let string = StringArray::from($ARRAY);
1308                assert_eq!($EXPECTED_MIN, min_string(&string));
1309                assert_eq!($EXPECTED_MAX, max_string(&string));
1310
1311                let large_string = LargeStringArray::from($ARRAY);
1312                assert_eq!($EXPECTED_MIN, min_string(&large_string));
1313                assert_eq!($EXPECTED_MAX, max_string(&large_string));
1314
1315                let string_view = StringViewArray::from($ARRAY);
1316                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1317                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1318            }
1319        };
1320    }
1321
1322    test_string!(
1323        test_string_min_max_with_nulls,
1324        vec![
1325            Some("b012345678901234"), // long bytes for view types
1326            None,
1327            None,
1328            Some("a"),
1329            Some("c"),
1330            Some("b0123xxxxxxxxxxx")
1331        ],
1332        Some("a"),
1333        Some("c")
1334    );
1335
1336    test_string!(
1337        test_string_min_max_no_null,
1338        vec![
1339            Some("b"),
1340            Some("b012345678901234"), // long bytes for view types
1341            Some("a"),
1342            Some("b012xxxxxxxxxxxx")
1343        ],
1344        Some("a"),
1345        Some("b012xxxxxxxxxxxx")
1346    );
1347
1348    test_string!(
1349        test_string_min_max_all_nulls,
1350        Vec::<Option<&str>>::from_iter([None, None]),
1351        None,
1352        None
1353    );
1354
1355    test_string!(
1356        test_string_min_max_1,
1357        vec![
1358            None,
1359            Some("c12345678901234"), // long bytes for view types
1360            None,
1361            Some("b"),
1362            Some("c1234xxxxxxxxxx")
1363        ],
1364        Some("b"),
1365        Some("c1234xxxxxxxxxx")
1366    );
1367
1368    test_string!(
1369        test_string_min_max_empty,
1370        Vec::<Option<&str>>::new(),
1371        None,
1372        None
1373    );
1374
1375    #[test]
1376    fn test_boolean_min_max_empty() {
1377        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
1378        assert_eq!(None, min_boolean(&a));
1379        assert_eq!(None, max_boolean(&a));
1380    }
1381
1382    #[test]
1383    fn test_boolean_min_max_all_null() {
1384        let a = BooleanArray::from(vec![None, None]);
1385        assert_eq!(None, min_boolean(&a));
1386        assert_eq!(None, max_boolean(&a));
1387    }
1388
1389    #[test]
1390    fn test_boolean_min_max_no_null() {
1391        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
1392        assert_eq!(Some(false), min_boolean(&a));
1393        assert_eq!(Some(true), max_boolean(&a));
1394    }
1395
1396    #[test]
1397    fn test_boolean_min_max() {
1398        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
1399        assert_eq!(Some(false), min_boolean(&a));
1400        assert_eq!(Some(true), max_boolean(&a));
1401
1402        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
1403        assert_eq!(Some(false), min_boolean(&a));
1404        assert_eq!(Some(true), max_boolean(&a));
1405
1406        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
1407        assert_eq!(Some(false), min_boolean(&a));
1408        assert_eq!(Some(true), max_boolean(&a));
1409
1410        let a = BooleanArray::from(vec![Some(true), None]);
1411        assert_eq!(Some(true), min_boolean(&a));
1412        assert_eq!(Some(true), max_boolean(&a));
1413
1414        let a = BooleanArray::from(vec![Some(false), None]);
1415        assert_eq!(Some(false), min_boolean(&a));
1416        assert_eq!(Some(false), max_boolean(&a));
1417
1418        let a = BooleanArray::from(vec![Some(true)]);
1419        assert_eq!(Some(true), min_boolean(&a));
1420        assert_eq!(Some(true), max_boolean(&a));
1421
1422        let a = BooleanArray::from(vec![Some(false)]);
1423        assert_eq!(Some(false), min_boolean(&a));
1424        assert_eq!(Some(false), max_boolean(&a));
1425    }
1426
1427    #[test]
1428    fn test_boolean_min_max_smaller() {
1429        let a = BooleanArray::from(vec![Some(false)]);
1430        assert_eq!(Some(false), min_boolean(&a));
1431        assert_eq!(Some(false), max_boolean(&a));
1432
1433        let a = BooleanArray::from(vec![None, Some(false)]);
1434        assert_eq!(Some(false), min_boolean(&a));
1435        assert_eq!(Some(false), max_boolean(&a));
1436
1437        let a = BooleanArray::from(vec![None, Some(true)]);
1438        assert_eq!(Some(true), min_boolean(&a));
1439        assert_eq!(Some(true), max_boolean(&a));
1440
1441        let a = BooleanArray::from(vec![Some(true)]);
1442        assert_eq!(Some(true), min_boolean(&a));
1443        assert_eq!(Some(true), max_boolean(&a));
1444    }
1445
1446    #[test]
1447    fn test_boolean_min_max_64_true_64_false() {
1448        let mut no_nulls = BooleanBuilder::new();
1449        no_nulls.append_slice(&[true; 64]);
1450        no_nulls.append_slice(&[false; 64]);
1451        let no_nulls = no_nulls.finish();
1452
1453        assert_eq!(Some(false), min_boolean(&no_nulls));
1454        assert_eq!(Some(true), max_boolean(&no_nulls));
1455
1456        let mut with_nulls = BooleanBuilder::new();
1457        with_nulls.append_slice(&[true; 31]);
1458        with_nulls.append_null();
1459        with_nulls.append_slice(&[true; 32]);
1460        with_nulls.append_slice(&[false; 1]);
1461        with_nulls.append_nulls(63);
1462        let with_nulls = with_nulls.finish();
1463
1464        assert_eq!(Some(false), min_boolean(&with_nulls));
1465        assert_eq!(Some(true), max_boolean(&with_nulls));
1466    }
1467
1468    #[test]
1469    fn test_boolean_min_max_64_false_64_true() {
1470        let mut no_nulls = BooleanBuilder::new();
1471        no_nulls.append_slice(&[false; 64]);
1472        no_nulls.append_slice(&[true; 64]);
1473        let no_nulls = no_nulls.finish();
1474
1475        assert_eq!(Some(false), min_boolean(&no_nulls));
1476        assert_eq!(Some(true), max_boolean(&no_nulls));
1477
1478        let mut with_nulls = BooleanBuilder::new();
1479        with_nulls.append_slice(&[false; 31]);
1480        with_nulls.append_null();
1481        with_nulls.append_slice(&[false; 32]);
1482        with_nulls.append_slice(&[true; 1]);
1483        with_nulls.append_nulls(63);
1484        let with_nulls = with_nulls.finish();
1485
1486        assert_eq!(Some(false), min_boolean(&with_nulls));
1487        assert_eq!(Some(true), max_boolean(&with_nulls));
1488    }
1489
1490    #[test]
1491    fn test_boolean_min_max_96_true() {
1492        let mut no_nulls = BooleanBuilder::new();
1493        no_nulls.append_slice(&[true; 96]);
1494        let no_nulls = no_nulls.finish();
1495
1496        assert_eq!(Some(true), min_boolean(&no_nulls));
1497        assert_eq!(Some(true), max_boolean(&no_nulls));
1498
1499        let mut with_nulls = BooleanBuilder::new();
1500        with_nulls.append_slice(&[true; 31]);
1501        with_nulls.append_null();
1502        with_nulls.append_slice(&[true; 32]);
1503        with_nulls.append_slice(&[true; 31]);
1504        with_nulls.append_null();
1505        let with_nulls = with_nulls.finish();
1506
1507        assert_eq!(Some(true), min_boolean(&with_nulls));
1508        assert_eq!(Some(true), max_boolean(&with_nulls));
1509    }
1510
1511    #[test]
1512    fn test_boolean_min_max_96_false() {
1513        let mut no_nulls = BooleanBuilder::new();
1514        no_nulls.append_slice(&[false; 96]);
1515        let no_nulls = no_nulls.finish();
1516
1517        assert_eq!(Some(false), min_boolean(&no_nulls));
1518        assert_eq!(Some(false), max_boolean(&no_nulls));
1519
1520        let mut with_nulls = BooleanBuilder::new();
1521        with_nulls.append_slice(&[false; 31]);
1522        with_nulls.append_null();
1523        with_nulls.append_slice(&[false; 32]);
1524        with_nulls.append_slice(&[false; 31]);
1525        with_nulls.append_null();
1526        let with_nulls = with_nulls.finish();
1527
1528        assert_eq!(Some(false), min_boolean(&with_nulls));
1529        assert_eq!(Some(false), max_boolean(&with_nulls));
1530    }
1531
1532    #[test]
1533    fn test_sum_dyn() {
1534        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1535        let values = Arc::new(values) as ArrayRef;
1536        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1537
1538        let dict_array = DictionaryArray::new(keys, values.clone());
1539        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1540        assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap());
1541
1542        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1543        assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap());
1544
1545        let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]);
1546        let dict_array = DictionaryArray::new(keys, values.clone());
1547        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1548        assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap());
1549
1550        let keys = Int8Array::from(vec![None, None, None]);
1551        let dict_array = DictionaryArray::new(keys, values.clone());
1552        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1553        assert!(sum_array::<Int8Type, _>(array).is_none());
1554    }
1555
1556    #[test]
1557    fn test_max_min_dyn() {
1558        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1559        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1560        let values = Arc::new(values) as ArrayRef;
1561
1562        let dict_array = DictionaryArray::new(keys, values.clone());
1563        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1564        assert_eq!(14, max_array::<Int8Type, _>(array).unwrap());
1565
1566        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1567        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1568
1569        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1570        assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap());
1571        assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap());
1572
1573        let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]);
1574        let dict_array = DictionaryArray::new(keys, values.clone());
1575        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1576        assert_eq!(17, max_array::<Int8Type, _>(array).unwrap());
1577        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1578        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1579
1580        let keys = Int8Array::from(vec![None, None, None]);
1581        let dict_array = DictionaryArray::new(keys, values.clone());
1582        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1583        assert!(max_array::<Int8Type, _>(array).is_none());
1584        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1585        assert!(min_array::<Int8Type, _>(array).is_none());
1586    }
1587
1588    #[test]
1589    fn test_max_min_dyn_nan() {
1590        let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]);
1591        let keys = Int8Array::from_iter_values([0_i8, 1, 2]);
1592
1593        let dict_array = DictionaryArray::new(keys, Arc::new(values));
1594        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1595        assert!(max_array::<Float32Type, _>(array).unwrap().is_nan());
1596
1597        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1598        assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap());
1599    }
1600
1601    #[test]
1602    fn test_min_max_sliced_primitive() {
1603        let expected = Some(4.0);
1604        let input: Float64Array = vec![None, Some(4.0)].into_iter().collect();
1605        let actual = min(&input);
1606        assert_eq!(actual, expected);
1607        let actual = max(&input);
1608        assert_eq!(actual, expected);
1609
1610        let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)]
1611            .into_iter()
1612            .collect();
1613        let sliced_input = sliced_input.slice(4, 2);
1614
1615        assert_eq!(&sliced_input, &input);
1616
1617        let actual = min(&sliced_input);
1618        assert_eq!(actual, expected);
1619        let actual = max(&sliced_input);
1620        assert_eq!(actual, expected);
1621    }
1622
1623    #[test]
1624    fn test_min_max_sliced_boolean() {
1625        let expected = Some(true);
1626        let input: BooleanArray = vec![None, Some(true)].into_iter().collect();
1627        let actual = min_boolean(&input);
1628        assert_eq!(actual, expected);
1629        let actual = max_boolean(&input);
1630        assert_eq!(actual, expected);
1631
1632        let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)]
1633            .into_iter()
1634            .collect();
1635        let sliced_input = sliced_input.slice(4, 2);
1636
1637        assert_eq!(sliced_input, input);
1638
1639        let actual = min_boolean(&sliced_input);
1640        assert_eq!(actual, expected);
1641        let actual = max_boolean(&sliced_input);
1642        assert_eq!(actual, expected);
1643    }
1644
1645    #[test]
1646    fn test_min_max_sliced_string() {
1647        let expected = Some("foo");
1648        let input: StringArray = vec![None, Some("foo")].into_iter().collect();
1649        let actual = min_string(&input);
1650        assert_eq!(actual, expected);
1651        let actual = max_string(&input);
1652        assert_eq!(actual, expected);
1653
1654        let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")]
1655            .into_iter()
1656            .collect();
1657        let sliced_input = sliced_input.slice(4, 2);
1658
1659        assert_eq!(&sliced_input, &input);
1660
1661        let actual = min_string(&sliced_input);
1662        assert_eq!(actual, expected);
1663        let actual = max_string(&sliced_input);
1664        assert_eq!(actual, expected);
1665    }
1666
1667    #[test]
1668    fn test_min_max_sliced_binary() {
1669        let expected: Option<&[u8]> = Some(&[5]);
1670        let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect();
1671        let actual = min_binary(&input);
1672        assert_eq!(actual, expected);
1673        let actual = max_binary(&input);
1674        assert_eq!(actual, expected);
1675
1676        let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])]
1677            .into_iter()
1678            .collect();
1679        let sliced_input = sliced_input.slice(4, 2);
1680
1681        assert_eq!(&sliced_input, &input);
1682
1683        let actual = min_binary(&sliced_input);
1684        assert_eq!(actual, expected);
1685        let actual = max_binary(&sliced_input);
1686        assert_eq!(actual, expected);
1687    }
1688
1689    #[test]
1690    fn test_sum_overflow() {
1691        let a = Int32Array::from(vec![i32::MAX, 1]);
1692
1693        assert_eq!(sum(&a).unwrap(), -2147483648);
1694        assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648);
1695    }
1696
1697    #[test]
1698    fn test_sum_checked_overflow() {
1699        let a = Int32Array::from(vec![i32::MAX, 1]);
1700
1701        sum_checked(&a).expect_err("overflow should be detected");
1702        sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
1703    }
1704}
arrow_arith/aggregate.rs

arrow_arith/
aggregate.rs