arrow_select/
nullif.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Implements the `nullif` function for Arrow arrays.
19
20use arrow_array::{make_array, Array, ArrayRef, BooleanArray};
21use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper};
22use arrow_buffer::{BooleanBuffer, NullBuffer};
23use arrow_schema::{ArrowError, DataType};
24
25/// Returns a new array with the same values and the validity bit to false where
26/// the corresponding element of`right` is true.
27///
28/// This can be used to implement SQL `NULLIF`
29///
30/// # Example
31/// ```
32/// # use arrow_array::{Int32Array, BooleanArray};
33/// # use arrow_array::cast::AsArray;
34/// # use arrow_array::types::Int32Type;
35/// # use arrow_select::nullif::nullif;
36/// // input is [null, 8, 1, 9]
37/// let a = Int32Array::from(vec![None, Some(8), Some(1), Some(9)]);
38/// // use nullif to set index 1 to null
39/// let bool_array = BooleanArray::from(vec![Some(false), Some(true), Some(false), None]);
40/// let nulled = nullif(&a, &bool_array).unwrap();
41/// // The resulting array is [null, null, 1, 9]
42/// assert_eq!(nulled.as_primitive(), &Int32Array::from(vec![None, None, Some(1), Some(9)]));
43/// ```
44pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowError> {
45    let left_data = left.to_data();
46
47    if left_data.len() != right.len() {
48        return Err(ArrowError::ComputeError(
49            "Cannot perform comparison operation on arrays of different length".to_string(),
50        ));
51    }
52    let len = left_data.len();
53
54    if len == 0 || left_data.data_type() == &DataType::Null {
55        return Ok(make_array(left_data));
56    }
57
58    // left=0 (null)   right=null       output bitmap=null
59    // left=0          right=1          output bitmap=null
60    // left=1 (set)    right=null       output bitmap=set   (passthrough)
61    // left=1          right=1 & comp=true    output bitmap=null
62    // left=1          right=1 & comp=false   output bitmap=set
63    //
64    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
65    //              OR left null bitmap & !(right_values & right_bitmap)
66
67    // Compute right_values & right_bitmap
68    let right = match right.nulls() {
69        Some(nulls) => right.values() & nulls.inner(),
70        None => right.values().clone(),
71    };
72
73    // Compute left null bitmap & !right
74
75    let (combined, null_count) = match left_data.nulls() {
76        Some(left) => {
77            let mut valid_count = 0;
78            let b = bitwise_bin_op_helper(
79                left.buffer(),
80                left.offset(),
81                right.inner(),
82                right.offset(),
83                len,
84                |l, r| {
85                    let t = l & !r;
86                    valid_count += t.count_ones() as usize;
87                    t
88                },
89            );
90            (b, len - valid_count)
91        }
92        None => {
93            let mut null_count = 0;
94            let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
95                let t = !b;
96                null_count += t.count_zeros() as usize;
97                t
98            });
99            (buffer, null_count)
100        }
101    };
102
103    let combined = BooleanBuffer::new(combined, 0, len);
104    // Safety:
105    // Counted nulls whilst computing
106    let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };
107    let data = left_data.into_builder().nulls(Some(nulls));
108
109    // SAFETY:
110    // Only altered null mask
111    Ok(make_array(unsafe { data.build_unchecked() }))
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use arrow_array::builder::{BooleanBuilder, Int32Builder, StructBuilder};
118    use arrow_array::cast::AsArray;
119    use arrow_array::types::Int32Type;
120    use arrow_array::{Int32Array, NullArray, StringArray, StructArray};
121    use arrow_data::ArrayData;
122    use arrow_schema::{Field, Fields};
123    use rand::{thread_rng, Rng};
124
125    #[test]
126    fn test_nullif_int_array() {
127        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
128        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
129        let res = nullif(&a, &comp).unwrap();
130
131        let expected = Int32Array::from(vec![
132            Some(15),
133            None,
134            None, // comp true, slot 2 turned into null
135            Some(1),
136            // Even though comp array / right is null, should still pass through original value
137            // comp true, slot 2 turned into null
138            Some(9),
139        ]);
140
141        let res = res.as_primitive::<Int32Type>();
142        assert_eq!(&expected, res);
143    }
144
145    #[test]
146    fn test_nullif_null_array() {
147        assert_eq!(
148            nullif(&NullArray::new(0), &BooleanArray::new_null(0))
149                .unwrap()
150                .as_ref(),
151            &NullArray::new(0)
152        );
153
154        assert_eq!(
155            nullif(
156                &NullArray::new(3),
157                &BooleanArray::from(vec![Some(false), Some(true), None]),
158            )
159            .unwrap()
160            .as_ref(),
161            &NullArray::new(3)
162        );
163    }
164
165    #[test]
166    fn test_nullif_int_array_offset() {
167        let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
168        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
169        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
170        let comp = BooleanArray::from(vec![
171            Some(false),
172            Some(false),
173            Some(false),
174            None,
175            Some(true),
176            Some(false),
177            None,
178        ]);
179        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
180        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
181        let res = nullif(a, comp).unwrap();
182
183        let expected = Int32Array::from(vec![
184            Some(15), // False => keep it
185            Some(8),  // None => keep it
186            None,     // true => None
187        ]);
188        let res = res.as_primitive::<Int32Type>();
189        assert_eq!(&expected, res)
190    }
191
192    #[test]
193    fn test_nullif_string() {
194        let s = StringArray::from_iter([
195            Some("hello"),
196            None,
197            Some("world"),
198            Some("a"),
199            Some("b"),
200            None,
201            None,
202        ]);
203        let select = BooleanArray::from_iter([
204            Some(true),
205            Some(true),
206            Some(false),
207            Some(true),
208            Some(false),
209            Some(false),
210            None,
211        ]);
212
213        let a = nullif(&s, &select).unwrap();
214        let r: Vec<_> = a.as_string::<i32>().iter().collect();
215        assert_eq!(
216            r,
217            vec![None, None, Some("world"), None, Some("b"), None, None]
218        );
219
220        let s = s.slice(2, 3);
221        let select = select.slice(1, 3);
222        let a = nullif(&s, &select).unwrap();
223        let r: Vec<_> = a.as_string::<i32>().iter().collect();
224        assert_eq!(r, vec![None, Some("a"), None]);
225    }
226
227    #[test]
228    fn test_nullif_int_large_left_offset() {
229        let a = Int32Array::from(vec![
230            Some(-1), // 0
231            Some(-1),
232            Some(-1),
233            Some(-1),
234            Some(-1),
235            Some(-1),
236            Some(-1),
237            Some(-1),
238            Some(-1), // 8
239            Some(-1),
240            Some(-1),
241            Some(-1),
242            Some(-1),
243            Some(-1),
244            Some(-1),
245            Some(-1),
246            None,     // 16
247            Some(15), // 17
248            Some(8),
249            Some(1),
250            Some(9),
251        ]);
252        let a = a.slice(17, 3); // Some(15), Some(8), Some(1)
253
254        let comp = BooleanArray::from(vec![
255            Some(false),
256            Some(false),
257            Some(false),
258            None,
259            Some(true),
260            Some(false),
261            None,
262        ]);
263        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
264        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
265        let res = nullif(&a, comp).unwrap();
266        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
267
268        let expected = Int32Array::from(vec![
269            Some(15), // False => keep it
270            Some(8),  // None => keep it
271            None,     // true => None
272        ]);
273        assert_eq!(&expected, res)
274    }
275
276    #[test]
277    fn test_nullif_int_large_right_offset() {
278        let a = Int32Array::from(vec![
279            None,     // 0
280            Some(15), // 1
281            Some(8),
282            Some(1),
283            Some(9),
284        ]);
285        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
286
287        let comp = BooleanArray::from(vec![
288            Some(false), // 0
289            Some(false),
290            Some(false),
291            Some(false),
292            Some(false),
293            Some(false),
294            Some(false),
295            Some(false),
296            Some(false), // 8
297            Some(false),
298            Some(false),
299            Some(false),
300            Some(false),
301            Some(false),
302            Some(false),
303            Some(false),
304            Some(false), // 16
305            Some(false), // 17
306            Some(false), // 18
307            None,
308            Some(true),
309            Some(false),
310            None,
311        ]);
312        let comp = comp.slice(18, 3); // Some(false), None, Some(true)
313        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
314        let res = nullif(&a, comp).unwrap();
315        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
316
317        let expected = Int32Array::from(vec![
318            Some(15), // False => keep it
319            Some(8),  // None => keep it
320            None,     // true => None
321        ]);
322        assert_eq!(&expected, res)
323    }
324
325    #[test]
326    fn test_nullif_boolean_offset() {
327        let a = BooleanArray::from(vec![
328            None,       // 0
329            Some(true), // 1
330            Some(false),
331            Some(true),
332            Some(true),
333        ]);
334        let a = a.slice(1, 3); // Some(true), Some(false), Some(true)
335
336        let comp = BooleanArray::from(vec![
337            Some(false), // 0
338            Some(false), // 1
339            Some(false), // 2
340            None,
341            Some(true),
342            Some(false),
343            None,
344        ]);
345        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
346        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
347        let res = nullif(&a, comp).unwrap();
348        let res = res.as_any().downcast_ref::<BooleanArray>().unwrap();
349
350        let expected = BooleanArray::from(vec![
351            Some(true),  // False => keep it
352            Some(false), // None => keep it
353            None,        // true => None
354        ]);
355        assert_eq!(&expected, res)
356    }
357
358    struct Foo {
359        a: Option<i32>,
360        b: Option<bool>,
361        /// Whether the entry should be valid.
362        is_valid: bool,
363    }
364
365    impl Foo {
366        fn new_valid(a: i32, b: bool) -> Foo {
367            Self {
368                a: Some(a),
369                b: Some(b),
370                is_valid: true,
371            }
372        }
373
374        fn new_null() -> Foo {
375            Self {
376                a: None,
377                b: None,
378                is_valid: false,
379            }
380        }
381    }
382
383    /// Struct Array equality is a bit weird -- we need to have the *child values*
384    /// correct even if the enclosing struct indicates it is null. But we
385    /// also need the top level is_valid bits to be correct.
386    fn create_foo_struct(values: Vec<Foo>) -> StructArray {
387        let mut struct_array = StructBuilder::new(
388            Fields::from(vec![
389                Field::new("a", DataType::Int32, true),
390                Field::new("b", DataType::Boolean, true),
391            ]),
392            vec![
393                Box::new(Int32Builder::with_capacity(values.len())),
394                Box::new(BooleanBuilder::with_capacity(values.len())),
395            ],
396        );
397
398        for value in values {
399            struct_array
400                .field_builder::<Int32Builder>(0)
401                .unwrap()
402                .append_option(value.a);
403            struct_array
404                .field_builder::<BooleanBuilder>(1)
405                .unwrap()
406                .append_option(value.b);
407            struct_array.append(value.is_valid);
408        }
409
410        struct_array.finish()
411    }
412
413    #[test]
414    fn test_nullif_struct_slices() {
415        let struct_array = create_foo_struct(vec![
416            Foo::new_valid(7, true),
417            Foo::new_valid(15, false),
418            Foo::new_valid(8, true),
419            Foo::new_valid(12, false),
420            Foo::new_null(),
421            Foo::new_null(),
422            Foo::new_valid(42, true),
423        ]);
424
425        // Some({a: 15, b: false}), Some({a: 8, b: true}), Some({a: 12, b: false}),
426        // None, None
427        let struct_array = struct_array.slice(1, 5);
428        let comp = BooleanArray::from(vec![
429            Some(false), // 0
430            Some(false), // 1
431            Some(false), // 2
432            None,
433            Some(true),
434            Some(false),
435            None,
436        ]);
437        let comp = comp.slice(2, 5); // Some(false), None, Some(true), Some(false), None
438        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
439        let res = nullif(&struct_array, comp).unwrap();
440        let res = res.as_any().downcast_ref::<StructArray>().unwrap();
441
442        let expected = create_foo_struct(vec![
443            // Some(false) -> keep
444            Foo::new_valid(15, false),
445            // None -> keep
446            Foo::new_valid(8, true),
447            // Some(true) -> null out. But child values are still there.
448            Foo {
449                a: Some(12),
450                b: Some(false),
451                is_valid: false,
452            },
453            // Some(false) -> keep, but was null
454            Foo::new_null(),
455            // None -> keep, but was null
456            Foo::new_null(),
457        ]);
458
459        assert_eq!(&expected, res);
460    }
461
462    #[test]
463    fn test_nullif_no_nulls() {
464        let a = Int32Array::from(vec![Some(15), Some(7), Some(8), Some(1), Some(9)]);
465        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
466        let res = nullif(&a, &comp).unwrap();
467        let res = res.as_primitive::<Int32Type>();
468
469        let expected = Int32Array::from(vec![Some(15), Some(7), None, Some(1), Some(9)]);
470        assert_eq!(res, &expected);
471    }
472
473    #[test]
474    fn nullif_empty() {
475        let a = Int32Array::from(ArrayData::new_empty(&DataType::Int32));
476        let mask = BooleanArray::from(ArrayData::new_empty(&DataType::Boolean));
477        let res = nullif(&a, &mask).unwrap();
478        assert_eq!(res.as_ref(), &a);
479    }
480
481    fn test_nullif(values: &Int32Array, filter: &BooleanArray) {
482        let expected: Int32Array = values
483            .iter()
484            .zip(filter.iter())
485            .map(|(a, b)| match b {
486                Some(true) => None,
487                Some(false) | None => a,
488            })
489            .collect();
490
491        let r = nullif(values, filter).unwrap();
492        let r_data = r.to_data();
493        r_data.validate().unwrap();
494
495        assert_eq!(r.as_ref(), &expected);
496    }
497
498    #[test]
499    fn nullif_fuzz() {
500        let mut rng = thread_rng();
501
502        let arrays = [
503            Int32Array::from(vec![0; 128]),
504            (0..128).map(|_| rng.gen_bool(0.5).then_some(0)).collect(),
505        ];
506
507        for a in arrays {
508            let a_slices = [(0, 128), (64, 64), (0, 64), (32, 32), (0, 0), (32, 0)];
509
510            for (a_offset, a_length) in a_slices {
511                let a = a.slice(a_offset, a_length);
512
513                for i in 1..65 {
514                    let b_start_offset = rng.gen_range(0..i);
515                    let b_end_offset = rng.gen_range(0..i);
516
517                    let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset)
518                        .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5)))
519                        .collect();
520                    let b = b.slice(b_start_offset, a_length);
521
522                    test_nullif(&a, &b);
523                }
524            }
525        }
526    }
527}