datafusion_functions/core/
nullif.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::DataType;
19use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
20
21use arrow::compute::kernels::cmp::eq;
22use arrow::compute::kernels::nullif::nullif;
23use datafusion_common::{utils::take_function_args, Result, ScalarValue};
24use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
25use datafusion_macros::user_doc;
26use std::any::Any;
27
28#[user_doc(
29    doc_section(label = "Conditional Functions"),
30    description = "Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.
31This can be used to perform the inverse operation of [`coalesce`](#coalesce).",
32    syntax_example = "nullif(expression1, expression2)",
33    sql_example = r#"```sql
34> select nullif('datafusion', 'data');
35+-----------------------------------------+
36| nullif(Utf8("datafusion"),Utf8("data")) |
37+-----------------------------------------+
38| datafusion                              |
39+-----------------------------------------+
40> select nullif('datafusion', 'datafusion');
41+-----------------------------------------------+
42| nullif(Utf8("datafusion"),Utf8("datafusion")) |
43+-----------------------------------------------+
44|                                               |
45+-----------------------------------------------+
46```"#,
47    argument(
48        name = "expression1",
49        description = "Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators."
50    ),
51    argument(
52        name = "expression2",
53        description = "Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators."
54    )
55)]
56#[derive(Debug)]
57pub struct NullIfFunc {
58    signature: Signature,
59}
60
61impl Default for NullIfFunc {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl NullIfFunc {
68    pub fn new() -> Self {
69        Self {
70            // Documentation mentioned in Postgres,
71            // The result has the same type as the first argument — but there is a subtlety.
72            // What is actually returned is the first argument of the implied = operator,
73            // and in some cases that will have been promoted to match the second argument's type.
74            // For example, NULLIF(1, 2.2) yields numeric, because there is no integer = numeric operator, only numeric = numeric
75            //
76            // We don't strictly follow Postgres or DuckDB for **simplicity**.
77            // In this function, we will coerce arguments to the same data type for comparison need. Unlike DuckDB
78            // we don't return the **original** first argument type but return the final coerced type.
79            //
80            // In Postgres, nullif('2', 2) returns Null but nullif('2::varchar', 2) returns error.
81            // While in DuckDB both query returns Null. We follow DuckDB in this case since I think they are equivalent thing and should
82            // have the same result as well.
83            signature: Signature::comparable(2, Volatility::Immutable),
84        }
85    }
86}
87
88impl ScalarUDFImpl for NullIfFunc {
89    fn as_any(&self) -> &dyn Any {
90        self
91    }
92    fn name(&self) -> &str {
93        "nullif"
94    }
95
96    fn signature(&self) -> &Signature {
97        &self.signature
98    }
99
100    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
101        Ok(arg_types[0].to_owned())
102    }
103
104    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
105        nullif_func(&args.args)
106    }
107
108    fn documentation(&self) -> Option<&Documentation> {
109        self.doc()
110    }
111}
112
113/// Implements NULLIF(expr1, expr2)
114/// Args: 0 - left expr is any array
115///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
116///
117fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
118    let [lhs, rhs] = take_function_args("nullif", args)?;
119
120    match (lhs, rhs) {
121        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
122            let rhs = rhs.to_scalar()?;
123            let array = nullif(lhs, &eq(&lhs, &rhs)?)?;
124
125            Ok(ColumnarValue::Array(array))
126        }
127        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
128            let array = nullif(lhs, &eq(&lhs, &rhs)?)?;
129            Ok(ColumnarValue::Array(array))
130        }
131        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
132            let lhs_s = lhs.to_scalar()?;
133            let lhs_a = lhs.to_array_of_size(rhs.len())?;
134            let array = nullif(
135                // nullif in arrow-select does not support Datum, so we need to convert to array
136                lhs_a.as_ref(),
137                &eq(&lhs_s, &rhs)?,
138            )?;
139            Ok(ColumnarValue::Array(array))
140        }
141        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
142            let val: ScalarValue = match lhs.eq(rhs) {
143                true => lhs.data_type().try_into()?,
144                false => lhs.clone(),
145            };
146
147            Ok(ColumnarValue::Scalar(val))
148        }
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use std::sync::Arc;
155
156    use arrow::array::*;
157
158    use super::*;
159
160    #[test]
161    fn nullif_int32() -> Result<()> {
162        let a = Int32Array::from(vec![
163            Some(1),
164            Some(2),
165            None,
166            None,
167            Some(3),
168            None,
169            None,
170            Some(4),
171            Some(5),
172        ]);
173        let a = ColumnarValue::Array(Arc::new(a));
174
175        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
176
177        let result = nullif_func(&[a, lit_array])?;
178        let result = result.into_array(0).expect("Failed to convert to array");
179
180        let expected = Arc::new(Int32Array::from(vec![
181            Some(1),
182            None,
183            None,
184            None,
185            Some(3),
186            None,
187            None,
188            Some(4),
189            Some(5),
190        ])) as ArrayRef;
191        assert_eq!(expected.as_ref(), result.as_ref());
192        Ok(())
193    }
194
195    #[test]
196    // Ensure that arrays with no nulls can also invoke NULLIF() correctly
197    fn nullif_int32_non_nulls() -> Result<()> {
198        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
199        let a = ColumnarValue::Array(Arc::new(a));
200
201        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
202
203        let result = nullif_func(&[a, lit_array])?;
204        let result = result.into_array(0).expect("Failed to convert to array");
205
206        let expected = Arc::new(Int32Array::from(vec![
207            None,
208            Some(3),
209            Some(10),
210            Some(7),
211            Some(8),
212            None,
213            Some(2),
214            Some(4),
215            Some(5),
216        ])) as ArrayRef;
217        assert_eq!(expected.as_ref(), result.as_ref());
218        Ok(())
219    }
220
221    #[test]
222    fn nullif_boolean() -> Result<()> {
223        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
224        let a = ColumnarValue::Array(Arc::new(a));
225
226        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
227
228        let result = nullif_func(&[a, lit_array])?;
229        let result = result.into_array(0).expect("Failed to convert to array");
230
231        let expected =
232            Arc::new(BooleanArray::from(vec![Some(true), None, None])) as ArrayRef;
233
234        assert_eq!(expected.as_ref(), result.as_ref());
235        Ok(())
236    }
237
238    #[test]
239    fn nullif_string() -> Result<()> {
240        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
241        let a = ColumnarValue::Array(Arc::new(a));
242
243        let lit_array = ColumnarValue::Scalar(ScalarValue::from("bar"));
244
245        let result = nullif_func(&[a, lit_array])?;
246        let result = result.into_array(0).expect("Failed to convert to array");
247
248        let expected = Arc::new(StringArray::from(vec![
249            Some("foo"),
250            None,
251            None,
252            Some("baz"),
253        ])) as ArrayRef;
254
255        assert_eq!(expected.as_ref(), result.as_ref());
256        Ok(())
257    }
258
259    #[test]
260    fn nullif_literal_first() -> Result<()> {
261        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
262        let a = ColumnarValue::Array(Arc::new(a));
263
264        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
265
266        let result = nullif_func(&[lit_array, a])?;
267        let result = result.into_array(0).expect("Failed to convert to array");
268
269        let expected = Arc::new(Int32Array::from(vec![
270            Some(2),
271            None,
272            Some(2),
273            Some(2),
274            Some(2),
275            Some(2),
276        ])) as ArrayRef;
277        assert_eq!(expected.as_ref(), result.as_ref());
278        Ok(())
279    }
280
281    #[test]
282    fn nullif_scalar() -> Result<()> {
283        let a_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
284        let b_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
285
286        let result_eq = nullif_func(&[a_eq, b_eq])?;
287        let result_eq = result_eq.into_array(1).expect("Failed to convert to array");
288
289        let expected_eq = Arc::new(Int32Array::from(vec![None])) as ArrayRef;
290
291        assert_eq!(expected_eq.as_ref(), result_eq.as_ref());
292
293        let a_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
294        let b_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
295
296        let result_neq = nullif_func(&[a_neq, b_neq])?;
297        let result_neq = result_neq
298            .into_array(1)
299            .expect("Failed to convert to array");
300
301        let expected_neq = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
302        assert_eq!(expected_neq.as_ref(), result_neq.as_ref());
303
304        Ok(())
305    }
306}