datafusion_physical_expr/expressions/
is_not_null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! IS NOT NULL expression
19
20use std::hash::Hash;
21use std::{any::Any, sync::Arc};
22
23use crate::PhysicalExpr;
24use arrow::{
25    datatypes::{DataType, Schema},
26    record_batch::RecordBatch,
27};
28use datafusion_common::Result;
29use datafusion_common::ScalarValue;
30use datafusion_expr::ColumnarValue;
31
32/// IS NOT NULL expression
33#[derive(Debug, Eq)]
34pub struct IsNotNullExpr {
35    /// The input expression
36    arg: Arc<dyn PhysicalExpr>,
37}
38
39// Manually derive PartialEq and Hash to work around https://github.com/rust-lang/rust/issues/78808
40impl PartialEq for IsNotNullExpr {
41    fn eq(&self, other: &Self) -> bool {
42        self.arg.eq(&other.arg)
43    }
44}
45
46impl Hash for IsNotNullExpr {
47    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
48        self.arg.hash(state);
49    }
50}
51
52impl IsNotNullExpr {
53    /// Create new not expression
54    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
55        Self { arg }
56    }
57
58    /// Get the input expression
59    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
60        &self.arg
61    }
62}
63
64impl std::fmt::Display for IsNotNullExpr {
65    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
66        write!(f, "{} IS NOT NULL", self.arg)
67    }
68}
69
70impl PhysicalExpr for IsNotNullExpr {
71    /// Return a reference to Any that can be used for downcasting
72    fn as_any(&self) -> &dyn Any {
73        self
74    }
75
76    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
77        Ok(DataType::Boolean)
78    }
79
80    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
81        Ok(false)
82    }
83
84    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
85        let arg = self.arg.evaluate(batch)?;
86        match arg {
87            ColumnarValue::Array(array) => {
88                let is_not_null = arrow::compute::is_not_null(&array)?;
89                Ok(ColumnarValue::Array(Arc::new(is_not_null)))
90            }
91            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
92                ScalarValue::Boolean(Some(!scalar.is_null())),
93            )),
94        }
95    }
96
97    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
98        vec![&self.arg]
99    }
100
101    fn with_new_children(
102        self: Arc<Self>,
103        children: Vec<Arc<dyn PhysicalExpr>>,
104    ) -> Result<Arc<dyn PhysicalExpr>> {
105        Ok(Arc::new(IsNotNullExpr::new(Arc::clone(&children[0]))))
106    }
107
108    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        self.arg.fmt_sql(f)?;
110        write!(f, " IS NOT NULL")
111    }
112}
113
114/// Create an IS NOT NULL expression
115pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
116    Ok(Arc::new(IsNotNullExpr::new(arg)))
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use crate::expressions::col;
123    use arrow::array::{
124        Array, BooleanArray, Float64Array, Int32Array, StringArray, UnionArray,
125    };
126    use arrow::buffer::ScalarBuffer;
127    use arrow::datatypes::*;
128    use datafusion_common::cast::as_boolean_array;
129    use datafusion_physical_expr_common::physical_expr::fmt_sql;
130
131    #[test]
132    fn is_not_null_op() -> Result<()> {
133        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
134        let a = StringArray::from(vec![Some("foo"), None]);
135        let expr = is_not_null(col("a", &schema)?).unwrap();
136        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
137
138        // expression: "a is not null"
139        let result = expr
140            .evaluate(&batch)?
141            .into_array(batch.num_rows())
142            .expect("Failed to convert to array");
143        let result =
144            as_boolean_array(&result).expect("failed to downcast to BooleanArray");
145
146        let expected = &BooleanArray::from(vec![true, false]);
147
148        assert_eq!(expected, result);
149
150        Ok(())
151    }
152
153    #[test]
154    fn union_is_not_null_op() {
155        // union of [{A=1}, {A=}, {B=1.1}, {B=1.2}, {B=}]
156        let int_array = Int32Array::from(vec![Some(1), None, None, None, None]);
157        let float_array =
158            Float64Array::from(vec![None, None, Some(1.1), Some(1.2), None]);
159        let type_ids = [0, 0, 1, 1, 1].into_iter().collect::<ScalarBuffer<i8>>();
160
161        let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
162
163        let union_fields: UnionFields = [
164            (0, Arc::new(Field::new("A", DataType::Int32, true))),
165            (1, Arc::new(Field::new("B", DataType::Float64, true))),
166        ]
167        .into_iter()
168        .collect();
169
170        let array =
171            UnionArray::try_new(union_fields.clone(), type_ids, None, children).unwrap();
172
173        let field = Field::new(
174            "my_union",
175            DataType::Union(union_fields, UnionMode::Sparse),
176            true,
177        );
178
179        let schema = Schema::new(vec![field]);
180        let expr = is_not_null(col("my_union", &schema).unwrap()).unwrap();
181        let batch =
182            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
183
184        // expression: "a is not null"
185        let actual = expr
186            .evaluate(&batch)
187            .unwrap()
188            .into_array(batch.num_rows())
189            .expect("Failed to convert to array");
190        let actual = as_boolean_array(&actual).unwrap();
191
192        let expected = &BooleanArray::from(vec![true, false, true, true, false]);
193
194        assert_eq!(expected, actual);
195    }
196
197    #[test]
198    fn test_fmt_sql() -> Result<()> {
199        let union_fields: UnionFields = [
200            (0, Arc::new(Field::new("A", DataType::Int32, true))),
201            (1, Arc::new(Field::new("B", DataType::Float64, true))),
202        ]
203        .into_iter()
204        .collect();
205
206        let field = Field::new(
207            "my_union",
208            DataType::Union(union_fields, UnionMode::Sparse),
209            true,
210        );
211
212        let schema = Schema::new(vec![field]);
213        let expr = is_not_null(col("my_union", &schema).unwrap()).unwrap();
214        let display_string = expr.to_string();
215        assert_eq!(display_string, "my_union@0 IS NOT NULL");
216        let sql_string = fmt_sql(expr.as_ref()).to_string();
217        assert_eq!(sql_string, "my_union IS NOT NULL");
218
219        Ok(())
220    }
221}