datafusion_functions/core/
coalesce.rsuse arrow::array::{new_null_array, BooleanArray};
use arrow::compute::kernels::zip::zip;
use arrow::compute::{and, is_not_null, is_null};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, ExprSchema, Result};
use datafusion_expr::binary::try_type_union_resolution;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use itertools::Itertools;
use std::any::Any;
use std::sync::OnceLock;
#[derive(Debug)]
pub struct CoalesceFunc {
signature: Signature,
}
impl Default for CoalesceFunc {
fn default() -> Self {
CoalesceFunc::new()
}
}
impl CoalesceFunc {
pub fn new() -> Self {
Self {
signature: Signature::user_defined(Volatility::Immutable),
}
}
}
impl ScalarUDFImpl for CoalesceFunc {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"coalesce"
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types
.iter()
.find_or_first(|d| !d.is_null())
.unwrap()
.clone())
}
fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
args.iter().all(|e| e.nullable(schema).ok().unwrap_or(true))
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.is_empty() {
return exec_err!(
"coalesce was called with {} arguments. It requires at least 1.",
args.len()
);
}
let return_type = args[0].data_type();
let mut return_array = args.iter().filter_map(|x| match x {
ColumnarValue::Array(array) => Some(array.len()),
_ => None,
});
if let Some(size) = return_array.next() {
let mut current_value = new_null_array(&return_type, size);
let mut remainder = BooleanArray::from(vec![true; size]);
for arg in args {
match arg {
ColumnarValue::Array(ref array) => {
let to_apply = and(&remainder, &is_not_null(array.as_ref())?)?;
current_value = zip(&to_apply, array, ¤t_value)?;
remainder = and(&remainder, &is_null(array)?)?;
}
ColumnarValue::Scalar(value) => {
if value.is_null() {
continue;
} else {
let last_value = value.to_scalar()?;
current_value = zip(&remainder, &last_value, ¤t_value)?;
break;
}
}
}
if remainder.iter().all(|x| x == Some(false)) {
break;
}
}
Ok(ColumnarValue::Array(current_value))
} else {
let result = args
.iter()
.filter_map(|x| match x {
ColumnarValue::Scalar(s) if !s.is_null() => Some(x.clone()),
_ => None,
})
.next()
.unwrap_or_else(|| args[0].clone());
Ok(result)
}
}
fn short_circuits(&self) -> bool {
true
}
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
if arg_types.is_empty() {
return exec_err!("coalesce must have at least one argument");
}
try_type_union_resolution(arg_types)
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_coalesce_doc())
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_coalesce_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_CONDITIONAL)
.with_description("Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.")
.with_syntax_example("coalesce(expression1[, ..., expression_n])")
.with_sql_example(r#"```sql
> select coalesce(null, null, 'datafusion');
+----------------------------------------+
| coalesce(NULL,NULL,Utf8("datafusion")) |
+----------------------------------------+
| datafusion |
+----------------------------------------+
```"#,
)
.with_argument(
"expression1, expression_n",
"Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary."
)
.build()
.unwrap()
})
}
#[cfg(test)]
mod test {
use arrow::datatypes::DataType;
use datafusion_expr::ScalarUDFImpl;
use crate::core;
#[test]
fn test_coalesce_return_types() {
let coalesce = core::coalesce::CoalesceFunc::new();
let return_type = coalesce
.return_type(&[DataType::Date32, DataType::Date32])
.unwrap();
assert_eq!(return_type, DataType::Date32);
}
#[test]
fn test_coalesce_return_types_with_nulls_first() {
let coalesce = core::coalesce::CoalesceFunc::new();
let return_type = coalesce
.return_type(&[DataType::Null, DataType::Date32])
.unwrap();
assert_eq!(return_type, DataType::Date32);
}
#[test]
fn test_coalesce_return_types_with_nulls_last() {
let coalesce = core::coalesce::CoalesceFunc::new();
let return_type = coalesce
.return_type(&[DataType::Int64, DataType::Null])
.unwrap();
assert_eq!(return_type, DataType::Int64);
}
}