datafusion_functions_nested/
cardinality.rsuse crate::utils::make_scalar_function;
use arrow_array::{
Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
};
use arrow_schema::DataType;
use arrow_schema::DataType::{FixedSizeList, LargeList, List, Map, UInt64};
use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
TypeSignature, Volatility,
};
use std::any::Any;
use std::sync::{Arc, OnceLock};
make_udf_expr_and_func!(
Cardinality,
cardinality,
array,
"returns the total number of elements in the array or map.",
cardinality_udf
);
impl Cardinality {
pub fn new() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
],
Volatility::Immutable,
),
aliases: vec![],
}
}
}
#[derive(Debug)]
pub(super) struct Cardinality {
signature: Signature,
aliases: Vec<String>,
}
impl ScalarUDFImpl for Cardinality {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"cardinality"
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) | Map(_, _) => UInt64,
_ => {
return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList/Map.");
}
})
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(cardinality_inner)(args)
}
fn aliases(&self) -> &[String] {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_cardinality_doc())
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_cardinality_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_ARRAY)
.with_description(
"Returns the total number of elements in the array.",
)
.with_syntax_example("cardinality(array)")
.with_sql_example(
r#"```sql
> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
+--------------------------------------+
| cardinality(List([1,2,3,4,5,6,7,8])) |
+--------------------------------------+
| 8 |
+--------------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
.unwrap()
})
}
pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
return exec_err!("cardinality expects one argument");
}
match &args[0].data_type() {
List(_) => {
let list_array = as_list_array(&args[0])?;
generic_list_cardinality::<i32>(list_array)
}
LargeList(_) => {
let list_array = as_large_list_array(&args[0])?;
generic_list_cardinality::<i64>(list_array)
}
Map(_, _) => {
let map_array = as_map_array(&args[0])?;
generic_map_cardinality(map_array)
}
other => {
exec_err!("cardinality does not support type '{:?}'", other)
}
}
}
fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
let result: UInt64Array = array
.iter()
.map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
.collect();
Ok(Arc::new(result))
}
fn generic_list_cardinality<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
) -> Result<ArrayRef> {
let result = array
.iter()
.map(|arr| match crate::utils::compute_array_dims(arr)? {
Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
None => Ok(None),
})
.collect::<Result<UInt64Array>>()?;
Ok(Arc::new(result) as ArrayRef)
}