datafusion_functions/
macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25///     use super::*;
26///     /// Return encode(arg)
27///     pub fn encode(args: Vec<Expr>) -> Expr {
28///         super::encode().call(args)
29///     }
30///  ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33///     vec![
34///       encode(),
35///       decode()
36///    ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43macro_rules! export_functions {
44    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
45        $(
46            // switch to single-function cases below
47            export_functions!(single $FUNC, $DOC, $($arg)*);
48        )*
49    };
50
51    // single vector argument (a single argument followed by a comma)
52    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
53        #[doc = $DOC]
54        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
55            super::$FUNC().call($arg)
56        }
57    };
58
59    // variadic arguments (zero or more arguments, without commas)
60    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
61        #[doc = $DOC]
62        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
63            super::$FUNC().call(vec![$($arg),*])
64        }
65    };
66}
67
68/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
69/// named `$NAME` which returns that singleton.
70///
71/// This is used to ensure creating the list of `ScalarUDF` only happens once.
72macro_rules! make_udf_function {
73    ($UDF:ty, $NAME:ident) => {
74        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
75        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
76            // Singleton instance of the function
77            static INSTANCE: std::sync::LazyLock<
78                std::sync::Arc<datafusion_expr::ScalarUDF>,
79            > = std::sync::LazyLock::new(|| {
80                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
81                    <$UDF>::new(),
82                ))
83            });
84            std::sync::Arc::clone(&INSTANCE)
85        }
86    };
87}
88
89/// Macro creates a sub module if the feature is not enabled
90///
91/// The rationale for providing stub functions is to help users to configure datafusion
92/// properly (so they get an error telling them why a function is not available)
93/// instead of getting a cryptic "no function found" message at runtime.
94macro_rules! make_stub_package {
95    ($name:ident, $feature:literal) => {
96        #[cfg(not(feature = $feature))]
97        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
98        pub mod $name {
99            use datafusion_expr::ScalarUDF;
100            use log::debug;
101            use std::sync::Arc;
102
103            /// Returns an empty list of functions when the feature is not enabled
104            pub fn functions() -> Vec<Arc<ScalarUDF>> {
105                debug!("{} functions disabled", stringify!($name));
106                vec![]
107            }
108        }
109    };
110}
111
112/// Downcast a named argument to a specific array type, returning an internal error
113/// if the cast fails
114///
115/// $ARG: ArrayRef
116/// $NAME: name of the argument (for error messages)
117/// $ARRAY_TYPE: the type of array to cast the argument to
118#[macro_export]
119macro_rules! downcast_named_arg {
120    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
121        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
122            internal_datafusion_err!(
123                "could not cast {} to {}",
124                $NAME,
125                std::any::type_name::<$ARRAY_TYPE>()
126            )
127        })?
128    }};
129}
130
131/// Downcast an argument to a specific array type, returning an internal error
132/// if the cast fails
133///
134/// $ARG: ArrayRef
135/// $ARRAY_TYPE: the type of array to cast the argument to
136#[macro_export]
137macro_rules! downcast_arg {
138    ($ARG:expr, $ARRAY_TYPE:ident) => {{
139        downcast_named_arg!($ARG, "", $ARRAY_TYPE)
140    }};
141}
142
143/// Macro to create a unary math UDF.
144///
145/// A unary math function takes an argument of type Float32 or Float64,
146/// applies a unary floating function to the argument, and returns a value of the same type.
147///
148/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
149/// $NAME: the name of the function
150/// $UNARY_FUNC: the unary function to apply to the argument
151/// $OUTPUT_ORDERING: the output ordering calculation method of the function
152/// $GET_DOC: the function to get the documentation of the UDF
153macro_rules! make_math_unary_udf {
154    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
155        make_udf_function!($NAME::$UDF, $NAME);
156
157        mod $NAME {
158            use std::any::Any;
159            use std::sync::Arc;
160
161            use arrow::array::{ArrayRef, AsArray};
162            use arrow::datatypes::{DataType, Float32Type, Float64Type};
163            use datafusion_common::{exec_err, Result};
164            use datafusion_expr::interval_arithmetic::Interval;
165            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
166            use datafusion_expr::{
167                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
168                Signature, Volatility,
169            };
170
171            #[derive(Debug)]
172            pub struct $UDF {
173                signature: Signature,
174            }
175
176            impl $UDF {
177                pub fn new() -> Self {
178                    use DataType::*;
179                    Self {
180                        signature: Signature::uniform(
181                            1,
182                            vec![Float64, Float32],
183                            Volatility::Immutable,
184                        ),
185                    }
186                }
187            }
188
189            impl ScalarUDFImpl for $UDF {
190                fn as_any(&self) -> &dyn Any {
191                    self
192                }
193                fn name(&self) -> &str {
194                    stringify!($NAME)
195                }
196
197                fn signature(&self) -> &Signature {
198                    &self.signature
199                }
200
201                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
202                    let arg_type = &arg_types[0];
203
204                    match arg_type {
205                        DataType::Float32 => Ok(DataType::Float32),
206                        // For other types (possible values float64/null/int), use Float64
207                        _ => Ok(DataType::Float64),
208                    }
209                }
210
211                fn output_ordering(
212                    &self,
213                    input: &[ExprProperties],
214                ) -> Result<SortProperties> {
215                    $OUTPUT_ORDERING(input)
216                }
217
218                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
219                    $EVALUATE_BOUNDS(inputs)
220                }
221
222                fn invoke_with_args(
223                    &self,
224                    args: ScalarFunctionArgs,
225                ) -> Result<ColumnarValue> {
226                    let args = ColumnarValue::values_to_arrays(&args.args)?;
227                    let arr: ArrayRef = match args[0].data_type() {
228                        DataType::Float64 => Arc::new(
229                            args[0]
230                                .as_primitive::<Float64Type>()
231                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
232                        ) as ArrayRef,
233                        DataType::Float32 => Arc::new(
234                            args[0]
235                                .as_primitive::<Float32Type>()
236                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
237                        ) as ArrayRef,
238                        other => {
239                            return exec_err!(
240                                "Unsupported data type {other:?} for function {}",
241                                self.name()
242                            )
243                        }
244                    };
245
246                    Ok(ColumnarValue::Array(arr))
247                }
248
249                fn documentation(&self) -> Option<&Documentation> {
250                    Some($GET_DOC())
251                }
252            }
253        }
254    };
255}
256
257/// Macro to create a binary math UDF.
258///
259/// A binary math function takes two arguments of types Float32 or Float64,
260/// applies a binary floating function to the argument, and returns a value of the same type.
261///
262/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
263/// $NAME: the name of the function
264/// $BINARY_FUNC: the binary function to apply to the argument
265/// $OUTPUT_ORDERING: the output ordering calculation method of the function
266/// $GET_DOC: the function to get the documentation of the UDF
267macro_rules! make_math_binary_udf {
268    ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
269        make_udf_function!($NAME::$UDF, $NAME);
270
271        mod $NAME {
272            use std::any::Any;
273            use std::sync::Arc;
274
275            use arrow::array::{ArrayRef, AsArray};
276            use arrow::datatypes::{DataType, Float32Type, Float64Type};
277            use datafusion_common::{exec_err, Result};
278            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
279            use datafusion_expr::TypeSignature;
280            use datafusion_expr::{
281                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
282                Signature, Volatility,
283            };
284
285            #[derive(Debug)]
286            pub struct $UDF {
287                signature: Signature,
288            }
289
290            impl $UDF {
291                pub fn new() -> Self {
292                    use DataType::*;
293                    Self {
294                        signature: Signature::one_of(
295                            vec![
296                                TypeSignature::Exact(vec![Float32, Float32]),
297                                TypeSignature::Exact(vec![Float64, Float64]),
298                            ],
299                            Volatility::Immutable,
300                        ),
301                    }
302                }
303            }
304
305            impl ScalarUDFImpl for $UDF {
306                fn as_any(&self) -> &dyn Any {
307                    self
308                }
309                fn name(&self) -> &str {
310                    stringify!($NAME)
311                }
312
313                fn signature(&self) -> &Signature {
314                    &self.signature
315                }
316
317                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
318                    let arg_type = &arg_types[0];
319
320                    match arg_type {
321                        DataType::Float32 => Ok(DataType::Float32),
322                        // For other types (possible values float64/null/int), use Float64
323                        _ => Ok(DataType::Float64),
324                    }
325                }
326
327                fn output_ordering(
328                    &self,
329                    input: &[ExprProperties],
330                ) -> Result<SortProperties> {
331                    $OUTPUT_ORDERING(input)
332                }
333
334                fn invoke_with_args(
335                    &self,
336                    args: ScalarFunctionArgs,
337                ) -> Result<ColumnarValue> {
338                    let args = ColumnarValue::values_to_arrays(&args.args)?;
339                    let arr: ArrayRef = match args[0].data_type() {
340                        DataType::Float64 => {
341                            let y = args[0].as_primitive::<Float64Type>();
342                            let x = args[1].as_primitive::<Float64Type>();
343                            let result = arrow::compute::binary::<_, _, _, Float64Type>(
344                                y,
345                                x,
346                                |y, x| f64::$BINARY_FUNC(y, x),
347                            )?;
348                            Arc::new(result) as _
349                        }
350                        DataType::Float32 => {
351                            let y = args[0].as_primitive::<Float32Type>();
352                            let x = args[1].as_primitive::<Float32Type>();
353                            let result = arrow::compute::binary::<_, _, _, Float32Type>(
354                                y,
355                                x,
356                                |y, x| f32::$BINARY_FUNC(y, x),
357                            )?;
358                            Arc::new(result) as _
359                        }
360                        other => {
361                            return exec_err!(
362                                "Unsupported data type {other:?} for function {}",
363                                self.name()
364                            )
365                        }
366                    };
367
368                    Ok(ColumnarValue::Array(arr))
369                }
370
371                fn documentation(&self) -> Option<&Documentation> {
372                    Some($GET_DOC())
373                }
374            }
375        }
376    };
377}