datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43macro_rules! export_functions {
44 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
45 $(
46 // switch to single-function cases below
47 export_functions!(single $FUNC, $DOC, $($arg)*);
48 )*
49 };
50
51 // single vector argument (a single argument followed by a comma)
52 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
53 #[doc = $DOC]
54 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
55 super::$FUNC().call($arg)
56 }
57 };
58
59 // variadic arguments (zero or more arguments, without commas)
60 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
61 #[doc = $DOC]
62 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
63 super::$FUNC().call(vec![$($arg),*])
64 }
65 };
66}
67
68/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
69/// named `$NAME` which returns that singleton.
70///
71/// This is used to ensure creating the list of `ScalarUDF` only happens once.
72macro_rules! make_udf_function {
73 ($UDF:ty, $NAME:ident) => {
74 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
75 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
76 // Singleton instance of the function
77 static INSTANCE: std::sync::LazyLock<
78 std::sync::Arc<datafusion_expr::ScalarUDF>,
79 > = std::sync::LazyLock::new(|| {
80 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
81 <$UDF>::new(),
82 ))
83 });
84 std::sync::Arc::clone(&INSTANCE)
85 }
86 };
87}
88
89/// Macro creates a sub module if the feature is not enabled
90///
91/// The rationale for providing stub functions is to help users to configure datafusion
92/// properly (so they get an error telling them why a function is not available)
93/// instead of getting a cryptic "no function found" message at runtime.
94macro_rules! make_stub_package {
95 ($name:ident, $feature:literal) => {
96 #[cfg(not(feature = $feature))]
97 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
98 pub mod $name {
99 use datafusion_expr::ScalarUDF;
100 use log::debug;
101 use std::sync::Arc;
102
103 /// Returns an empty list of functions when the feature is not enabled
104 pub fn functions() -> Vec<Arc<ScalarUDF>> {
105 debug!("{} functions disabled", stringify!($name));
106 vec![]
107 }
108 }
109 };
110}
111
112/// Downcast a named argument to a specific array type, returning an internal error
113/// if the cast fails
114///
115/// $ARG: ArrayRef
116/// $NAME: name of the argument (for error messages)
117/// $ARRAY_TYPE: the type of array to cast the argument to
118#[macro_export]
119macro_rules! downcast_named_arg {
120 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
121 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
122 internal_datafusion_err!(
123 "could not cast {} to {}",
124 $NAME,
125 std::any::type_name::<$ARRAY_TYPE>()
126 )
127 })?
128 }};
129}
130
131/// Downcast an argument to a specific array type, returning an internal error
132/// if the cast fails
133///
134/// $ARG: ArrayRef
135/// $ARRAY_TYPE: the type of array to cast the argument to
136#[macro_export]
137macro_rules! downcast_arg {
138 ($ARG:expr, $ARRAY_TYPE:ident) => {{
139 downcast_named_arg!($ARG, "", $ARRAY_TYPE)
140 }};
141}
142
143/// Macro to create a unary math UDF.
144///
145/// A unary math function takes an argument of type Float32 or Float64,
146/// applies a unary floating function to the argument, and returns a value of the same type.
147///
148/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
149/// $NAME: the name of the function
150/// $UNARY_FUNC: the unary function to apply to the argument
151/// $OUTPUT_ORDERING: the output ordering calculation method of the function
152/// $GET_DOC: the function to get the documentation of the UDF
153macro_rules! make_math_unary_udf {
154 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
155 make_udf_function!($NAME::$UDF, $NAME);
156
157 mod $NAME {
158 use std::any::Any;
159 use std::sync::Arc;
160
161 use arrow::array::{ArrayRef, AsArray};
162 use arrow::datatypes::{DataType, Float32Type, Float64Type};
163 use datafusion_common::{exec_err, Result};
164 use datafusion_expr::interval_arithmetic::Interval;
165 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
166 use datafusion_expr::{
167 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
168 Signature, Volatility,
169 };
170
171 #[derive(Debug)]
172 pub struct $UDF {
173 signature: Signature,
174 }
175
176 impl $UDF {
177 pub fn new() -> Self {
178 use DataType::*;
179 Self {
180 signature: Signature::uniform(
181 1,
182 vec![Float64, Float32],
183 Volatility::Immutable,
184 ),
185 }
186 }
187 }
188
189 impl ScalarUDFImpl for $UDF {
190 fn as_any(&self) -> &dyn Any {
191 self
192 }
193 fn name(&self) -> &str {
194 stringify!($NAME)
195 }
196
197 fn signature(&self) -> &Signature {
198 &self.signature
199 }
200
201 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
202 let arg_type = &arg_types[0];
203
204 match arg_type {
205 DataType::Float32 => Ok(DataType::Float32),
206 // For other types (possible values float64/null/int), use Float64
207 _ => Ok(DataType::Float64),
208 }
209 }
210
211 fn output_ordering(
212 &self,
213 input: &[ExprProperties],
214 ) -> Result<SortProperties> {
215 $OUTPUT_ORDERING(input)
216 }
217
218 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
219 $EVALUATE_BOUNDS(inputs)
220 }
221
222 fn invoke_with_args(
223 &self,
224 args: ScalarFunctionArgs,
225 ) -> Result<ColumnarValue> {
226 let args = ColumnarValue::values_to_arrays(&args.args)?;
227 let arr: ArrayRef = match args[0].data_type() {
228 DataType::Float64 => Arc::new(
229 args[0]
230 .as_primitive::<Float64Type>()
231 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
232 ) as ArrayRef,
233 DataType::Float32 => Arc::new(
234 args[0]
235 .as_primitive::<Float32Type>()
236 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
237 ) as ArrayRef,
238 other => {
239 return exec_err!(
240 "Unsupported data type {other:?} for function {}",
241 self.name()
242 )
243 }
244 };
245
246 Ok(ColumnarValue::Array(arr))
247 }
248
249 fn documentation(&self) -> Option<&Documentation> {
250 Some($GET_DOC())
251 }
252 }
253 }
254 };
255}
256
257/// Macro to create a binary math UDF.
258///
259/// A binary math function takes two arguments of types Float32 or Float64,
260/// applies a binary floating function to the argument, and returns a value of the same type.
261///
262/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
263/// $NAME: the name of the function
264/// $BINARY_FUNC: the binary function to apply to the argument
265/// $OUTPUT_ORDERING: the output ordering calculation method of the function
266/// $GET_DOC: the function to get the documentation of the UDF
267macro_rules! make_math_binary_udf {
268 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
269 make_udf_function!($NAME::$UDF, $NAME);
270
271 mod $NAME {
272 use std::any::Any;
273 use std::sync::Arc;
274
275 use arrow::array::{ArrayRef, AsArray};
276 use arrow::datatypes::{DataType, Float32Type, Float64Type};
277 use datafusion_common::{exec_err, Result};
278 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
279 use datafusion_expr::TypeSignature;
280 use datafusion_expr::{
281 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
282 Signature, Volatility,
283 };
284
285 #[derive(Debug)]
286 pub struct $UDF {
287 signature: Signature,
288 }
289
290 impl $UDF {
291 pub fn new() -> Self {
292 use DataType::*;
293 Self {
294 signature: Signature::one_of(
295 vec![
296 TypeSignature::Exact(vec![Float32, Float32]),
297 TypeSignature::Exact(vec![Float64, Float64]),
298 ],
299 Volatility::Immutable,
300 ),
301 }
302 }
303 }
304
305 impl ScalarUDFImpl for $UDF {
306 fn as_any(&self) -> &dyn Any {
307 self
308 }
309 fn name(&self) -> &str {
310 stringify!($NAME)
311 }
312
313 fn signature(&self) -> &Signature {
314 &self.signature
315 }
316
317 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
318 let arg_type = &arg_types[0];
319
320 match arg_type {
321 DataType::Float32 => Ok(DataType::Float32),
322 // For other types (possible values float64/null/int), use Float64
323 _ => Ok(DataType::Float64),
324 }
325 }
326
327 fn output_ordering(
328 &self,
329 input: &[ExprProperties],
330 ) -> Result<SortProperties> {
331 $OUTPUT_ORDERING(input)
332 }
333
334 fn invoke_with_args(
335 &self,
336 args: ScalarFunctionArgs,
337 ) -> Result<ColumnarValue> {
338 let args = ColumnarValue::values_to_arrays(&args.args)?;
339 let arr: ArrayRef = match args[0].data_type() {
340 DataType::Float64 => {
341 let y = args[0].as_primitive::<Float64Type>();
342 let x = args[1].as_primitive::<Float64Type>();
343 let result = arrow::compute::binary::<_, _, _, Float64Type>(
344 y,
345 x,
346 |y, x| f64::$BINARY_FUNC(y, x),
347 )?;
348 Arc::new(result) as _
349 }
350 DataType::Float32 => {
351 let y = args[0].as_primitive::<Float32Type>();
352 let x = args[1].as_primitive::<Float32Type>();
353 let result = arrow::compute::binary::<_, _, _, Float32Type>(
354 y,
355 x,
356 |y, x| f32::$BINARY_FUNC(y, x),
357 )?;
358 Arc::new(result) as _
359 }
360 other => {
361 return exec_err!(
362 "Unsupported data type {other:?} for function {}",
363 self.name()
364 )
365 }
366 };
367
368 Ok(ColumnarValue::Array(arr))
369 }
370
371 fn documentation(&self) -> Option<&Documentation> {
372 Some($GET_DOC())
373 }
374 }
375 }
376 };
377}