datafusion_expr/
udf.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDF`]: Scalar User Defined Functions
19
20use crate::expr::schema_name_from_exprs_comma_separated_without_space;
21use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
22use crate::sort_properties::{ExprProperties, SortProperties};
23use crate::{
24    ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature,
25};
26use arrow::datatypes::DataType;
27use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
28use datafusion_expr_common::interval_arithmetic::Interval;
29use std::any::Any;
30use std::cmp::Ordering;
31use std::fmt::Debug;
32use std::hash::{DefaultHasher, Hash, Hasher};
33use std::sync::Arc;
34
35/// Logical representation of a Scalar User Defined Function.
36///
37/// A scalar function produces a single row output for each row of input. This
38/// struct contains the information DataFusion needs to plan and invoke
39/// functions you supply such name, type signature, return type, and actual
40/// implementation.
41///
42/// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]).
43///
44/// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
45///    access (examples in  [`advanced_udf.rs`]).
46///
47/// See [`Self::call`] to invoke a `ScalarUDF` with arguments.
48///
49/// # API Note
50///
51/// This is a separate struct from `ScalarUDFImpl` to maintain backwards
52/// compatibility with the older API.
53///
54/// [`create_udf`]: crate::expr_fn::create_udf
55/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
56/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
57#[derive(Debug, Clone)]
58pub struct ScalarUDF {
59    inner: Arc<dyn ScalarUDFImpl>,
60}
61
62impl PartialEq for ScalarUDF {
63    fn eq(&self, other: &Self) -> bool {
64        self.inner.equals(other.inner.as_ref())
65    }
66}
67
68// Manual implementation based on `ScalarUDFImpl::equals`
69impl PartialOrd for ScalarUDF {
70    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
71        match self.name().partial_cmp(other.name()) {
72            Some(Ordering::Equal) => self.signature().partial_cmp(other.signature()),
73            cmp => cmp,
74        }
75    }
76}
77
78impl Eq for ScalarUDF {}
79
80impl Hash for ScalarUDF {
81    fn hash<H: Hasher>(&self, state: &mut H) {
82        self.inner.hash_value().hash(state)
83    }
84}
85
86impl ScalarUDF {
87    /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
88    ///
89    /// Note this is the same as using the `From` impl (`ScalarUDF::from`)
90    pub fn new_from_impl<F>(fun: F) -> ScalarUDF
91    where
92        F: ScalarUDFImpl + 'static,
93    {
94        Self::new_from_shared_impl(Arc::new(fun))
95    }
96
97    /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
98    pub fn new_from_shared_impl(fun: Arc<dyn ScalarUDFImpl>) -> ScalarUDF {
99        Self { inner: fun }
100    }
101
102    /// Return the underlying [`ScalarUDFImpl`] trait object for this function
103    pub fn inner(&self) -> &Arc<dyn ScalarUDFImpl> {
104        &self.inner
105    }
106
107    /// Adds additional names that can be used to invoke this function, in
108    /// addition to `name`
109    ///
110    /// If you implement [`ScalarUDFImpl`] directly you should return aliases directly.
111    pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) -> Self {
112        Self::new_from_impl(AliasedScalarUDFImpl::new(Arc::clone(&self.inner), aliases))
113    }
114
115    /// Returns a [`Expr`] logical expression to call this UDF with specified
116    /// arguments.
117    ///
118    /// This utility allows easily calling UDFs
119    ///
120    /// # Example
121    /// ```no_run
122    /// use datafusion_expr::{col, lit, ScalarUDF};
123    /// # fn my_udf() -> ScalarUDF { unimplemented!() }
124    /// let my_func: ScalarUDF = my_udf();
125    /// // Create an expr for `my_func(a, 12.3)`
126    /// let expr = my_func.call(vec![col("a"), lit(12.3)]);
127    /// ```
128    pub fn call(&self, args: Vec<Expr>) -> Expr {
129        Expr::ScalarFunction(crate::expr::ScalarFunction::new_udf(
130            Arc::new(self.clone()),
131            args,
132        ))
133    }
134
135    /// Returns this function's name.
136    ///
137    /// See [`ScalarUDFImpl::name`] for more details.
138    pub fn name(&self) -> &str {
139        self.inner.name()
140    }
141
142    /// Returns this function's display_name.
143    ///
144    /// See [`ScalarUDFImpl::display_name`] for more details
145    pub fn display_name(&self, args: &[Expr]) -> Result<String> {
146        self.inner.display_name(args)
147    }
148
149    /// Returns this function's schema_name.
150    ///
151    /// See [`ScalarUDFImpl::schema_name`] for more details
152    pub fn schema_name(&self, args: &[Expr]) -> Result<String> {
153        self.inner.schema_name(args)
154    }
155
156    /// Returns the aliases for this function.
157    ///
158    /// See [`ScalarUDF::with_aliases`] for more details
159    pub fn aliases(&self) -> &[String] {
160        self.inner.aliases()
161    }
162
163    /// Returns this function's [`Signature`] (what input types are accepted).
164    ///
165    /// See [`ScalarUDFImpl::signature`] for more details.
166    pub fn signature(&self) -> &Signature {
167        self.inner.signature()
168    }
169
170    /// The datatype this function returns given the input argument types.
171    /// This function is used when the input arguments are [`DataType`]s.
172    ///
173    ///  # Notes
174    ///
175    /// If a function implement [`ScalarUDFImpl::return_type_from_exprs`],
176    /// its [`ScalarUDFImpl::return_type`] should raise an error.
177    ///
178    /// See [`ScalarUDFImpl::return_type`] for more details.
179    pub fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
180        self.inner.return_type(arg_types)
181    }
182
183    /// The datatype this function returns given the input argument input types.
184    /// This function is used when the input arguments are [`Expr`]s.
185    ///
186    ///
187    /// See [`ScalarUDFImpl::return_type_from_exprs`] for more details.
188    #[allow(deprecated)]
189    pub fn return_type_from_exprs(
190        &self,
191        args: &[Expr],
192        schema: &dyn ExprSchema,
193        arg_types: &[DataType],
194    ) -> Result<DataType> {
195        // If the implementation provides a return_type_from_exprs, use it
196        self.inner.return_type_from_exprs(args, schema, arg_types)
197    }
198
199    /// Return the datatype this function returns given the input argument types.
200    ///
201    /// See [`ScalarUDFImpl::return_type_from_args`] for more details.
202    pub fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
203        self.inner.return_type_from_args(args)
204    }
205
206    /// Do the function rewrite
207    ///
208    /// See [`ScalarUDFImpl::simplify`] for more details.
209    pub fn simplify(
210        &self,
211        args: Vec<Expr>,
212        info: &dyn SimplifyInfo,
213    ) -> Result<ExprSimplifyResult> {
214        self.inner.simplify(args, info)
215    }
216
217    #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
218    pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
219        #[allow(deprecated)]
220        self.inner.invoke(args)
221    }
222
223    #[allow(deprecated)]
224    pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
225        self.inner.is_nullable(args, schema)
226    }
227
228    pub fn invoke_batch(
229        &self,
230        args: &[ColumnarValue],
231        number_rows: usize,
232    ) -> Result<ColumnarValue> {
233        self.inner.invoke_batch(args, number_rows)
234    }
235
236    /// Invoke the function on `args`, returning the appropriate result.
237    ///
238    /// See [`ScalarUDFImpl::invoke_with_args`] for details.
239    pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
240        self.inner.invoke_with_args(args)
241    }
242
243    /// Invoke the function without `args` but number of rows, returning the appropriate result.
244    ///
245    /// Note: This method is deprecated and will be removed in future releases.
246    /// User defined functions should implement [`Self::invoke_with_args`] instead.
247    #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
248    pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
249        #[allow(deprecated)]
250        self.inner.invoke_no_args(number_rows)
251    }
252
253    /// Returns a `ScalarFunctionImplementation` that can invoke the function
254    /// during execution
255    #[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")]
256    pub fn fun(&self) -> ScalarFunctionImplementation {
257        let captured = Arc::clone(&self.inner);
258        #[allow(deprecated)]
259        Arc::new(move |args| captured.invoke(args))
260    }
261
262    /// Get the circuits of inner implementation
263    pub fn short_circuits(&self) -> bool {
264        self.inner.short_circuits()
265    }
266
267    /// Computes the output interval for a [`ScalarUDF`], given the input
268    /// intervals.
269    ///
270    /// # Parameters
271    ///
272    /// * `inputs` are the intervals for the inputs (children) of this function.
273    ///
274    /// # Example
275    ///
276    /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
277    /// then the output interval would be `[0, 3]`.
278    pub fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
279        self.inner.evaluate_bounds(inputs)
280    }
281
282    /// Updates bounds for child expressions, given a known interval for this
283    /// function. This is used to propagate constraints down through an expression
284    /// tree.
285    ///
286    /// # Parameters
287    ///
288    /// * `interval` is the currently known interval for this function.
289    /// * `inputs` are the current intervals for the inputs (children) of this function.
290    ///
291    /// # Returns
292    ///
293    /// A `Vec` of new intervals for the children, in order.
294    ///
295    /// If constraint propagation reveals an infeasibility for any child, returns
296    /// [`None`]. If none of the children intervals change as a result of
297    /// propagation, may return an empty vector instead of cloning `children`.
298    /// This is the default (and conservative) return value.
299    ///
300    /// # Example
301    ///
302    /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
303    /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
304    pub fn propagate_constraints(
305        &self,
306        interval: &Interval,
307        inputs: &[&Interval],
308    ) -> Result<Option<Vec<Interval>>> {
309        self.inner.propagate_constraints(interval, inputs)
310    }
311
312    /// Calculates the [`SortProperties`] of this function based on its
313    /// children's properties.
314    pub fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
315        self.inner.output_ordering(inputs)
316    }
317
318    pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
319        self.inner.preserves_lex_ordering(inputs)
320    }
321
322    /// See [`ScalarUDFImpl::coerce_types`] for more details.
323    pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
324        self.inner.coerce_types(arg_types)
325    }
326
327    /// Returns the documentation for this Scalar UDF.
328    ///
329    /// Documentation can be accessed programmatically as well as
330    /// generating publicly facing documentation.
331    pub fn documentation(&self) -> Option<&Documentation> {
332        self.inner.documentation()
333    }
334}
335
336impl<F> From<F> for ScalarUDF
337where
338    F: ScalarUDFImpl + 'static,
339{
340    fn from(fun: F) -> Self {
341        Self::new_from_impl(fun)
342    }
343}
344
345/// Arguments passed to [`ScalarUDFImpl::invoke_with_args`] when invoking a
346/// scalar function.
347pub struct ScalarFunctionArgs<'a> {
348    /// The evaluated arguments to the function
349    pub args: Vec<ColumnarValue>,
350    /// The number of rows in record batch being evaluated
351    pub number_rows: usize,
352    /// The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`)
353    /// when creating the physical expression from the logical expression
354    pub return_type: &'a DataType,
355}
356
357/// Information about arguments passed to the function
358///
359/// This structure contains metadata about how the function was called
360/// such as the type of the arguments, any scalar arguments and if the
361/// arguments can (ever) be null
362///
363/// See [`ScalarUDFImpl::return_type_from_args`] for more information
364#[derive(Debug)]
365pub struct ReturnTypeArgs<'a> {
366    /// The data types of the arguments to the function
367    pub arg_types: &'a [DataType],
368    /// Is argument `i` to the function a scalar (constant)
369    ///
370    /// If argument `i` is not a scalar, it will be None
371    ///
372    /// For example, if a function is called like `my_function(column_a, 5)`
373    /// this field will be `[None, Some(ScalarValue::Int32(Some(5)))]`
374    pub scalar_arguments: &'a [Option<&'a ScalarValue>],
375    /// Can argument `i` (ever) null?
376    pub nullables: &'a [bool],
377}
378
379/// Return metadata for this function.
380///
381/// See [`ScalarUDFImpl::return_type_from_args`] for more information
382#[derive(Debug)]
383pub struct ReturnInfo {
384    return_type: DataType,
385    nullable: bool,
386}
387
388impl ReturnInfo {
389    pub fn new(return_type: DataType, nullable: bool) -> Self {
390        Self {
391            return_type,
392            nullable,
393        }
394    }
395
396    pub fn new_nullable(return_type: DataType) -> Self {
397        Self {
398            return_type,
399            nullable: true,
400        }
401    }
402
403    pub fn new_non_nullable(return_type: DataType) -> Self {
404        Self {
405            return_type,
406            nullable: false,
407        }
408    }
409
410    pub fn return_type(&self) -> &DataType {
411        &self.return_type
412    }
413
414    pub fn nullable(&self) -> bool {
415        self.nullable
416    }
417
418    pub fn into_parts(self) -> (DataType, bool) {
419        (self.return_type, self.nullable)
420    }
421}
422
423/// Trait for implementing user defined scalar functions.
424///
425/// This trait exposes the full API for implementing user defined functions and
426/// can be used to implement any function.
427///
428/// See [`advanced_udf.rs`] for a full example with complete implementation and
429/// [`ScalarUDF`] for other available options.
430///
431/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
432///
433/// # Basic Example
434/// ```
435/// # use std::any::Any;
436/// # use std::sync::LazyLock;
437/// # use arrow::datatypes::DataType;
438/// # use datafusion_common::{DataFusionError, plan_err, Result};
439/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility};
440/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF};
441/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH;
442/// /// This struct for a simple UDF that adds one to an int32
443/// #[derive(Debug)]
444/// struct AddOne {
445///   signature: Signature,
446/// }
447///
448/// impl AddOne {
449///   fn new() -> Self {
450///     Self {
451///       signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable),
452///      }
453///   }
454/// }
455///
456/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
457///         Documentation::builder(DOC_SECTION_MATH, "Add one to an int32", "add_one(2)")
458///             .with_argument("arg1", "The int32 number to add one to")
459///             .build()
460///     });
461///
462/// fn get_doc() -> &'static Documentation {
463///     &DOCUMENTATION
464/// }
465///
466/// /// Implement the ScalarUDFImpl trait for AddOne
467/// impl ScalarUDFImpl for AddOne {
468///    fn as_any(&self) -> &dyn Any { self }
469///    fn name(&self) -> &str { "add_one" }
470///    fn signature(&self) -> &Signature { &self.signature }
471///    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
472///      if !matches!(args.get(0), Some(&DataType::Int32)) {
473///        return plan_err!("add_one only accepts Int32 arguments");
474///      }
475///      Ok(DataType::Int32)
476///    }
477///    // The actual implementation would add one to the argument
478///    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
479///         unimplemented!()
480///    }
481///    fn documentation(&self) -> Option<&Documentation> {
482///         Some(get_doc())
483///     }
484/// }
485///
486/// // Create a new ScalarUDF from the implementation
487/// let add_one = ScalarUDF::from(AddOne::new());
488///
489/// // Call the function `add_one(col)`
490/// let expr = add_one.call(vec![col("a")]);
491/// ```
492pub trait ScalarUDFImpl: Debug + Send + Sync {
493    // Note: When adding any methods (with default implementations), remember to add them also
494    // into the AliasedScalarUDFImpl below!
495
496    /// Returns this object as an [`Any`] trait object
497    fn as_any(&self) -> &dyn Any;
498
499    /// Returns this function's name
500    fn name(&self) -> &str;
501
502    /// Returns the user-defined display name of function, given the arguments
503    ///
504    /// This can be used to customize the output column name generated by this
505    /// function.
506    ///
507    /// Defaults to `name(args[0], args[1], ...)`
508    fn display_name(&self, args: &[Expr]) -> Result<String> {
509        let names: Vec<String> = args.iter().map(ToString::to_string).collect();
510        // TODO: join with ", " to standardize the formatting of Vec<Expr>, <https://github.com/apache/datafusion/issues/10364>
511        Ok(format!("{}({})", self.name(), names.join(",")))
512    }
513
514    /// Returns the name of the column this expression would create
515    ///
516    /// See [`Expr::schema_name`] for details
517    fn schema_name(&self, args: &[Expr]) -> Result<String> {
518        Ok(format!(
519            "{}({})",
520            self.name(),
521            schema_name_from_exprs_comma_separated_without_space(args)?
522        ))
523    }
524
525    /// Returns the function's [`Signature`] for information about what input
526    /// types are accepted and the function's Volatility.
527    fn signature(&self) -> &Signature;
528
529    /// What [`DataType`] will be returned by this function, given the types of
530    /// the arguments.
531    ///
532    /// # Notes
533    ///
534    /// If you provide an implementation for [`Self::return_type_from_args`],
535    /// DataFusion will not call `return_type` (this function). In such cases
536    /// is recommended to return [`DataFusionError::Internal`].
537    ///
538    /// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
539    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
540
541    #[deprecated(since = "45.0.0", note = "Use `return_type_from_args` instead")]
542    fn return_type_from_exprs(
543        &self,
544        _args: &[Expr],
545        _schema: &dyn ExprSchema,
546        arg_types: &[DataType],
547    ) -> Result<DataType> {
548        self.return_type(arg_types)
549    }
550
551    /// What type will be returned by this function, given the arguments?
552    ///
553    /// By default, this function calls [`Self::return_type`] with the
554    /// types of each argument.
555    ///
556    /// # Notes
557    ///
558    /// Most UDFs should implement [`Self::return_type`] and not this
559    /// function as the output type for most functions only depends on the types
560    /// of their inputs (e.g. `sqrt(f32)` is always `f32`).
561    ///
562    /// This function can be used for more advanced cases such as:
563    ///
564    /// 1. specifying nullability
565    /// 2. return types based on the **values** of the arguments (rather than
566    ///    their **types**.
567    ///
568    /// # Output Type based on Values
569    ///
570    /// For example, the following two function calls get the same argument
571    /// types (something and a `Utf8` string) but return different types based
572    /// on the value of the second argument:
573    ///
574    /// * `arrow_cast(x, 'Int16')` --> `Int16`
575    /// * `arrow_cast(x, 'Float32')` --> `Float32`
576    ///
577    /// # Requirements
578    ///
579    /// This function **must** consistently return the same type for the same
580    /// logical input even if the input is simplified (e.g. it must return the same
581    /// value for `('foo' | 'bar')` as it does for ('foobar').
582    fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
583        let return_type = self.return_type(args.arg_types)?;
584        Ok(ReturnInfo::new_nullable(return_type))
585    }
586
587    #[deprecated(
588        since = "45.0.0",
589        note = "Use `return_type_from_args` instead. if you use `is_nullable` that returns non-nullable with `return_type`, you would need to switch to `return_type_from_args`, you might have error"
590    )]
591    fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
592        true
593    }
594
595    /// Invoke the function on `args`, returning the appropriate result
596    ///
597    /// Note: This method is deprecated and will be removed in future releases.
598    /// User defined functions should implement [`Self::invoke_with_args`] instead.
599    #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
600    fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
601        not_impl_err!(
602            "Function {} does not implement invoke but called",
603            self.name()
604        )
605    }
606
607    /// Invoke the function with `args` and the number of rows,
608    /// returning the appropriate result.
609    ///
610    /// Note: See notes on  [`Self::invoke_with_args`]
611    ///
612    /// Note: This method is deprecated and will be removed in future releases.
613    /// User defined functions should implement [`Self::invoke_with_args`] instead.
614    ///
615    /// See <https://github.com/apache/datafusion/issues/13515> for more details.
616    fn invoke_batch(
617        &self,
618        args: &[ColumnarValue],
619        number_rows: usize,
620    ) -> Result<ColumnarValue> {
621        match args.is_empty() {
622            true =>
623            {
624                #[allow(deprecated)]
625                self.invoke_no_args(number_rows)
626            }
627            false =>
628            {
629                #[allow(deprecated)]
630                self.invoke(args)
631            }
632        }
633    }
634
635    /// Invoke the function returning the appropriate result.
636    ///
637    /// # Performance
638    ///
639    /// For the best performance, the implementations should handle the common case
640    /// when one or more of their arguments are constant values (aka
641    /// [`ColumnarValue::Scalar`]).
642    ///
643    /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
644    /// to arrays, which will likely be simpler code, but be slower.
645    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
646        self.invoke_batch(&args.args, args.number_rows)
647    }
648
649    /// Invoke the function without `args`, instead the number of rows are provided,
650    /// returning the appropriate result.
651    ///
652    /// Note: This method is deprecated and will be removed in future releases.
653    /// User defined functions should implement [`Self::invoke_with_args`] instead.
654    #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
655    fn invoke_no_args(&self, _number_rows: usize) -> Result<ColumnarValue> {
656        not_impl_err!(
657            "Function {} does not implement invoke_no_args but called",
658            self.name()
659        )
660    }
661
662    /// Returns any aliases (alternate names) for this function.
663    ///
664    /// Aliases can be used to invoke the same function using different names.
665    /// For example in some databases `now()` and `current_timestamp()` are
666    /// aliases for the same function. This behavior can be obtained by
667    /// returning `current_timestamp` as an alias for the `now` function.
668    ///
669    /// Note: `aliases` should only include names other than [`Self::name`].
670    /// Defaults to `[]` (no aliases)
671    fn aliases(&self) -> &[String] {
672        &[]
673    }
674
675    /// Optionally apply per-UDF simplification / rewrite rules.
676    ///
677    /// This can be used to apply function specific simplification rules during
678    /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
679    /// implementation does nothing.
680    ///
681    /// Note that DataFusion handles simplifying arguments and  "constant
682    /// folding" (replacing a function call with constant arguments such as
683    /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
684    /// optimizations manually for specific UDFs.
685    ///
686    /// # Arguments
687    /// * `args`: The arguments of the function
688    /// * `info`: The necessary information for simplification
689    ///
690    /// # Returns
691    /// [`ExprSimplifyResult`] indicating the result of the simplification NOTE
692    /// if the function cannot be simplified, the arguments *MUST* be returned
693    /// unmodified
694    fn simplify(
695        &self,
696        args: Vec<Expr>,
697        _info: &dyn SimplifyInfo,
698    ) -> Result<ExprSimplifyResult> {
699        Ok(ExprSimplifyResult::Original(args))
700    }
701
702    /// Returns true if some of this `exprs` subexpressions may not be evaluated
703    /// and thus any side effects (like divide by zero) may not be encountered
704    /// Setting this to true prevents certain optimizations such as common subexpression elimination
705    fn short_circuits(&self) -> bool {
706        false
707    }
708
709    /// Computes the output interval for a [`ScalarUDFImpl`], given the input
710    /// intervals.
711    ///
712    /// # Parameters
713    ///
714    /// * `children` are the intervals for the children (inputs) of this function.
715    ///
716    /// # Example
717    ///
718    /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
719    /// then the output interval would be `[0, 3]`.
720    fn evaluate_bounds(&self, _input: &[&Interval]) -> Result<Interval> {
721        // We cannot assume the input datatype is the same of output type.
722        Interval::make_unbounded(&DataType::Null)
723    }
724
725    /// Updates bounds for child expressions, given a known interval for this
726    /// function. This is used to propagate constraints down through an expression
727    /// tree.
728    ///
729    /// # Parameters
730    ///
731    /// * `interval` is the currently known interval for this function.
732    /// * `inputs` are the current intervals for the inputs (children) of this function.
733    ///
734    /// # Returns
735    ///
736    /// A `Vec` of new intervals for the children, in order.
737    ///
738    /// If constraint propagation reveals an infeasibility for any child, returns
739    /// [`None`]. If none of the children intervals change as a result of
740    /// propagation, may return an empty vector instead of cloning `children`.
741    /// This is the default (and conservative) return value.
742    ///
743    /// # Example
744    ///
745    /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
746    /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
747    fn propagate_constraints(
748        &self,
749        _interval: &Interval,
750        _inputs: &[&Interval],
751    ) -> Result<Option<Vec<Interval>>> {
752        Ok(Some(vec![]))
753    }
754
755    /// Calculates the [`SortProperties`] of this function based on its children's properties.
756    fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
757        if !self.preserves_lex_ordering(inputs)? {
758            return Ok(SortProperties::Unordered);
759        }
760
761        let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else {
762            return Ok(SortProperties::Singleton);
763        };
764
765        if inputs
766            .iter()
767            .skip(1)
768            .all(|input| &input.sort_properties == first_order)
769        {
770            Ok(*first_order)
771        } else {
772            Ok(SortProperties::Unordered)
773        }
774    }
775
776    /// Whether the function preserves lexicographical ordering based on the input ordering
777    fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
778        Ok(false)
779    }
780
781    /// Coerce arguments of a function call to types that the function can evaluate.
782    ///
783    /// This function is only called if [`ScalarUDFImpl::signature`] returns [`crate::TypeSignature::UserDefined`]. Most
784    /// UDFs should return one of the other variants of `TypeSignature` which handle common
785    /// cases
786    ///
787    /// See the [type coercion module](crate::type_coercion)
788    /// documentation for more details on type coercion
789    ///
790    /// For example, if your function requires a floating point arguments, but the user calls
791    /// it like `my_func(1::int)` (i.e. with `1` as an integer), coerce_types can return `[DataType::Float64]`
792    /// to ensure the argument is converted to `1::double`
793    ///
794    /// # Parameters
795    /// * `arg_types`: The argument types of the arguments  this function with
796    ///
797    /// # Return value
798    /// A Vec the same length as `arg_types`. DataFusion will `CAST` the function call
799    /// arguments to these specific types.
800    fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
801        not_impl_err!("Function {} does not implement coerce_types", self.name())
802    }
803
804    /// Return true if this scalar UDF is equal to the other.
805    ///
806    /// Allows customizing the equality of scalar UDFs.
807    /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
808    ///
809    /// - reflexive: `a.equals(a)`;
810    /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
811    /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
812    ///
813    /// By default, compares [`Self::name`] and [`Self::signature`].
814    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
815        self.name() == other.name() && self.signature() == other.signature()
816    }
817
818    /// Returns a hash value for this scalar UDF.
819    ///
820    /// Allows customizing the hash code of scalar UDFs. Similarly to [`Hash`] and [`Eq`],
821    /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
822    ///
823    /// By default, hashes [`Self::name`] and [`Self::signature`].
824    fn hash_value(&self) -> u64 {
825        let hasher = &mut DefaultHasher::new();
826        self.name().hash(hasher);
827        self.signature().hash(hasher);
828        hasher.finish()
829    }
830
831    /// Returns the documentation for this Scalar UDF.
832    ///
833    /// Documentation can be accessed programmatically as well as
834    /// generating publicly facing documentation.
835    fn documentation(&self) -> Option<&Documentation> {
836        None
837    }
838}
839
840/// ScalarUDF that adds an alias to the underlying function. It is better to
841/// implement [`ScalarUDFImpl`], which supports aliases, directly if possible.
842#[derive(Debug)]
843struct AliasedScalarUDFImpl {
844    inner: Arc<dyn ScalarUDFImpl>,
845    aliases: Vec<String>,
846}
847
848impl AliasedScalarUDFImpl {
849    pub fn new(
850        inner: Arc<dyn ScalarUDFImpl>,
851        new_aliases: impl IntoIterator<Item = &'static str>,
852    ) -> Self {
853        let mut aliases = inner.aliases().to_vec();
854        aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
855        Self { inner, aliases }
856    }
857}
858
859impl ScalarUDFImpl for AliasedScalarUDFImpl {
860    fn as_any(&self) -> &dyn Any {
861        self
862    }
863
864    fn name(&self) -> &str {
865        self.inner.name()
866    }
867
868    fn display_name(&self, args: &[Expr]) -> Result<String> {
869        self.inner.display_name(args)
870    }
871
872    fn schema_name(&self, args: &[Expr]) -> Result<String> {
873        self.inner.schema_name(args)
874    }
875
876    fn signature(&self) -> &Signature {
877        self.inner.signature()
878    }
879
880    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
881        self.inner.return_type(arg_types)
882    }
883
884    fn aliases(&self) -> &[String] {
885        &self.aliases
886    }
887
888    #[allow(deprecated)]
889    fn return_type_from_exprs(
890        &self,
891        args: &[Expr],
892        schema: &dyn ExprSchema,
893        arg_types: &[DataType],
894    ) -> Result<DataType> {
895        self.inner.return_type_from_exprs(args, schema, arg_types)
896    }
897
898    fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
899        self.inner.return_type_from_args(args)
900    }
901
902    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
903        self.inner.invoke_with_args(args)
904    }
905
906    fn simplify(
907        &self,
908        args: Vec<Expr>,
909        info: &dyn SimplifyInfo,
910    ) -> Result<ExprSimplifyResult> {
911        self.inner.simplify(args, info)
912    }
913
914    fn short_circuits(&self) -> bool {
915        self.inner.short_circuits()
916    }
917
918    fn evaluate_bounds(&self, input: &[&Interval]) -> Result<Interval> {
919        self.inner.evaluate_bounds(input)
920    }
921
922    fn propagate_constraints(
923        &self,
924        interval: &Interval,
925        inputs: &[&Interval],
926    ) -> Result<Option<Vec<Interval>>> {
927        self.inner.propagate_constraints(interval, inputs)
928    }
929
930    fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
931        self.inner.output_ordering(inputs)
932    }
933
934    fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
935        self.inner.preserves_lex_ordering(inputs)
936    }
937
938    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
939        self.inner.coerce_types(arg_types)
940    }
941
942    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
943        if let Some(other) = other.as_any().downcast_ref::<AliasedScalarUDFImpl>() {
944            self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
945        } else {
946            false
947        }
948    }
949
950    fn hash_value(&self) -> u64 {
951        let hasher = &mut DefaultHasher::new();
952        self.inner.hash_value().hash(hasher);
953        self.aliases.hash(hasher);
954        hasher.finish()
955    }
956
957    fn documentation(&self) -> Option<&Documentation> {
958        self.inner.documentation()
959    }
960}
961
962// Scalar UDF doc sections for use in public documentation
963pub mod scalar_doc_sections {
964    use crate::DocSection;
965
966    pub fn doc_sections() -> Vec<DocSection> {
967        vec![
968            DOC_SECTION_MATH,
969            DOC_SECTION_CONDITIONAL,
970            DOC_SECTION_STRING,
971            DOC_SECTION_BINARY_STRING,
972            DOC_SECTION_REGEX,
973            DOC_SECTION_DATETIME,
974            DOC_SECTION_ARRAY,
975            DOC_SECTION_STRUCT,
976            DOC_SECTION_MAP,
977            DOC_SECTION_HASHING,
978            DOC_SECTION_UNION,
979            DOC_SECTION_OTHER,
980        ]
981    }
982
983    pub const fn doc_sections_const() -> &'static [DocSection] {
984        &[
985            DOC_SECTION_MATH,
986            DOC_SECTION_CONDITIONAL,
987            DOC_SECTION_STRING,
988            DOC_SECTION_BINARY_STRING,
989            DOC_SECTION_REGEX,
990            DOC_SECTION_DATETIME,
991            DOC_SECTION_ARRAY,
992            DOC_SECTION_STRUCT,
993            DOC_SECTION_MAP,
994            DOC_SECTION_HASHING,
995            DOC_SECTION_UNION,
996            DOC_SECTION_OTHER,
997        ]
998    }
999
1000    pub const DOC_SECTION_MATH: DocSection = DocSection {
1001        include: true,
1002        label: "Math Functions",
1003        description: None,
1004    };
1005
1006    pub const DOC_SECTION_CONDITIONAL: DocSection = DocSection {
1007        include: true,
1008        label: "Conditional Functions",
1009        description: None,
1010    };
1011
1012    pub const DOC_SECTION_STRING: DocSection = DocSection {
1013        include: true,
1014        label: "String Functions",
1015        description: None,
1016    };
1017
1018    pub const DOC_SECTION_BINARY_STRING: DocSection = DocSection {
1019        include: true,
1020        label: "Binary String Functions",
1021        description: None,
1022    };
1023
1024    pub const DOC_SECTION_REGEX: DocSection = DocSection {
1025        include: true,
1026        label: "Regular Expression Functions",
1027        description: Some(
1028            r#"Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions)
1029regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax)
1030(minus support for several features including look-around and backreferences).
1031The following regular expression functions are supported:"#,
1032        ),
1033    };
1034
1035    pub const DOC_SECTION_DATETIME: DocSection = DocSection {
1036        include: true,
1037        label: "Time and Date Functions",
1038        description: None,
1039    };
1040
1041    pub const DOC_SECTION_ARRAY: DocSection = DocSection {
1042        include: true,
1043        label: "Array Functions",
1044        description: None,
1045    };
1046
1047    pub const DOC_SECTION_STRUCT: DocSection = DocSection {
1048        include: true,
1049        label: "Struct Functions",
1050        description: None,
1051    };
1052
1053    pub const DOC_SECTION_MAP: DocSection = DocSection {
1054        include: true,
1055        label: "Map Functions",
1056        description: None,
1057    };
1058
1059    pub const DOC_SECTION_HASHING: DocSection = DocSection {
1060        include: true,
1061        label: "Hashing Functions",
1062        description: None,
1063    };
1064
1065    pub const DOC_SECTION_OTHER: DocSection = DocSection {
1066        include: true,
1067        label: "Other Functions",
1068        description: None,
1069    };
1070
1071    pub const DOC_SECTION_UNION: DocSection = DocSection {
1072        include: true,
1073        label: "Union Functions",
1074        description: Some("Functions to work with the union data type, also know as tagged unions, variant types, enums or sum types. Note: Not related to the SQL UNION operator"),
1075    };
1076}