Trait datafusion::logical_expr::ScalarUDFImpl

source ·
pub trait ScalarUDFImpl:
    Debug
    + Send
    + Sync {
Show 17 methods // Required methods fn as_any(&self) -> &(dyn Any + 'static); fn name(&self) -> &str; fn signature(&self) -> &Signature; fn return_type( &self, arg_types: &[DataType], ) -> Result<DataType, DataFusionError>; fn invoke( &self, _args: &[ColumnarValue], ) -> Result<ColumnarValue, DataFusionError>; // Provided methods fn display_name(&self, args: &[Expr]) -> Result<String, DataFusionError> { ... } fn return_type_from_exprs( &self, _args: &[Expr], _schema: &dyn ExprSchema, arg_types: &[DataType], ) -> Result<DataType, DataFusionError> { ... } fn invoke_no_args( &self, _number_rows: usize, ) -> Result<ColumnarValue, DataFusionError> { ... } fn aliases(&self) -> &[String] { ... } fn simplify( &self, args: Vec<Expr>, _info: &dyn SimplifyInfo, ) -> Result<ExprSimplifyResult, DataFusionError> { ... } fn short_circuits(&self) -> bool { ... } fn evaluate_bounds( &self, _input: &[&Interval], ) -> Result<Interval, DataFusionError> { ... } fn propagate_constraints( &self, _interval: &Interval, _inputs: &[&Interval], ) -> Result<Option<Vec<Interval>>, DataFusionError> { ... } fn output_ordering( &self, _inputs: &[ExprProperties], ) -> Result<SortProperties, DataFusionError> { ... } fn coerce_types( &self, _arg_types: &[DataType], ) -> Result<Vec<DataType>, DataFusionError> { ... } fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { ... } fn hash_value(&self) -> u64 { ... }
}
Expand description

Trait for implementing ScalarUDF.

This trait exposes the full API for implementing user defined functions and can be used to implement any function.

See advanced_udf.rs for a full example with complete implementation and ScalarUDF for other available options.

§Basic Example

#[derive(Debug)]
struct AddOne {
  signature: Signature
}

impl AddOne {
  fn new() -> Self {
    Self {
      signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable)
     }
  }
}

/// Implement the ScalarUDFImpl trait for AddOne
impl ScalarUDFImpl for AddOne {
   fn as_any(&self) -> &dyn Any { self }
   fn name(&self) -> &str { "add_one" }
   fn signature(&self) -> &Signature { &self.signature }
   fn return_type(&self, args: &[DataType]) -> Result<DataType> {
     if !matches!(args.get(0), Some(&DataType::Int32)) {
       return plan_err!("add_one only accepts Int32 arguments");
     }
     Ok(DataType::Int32)
   }
   // The actual implementation would add one to the argument
   fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { unimplemented!() }
}

// Create a new ScalarUDF from the implementation
let add_one = ScalarUDF::from(AddOne::new());

// Call the function `add_one(col)`
let expr = add_one.call(vec![col("a")]);

Required Methods§

source

fn as_any(&self) -> &(dyn Any + 'static)

Returns this object as an Any trait object

source

fn name(&self) -> &str

Returns this function’s name

source

fn signature(&self) -> &Signature

Returns the function’s Signature for information about what input types are accepted and the function’s Volatility.

source

fn return_type( &self, arg_types: &[DataType], ) -> Result<DataType, DataFusionError>

What DataType will be returned by this function, given the types of the arguments.

§Notes

If you provide an implementation for Self::return_type_from_exprs, DataFusion will not call return_type (this function). In this case it is recommended to return DataFusionError::Internal.

source

fn invoke( &self, _args: &[ColumnarValue], ) -> Result<ColumnarValue, DataFusionError>

Invoke the function on args, returning the appropriate result

The function will be invoked passed with the slice of ColumnarValue (either scalar or array).

If the function does not take any arguments, please use invoke_no_args instead and return not_impl_err for this function.

§Performance

For the best performance, the implementations of invoke should handle the common case when one or more of their arguments are constant values (aka ColumnarValue::Scalar).

ColumnarValue::values_to_arrays can be used to convert the arguments to arrays, which will likely be simpler code, but be slower.

Provided Methods§

source

fn display_name(&self, args: &[Expr]) -> Result<String, DataFusionError>

Returns the user-defined display name of the UDF given the arguments

source

fn return_type_from_exprs( &self, _args: &[Expr], _schema: &dyn ExprSchema, arg_types: &[DataType], ) -> Result<DataType, DataFusionError>

What DataType will be returned by this function, given the arguments?

Note most UDFs should implement Self::return_type and not this function. The output type for most functions only depends on the types of their inputs (e.g. sqrt(f32) is always f32).

By default, this function calls Self::return_type with the types of each argument.

This method can be overridden for functions that return different types based on the values of their arguments.

For example, the following two function calls get the same argument types (something and a Utf8 string) but return different types based on the value of the second argument:

  • arrow_cast(x, 'Int16') –> Int16
  • arrow_cast(x, 'Float32') –> Float32
§Notes:

This function must consistently return the same type for the same logical input even if the input is simplified (e.g. it must return the same value for ('foo' | 'bar') as it does for (‘foobar’).

source

fn invoke_no_args( &self, _number_rows: usize, ) -> Result<ColumnarValue, DataFusionError>

Invoke the function without args, instead the number of rows are provided, returning the appropriate result.

source

fn aliases(&self) -> &[String]

Returns any aliases (alternate names) for this function.

Aliases can be used to invoke the same function using different names. For example in some databases now() and current_timestamp() are aliases for the same function. This behavior can be obtained by returning current_timestamp as an alias for the now function.

Note: aliases should only include names other than Self::name. Defaults to [] (no aliases)

source

fn simplify( &self, args: Vec<Expr>, _info: &dyn SimplifyInfo, ) -> Result<ExprSimplifyResult, DataFusionError>

Optionally apply per-UDF simplification / rewrite rules.

This can be used to apply function specific simplification rules during optimization (e.g. arrow_cast –> Expr::Cast). The default implementation does nothing.

Note that DataFusion handles simplifying arguments and “constant folding” (replacing a function call with constant arguments such as my_add(1,2) --> 3 ). Thus, there is no need to implement such optimizations manually for specific UDFs.

§Arguments
  • args: The arguments of the function
  • info: The necessary information for simplification
§Returns

ExprSimplifyResult indicating the result of the simplification NOTE if the function cannot be simplified, the arguments MUST be returned unmodified

source

fn short_circuits(&self) -> bool

Returns true if some of this exprs subexpressions may not be evaluated and thus any side effects (like divide by zero) may not be encountered Setting this to true prevents certain optimizations such as common subexpression elimination

source

fn evaluate_bounds( &self, _input: &[&Interval], ) -> Result<Interval, DataFusionError>

Computes the output interval for a ScalarUDFImpl, given the input intervals.

§Parameters
  • children are the intervals for the children (inputs) of this function.
§Example

If the function is ABS(a), and the input interval is a: [-3, 2], then the output interval would be [0, 3].

source

fn propagate_constraints( &self, _interval: &Interval, _inputs: &[&Interval], ) -> Result<Option<Vec<Interval>>, DataFusionError>

Updates bounds for child expressions, given a known interval for this function. This is used to propagate constraints down through an expression tree.

§Parameters
  • interval is the currently known interval for this function.
  • inputs are the current intervals for the inputs (children) of this function.
§Returns

A Vec of new intervals for the children, in order.

If constraint propagation reveals an infeasibility for any child, returns None. If none of the children intervals change as a result of propagation, may return an empty vector instead of cloning children. This is the default (and conservative) return value.

§Example

If the function is ABS(a), the current interval is [4, 5] and the input a is given as [-7, 3], then propagation would return [-5, 3].

source

fn output_ordering( &self, _inputs: &[ExprProperties], ) -> Result<SortProperties, DataFusionError>

Calculates the SortProperties of this function based on its children’s properties.

source

fn coerce_types( &self, _arg_types: &[DataType], ) -> Result<Vec<DataType>, DataFusionError>

Coerce arguments of a function call to types that the function can evaluate.

This function is only called if ScalarUDFImpl::signature returns crate::TypeSignature::UserDefined. Most UDFs should return one of the other variants of TypeSignature which handle common cases

See the type coercion module documentation for more details on type coercion

For example, if your function requires a floating point arguments, but the user calls it like my_func(1::int) (aka with 1 as an integer), coerce_types could return [DataType::Float64] to ensure the argument was cast to 1::double

§Parameters
  • arg_types: The argument types of the arguments this function with
§Return value

A Vec the same length as arg_types. DataFusion will CAST the function call arguments to these specific types.

source

fn equals(&self, other: &dyn ScalarUDFImpl) -> bool

Return true if this scalar UDF is equal to the other.

Allows customizing the equality of scalar UDFs. Must be consistent with Self::hash_value and follow the same rules as Eq:

  • reflexive: a.equals(a);
  • symmetric: a.equals(b) implies b.equals(a);
  • transitive: a.equals(b) and b.equals(c) implies a.equals(c).

By default, compares Self::name and Self::signature.

source

fn hash_value(&self) -> u64

Returns a hash value for this scalar UDF.

Allows customizing the hash code of scalar UDFs. Similarly to Hash and Eq, if Self::equals returns true for two UDFs, their hash_values must be the same.

By default, hashes Self::name and Self::signature.

Implementors§

source§

impl ScalarUDFImpl for ArrowCastFunc

source§

impl ScalarUDFImpl for ArrowTypeOfFunc

source§

impl ScalarUDFImpl for CoalesceFunc

source§

impl ScalarUDFImpl for GetFieldFunc

source§

impl ScalarUDFImpl for NamedStructFunc

source§

impl ScalarUDFImpl for NullIfFunc

source§

impl ScalarUDFImpl for NVL2Func

source§

impl ScalarUDFImpl for NVLFunc

source§

impl ScalarUDFImpl for StructFunc

source§

impl ScalarUDFImpl for DigestFunc

source§

impl ScalarUDFImpl for Md5Func

source§

impl ScalarUDFImpl for SHA224Func

source§

impl ScalarUDFImpl for SHA256Func

source§

impl ScalarUDFImpl for SHA384Func

source§

impl ScalarUDFImpl for SHA512Func

source§

impl ScalarUDFImpl for CurrentDateFunc

Create an implementation of current_date() that always returns the specified current date.

The semantics of current_date() require it to return the same value wherever it appears within a single statement. This value is chosen during planning time.

source§

impl ScalarUDFImpl for CurrentTimeFunc

Create an implementation of current_time() that always returns the specified current time.

The semantics of current_time() require it to return the same value wherever it appears within a single statement. This value is chosen during planning time.

source§

impl ScalarUDFImpl for DateBinFunc

source§

impl ScalarUDFImpl for DatePartFunc

source§

impl ScalarUDFImpl for DateTruncFunc

source§

impl ScalarUDFImpl for FromUnixtimeFunc

source§

impl ScalarUDFImpl for MakeDateFunc

source§

impl ScalarUDFImpl for NowFunc

Create an implementation of now() that always returns the specified timestamp.

The semantics of now() require it to return the same value wherever it appears within a single statement. This value is chosen during planning time.

source§

impl ScalarUDFImpl for ToCharFunc

source§

impl ScalarUDFImpl for ToDateFunc

source§

impl ScalarUDFImpl for ToLocalTimeFunc

source§

impl ScalarUDFImpl for ToTimestampFunc

to_timestamp SQL function

Note: to_timestamp returns Timestamp(Nanosecond) though its arguments are interpreted as seconds. The supported range for integer input is between -9223372037 and 9223372036. Supported range for string input is between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.0. Please use to_timestamp_seconds for the input outside of supported bounds.

source§

impl ScalarUDFImpl for ToTimestampMicrosFunc

source§

impl ScalarUDFImpl for ToTimestampMillisFunc

source§

impl ScalarUDFImpl for ToTimestampNanosFunc

source§

impl ScalarUDFImpl for ToTimestampSecondsFunc

source§

impl ScalarUDFImpl for ToUnixtimeFunc

source§

impl ScalarUDFImpl for DecodeFunc

source§

impl ScalarUDFImpl for EncodeFunc

source§

impl ScalarUDFImpl for AbsFunc

source§

impl ScalarUDFImpl for CotFunc

source§

impl ScalarUDFImpl for FactorialFunc

source§

impl ScalarUDFImpl for GcdFunc

source§

impl ScalarUDFImpl for IsZeroFunc

source§

impl ScalarUDFImpl for LcmFunc

source§

impl ScalarUDFImpl for LogFunc

source§

impl ScalarUDFImpl for IsNanFunc

source§

impl ScalarUDFImpl for NanvlFunc

source§

impl ScalarUDFImpl for PiFunc

source§

impl ScalarUDFImpl for PowerFunc

source§

impl ScalarUDFImpl for RandomFunc

source§

impl ScalarUDFImpl for RoundFunc

source§

impl ScalarUDFImpl for SignumFunc

source§

impl ScalarUDFImpl for TruncFunc

source§

impl ScalarUDFImpl for RegexpLikeFunc

source§

impl ScalarUDFImpl for RegexpMatchFunc

source§

impl ScalarUDFImpl for RegexpReplaceFunc

source§

impl ScalarUDFImpl for AsciiFunc

source§

impl ScalarUDFImpl for BitLengthFunc

source§

impl ScalarUDFImpl for BTrimFunc

source§

impl ScalarUDFImpl for ChrFunc

source§

impl ScalarUDFImpl for ConcatFunc

source§

impl ScalarUDFImpl for ConcatWsFunc

source§

impl ScalarUDFImpl for ContainsFunc

source§

impl ScalarUDFImpl for EndsWithFunc

source§

impl ScalarUDFImpl for InitcapFunc

source§

impl ScalarUDFImpl for LevenshteinFunc

source§

impl ScalarUDFImpl for LowerFunc

source§

impl ScalarUDFImpl for LtrimFunc

source§

impl ScalarUDFImpl for OctetLengthFunc

source§

impl ScalarUDFImpl for OverlayFunc

source§

impl ScalarUDFImpl for RepeatFunc

source§

impl ScalarUDFImpl for ReplaceFunc

source§

impl ScalarUDFImpl for RtrimFunc

source§

impl ScalarUDFImpl for SplitPartFunc

source§

impl ScalarUDFImpl for StartsWithFunc

source§

impl ScalarUDFImpl for ToHexFunc

source§

impl ScalarUDFImpl for UpperFunc

source§

impl ScalarUDFImpl for UuidFunc

source§

impl ScalarUDFImpl for CharacterLengthFunc

source§

impl ScalarUDFImpl for FindInSetFunc

source§

impl ScalarUDFImpl for LeftFunc

source§

impl ScalarUDFImpl for LPadFunc

source§

impl ScalarUDFImpl for ReverseFunc

source§

impl ScalarUDFImpl for RightFunc

source§

impl ScalarUDFImpl for RPadFunc

source§

impl ScalarUDFImpl for StrposFunc

source§

impl ScalarUDFImpl for SubstrFunc

source§

impl ScalarUDFImpl for SubstrIndexFunc

source§

impl ScalarUDFImpl for TranslateFunc

source§

impl ScalarUDFImpl for ArrayHas

source§

impl ScalarUDFImpl for ArrayHasAll

source§

impl ScalarUDFImpl for ArrayHasAny

source§

impl ScalarUDFImpl for ArrayAppend

source§

impl ScalarUDFImpl for ArrayConcat

source§

impl ScalarUDFImpl for ArrayPrepend

source§

impl ScalarUDFImpl for MakeArray

source§

impl ScalarUDFImpl for MapFunc

source§

impl ScalarUDFImpl for SimpleScalarUDF