1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Function module contains typing and signature for built-in and user defined functions.
use crate::ColumnarValue;
use crate::{Accumulator, Expr, PartitionEvaluator};
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_common::{DFSchema, Result};
use std::sync::Arc;
#[derive(Debug, Clone, Copy)]
pub enum Hint {
/// Indicates the argument needs to be padded if it is scalar
Pad,
/// Indicates the argument can be converted to an array of length 1
AcceptsSingular,
}
/// Scalar function
///
/// The Fn param is the wrapped function but be aware that the function will
/// be passed with the slice / vec of columnar values (either scalar or array)
/// with the exception of zero param function, where a singular element vec
/// will be passed. In that case the single element is a null array to indicate
/// the batch's row count (so that the generative zero-argument function can know
/// the result array size).
pub type ScalarFunctionImplementation =
Arc<dyn Fn(&[ColumnarValue]) -> Result<ColumnarValue> + Send + Sync>;
/// Factory that returns the functions's return type given the input argument types
pub type ReturnTypeFunction =
Arc<dyn Fn(&[DataType]) -> Result<Arc<DataType>> + Send + Sync>;
/// [`AccumulatorArgs`] contains information about how an aggregate
/// function was called, including the types of its arguments and any optional
/// ordering expressions.
#[derive(Debug)]
pub struct AccumulatorArgs<'a> {
/// The return type of the aggregate function.
pub data_type: &'a DataType,
/// The schema of the input arguments
pub schema: &'a Schema,
/// The schema of the input arguments
pub dfschema: &'a DFSchema,
/// Whether to ignore nulls.
///
/// SQL allows the user to specify `IGNORE NULLS`, for example:
///
/// ```sql
/// SELECT FIRST_VALUE(column1) IGNORE NULLS FROM t;
/// ```
pub ignore_nulls: bool,
/// The expressions in the `ORDER BY` clause passed to this aggregator.
///
/// SQL allows the user to specify the ordering of arguments to the
/// aggregate using an `ORDER BY`. For example:
///
/// ```sql
/// SELECT FIRST_VALUE(column1 ORDER BY column2) FROM t;
/// ```
///
/// If no `ORDER BY` is specified, `sort_exprs`` will be empty.
pub sort_exprs: &'a [Expr],
/// Whether the aggregation is running in reverse order
pub is_reversed: bool,
/// The name of the aggregate expression
pub name: &'a str,
/// Whether the aggregate function is distinct.
///
/// ```sql
/// SELECT COUNT(DISTINCT column1) FROM t;
/// ```
pub is_distinct: bool,
/// The input types of the aggregate function.
pub input_types: &'a [DataType],
/// The logical expression of arguments the aggregate function takes.
pub input_exprs: &'a [Expr],
}
/// [`StateFieldsArgs`] contains information about the fields that an
/// aggregate function's accumulator should have. Used for [`AggregateUDFImpl::state_fields`].
///
/// [`AggregateUDFImpl::state_fields`]: crate::udaf::AggregateUDFImpl::state_fields
pub struct StateFieldsArgs<'a> {
/// The name of the aggregate function.
pub name: &'a str,
/// The input types of the aggregate function.
pub input_types: &'a [DataType],
/// The return type of the aggregate function.
pub return_type: &'a DataType,
/// The ordering fields of the aggregate function.
pub ordering_fields: &'a [Field],
/// Whether the aggregate function is distinct.
pub is_distinct: bool,
}
/// Factory that returns an accumulator for the given aggregate function.
pub type AccumulatorFactoryFunction =
Arc<dyn Fn(AccumulatorArgs) -> Result<Box<dyn Accumulator>> + Send + Sync>;
/// Factory that creates a PartitionEvaluator for the given window
/// function
pub type PartitionEvaluatorFactory =
Arc<dyn Fn() -> Result<Box<dyn PartitionEvaluator>> + Send + Sync>;
/// Factory that returns the types used by an aggregator to serialize
/// its state, given its return datatype.
pub type StateTypeFunction =
Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure
/// A closure with two arguments:
/// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked
/// * 'info': [crate::simplify::SimplifyInfo]
///
/// closure returns simplified [Expr] or an error.
pub type AggregateFunctionSimplification = Box<
dyn Fn(
crate::expr::AggregateFunction,
&dyn crate::simplify::SimplifyInfo,
) -> Result<Expr>,
>;
/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure
/// A closure with two arguments:
/// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked
/// * 'info': [crate::simplify::SimplifyInfo]
///
/// closure returns simplified [Expr] or an error.
pub type WindowFunctionSimplification = Box<
dyn Fn(
crate::expr::WindowFunction,
&dyn crate::simplify::SimplifyInfo,
) -> Result<Expr>,
>;