polars_plan/dsl/
udf.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
use arrow::legacy::error::{polars_bail, PolarsResult};
use polars_core::prelude::Field;
use polars_core::schema::Schema;
use polars_utils::pl_str::PlSmallStr;

use super::{ColumnsUdf, Expr, GetOutput, OpaqueColumnUdf};
use crate::prelude::{new_column_udf, Context, FunctionOptions};

/// Represents a user-defined function
#[derive(Clone)]
pub struct UserDefinedFunction {
    /// name
    pub name: PlSmallStr,
    /// The function signature.
    pub input_fields: Vec<Field>,
    /// The function output type.
    pub return_type: GetOutput,
    /// The function implementation.
    pub fun: OpaqueColumnUdf,
    /// Options for the function.
    pub options: FunctionOptions,
}

impl std::fmt::Debug for UserDefinedFunction {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        f.debug_struct("UserDefinedFunction")
            .field("name", &self.name)
            .field("signature", &self.input_fields)
            .field("fun", &"<FUNC>")
            .field("options", &self.options)
            .finish()
    }
}

impl UserDefinedFunction {
    /// Create a new UserDefinedFunction
    pub fn new(
        name: PlSmallStr,
        input_fields: Vec<Field>,
        return_type: GetOutput,
        fun: impl ColumnsUdf + 'static,
    ) -> Self {
        Self {
            name,
            input_fields,
            return_type,
            fun: new_column_udf(fun),
            options: FunctionOptions::default(),
        }
    }

    /// creates a logical expression with a call of the UDF
    /// This utility allows using the UDF without requiring access to the registry.
    /// The schema is validated and the query will fail if the schema is invalid.
    pub fn call(self, args: Vec<Expr>) -> PolarsResult<Expr> {
        if args.len() != self.input_fields.len() {
            polars_bail!(InvalidOperation: "expected {} arguments, got {}", self.input_fields.len(), args.len())
        }
        let schema = Schema::from_iter(self.input_fields);

        if args
            .iter()
            .map(|e| e.to_field(&schema, Context::Default))
            .collect::<PolarsResult<Vec<_>>>()
            .is_err()
        {
            polars_bail!(InvalidOperation: "unexpected field in UDF \nexpected: {:?}\n received {:?}", schema, args)
        };

        Ok(Expr::AnonymousFunction {
            input: args,
            function: self.fun,
            output_type: self.return_type,
            options: self.options,
        })
    }

    /// creates a logical expression with a call of the UDF
    /// This does not do any schema validation and is therefore faster.
    ///
    /// Only use this if you are certain that the schema is correct.
    /// If the schema is invalid, the query will fail at runtime.
    pub fn call_unchecked(self, args: Vec<Expr>) -> Expr {
        Expr::AnonymousFunction {
            input: args,
            function: self.fun,
            output_type: self.return_type.clone(),
            options: self.options,
        }
    }
}