datafusion_expr/
planner.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning
19
20use std::fmt::Debug;
21use std::sync::Arc;
22
23use arrow::datatypes::{DataType, Field, SchemaRef};
24use datafusion_common::{
25    config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema,
26    Result, TableReference,
27};
28use sqlparser::ast::{self, NullTreatment};
29
30use crate::{
31    AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame,
32    WindowFunctionDefinition, WindowUDF,
33};
34
35/// Provides the `SQL` query planner meta-data about tables and
36/// functions referenced in SQL statements, without a direct dependency on the
37/// `datafusion` Catalog structures such as [`TableProvider`]
38///
39/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
40pub trait ContextProvider {
41    /// Returns a table by reference, if it exists
42    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>;
43
44    /// Return the type of a file based on its extension (e.g. `.parquet`)
45    ///
46    /// This is used to plan `COPY` statements
47    fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> {
48        not_impl_err!("Registered file types are not supported")
49    }
50
51    /// Getter for a table function
52    fn get_table_function_source(
53        &self,
54        _name: &str,
55        _args: Vec<Expr>,
56    ) -> Result<Arc<dyn TableSource>> {
57        not_impl_err!("Table Functions are not supported")
58    }
59
60    /// Provides an intermediate table that is used to store the results of a CTE during execution
61    ///
62    /// CTE stands for "Common Table Expression"
63    ///
64    /// # Notes
65    /// We don't directly implement this in [`SqlToRel`] as implementing this function
66    /// often requires access to a table that contains
67    /// execution-related types that can't be a direct dependency
68    /// of the sql crate (for example [`CteWorkTable`]).
69    ///
70    /// The [`ContextProvider`] provides a way to "hide" this dependency.
71    ///
72    /// [`SqlToRel`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/struct.SqlToRel.html
73    /// [`CteWorkTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/cte_worktable/struct.CteWorkTable.html
74    fn create_cte_work_table(
75        &self,
76        _name: &str,
77        _schema: SchemaRef,
78    ) -> Result<Arc<dyn TableSource>> {
79        not_impl_err!("Recursive CTE is not implemented")
80    }
81
82    /// Return [`ExprPlanner`] extensions for planning expressions
83    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
84        &[]
85    }
86
87    /// Return [`TypePlanner`] extensions for planning data types
88    fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
89        None
90    }
91
92    /// Return the scalar function with a given name, if any
93    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
94
95    /// Return the aggregate function with a given name, if any
96    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
97
98    /// Return the window function with a given name, if any
99    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>;
100
101    /// Return the system/user-defined variable type, if any
102    ///
103    /// A user defined variable is typically accessed via `@var_name`
104    fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
105
106    /// Return overall configuration options
107    fn options(&self) -> &ConfigOptions;
108
109    /// Return all scalar function names
110    fn udf_names(&self) -> Vec<String>;
111
112    /// Return all aggregate function names
113    fn udaf_names(&self) -> Vec<String>;
114
115    /// Return all window function names
116    fn udwf_names(&self) -> Vec<String>;
117}
118
119/// Customize planning of SQL AST expressions to [`Expr`]s
120pub trait ExprPlanner: Debug + Send + Sync {
121    /// Plan the binary operation between two expressions, returns original
122    /// BinaryExpr if not possible
123    fn plan_binary_op(
124        &self,
125        expr: RawBinaryExpr,
126        _schema: &DFSchema,
127    ) -> Result<PlannerResult<RawBinaryExpr>> {
128        Ok(PlannerResult::Original(expr))
129    }
130
131    /// Plan the field access expression, such as `foo.bar`
132    ///
133    /// returns original [`RawFieldAccessExpr`] if not possible
134    fn plan_field_access(
135        &self,
136        expr: RawFieldAccessExpr,
137        _schema: &DFSchema,
138    ) -> Result<PlannerResult<RawFieldAccessExpr>> {
139        Ok(PlannerResult::Original(expr))
140    }
141
142    /// Plan an array literal, such as `[1, 2, 3]`
143    ///
144    /// Returns original expression arguments if not possible
145    fn plan_array_literal(
146        &self,
147        exprs: Vec<Expr>,
148        _schema: &DFSchema,
149    ) -> Result<PlannerResult<Vec<Expr>>> {
150        Ok(PlannerResult::Original(exprs))
151    }
152
153    /// Plan a `POSITION` expression, such as `POSITION(<expr> in <expr>)`
154    ///
155    /// Returns original expression arguments if not possible
156    fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
157        Ok(PlannerResult::Original(args))
158    }
159
160    /// Plan a dictionary literal, such as `{ key: value, ...}`
161    ///
162    /// Returns original expression arguments if not possible
163    fn plan_dictionary_literal(
164        &self,
165        expr: RawDictionaryExpr,
166        _schema: &DFSchema,
167    ) -> Result<PlannerResult<RawDictionaryExpr>> {
168        Ok(PlannerResult::Original(expr))
169    }
170
171    /// Plan an extract expression, such as`EXTRACT(month FROM foo)`
172    ///
173    /// Returns original expression arguments if not possible
174    fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
175        Ok(PlannerResult::Original(args))
176    }
177
178    /// Plan an substring expression, such as `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])`
179    ///
180    /// Returns original expression arguments if not possible
181    fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
182        Ok(PlannerResult::Original(args))
183    }
184
185    /// Plans a struct literal, such as  `{'field1' : expr1, 'field2' : expr2, ...}`
186    ///
187    /// This function takes a vector of expressions and a boolean flag
188    /// indicating whether the struct uses the optional name
189    ///
190    /// Returns the original input expressions if planning is not possible.
191    fn plan_struct_literal(
192        &self,
193        args: Vec<Expr>,
194        _is_named_struct: bool,
195    ) -> Result<PlannerResult<Vec<Expr>>> {
196        Ok(PlannerResult::Original(args))
197    }
198
199    /// Plans an overlay expression, such as `overlay(str PLACING substr FROM pos [FOR count])`
200    ///
201    /// Returns original expression arguments if not possible
202    fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
203        Ok(PlannerResult::Original(args))
204    }
205
206    /// Plans a `make_map` expression, such as `make_map(key1, value1, key2, value2, ...)`
207    ///
208    /// Returns original expression arguments if not possible
209    fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
210        Ok(PlannerResult::Original(args))
211    }
212
213    /// Plans compound identifier such as `db.schema.table` for non-empty nested names
214    ///
215    /// # Note:
216    /// Currently compound identifier for outer query schema is not supported.
217    ///
218    /// Returns original expression if not possible
219    fn plan_compound_identifier(
220        &self,
221        _field: &Field,
222        _qualifier: Option<&TableReference>,
223        _nested_names: &[String],
224    ) -> Result<PlannerResult<Vec<Expr>>> {
225        not_impl_err!(
226            "Default planner compound identifier hasn't been implemented for ExprPlanner"
227        )
228    }
229
230    /// Plans `ANY` expression, such as `expr = ANY(array_expr)`
231    ///
232    /// Returns origin binary expression if not possible
233    fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> {
234        Ok(PlannerResult::Original(expr))
235    }
236
237    /// Plans aggregate functions, such as `COUNT(<expr>)`
238    ///
239    /// Returns original expression arguments if not possible
240    fn plan_aggregate(
241        &self,
242        expr: RawAggregateExpr,
243    ) -> Result<PlannerResult<RawAggregateExpr>> {
244        Ok(PlannerResult::Original(expr))
245    }
246
247    /// Plans window functions, such as `COUNT(<expr>)`
248    ///
249    /// Returns original expression arguments if not possible
250    fn plan_window(&self, expr: RawWindowExpr) -> Result<PlannerResult<RawWindowExpr>> {
251        Ok(PlannerResult::Original(expr))
252    }
253}
254
255/// An operator with two arguments to plan
256///
257/// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST
258/// operator.
259///
260/// This structure is used by [`ExprPlanner`] to plan operators with
261/// custom expressions.
262#[derive(Debug, Clone)]
263pub struct RawBinaryExpr {
264    pub op: ast::BinaryOperator,
265    pub left: Expr,
266    pub right: Expr,
267}
268
269/// An expression with GetFieldAccess to plan
270///
271/// This structure is used by [`ExprPlanner`] to plan operators with
272/// custom expressions.
273#[derive(Debug, Clone)]
274pub struct RawFieldAccessExpr {
275    pub field_access: GetFieldAccess,
276    pub expr: Expr,
277}
278
279/// A Dictionary literal expression `{ key: value, ...}`
280///
281/// This structure is used by [`ExprPlanner`] to plan operators with
282/// custom expressions.
283#[derive(Debug, Clone)]
284pub struct RawDictionaryExpr {
285    pub keys: Vec<Expr>,
286    pub values: Vec<Expr>,
287}
288
289/// This structure is used by `AggregateFunctionPlanner` to plan operators with
290/// custom expressions.
291#[derive(Debug, Clone)]
292pub struct RawAggregateExpr {
293    pub func: Arc<AggregateUDF>,
294    pub args: Vec<Expr>,
295    pub distinct: bool,
296    pub filter: Option<Box<Expr>>,
297    pub order_by: Option<Vec<SortExpr>>,
298    pub null_treatment: Option<NullTreatment>,
299}
300
301/// This structure is used by `WindowFunctionPlanner` to plan operators with
302/// custom expressions.
303#[derive(Debug, Clone)]
304pub struct RawWindowExpr {
305    pub func_def: WindowFunctionDefinition,
306    pub args: Vec<Expr>,
307    pub partition_by: Vec<Expr>,
308    pub order_by: Vec<SortExpr>,
309    pub window_frame: WindowFrame,
310    pub null_treatment: Option<NullTreatment>,
311}
312
313/// Result of planning a raw expr with [`ExprPlanner`]
314#[derive(Debug, Clone)]
315pub enum PlannerResult<T> {
316    /// The raw expression was successfully planned as a new [`Expr`]
317    Planned(Expr),
318    /// The raw expression could not be planned, and is returned unmodified
319    Original(T),
320}
321
322/// Customize planning SQL types to DataFusion (Arrow) types.
323pub trait TypePlanner: Debug + Send + Sync {
324    /// Plan SQL [`ast::DataType`] to DataFusion [`DataType`]
325    ///
326    /// Returns None if not possible
327    fn plan_type(&self, _sql_type: &ast::DataType) -> Result<Option<DataType>> {
328        Ok(None)
329    }
330}