Struct datafusion_expr::logical_plan::builder::LogicalPlanBuilder
source · pub struct LogicalPlanBuilder { /* private fields */ }
Expand description
Builder for logical plans
// Create a plan similar to
// SELECT last_name
// FROM employees
// WHERE salary < 1000
let plan = table_scan(Some("employee"), &employee_schema(), None)?
// Keep only rows where salary < 1000
.filter(col("salary").lt(lit(1000)))?
// only show "last_name" in the final results
.project(vec![col("last_name")])?
.build()?;
Implementations§
source§impl LogicalPlanBuilder
impl LogicalPlanBuilder
sourcepub fn from(plan: LogicalPlan) -> Self
pub fn from(plan: LogicalPlan) -> Self
Create a builder from an existing plan
sourcepub fn schema(&self) -> &DFSchemaRef
pub fn schema(&self) -> &DFSchemaRef
Return the output schema of the plan build so far
sourcepub fn empty(produce_one_row: bool) -> Self
pub fn empty(produce_one_row: bool) -> Self
Create an empty relation.
produce_one_row
set to true means this empty node needs to produce a placeholder row.
sourcepub fn to_recursive_query(
&self,
name: String,
recursive_term: LogicalPlan,
is_distinct: bool,
) -> Result<Self>
pub fn to_recursive_query( &self, name: String, recursive_term: LogicalPlan, is_distinct: bool, ) -> Result<Self>
Convert a regular plan into a recursive query.
is_distinct
indicates whether the recursive term should be de-duplicated (UNION
) after each iteration or not (UNION ALL
).
sourcepub fn values(values: Vec<Vec<Expr>>) -> Result<Self>
pub fn values(values: Vec<Vec<Expr>>) -> Result<Self>
Create a values list based relation, and the schema is inferred from data, consuming
value
. See the Postgres VALUES
documentation for more details.
By default, it assigns the names column1, column2, etc. to the columns of a VALUES table. The column names are not specified by the SQL standard and different database systems do it differently, so it’s usually better to override the default names with a table alias list.
If the values include params/binders such as $1, $2, $3, etc, then the param_data_types
should be provided.
sourcepub fn scan(
table_name: impl Into<TableReference>,
table_source: Arc<dyn TableSource>,
projection: Option<Vec<usize>>,
) -> Result<Self>
pub fn scan( table_name: impl Into<TableReference>, table_source: Arc<dyn TableSource>, projection: Option<Vec<usize>>, ) -> Result<Self>
Convert a table provider into a builder with a TableScan
Note that if you pass a string as table_name
, it is treated
as a SQL identifier, as described on TableReference
and
thus is normalized
§Example:
// Scan table_source with the name "mytable" (after normalization)
let scan = LogicalPlanBuilder::scan("MyTable", table, None);
// Scan table_source with the name "MyTable" by enclosing in quotes
let scan = LogicalPlanBuilder::scan(r#""MyTable""#, table, None);
// Scan table_source with the name "MyTable" by forming the table reference
let table_reference = TableReference::bare("MyTable");
let scan = LogicalPlanBuilder::scan(table_reference, table, None);
sourcepub fn copy_to(
input: LogicalPlan,
output_url: String,
file_type: Arc<dyn FileType>,
options: HashMap<String, String>,
partition_by: Vec<String>,
) -> Result<Self>
pub fn copy_to( input: LogicalPlan, output_url: String, file_type: Arc<dyn FileType>, options: HashMap<String, String>, partition_by: Vec<String>, ) -> Result<Self>
Create a CopyTo for copying the contents of this builder to the specified file(s)
sourcepub fn insert_into(
input: LogicalPlan,
table_name: impl Into<TableReference>,
table_schema: &Schema,
overwrite: bool,
) -> Result<Self>
pub fn insert_into( input: LogicalPlan, table_name: impl Into<TableReference>, table_schema: &Schema, overwrite: bool, ) -> Result<Self>
Create a DmlStatement for inserting the contents of this builder into the named table
sourcepub fn scan_with_filters(
table_name: impl Into<TableReference>,
table_source: Arc<dyn TableSource>,
projection: Option<Vec<usize>>,
filters: Vec<Expr>,
) -> Result<Self>
pub fn scan_with_filters( table_name: impl Into<TableReference>, table_source: Arc<dyn TableSource>, projection: Option<Vec<usize>>, filters: Vec<Expr>, ) -> Result<Self>
Convert a table provider into a builder with a TableScan
sourcepub fn window_plan(
input: LogicalPlan,
window_exprs: Vec<Expr>,
) -> Result<LogicalPlan>
pub fn window_plan( input: LogicalPlan, window_exprs: Vec<Expr>, ) -> Result<LogicalPlan>
Wrap a plan in a window
sourcepub fn project(
self,
expr: impl IntoIterator<Item = impl Into<Expr>>,
) -> Result<Self>
pub fn project( self, expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>
Apply a projection without alias.
sourcepub fn select(self, indices: impl IntoIterator<Item = usize>) -> Result<Self>
pub fn select(self, indices: impl IntoIterator<Item = usize>) -> Result<Self>
Select the given column indices
sourcepub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self>
pub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self>
Make a builder for a prepare logical plan from the builder’s plan
sourcepub fn limit(self, skip: usize, fetch: Option<usize>) -> Result<Self>
pub fn limit(self, skip: usize, fetch: Option<usize>) -> Result<Self>
Limit the number of rows returned
skip
- Number of rows to skip before fetch any row.
fetch
- Maximum number of rows to fetch, after skipping skip
rows,
if specified.
sourcepub fn alias(self, alias: impl Into<TableReference>) -> Result<Self>
pub fn alias(self, alias: impl Into<TableReference>) -> Result<Self>
Apply an alias
sourcepub fn sort(
self,
exprs: impl IntoIterator<Item = impl Into<Expr>> + Clone,
) -> Result<Self>
pub fn sort( self, exprs: impl IntoIterator<Item = impl Into<Expr>> + Clone, ) -> Result<Self>
Apply a sort
sourcepub fn union(self, plan: LogicalPlan) -> Result<Self>
pub fn union(self, plan: LogicalPlan) -> Result<Self>
Apply a union, preserving duplicate rows
sourcepub fn union_distinct(self, plan: LogicalPlan) -> Result<Self>
pub fn union_distinct(self, plan: LogicalPlan) -> Result<Self>
Apply a union, removing duplicate rows
sourcepub fn distinct(self) -> Result<Self>
pub fn distinct(self) -> Result<Self>
Apply deduplication: Only distinct (different) values are returned)
sourcepub fn distinct_on(
self,
on_expr: Vec<Expr>,
select_expr: Vec<Expr>,
sort_expr: Option<Vec<Expr>>,
) -> Result<Self>
pub fn distinct_on( self, on_expr: Vec<Expr>, select_expr: Vec<Expr>, sort_expr: Option<Vec<Expr>>, ) -> Result<Self>
Project first values of the specified expression list according to the provided
sorting expressions grouped by the DISTINCT ON
clause expressions.
sourcepub fn join(
self,
right: LogicalPlan,
join_type: JoinType,
join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>),
filter: Option<Expr>,
) -> Result<Self>
pub fn join( self, right: LogicalPlan, join_type: JoinType, join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>), filter: Option<Expr>, ) -> Result<Self>
Apply a join to right
using explicitly specified columns and an
optional filter expression.
See join_on
for a more concise way to specify the
join condition. Since DataFusion will automatically identify and
optimize equality predicates there is no performance difference between
this function and join_on
left_cols
and right_cols
are used to form “equijoin” predicates (see
example below), which are then combined with the optional filter
expression.
Note that in case of outer join, the filter
is applied to only matched rows.
sourcepub fn join_on(
self,
right: LogicalPlan,
join_type: JoinType,
on_exprs: impl IntoIterator<Item = Expr>,
) -> Result<Self>
pub fn join_on( self, right: LogicalPlan, join_type: JoinType, on_exprs: impl IntoIterator<Item = Expr>, ) -> Result<Self>
Apply a join with using the specified expressions.
Note that DataFusion automatically optimizes joins, including identifying and optimizing equality predicates.
§Example
let example_schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
Field::new("c", DataType::Int32, false),
]));
let table_source = Arc::new(LogicalTableSource::new(example_schema));
let left_table = table_source.clone();
let right_table = table_source.clone();
let right_plan = LogicalPlanBuilder::scan("right", right_table, None)?.build()?;
// Form the expression `(left.a != right.a)` AND `(left.b != right.b)`
let exprs = vec![
col("left.a").eq(col("right.a")),
col("left.b").not_eq(col("right.b"))
];
// Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)`
// finding all pairs of rows from `left` and `right` where
// where `a = a2` and `b != b2`.
let plan = LogicalPlanBuilder::scan("left", left_table, None)?
.join_on(right_plan, JoinType::Inner, exprs)?
.build()?;
sourcepub fn join_detailed(
self,
right: LogicalPlan,
join_type: JoinType,
join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>),
filter: Option<Expr>,
null_equals_null: bool,
) -> Result<Self>
pub fn join_detailed( self, right: LogicalPlan, join_type: JoinType, join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>), filter: Option<Expr>, null_equals_null: bool, ) -> Result<Self>
Apply a join with on constraint and specified null equality.
The behavior is the same as join
except that it allows
specifying the null equality behavior.
If null_equals_null=true
, rows where both join keys are null
will be
emitted. Otherwise rows where either or both join keys are null
will be
omitted.
sourcepub fn join_using(
self,
right: LogicalPlan,
join_type: JoinType,
using_keys: Vec<impl Into<Column> + Clone>,
) -> Result<Self>
pub fn join_using( self, right: LogicalPlan, join_type: JoinType, using_keys: Vec<impl Into<Column> + Clone>, ) -> Result<Self>
Apply a join with using constraint, which duplicates all join columns in output schema.
sourcepub fn cross_join(self, right: LogicalPlan) -> Result<Self>
pub fn cross_join(self, right: LogicalPlan) -> Result<Self>
Apply a cross join
sourcepub fn repartition(self, partitioning_scheme: Partitioning) -> Result<Self>
pub fn repartition(self, partitioning_scheme: Partitioning) -> Result<Self>
Repartition
sourcepub fn window(
self,
window_expr: impl IntoIterator<Item = impl Into<Expr>>,
) -> Result<Self>
pub fn window( self, window_expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>
Apply a window functions to extend the schema
sourcepub fn aggregate(
self,
group_expr: impl IntoIterator<Item = impl Into<Expr>>,
aggr_expr: impl IntoIterator<Item = impl Into<Expr>>,
) -> Result<Self>
pub fn aggregate( self, group_expr: impl IntoIterator<Item = impl Into<Expr>>, aggr_expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>
Apply an aggregate: grouping on the group_expr
expressions
and calculating aggr_expr
aggregates for each distinct
value of the group_expr
;
sourcepub fn explain(self, verbose: bool, analyze: bool) -> Result<Self>
pub fn explain(self, verbose: bool, analyze: bool) -> Result<Self>
Create an expression to represent the explanation of the plan
if analyze
is true, runs the actual plan and produces
information about metrics during run.
if verbose
is true, prints out additional details.
sourcepub fn intersect(
left_plan: LogicalPlan,
right_plan: LogicalPlan,
is_all: bool,
) -> Result<LogicalPlan>
pub fn intersect( left_plan: LogicalPlan, right_plan: LogicalPlan, is_all: bool, ) -> Result<LogicalPlan>
Process intersect set operator
sourcepub fn except(
left_plan: LogicalPlan,
right_plan: LogicalPlan,
is_all: bool,
) -> Result<LogicalPlan>
pub fn except( left_plan: LogicalPlan, right_plan: LogicalPlan, is_all: bool, ) -> Result<LogicalPlan>
Process except set operator
sourcepub fn build(self) -> Result<LogicalPlan>
pub fn build(self) -> Result<LogicalPlan>
Build the plan
sourcepub fn join_with_expr_keys(
self,
right: LogicalPlan,
join_type: JoinType,
equi_exprs: (Vec<impl Into<Expr>>, Vec<impl Into<Expr>>),
filter: Option<Expr>,
) -> Result<Self>
pub fn join_with_expr_keys( self, right: LogicalPlan, join_type: JoinType, equi_exprs: (Vec<impl Into<Expr>>, Vec<impl Into<Expr>>), filter: Option<Expr>, ) -> Result<Self>
Apply a join with the expression on constraint.
equi_exprs are “equijoin” predicates expressions on the existing and right inputs, respectively.
filter: any other filter expression to apply during the join. equi_exprs predicates are likely to be evaluated more quickly than the filter expressions
sourcepub fn unnest_column(self, column: impl Into<Column>) -> Result<Self>
pub fn unnest_column(self, column: impl Into<Column>) -> Result<Self>
Unnest the given column.
sourcepub fn unnest_column_with_options(
self,
column: impl Into<Column>,
options: UnnestOptions,
) -> Result<Self>
pub fn unnest_column_with_options( self, column: impl Into<Column>, options: UnnestOptions, ) -> Result<Self>
Unnest the given column given UnnestOptions
sourcepub fn unnest_columns_with_options(
self,
columns: Vec<Column>,
options: UnnestOptions,
) -> Result<Self>
pub fn unnest_columns_with_options( self, columns: Vec<Column>, options: UnnestOptions, ) -> Result<Self>
Unnest the given columns with the given UnnestOptions
Trait Implementations§
source§impl Clone for LogicalPlanBuilder
impl Clone for LogicalPlanBuilder
source§fn clone(&self) -> LogicalPlanBuilder
fn clone(&self) -> LogicalPlanBuilder
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moresource§impl Debug for LogicalPlanBuilder
impl Debug for LogicalPlanBuilder
source§impl From<Arc<LogicalPlan>> for LogicalPlanBuilder
impl From<Arc<LogicalPlan>> for LogicalPlanBuilder
Converts a Arc<LogicalPlan>
into LogicalPlanBuilder
// Create the plan
let plan = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3, 4]))?
.sort(vec![
Expr::Sort(expr::Sort::new(Box::new(col("state")), true, true)),
Expr::Sort(expr::Sort::new(Box::new(col("salary")), false, false)),
])?
.build()?;
// Convert LogicalPlan into LogicalPlanBuilder
let plan_builder: LogicalPlanBuilder = std::sync::Arc::new(plan).into();
source§fn from(plan: Arc<LogicalPlan>) -> Self
fn from(plan: Arc<LogicalPlan>) -> Self
Auto Trait Implementations§
impl Freeze for LogicalPlanBuilder
impl !RefUnwindSafe for LogicalPlanBuilder
impl Send for LogicalPlanBuilder
impl Sync for LogicalPlanBuilder
impl Unpin for LogicalPlanBuilder
impl !UnwindSafe for LogicalPlanBuilder
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
source§default unsafe fn clone_to_uninit(&self, dst: *mut T)
default unsafe fn clone_to_uninit(&self, dst: *mut T)
clone_to_uninit
)