pub struct LogicalPlanBuilder { /* private fields */ }
Expand description

Builder for logical plans

// Create a plan similar to
// SELECT last_name
// FROM employees
// WHERE salary < 1000
let plan = table_scan(Some("employee"), &employee_schema(), None)?
 // Keep only rows where salary < 1000
 .filter(col("salary").lt(lit(1000)))?
 // only show "last_name" in the final results
 .project(vec![col("last_name")])?
 .build()?;

Implementations§

source§

impl LogicalPlanBuilder

source

pub fn from(plan: LogicalPlan) -> Self

Create a builder from an existing plan

source

pub fn schema(&self) -> &DFSchemaRef

Return the output schema of the plan build so far

source

pub fn empty(produce_one_row: bool) -> Self

Create an empty relation.

produce_one_row set to true means this empty node needs to produce a placeholder row.

source

pub fn to_recursive_query( &self, name: String, recursive_term: LogicalPlan, is_distinct: bool, ) -> Result<Self>

Convert a regular plan into a recursive query. is_distinct indicates whether the recursive term should be de-duplicated (UNION) after each iteration or not (UNION ALL).

source

pub fn values(values: Vec<Vec<Expr>>) -> Result<Self>

Create a values list based relation, and the schema is inferred from data, consuming value. See the Postgres VALUES documentation for more details.

By default, it assigns the names column1, column2, etc. to the columns of a VALUES table. The column names are not specified by the SQL standard and different database systems do it differently, so it’s usually better to override the default names with a table alias list.

If the values include params/binders such as $1, $2, $3, etc, then the param_data_types should be provided.

source

pub fn scan( table_name: impl Into<TableReference>, table_source: Arc<dyn TableSource>, projection: Option<Vec<usize>>, ) -> Result<Self>

Convert a table provider into a builder with a TableScan

Note that if you pass a string as table_name, it is treated as a SQL identifier, as described on TableReference and thus is normalized

§Example:
// Scan table_source with the name "mytable" (after normalization)
let scan = LogicalPlanBuilder::scan("MyTable", table, None);

// Scan table_source with the name "MyTable" by enclosing in quotes
let scan = LogicalPlanBuilder::scan(r#""MyTable""#, table, None);

// Scan table_source with the name "MyTable" by forming the table reference
let table_reference = TableReference::bare("MyTable");
let scan = LogicalPlanBuilder::scan(table_reference, table, None);
source

pub fn copy_to( input: LogicalPlan, output_url: String, file_type: Arc<dyn FileType>, options: HashMap<String, String>, partition_by: Vec<String>, ) -> Result<Self>

Create a CopyTo for copying the contents of this builder to the specified file(s)

source

pub fn insert_into( input: LogicalPlan, table_name: impl Into<TableReference>, table_schema: &Schema, overwrite: bool, ) -> Result<Self>

Create a DmlStatement for inserting the contents of this builder into the named table

source

pub fn scan_with_filters( table_name: impl Into<TableReference>, table_source: Arc<dyn TableSource>, projection: Option<Vec<usize>>, filters: Vec<Expr>, ) -> Result<Self>

Convert a table provider into a builder with a TableScan

source

pub fn window_plan( input: LogicalPlan, window_exprs: Vec<Expr>, ) -> Result<LogicalPlan>

Wrap a plan in a window

source

pub fn project( self, expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>

Apply a projection without alias.

source

pub fn select(self, indices: impl IntoIterator<Item = usize>) -> Result<Self>

Select the given column indices

source

pub fn filter(self, expr: impl Into<Expr>) -> Result<Self>

Apply a filter

source

pub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self>

Make a builder for a prepare logical plan from the builder’s plan

source

pub fn limit(self, skip: usize, fetch: Option<usize>) -> Result<Self>

Limit the number of rows returned

skip - Number of rows to skip before fetch any row.

fetch - Maximum number of rows to fetch, after skipping skip rows, if specified.

source

pub fn alias(self, alias: impl Into<TableReference>) -> Result<Self>

Apply an alias

source

pub fn sort( self, exprs: impl IntoIterator<Item = impl Into<Expr>> + Clone, ) -> Result<Self>

Apply a sort

source

pub fn union(self, plan: LogicalPlan) -> Result<Self>

Apply a union, preserving duplicate rows

source

pub fn union_distinct(self, plan: LogicalPlan) -> Result<Self>

Apply a union, removing duplicate rows

source

pub fn distinct(self) -> Result<Self>

Apply deduplication: Only distinct (different) values are returned)

source

pub fn distinct_on( self, on_expr: Vec<Expr>, select_expr: Vec<Expr>, sort_expr: Option<Vec<Expr>>, ) -> Result<Self>

Project first values of the specified expression list according to the provided sorting expressions grouped by the DISTINCT ON clause expressions.

source

pub fn join( self, right: LogicalPlan, join_type: JoinType, join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>), filter: Option<Expr>, ) -> Result<Self>

Apply a join to right using explicitly specified columns and an optional filter expression.

See join_on for a more concise way to specify the join condition. Since DataFusion will automatically identify and optimize equality predicates there is no performance difference between this function and join_on

left_cols and right_cols are used to form “equijoin” predicates (see example below), which are then combined with the optional filter expression.

Note that in case of outer join, the filter is applied to only matched rows.

source

pub fn join_on( self, right: LogicalPlan, join_type: JoinType, on_exprs: impl IntoIterator<Item = Expr>, ) -> Result<Self>

Apply a join with using the specified expressions.

Note that DataFusion automatically optimizes joins, including identifying and optimizing equality predicates.

§Example
let example_schema = Arc::new(Schema::new(vec![
    Field::new("a", DataType::Int32, false),
    Field::new("b", DataType::Int32, false),
    Field::new("c", DataType::Int32, false),
]));
let table_source = Arc::new(LogicalTableSource::new(example_schema));
let left_table = table_source.clone();
let right_table = table_source.clone();

let right_plan = LogicalPlanBuilder::scan("right", right_table, None)?.build()?;

// Form the expression `(left.a != right.a)` AND `(left.b != right.b)`
let exprs = vec![
    col("left.a").eq(col("right.a")),
    col("left.b").not_eq(col("right.b"))
 ];

// Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)`
// finding all pairs of rows from `left` and `right` where
// where `a = a2` and `b != b2`.
let plan = LogicalPlanBuilder::scan("left", left_table, None)?
    .join_on(right_plan, JoinType::Inner, exprs)?
    .build()?;
source

pub fn join_detailed( self, right: LogicalPlan, join_type: JoinType, join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>), filter: Option<Expr>, null_equals_null: bool, ) -> Result<Self>

Apply a join with on constraint and specified null equality.

The behavior is the same as join except that it allows specifying the null equality behavior.

If null_equals_null=true, rows where both join keys are null will be emitted. Otherwise rows where either or both join keys are null will be omitted.

source

pub fn join_using( self, right: LogicalPlan, join_type: JoinType, using_keys: Vec<impl Into<Column> + Clone>, ) -> Result<Self>

Apply a join with using constraint, which duplicates all join columns in output schema.

source

pub fn cross_join(self, right: LogicalPlan) -> Result<Self>

Apply a cross join

source

pub fn repartition(self, partitioning_scheme: Partitioning) -> Result<Self>

Repartition

source

pub fn window( self, window_expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>

Apply a window functions to extend the schema

source

pub fn aggregate( self, group_expr: impl IntoIterator<Item = impl Into<Expr>>, aggr_expr: impl IntoIterator<Item = impl Into<Expr>>, ) -> Result<Self>

Apply an aggregate: grouping on the group_expr expressions and calculating aggr_expr aggregates for each distinct value of the group_expr;

source

pub fn explain(self, verbose: bool, analyze: bool) -> Result<Self>

Create an expression to represent the explanation of the plan

if analyze is true, runs the actual plan and produces information about metrics during run.

if verbose is true, prints out additional details.

source

pub fn intersect( left_plan: LogicalPlan, right_plan: LogicalPlan, is_all: bool, ) -> Result<LogicalPlan>

Process intersect set operator

source

pub fn except( left_plan: LogicalPlan, right_plan: LogicalPlan, is_all: bool, ) -> Result<LogicalPlan>

Process except set operator

source

pub fn build(self) -> Result<LogicalPlan>

Build the plan

source

pub fn join_with_expr_keys( self, right: LogicalPlan, join_type: JoinType, equi_exprs: (Vec<impl Into<Expr>>, Vec<impl Into<Expr>>), filter: Option<Expr>, ) -> Result<Self>

Apply a join with the expression on constraint.

equi_exprs are “equijoin” predicates expressions on the existing and right inputs, respectively.

filter: any other filter expression to apply during the join. equi_exprs predicates are likely to be evaluated more quickly than the filter expressions

source

pub fn unnest_column(self, column: impl Into<Column>) -> Result<Self>

Unnest the given column.

source

pub fn unnest_column_with_options( self, column: impl Into<Column>, options: UnnestOptions, ) -> Result<Self>

Unnest the given column given UnnestOptions

source

pub fn unnest_columns_with_options( self, columns: Vec<Column>, options: UnnestOptions, ) -> Result<Self>

Unnest the given columns with the given UnnestOptions

Trait Implementations§

source§

impl Clone for LogicalPlanBuilder

source§

fn clone(&self) -> LogicalPlanBuilder

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for LogicalPlanBuilder

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl From<Arc<LogicalPlan>> for LogicalPlanBuilder

Converts a Arc<LogicalPlan> into LogicalPlanBuilder

// Create the plan
let plan = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3, 4]))?
    .sort(vec![
        Expr::Sort(expr::Sort::new(Box::new(col("state")), true, true)),
        Expr::Sort(expr::Sort::new(Box::new(col("salary")), false, false)),
     ])?
    .build()?;
// Convert LogicalPlan into LogicalPlanBuilder
let plan_builder: LogicalPlanBuilder = std::sync::Arc::new(plan).into();
source§

fn from(plan: Arc<LogicalPlan>) -> Self

Converts to this type from the input type.

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

default unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.