pub struct ExprSimplifier<S> { /* private fields */ }
Expand description
This structure handles API for expression simplification
Provides simplification information based on DFSchema and
ExecutionProps
. This is the default implementation used by DataFusion
For example:
use arrow::datatypes::{Schema, Field, DataType};
use datafusion_expr::{col, lit};
use datafusion_common::{DataFusionError, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
// Create the schema
let schema = Schema::new(vec![
Field::new("i", DataType::Int64, false),
])
.to_dfschema_ref().unwrap();
// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
.with_schema(schema);
let simplifier = ExprSimplifier::new(context);
// Use the simplifier
// b < 2 or (1 > 3)
let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
// b < 2
let simplified = simplifier.simplify(expr).unwrap();
assert_eq!(simplified, col("b").lt(lit(2)));
Implementations§
source§impl<S: SimplifyInfo> ExprSimplifier<S>
impl<S: SimplifyInfo> ExprSimplifier<S>
sourcepub fn new(info: S) -> Self
pub fn new(info: S) -> Self
Create a new ExprSimplifier
with the given info
such as an
instance of SimplifyContext
. See
simplify
for an example.
sourcepub fn simplify(&self, expr: Expr) -> Result<Expr>
pub fn simplify(&self, expr: Expr) -> Result<Expr>
Simplifies this Expr
as much as possible, evaluating
constants and applying algebraic simplifications.
The types of the expression must match what operators expect,
or else an error may occur trying to evaluate. See
coerce
for a function to help.
§Example:
b > 2 AND b > 2
can be written to
b > 2
use arrow::datatypes::DataType;
use datafusion_expr::{col, lit, Expr};
use datafusion_common::Result;
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::simplify::SimplifyInfo;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_common::DFSchema;
use std::sync::Arc;
/// Simple implementation that provides `Simplifier` the information it needs
/// See SimplifyContext for a structure that does this.
#[derive(Default)]
struct Info {
execution_props: ExecutionProps,
};
impl SimplifyInfo for Info {
fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
Ok(false)
}
fn nullable(&self, expr: &Expr) -> Result<bool> {
Ok(true)
}
fn execution_props(&self) -> &ExecutionProps {
&self.execution_props
}
fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
Ok(DataType::Int32)
}
}
// Create the simplifier
let simplifier = ExprSimplifier::new(Info::default());
// b < 2
let b_lt_2 = col("b").gt(lit(2));
// (b < 2) OR (b < 2)
let expr = b_lt_2.clone().or(b_lt_2.clone());
// (b < 2) OR (b < 2) --> (b < 2)
let expr = simplifier.simplify(expr).unwrap();
assert_eq!(expr, b_lt_2);
sourcepub fn simplify_with_cycle_count(&self, expr: Expr) -> Result<(Expr, u32)>
pub fn simplify_with_cycle_count(&self, expr: Expr) -> Result<(Expr, u32)>
Like Self::simplify, simplifies this Expr
as much as possible, evaluating
constants and applying algebraic simplifications. Additionally returns a u32
representing the number of simplification cycles performed, which can be useful for testing
optimizations.
See Self::simplify for details and usage examples.
sourcepub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr>
pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr>
Apply type coercion to an Expr
so that it can be
evaluated as a PhysicalExpr
.
See the type coercion module documentation for more details on type coercion
sourcepub fn with_guarantees(self, guarantees: Vec<(Expr, NullableInterval)>) -> Self
pub fn with_guarantees(self, guarantees: Vec<(Expr, NullableInterval)>) -> Self
Input guarantees about the values of columns.
The guarantees can simplify expressions. For example, if a column x
is
guaranteed to be 3
, then the expression x > 1
can be replaced by the
literal true
.
The guarantees are provided as a Vec<(Expr, NullableInterval)>
,
where the Expr is a column reference and the NullableInterval
is an interval representing the known possible values of that column.
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{col, lit, Expr};
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
use datafusion_common::{Result, ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
let schema = Schema::new(vec![
Field::new("x", DataType::Int64, false),
Field::new("y", DataType::UInt32, false),
Field::new("z", DataType::Int64, false),
])
.to_dfschema_ref().unwrap();
// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
.with_schema(schema);
// Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
let expr_x = col("x").gt_eq(lit(3_i64));
let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
let expr_z = col("z").gt(lit(5_i64));
let expr = expr_x.and(expr_y).and(expr_z.clone());
let guarantees = vec![
// x ∈ [3, 5]
(
col("x"),
NullableInterval::NotNull {
values: Interval::make(Some(3_i64), Some(5_i64)).unwrap()
}
),
// y = 3
(col("y"), NullableInterval::from(ScalarValue::UInt32(Some(3)))),
];
let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
let output = simplifier.simplify(expr).unwrap();
// Expression becomes: true AND true AND (z > 5), which simplifies to
// z > 5.
assert_eq!(output, expr_z);
sourcepub fn with_canonicalize(self, canonicalize: bool) -> Self
pub fn with_canonicalize(self, canonicalize: bool) -> Self
Should Canonicalizer
be applied before simplification?
If true (the default), the expression will be rewritten to canonical form before simplification. This is useful to ensure that the simplifier can apply all possible simplifications.
Some expressions, such as those in some Joins, can not be canonicalized without changing their meaning. In these cases, canonicalization should be disabled.
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{col, lit, Expr};
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
use datafusion_common::{Result, ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
let schema = Schema::new(vec![
Field::new("a", DataType::Int64, false),
Field::new("b", DataType::Int64, false),
Field::new("c", DataType::Int64, false),
])
.to_dfschema_ref().unwrap();
// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
.with_schema(schema);
let simplifier = ExprSimplifier::new(context);
// Expression: a = c AND 1 = b
let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));
// With canonicalization, the expression is rewritten to canonical form
// (though it is no simpler in this case):
let canonical = simplifier.simplify(expr.clone()).unwrap();
// Expression has been rewritten to: (c = a AND b = 1)
assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));
// If canonicalization is disabled, the expression is not changed
let non_canonicalized = simplifier
.with_canonicalize(false)
.simplify(expr.clone())
.unwrap();
assert_eq!(non_canonicalized, expr);
sourcepub fn with_max_cycles(self, max_simplifier_cycles: u32) -> Self
pub fn with_max_cycles(self, max_simplifier_cycles: u32) -> Self
Specifies the maximum number of simplification cycles to run.
The simplifier can perform multiple passes of simplification. This is because the output of one simplification step can allow more optimizations in another simplification step. For example, constant evaluation can allow more expression simplifications, and expression simplifications can allow more constant evaluations.
This method specifies the maximum number of allowed iteration cycles before the simplifier returns an Expr output. However, it does not always perform the maximum number of cycles. The simplifier will attempt to detect when an Expr is unchanged by all the simplification passes, and return early. This avoids wasting time on unnecessary Expr tree traversals.
If no maximum is specified, the value of DEFAULT_MAX_SIMPLIFIER_CYCLES is used instead.
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{col, lit, Expr};
use datafusion_common::{Result, ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
let schema = Schema::new(vec![
Field::new("a", DataType::Int64, false),
])
.to_dfschema_ref().unwrap();
// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
.with_schema(schema);
let simplifier = ExprSimplifier::new(context);
// Expression: a IS NOT NULL
let expr = col("a").is_not_null();
// When using default maximum cycles, 2 cycles will be performed.
let (simplified_expr, count) = simplifier.simplify_with_cycle_count(expr.clone()).unwrap();
assert_eq!(simplified_expr, lit(true));
// 2 cycles were executed, but only 1 was needed
assert_eq!(count, 2);
// Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count(expr.clone()).unwrap();
// Expression has been rewritten to: (c = a AND b = 1)
assert_eq!(simplified_expr, lit(true));
// Only 1 cycle was executed
assert_eq!(count, 1);
Auto Trait Implementations§
impl<S> Freeze for ExprSimplifier<S>where
S: Freeze,
impl<S> !RefUnwindSafe for ExprSimplifier<S>
impl<S> Send for ExprSimplifier<S>where
S: Send,
impl<S> Sync for ExprSimplifier<S>where
S: Sync,
impl<S> Unpin for ExprSimplifier<S>where
S: Unpin,
impl<S> !UnwindSafe for ExprSimplifier<S>
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more