polars_plan/plans/aexpr/utils.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
use polars_utils::idx_vec::UnitVec;
use polars_utils::unitvec;
use super::*;
/// Checks if the top-level expression node is elementwise. If this is the case, then `stack` will
/// be extended further with any nested expression nodes.
pub fn is_elementwise(stack: &mut UnitVec<Node>, ae: &AExpr, expr_arena: &Arena<AExpr>) -> bool {
use AExpr::*;
if !ae.is_elementwise_top_level() {
return false;
}
match ae {
// Literals that aren't being projected are allowed to be non-scalar, so we don't add them
// for inspection. (e.g. `is_in(<literal>)`).
#[cfg(feature = "is_in")]
Function {
function: FunctionExpr::Boolean(BooleanFunction::IsIn),
input,
..
} => (|| {
if let Some(rhs) = input.get(1) {
assert_eq!(input.len(), 2); // A.is_in(B)
let rhs = rhs.node();
if matches!(expr_arena.get(rhs), AExpr::Literal { .. }) {
stack.extend([input[0].node()]);
return;
}
};
ae.nodes(stack);
})(),
_ => ae.nodes(stack),
}
true
}
/// Recursive variant of `is_elementwise`
pub fn is_elementwise_rec<'a>(mut ae: &'a AExpr, expr_arena: &'a Arena<AExpr>) -> bool {
let mut stack = unitvec![];
loop {
if !is_elementwise(&mut stack, ae, expr_arena) {
return false;
}
let Some(node) = stack.pop() else {
break;
};
ae = expr_arena.get(node);
}
true
}
/// Recursive variant of `is_elementwise` that also forbids casting to categoricals. This function
/// is used to determine if an expression evaluation can be vertically parallelized.
pub fn is_elementwise_rec_no_cat_cast<'a>(mut ae: &'a AExpr, expr_arena: &'a Arena<AExpr>) -> bool {
let mut stack = unitvec![];
loop {
if !is_elementwise(&mut stack, ae, expr_arena) {
return false;
}
#[cfg(feature = "dtype-categorical")]
{
if let AExpr::Cast {
dtype: DataType::Categorical(..),
..
} = ae
{
return false;
}
}
let Some(node) = stack.pop() else {
break;
};
ae = expr_arena.get(node);
}
true
}
/// Check whether filters can be pushed past this expression.
///
/// A query, `with_columns(C).filter(P)` can be re-ordered as `filter(P).with_columns(C)`, iff
/// both P and C permit filter pushdown.
///
/// If filter pushdown is permitted, `stack` is extended with any input expression nodes that this
/// expression may have.
///
/// Note that this function is not recursive - the caller should repeatedly
/// call this function with the `stack` to perform a recursive check.
pub(crate) fn permits_filter_pushdown(
stack: &mut UnitVec<Node>,
ae: &AExpr,
expr_arena: &Arena<AExpr>,
) -> bool {
// This is a subset of an `is_elementwise` check that also blocks exprs that raise errors
// depending on the data. The idea is that, although the success value of these functions
// are elementwise, their error behavior is non-elementwise. Their error behavior is essentially
// performing an aggregation `ANY(evaluation_result_was_error)`, and if this is the case then
// the query result should be an error.
match ae {
// Rows that go OOB on get/gather may be filtered out in earlier operations,
// so we don't push these down.
AExpr::Function {
function: FunctionExpr::ListExpr(ListFunction::Get(false)),
..
} => false,
#[cfg(feature = "list_gather")]
AExpr::Function {
function: FunctionExpr::ListExpr(ListFunction::Gather(false)),
..
} => false,
#[cfg(feature = "dtype-array")]
AExpr::Function {
function: FunctionExpr::ArrayExpr(ArrayFunction::Get(false)),
..
} => false,
// TODO: There are a lot more functions that should be caught here.
ae => is_elementwise(stack, ae, expr_arena),
}
}
pub fn permits_filter_pushdown_rec<'a>(mut ae: &'a AExpr, expr_arena: &'a Arena<AExpr>) -> bool {
let mut stack = unitvec![];
loop {
if !permits_filter_pushdown(&mut stack, ae, expr_arena) {
return false;
}
let Some(node) = stack.pop() else {
break;
};
ae = expr_arena.get(node);
}
true
}