polars_plan/dsl/functions/
horizontal.rs

1use super::*;
2
3#[cfg(feature = "dtype-struct")]
4fn cum_fold_dtype() -> GetOutput {
5    GetOutput::map_fields(|fields| {
6        let mut st = fields[0].dtype.clone();
7        for fld in &fields[1..] {
8            st = get_supertype(&st, &fld.dtype).unwrap();
9        }
10        Ok(Field::new(
11            fields[0].name.clone(),
12            DataType::Struct(
13                fields
14                    .iter()
15                    .map(|fld| Field::new(fld.name().clone(), st.clone()))
16                    .collect(),
17            ),
18        ))
19    })
20}
21
22/// Accumulate over multiple columns horizontally / row wise.
23pub fn fold_exprs<F, E>(acc: Expr, f: F, exprs: E) -> Expr
24where
25    F: 'static + Fn(Column, Column) -> PolarsResult<Option<Column>> + Send + Sync,
26    E: AsRef<[Expr]>,
27{
28    let mut exprs_v = Vec::with_capacity(exprs.as_ref().len() + 1);
29    exprs_v.push(acc);
30    exprs_v.extend(exprs.as_ref().iter().cloned());
31    let exprs = exprs_v;
32
33    let function = new_column_udf(move |columns: &mut [Column]| {
34        let mut acc = columns.first().unwrap().clone();
35        for c in &columns[1..] {
36            if let Some(a) = f(acc.clone(), c.clone())? {
37                acc = a
38            }
39        }
40        Ok(Some(acc))
41    });
42
43    Expr::AnonymousFunction {
44        input: exprs,
45        function,
46        // Take the type of the accumulator.
47        output_type: GetOutput::first(),
48        options: FunctionOptions {
49            collect_groups: ApplyOptions::GroupWise,
50            flags: FunctionFlags::default()
51                | FunctionFlags::INPUT_WILDCARD_EXPANSION
52                | FunctionFlags::RETURNS_SCALAR,
53            fmt_str: "fold",
54            ..Default::default()
55        },
56    }
57}
58
59/// Analogous to [`Iterator::reduce`](std::iter::Iterator::reduce).
60///
61/// An accumulator is initialized to the series given by the first expression in `exprs`, and then each subsequent value
62/// of the accumulator is computed from `f(acc, next_expr_series)`. If `exprs` is empty, an error is returned when
63/// `collect` is called.
64pub fn reduce_exprs<F, E>(f: F, exprs: E) -> Expr
65where
66    F: 'static + Fn(Column, Column) -> PolarsResult<Option<Column>> + Send + Sync,
67    E: AsRef<[Expr]>,
68{
69    let exprs = exprs.as_ref().to_vec();
70
71    let function = new_column_udf(move |columns: &mut [Column]| {
72        let mut c_iter = columns.iter();
73
74        match c_iter.next() {
75            Some(acc) => {
76                let mut acc = acc.clone();
77
78                for c in c_iter {
79                    if let Some(a) = f(acc.clone(), c.clone())? {
80                        acc = a
81                    }
82                }
83                Ok(Some(acc))
84            },
85            None => Err(polars_err!(ComputeError: "`reduce` did not have any expressions to fold")),
86        }
87    });
88
89    Expr::AnonymousFunction {
90        input: exprs,
91        function,
92        output_type: GetOutput::super_type(),
93        options: FunctionOptions {
94            collect_groups: ApplyOptions::GroupWise,
95            flags: FunctionFlags::default()
96                | FunctionFlags::INPUT_WILDCARD_EXPANSION
97                | FunctionFlags::RETURNS_SCALAR,
98            fmt_str: "reduce",
99            ..Default::default()
100        },
101    }
102}
103
104/// Accumulate over multiple columns horizontally / row wise.
105#[cfg(feature = "dtype-struct")]
106pub fn cum_reduce_exprs<F, E>(f: F, exprs: E) -> Expr
107where
108    F: 'static + Fn(Column, Column) -> PolarsResult<Option<Column>> + Send + Sync,
109    E: AsRef<[Expr]>,
110{
111    let exprs = exprs.as_ref().to_vec();
112
113    let function = new_column_udf(move |columns: &mut [Column]| {
114        let mut c_iter = columns.iter();
115
116        match c_iter.next() {
117            Some(acc) => {
118                let mut acc = acc.clone();
119                let mut result = vec![acc.clone()];
120
121                for c in c_iter {
122                    let name = c.name().clone();
123                    if let Some(a) = f(acc.clone(), c.clone())? {
124                        acc = a;
125                    }
126                    acc.rename(name);
127                    result.push(acc.clone());
128                }
129
130                StructChunked::from_columns(acc.name().clone(), result[0].len(), &result)
131                    .map(|ca| Some(ca.into_column()))
132            },
133            None => Err(polars_err!(ComputeError: "`reduce` did not have any expressions to fold")),
134        }
135    });
136
137    Expr::AnonymousFunction {
138        input: exprs,
139        function,
140        output_type: cum_fold_dtype(),
141        options: FunctionOptions {
142            collect_groups: ApplyOptions::GroupWise,
143            flags: FunctionFlags::default()
144                | FunctionFlags::INPUT_WILDCARD_EXPANSION
145                | FunctionFlags::RETURNS_SCALAR,
146            fmt_str: "cum_reduce",
147            ..Default::default()
148        },
149    }
150}
151
152/// Accumulate over multiple columns horizontally / row wise.
153#[cfg(feature = "dtype-struct")]
154pub fn cum_fold_exprs<F, E>(acc: Expr, f: F, exprs: E, include_init: bool) -> Expr
155where
156    F: 'static + Fn(Column, Column) -> PolarsResult<Option<Column>> + Send + Sync,
157    E: AsRef<[Expr]>,
158{
159    let mut exprs = exprs.as_ref().to_vec();
160    exprs.push(acc);
161
162    let function = new_column_udf(move |columns: &mut [Column]| {
163        let mut columns = columns.to_vec();
164        let mut acc = columns.pop().unwrap();
165
166        let mut result = vec![];
167        if include_init {
168            result.push(acc.clone())
169        }
170
171        for c in columns {
172            let name = c.name().clone();
173            if let Some(a) = f(acc.clone(), c)? {
174                acc = a;
175                acc.rename(name);
176                result.push(acc.clone());
177            }
178        }
179
180        StructChunked::from_columns(acc.name().clone(), result[0].len(), &result)
181            .map(|ca| Some(ca.into_column()))
182    });
183
184    Expr::AnonymousFunction {
185        input: exprs,
186        function,
187        output_type: cum_fold_dtype(),
188        options: FunctionOptions {
189            collect_groups: ApplyOptions::GroupWise,
190            flags: FunctionFlags::default()
191                | FunctionFlags::INPUT_WILDCARD_EXPANSION
192                | FunctionFlags::RETURNS_SCALAR,
193            fmt_str: "cum_fold",
194            ..Default::default()
195        },
196    }
197}
198
199/// Create a new column with the bitwise-and of the elements in each row.
200///
201/// The name of the resulting column will be "all"; use [`alias`](Expr::alias) to choose a different name.
202pub fn all_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
203    let exprs = exprs.as_ref().to_vec();
204    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
205    // This will be reduced to `expr & expr` during conversion to IR.
206    Ok(Expr::Function {
207        input: exprs,
208        function: FunctionExpr::Boolean(BooleanFunction::AllHorizontal),
209        options: FunctionOptions {
210            flags: FunctionFlags::default()
211                | FunctionFlags::INPUT_WILDCARD_EXPANSION
212                | FunctionFlags::ALLOW_EMPTY_INPUTS,
213            ..Default::default()
214        },
215    })
216}
217
218/// Create a new column with the bitwise-or of the elements in each row.
219///
220/// The name of the resulting column will be "any"; use [`alias`](Expr::alias) to choose a different name.
221pub fn any_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
222    let exprs = exprs.as_ref().to_vec();
223    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
224    // This will be reduced to `expr | expr` during conversion to IR.
225    Ok(Expr::Function {
226        input: exprs,
227        function: FunctionExpr::Boolean(BooleanFunction::AnyHorizontal),
228        options: FunctionOptions {
229            flags: FunctionFlags::default()
230                | FunctionFlags::INPUT_WILDCARD_EXPANSION
231                | FunctionFlags::ALLOW_EMPTY_INPUTS,
232            ..Default::default()
233        },
234    })
235}
236
237/// Create a new column with the maximum value per row.
238///
239/// The name of the resulting column will be `"max"`; use [`alias`](Expr::alias) to choose a different name.
240pub fn max_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
241    let exprs = exprs.as_ref().to_vec();
242    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
243
244    Ok(Expr::Function {
245        input: exprs,
246        function: FunctionExpr::MaxHorizontal,
247        options: FunctionOptions {
248            collect_groups: ApplyOptions::ElementWise,
249            flags: FunctionFlags::default()
250                | FunctionFlags::INPUT_WILDCARD_EXPANSION & !FunctionFlags::RETURNS_SCALAR
251                | FunctionFlags::ALLOW_RENAME,
252            ..Default::default()
253        },
254    })
255}
256
257/// Create a new column with the minimum value per row.
258///
259/// The name of the resulting column will be `"min"`; use [`alias`](Expr::alias) to choose a different name.
260pub fn min_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
261    let exprs = exprs.as_ref().to_vec();
262    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
263
264    Ok(Expr::Function {
265        input: exprs,
266        function: FunctionExpr::MinHorizontal,
267        options: FunctionOptions {
268            collect_groups: ApplyOptions::ElementWise,
269            flags: FunctionFlags::default()
270                | FunctionFlags::INPUT_WILDCARD_EXPANSION & !FunctionFlags::RETURNS_SCALAR
271                | FunctionFlags::ALLOW_RENAME,
272            ..Default::default()
273        },
274    })
275}
276
277/// Sum all values horizontally across columns.
278pub fn sum_horizontal<E: AsRef<[Expr]>>(exprs: E, ignore_nulls: bool) -> PolarsResult<Expr> {
279    let exprs = exprs.as_ref().to_vec();
280    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
281
282    Ok(Expr::Function {
283        input: exprs,
284        function: FunctionExpr::SumHorizontal { ignore_nulls },
285        options: FunctionOptions {
286            collect_groups: ApplyOptions::ElementWise,
287            flags: FunctionFlags::default()
288                | FunctionFlags::INPUT_WILDCARD_EXPANSION & !FunctionFlags::RETURNS_SCALAR,
289            ..Default::default()
290        },
291    })
292}
293
294/// Compute the mean of all values horizontally across columns.
295pub fn mean_horizontal<E: AsRef<[Expr]>>(exprs: E, ignore_nulls: bool) -> PolarsResult<Expr> {
296    let exprs = exprs.as_ref().to_vec();
297    polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");
298
299    Ok(Expr::Function {
300        input: exprs,
301        function: FunctionExpr::MeanHorizontal { ignore_nulls },
302        options: FunctionOptions {
303            collect_groups: ApplyOptions::ElementWise,
304            flags: FunctionFlags::default()
305                | FunctionFlags::INPUT_WILDCARD_EXPANSION & !FunctionFlags::RETURNS_SCALAR,
306            ..Default::default()
307        },
308    })
309}
310
311/// Folds the expressions from left to right keeping the first non-null values.
312///
313/// It is an error to provide an empty `exprs`.
314pub fn coalesce(exprs: &[Expr]) -> Expr {
315    let input = exprs.to_vec();
316    Expr::Function {
317        input,
318        function: FunctionExpr::Coalesce,
319        options: FunctionOptions {
320            collect_groups: ApplyOptions::ElementWise,
321            flags: FunctionFlags::default() | FunctionFlags::INPUT_WILDCARD_EXPANSION,
322            cast_options: Some(CastingRules::cast_to_supertypes()),
323            ..Default::default()
324        },
325    }
326}