polars_plan/dsl/
mod.rs

1#![allow(ambiguous_glob_reexports)]
2//! Domain specific language for the Lazy API.
3#[cfg(feature = "dtype-categorical")]
4pub mod cat;
5
6#[cfg(feature = "dtype-categorical")]
7pub use cat::*;
8#[cfg(feature = "rolling_window_by")]
9pub(crate) use polars_time::prelude::*;
10
11mod arithmetic;
12mod arity;
13#[cfg(feature = "dtype-array")]
14mod array;
15pub mod binary;
16#[cfg(feature = "bitwise")]
17mod bitwise;
18#[cfg(feature = "temporal")]
19pub mod dt;
20mod expr;
21mod expr_dyn_fn;
22mod from;
23pub mod function_expr;
24pub mod functions;
25mod list;
26#[cfg(feature = "meta")]
27mod meta;
28mod name;
29mod options;
30#[cfg(feature = "python")]
31pub mod python_udf;
32#[cfg(feature = "random")]
33mod random;
34mod selector;
35mod statistics;
36#[cfg(feature = "strings")]
37pub mod string;
38#[cfg(feature = "dtype-struct")]
39mod struct_;
40pub mod udf;
41
42use std::fmt::Debug;
43use std::sync::Arc;
44
45pub use arity::*;
46#[cfg(feature = "dtype-array")]
47pub use array::*;
48use arrow::legacy::prelude::QuantileMethod;
49pub use expr::*;
50pub use function_expr::schema::FieldsMapper;
51pub use function_expr::*;
52pub use functions::*;
53pub use list::*;
54#[cfg(feature = "meta")]
55pub use meta::*;
56pub use name::*;
57pub use options::*;
58use polars_core::chunked_array::cast::CastOptions;
59use polars_core::error::feature_gated;
60use polars_core::prelude::*;
61#[cfg(feature = "diff")]
62use polars_core::series::ops::NullBehavior;
63use polars_core::series::IsSorted;
64#[cfg(any(feature = "search_sorted", feature = "is_between"))]
65use polars_core::utils::SuperTypeFlags;
66use polars_core::utils::{try_get_supertype, SuperTypeOptions};
67pub use selector::Selector;
68#[cfg(feature = "dtype-struct")]
69pub use struct_::*;
70pub use udf::UserDefinedFunction;
71
72use crate::constants::MAP_LIST_NAME;
73pub use crate::plans::lit;
74use crate::prelude::*;
75
76impl Expr {
77    /// Modify the Options passed to the `Function` node.
78    pub(crate) fn with_function_options<F>(self, func: F) -> Expr
79    where
80        F: Fn(FunctionOptions) -> FunctionOptions,
81    {
82        match self {
83            Self::AnonymousFunction {
84                input,
85                function,
86                output_type,
87                mut options,
88            } => {
89                options = func(options);
90                Self::AnonymousFunction {
91                    input,
92                    function,
93                    output_type,
94                    options,
95                }
96            },
97            Self::Function {
98                input,
99                function,
100                mut options,
101            } => {
102                options = func(options);
103                Self::Function {
104                    input,
105                    function,
106                    options,
107                }
108            },
109            _ => {
110                panic!("implementation error")
111            },
112        }
113    }
114
115    /// Overwrite the function name used for formatting.
116    /// (this is not intended to be used).
117    #[doc(hidden)]
118    pub fn with_fmt(self, name: &'static str) -> Expr {
119        self.with_function_options(|mut options| {
120            options.fmt_str = name;
121            options
122        })
123    }
124
125    /// Compare `Expr` with other `Expr` on equality.
126    pub fn eq<E: Into<Expr>>(self, other: E) -> Expr {
127        binary_expr(self, Operator::Eq, other.into())
128    }
129
130    /// Compare `Expr` with other `Expr` on equality where `None == None`.
131    pub fn eq_missing<E: Into<Expr>>(self, other: E) -> Expr {
132        binary_expr(self, Operator::EqValidity, other.into())
133    }
134
135    /// Compare `Expr` with other `Expr` on non-equality.
136    pub fn neq<E: Into<Expr>>(self, other: E) -> Expr {
137        binary_expr(self, Operator::NotEq, other.into())
138    }
139
140    /// Compare `Expr` with other `Expr` on non-equality where `None == None`.
141    pub fn neq_missing<E: Into<Expr>>(self, other: E) -> Expr {
142        binary_expr(self, Operator::NotEqValidity, other.into())
143    }
144
145    /// Check if `Expr` < `Expr`.
146    pub fn lt<E: Into<Expr>>(self, other: E) -> Expr {
147        binary_expr(self, Operator::Lt, other.into())
148    }
149
150    /// Check if `Expr` > `Expr`.
151    pub fn gt<E: Into<Expr>>(self, other: E) -> Expr {
152        binary_expr(self, Operator::Gt, other.into())
153    }
154
155    /// Check if `Expr` >= `Expr`.
156    pub fn gt_eq<E: Into<Expr>>(self, other: E) -> Expr {
157        binary_expr(self, Operator::GtEq, other.into())
158    }
159
160    /// Check if `Expr` <= `Expr`.
161    pub fn lt_eq<E: Into<Expr>>(self, other: E) -> Expr {
162        binary_expr(self, Operator::LtEq, other.into())
163    }
164
165    /// Negate `Expr`.
166    #[allow(clippy::should_implement_trait)]
167    pub fn not(self) -> Expr {
168        self.map_private(BooleanFunction::Not.into())
169    }
170
171    /// Rename Column.
172    pub fn alias<S>(self, name: S) -> Expr
173    where
174        S: Into<PlSmallStr>,
175    {
176        Expr::Alias(Arc::new(self), name.into())
177    }
178
179    /// Run is_null operation on `Expr`.
180    #[allow(clippy::wrong_self_convention)]
181    pub fn is_null(self) -> Self {
182        self.map_private(BooleanFunction::IsNull.into())
183    }
184
185    /// Run is_not_null operation on `Expr`.
186    #[allow(clippy::wrong_self_convention)]
187    pub fn is_not_null(self) -> Self {
188        self.map_private(BooleanFunction::IsNotNull.into())
189    }
190
191    /// Drop null values.
192    pub fn drop_nulls(self) -> Self {
193        Expr::Function {
194            input: vec![self],
195            function: FunctionExpr::DropNulls,
196            options: FunctionOptions {
197                collect_groups: ApplyOptions::GroupWise,
198                flags: FunctionFlags::default() | FunctionFlags::ALLOW_EMPTY_INPUTS,
199                ..Default::default()
200            },
201        }
202    }
203
204    /// Drop NaN values.
205    pub fn drop_nans(self) -> Self {
206        self.apply_private(FunctionExpr::DropNans)
207    }
208
209    /// Get the number of unique values in the groups.
210    pub fn n_unique(self) -> Self {
211        AggExpr::NUnique(Arc::new(self)).into()
212    }
213
214    /// Get the first value in the group.
215    pub fn first(self) -> Self {
216        AggExpr::First(Arc::new(self)).into()
217    }
218
219    /// Get the last value in the group.
220    pub fn last(self) -> Self {
221        AggExpr::Last(Arc::new(self)).into()
222    }
223
224    /// GroupBy the group to a Series.
225    pub fn implode(self) -> Self {
226        AggExpr::Implode(Arc::new(self)).into()
227    }
228
229    /// Compute the quantile per group.
230    pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self {
231        AggExpr::Quantile {
232            expr: Arc::new(self),
233            quantile: Arc::new(quantile),
234            method,
235        }
236        .into()
237    }
238
239    /// Get the group indexes of the group by operation.
240    pub fn agg_groups(self) -> Self {
241        AggExpr::AggGroups(Arc::new(self)).into()
242    }
243
244    /// Alias for `explode`.
245    pub fn flatten(self) -> Self {
246        self.explode()
247    }
248
249    /// Explode the String/List column.
250    pub fn explode(self) -> Self {
251        Expr::Explode(Arc::new(self))
252    }
253
254    /// Slice the Series.
255    /// `offset` may be negative.
256    pub fn slice<E: Into<Expr>, F: Into<Expr>>(self, offset: E, length: F) -> Self {
257        Expr::Slice {
258            input: Arc::new(self),
259            offset: Arc::new(offset.into()),
260            length: Arc::new(length.into()),
261        }
262    }
263
264    /// Append expressions. This is done by adding the chunks of `other` to this [`Series`].
265    pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
266        let output_type = if upcast {
267            GetOutput::super_type()
268        } else {
269            GetOutput::same_type()
270        };
271
272        apply_binary(
273            self,
274            other.into(),
275            move |mut a, mut b| {
276                if upcast {
277                    let dtype = try_get_supertype(a.dtype(), b.dtype())?;
278                    a = a.cast(&dtype)?;
279                    b = b.cast(&dtype)?;
280                }
281                a.append(&b)?;
282                Ok(Some(a))
283            },
284            output_type,
285        )
286    }
287
288    /// Get the first `n` elements of the Expr result.
289    pub fn head(self, length: Option<usize>) -> Self {
290        self.slice(lit(0), lit(length.unwrap_or(10) as u64))
291    }
292
293    /// Get the last `n` elements of the Expr result.
294    pub fn tail(self, length: Option<usize>) -> Self {
295        let len = length.unwrap_or(10);
296        self.slice(lit(-(len as i64)), lit(len as u64))
297    }
298
299    /// Get unique values of this expression.
300    pub fn unique(self) -> Self {
301        self.apply_private(FunctionExpr::Unique(false))
302    }
303
304    /// Get unique values of this expression, while maintaining order.
305    /// This requires more work than [`Expr::unique`].
306    pub fn unique_stable(self) -> Self {
307        self.apply_private(FunctionExpr::Unique(true))
308    }
309
310    /// Get the first index of unique values of this expression.
311    pub fn arg_unique(self) -> Self {
312        self.apply_private(FunctionExpr::ArgUnique)
313    }
314
315    /// Get the index value that has the minimum value.
316    pub fn arg_min(self) -> Self {
317        let options = FunctionOptions {
318            collect_groups: ApplyOptions::GroupWise,
319            flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
320            fmt_str: "arg_min",
321            ..Default::default()
322        };
323
324        self.function_with_options(
325            move |c: Column| {
326                Ok(Some(Column::new(
327                    c.name().clone(),
328                    &[c.as_materialized_series().arg_min().map(|idx| idx as u32)],
329                )))
330            },
331            GetOutput::from_type(IDX_DTYPE),
332            options,
333        )
334    }
335
336    /// Get the index value that has the maximum value.
337    pub fn arg_max(self) -> Self {
338        let options = FunctionOptions {
339            collect_groups: ApplyOptions::GroupWise,
340            flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
341            fmt_str: "arg_max",
342            ..Default::default()
343        };
344
345        self.function_with_options(
346            move |c: Column| {
347                Ok(Some(Column::new(
348                    c.name().clone(),
349                    &[c.as_materialized_series()
350                        .arg_max()
351                        .map(|idx| idx as IdxSize)],
352                )))
353            },
354            GetOutput::from_type(IDX_DTYPE),
355            options,
356        )
357    }
358
359    /// Get the index values that would sort this expression.
360    pub fn arg_sort(self, sort_options: SortOptions) -> Self {
361        let options = FunctionOptions {
362            collect_groups: ApplyOptions::GroupWise,
363            fmt_str: "arg_sort",
364            ..Default::default()
365        };
366
367        self.function_with_options(
368            move |c: Column| {
369                Ok(Some(
370                    c.as_materialized_series()
371                        .arg_sort(sort_options)
372                        .into_column(),
373                ))
374            },
375            GetOutput::from_type(IDX_DTYPE),
376            options,
377        )
378    }
379
380    #[cfg(feature = "index_of")]
381    /// Find the index of a value.
382    pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
383        let element = element.into();
384        Expr::Function {
385            input: vec![self, element],
386            function: FunctionExpr::IndexOf,
387            options: FunctionOptions {
388                flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
389                fmt_str: "index_of",
390                cast_options: Some(CastingRules::FirstArgLossless),
391                ..Default::default()
392            },
393        }
394    }
395
396    #[cfg(feature = "search_sorted")]
397    /// Find indices where elements should be inserted to maintain order.
398    pub fn search_sorted<E: Into<Expr>>(self, element: E, side: SearchSortedSide) -> Expr {
399        let element = element.into();
400        Expr::Function {
401            input: vec![self, element],
402            function: FunctionExpr::SearchSorted(side),
403            options: FunctionOptions {
404                collect_groups: ApplyOptions::GroupWise,
405                flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
406                fmt_str: "search_sorted",
407                cast_options: Some(CastingRules::Supertype(
408                    (SuperTypeFlags::default() & !SuperTypeFlags::ALLOW_PRIMITIVE_TO_STRING).into(),
409                )),
410                ..Default::default()
411            },
412        }
413    }
414
415    /// Cast expression to another data type.
416    /// Throws an error if conversion had overflows.
417    /// Returns an Error if cast is invalid on rows after predicates are pushed down.
418    pub fn strict_cast(self, dtype: DataType) -> Self {
419        Expr::Cast {
420            expr: Arc::new(self),
421            dtype,
422            options: CastOptions::Strict,
423        }
424    }
425
426    /// Cast expression to another data type.
427    pub fn cast(self, dtype: DataType) -> Self {
428        Expr::Cast {
429            expr: Arc::new(self),
430            dtype,
431            options: CastOptions::NonStrict,
432        }
433    }
434
435    /// Cast expression to another data type.
436    pub fn cast_with_options(self, dtype: DataType, cast_options: CastOptions) -> Self {
437        Expr::Cast {
438            expr: Arc::new(self),
439            dtype,
440            options: cast_options,
441        }
442    }
443
444    /// Take the values by idx.
445    pub fn gather<E: Into<Expr>>(self, idx: E) -> Self {
446        Expr::Gather {
447            expr: Arc::new(self),
448            idx: Arc::new(idx.into()),
449            returns_scalar: false,
450        }
451    }
452
453    /// Take the values by a single index.
454    pub fn get<E: Into<Expr>>(self, idx: E) -> Self {
455        Expr::Gather {
456            expr: Arc::new(self),
457            idx: Arc::new(idx.into()),
458            returns_scalar: true,
459        }
460    }
461
462    /// Sort with given options.
463    ///
464    /// # Example
465    ///
466    /// ```rust
467    /// # use polars_core::prelude::*;
468    /// # use polars_lazy::prelude::*;
469    /// # fn main() -> PolarsResult<()> {
470    /// let lf = df! {
471    ///    "a" => [Some(5), Some(4), Some(3), Some(2), None]
472    /// }?
473    /// .lazy();
474    ///
475    /// let sorted = lf
476    ///     .select(
477    ///         vec![col("a").sort(SortOptions::default())],
478    ///     )
479    ///     .collect()?;
480    ///
481    /// assert_eq!(
482    ///     sorted,
483    ///     df! {
484    ///         "a" => [None, Some(2), Some(3), Some(4), Some(5)]
485    ///     }?
486    /// );
487    /// # Ok(())
488    /// # }
489    /// ```
490    /// See [`SortOptions`] for more options.
491    pub fn sort(self, options: SortOptions) -> Self {
492        Expr::Sort {
493            expr: Arc::new(self),
494            options,
495        }
496    }
497
498    /// Returns the `k` largest elements.
499    ///
500    /// This has time complexity `O(n + k log(n))`.
501    #[cfg(feature = "top_k")]
502    pub fn top_k(self, k: Expr) -> Self {
503        self.apply_many_private(FunctionExpr::TopK { descending: false }, &[k], false, false)
504    }
505
506    /// Returns the `k` largest rows by given column.
507    ///
508    /// For single column, use [`Expr::top_k`].
509    #[cfg(feature = "top_k")]
510    pub fn top_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
511        self,
512        k: K,
513        by: E,
514        descending: Vec<bool>,
515    ) -> Self {
516        let mut args = vec![k.into()];
517        args.extend(by.as_ref().iter().map(|e| -> Expr { e.clone().into() }));
518        self.apply_many_private(FunctionExpr::TopKBy { descending }, &args, false, false)
519    }
520
521    /// Returns the `k` smallest elements.
522    ///
523    /// This has time complexity `O(n + k log(n))`.
524    #[cfg(feature = "top_k")]
525    pub fn bottom_k(self, k: Expr) -> Self {
526        self.apply_many_private(FunctionExpr::TopK { descending: true }, &[k], false, false)
527    }
528
529    /// Returns the `k` smallest rows by given column.
530    ///
531    /// For single column, use [`Expr::bottom_k`].
532    // #[cfg(feature = "top_k")]
533    #[cfg(feature = "top_k")]
534    pub fn bottom_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
535        self,
536        k: K,
537        by: E,
538        descending: Vec<bool>,
539    ) -> Self {
540        let mut args = vec![k.into()];
541        args.extend(by.as_ref().iter().map(|e| -> Expr { e.clone().into() }));
542        let descending = descending.into_iter().map(|x| !x).collect();
543        self.apply_many_private(FunctionExpr::TopKBy { descending }, &args, false, false)
544    }
545
546    /// Reverse column
547    pub fn reverse(self) -> Self {
548        self.apply_private(FunctionExpr::Reverse)
549    }
550
551    /// Apply a function/closure once the logical plan get executed.
552    ///
553    /// This function is very similar to [`Expr::apply`], but differs in how it handles aggregations.
554    ///
555    ///  * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
556    ///  * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
557    ///
558    /// It is the responsibility of the caller that the schema is correct by giving
559    /// the correct output_type. If None given the output type of the input expr is used.
560    pub fn map<F>(self, function: F, output_type: GetOutput) -> Self
561    where
562        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
563    {
564        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
565
566        Expr::AnonymousFunction {
567            input: vec![self],
568            function: new_column_udf(f),
569            output_type,
570            options: FunctionOptions {
571                collect_groups: ApplyOptions::ElementWise,
572                fmt_str: "map",
573                flags: FunctionFlags::default() | FunctionFlags::OPTIONAL_RE_ENTRANT,
574                ..Default::default()
575            },
576        }
577    }
578
579    fn map_private(self, function_expr: FunctionExpr) -> Self {
580        Expr::Function {
581            input: vec![self],
582            function: function_expr,
583            options: FunctionOptions {
584                collect_groups: ApplyOptions::ElementWise,
585                ..Default::default()
586            },
587        }
588    }
589
590    /// Apply a function/closure once the logical plan get executed with many arguments.
591    ///
592    /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
593    pub fn map_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
594    where
595        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
596    {
597        let mut input = vec![self];
598        input.extend_from_slice(arguments);
599
600        Expr::AnonymousFunction {
601            input,
602            function: new_column_udf(function),
603            output_type,
604            options: FunctionOptions {
605                collect_groups: ApplyOptions::ElementWise,
606                fmt_str: "",
607                ..Default::default()
608            },
609        }
610    }
611
612    /// Apply a function/closure once the logical plan get executed.
613    ///
614    /// This function is very similar to [apply](Expr::apply), but differs in how it handles aggregations.
615    ///
616    ///  * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
617    ///  * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
618    ///  * `map_list` should be used when the function expects a list aggregated series.
619    pub fn map_list<F>(self, function: F, output_type: GetOutput) -> Self
620    where
621        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
622    {
623        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
624
625        Expr::AnonymousFunction {
626            input: vec![self],
627            function: new_column_udf(f),
628            output_type,
629            options: FunctionOptions {
630                collect_groups: ApplyOptions::ApplyList,
631                fmt_str: MAP_LIST_NAME,
632                ..Default::default()
633            },
634        }
635    }
636
637    /// A function that cannot be expressed with `map` or `apply` and requires extra settings.
638    pub fn function_with_options<F>(
639        self,
640        function: F,
641        output_type: GetOutput,
642        options: FunctionOptions,
643    ) -> Self
644    where
645        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
646    {
647        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
648
649        Expr::AnonymousFunction {
650            input: vec![self],
651            function: new_column_udf(f),
652            output_type,
653            options,
654        }
655    }
656
657    /// Apply a function/closure over the groups. This should only be used in a group_by aggregation.
658    ///
659    /// It is the responsibility of the caller that the schema is correct by giving
660    /// the correct output_type. If None given the output type of the input expr is used.
661    ///
662    /// This difference with [map](Self::map) is that `apply` will create a separate `Series` per group.
663    ///
664    /// * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
665    /// * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
666    pub fn apply<F>(self, function: F, output_type: GetOutput) -> Self
667    where
668        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
669    {
670        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
671
672        Expr::AnonymousFunction {
673            input: vec![self],
674            function: new_column_udf(f),
675            output_type,
676            options: FunctionOptions {
677                collect_groups: ApplyOptions::GroupWise,
678                fmt_str: "",
679                ..Default::default()
680            },
681        }
682    }
683
684    fn apply_private(self, function_expr: FunctionExpr) -> Self {
685        Expr::Function {
686            input: vec![self],
687            function: function_expr,
688            options: FunctionOptions {
689                collect_groups: ApplyOptions::GroupWise,
690                ..Default::default()
691            },
692        }
693    }
694
695    /// Apply a function/closure over the groups with many arguments. This should only be used in a group_by aggregation.
696    ///
697    /// See the [`Expr::apply`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
698    pub fn apply_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
699    where
700        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
701    {
702        let mut input = vec![self];
703        input.extend_from_slice(arguments);
704
705        Expr::AnonymousFunction {
706            input,
707            function: new_column_udf(function),
708            output_type,
709            options: FunctionOptions {
710                collect_groups: ApplyOptions::GroupWise,
711                fmt_str: "",
712                ..Default::default()
713            },
714        }
715    }
716
717    pub fn apply_many_private(
718        self,
719        function_expr: FunctionExpr,
720        arguments: &[Expr],
721        returns_scalar: bool,
722        cast_to_supertypes: bool,
723    ) -> Self {
724        let mut input = Vec::with_capacity(arguments.len() + 1);
725        input.push(self);
726        input.extend_from_slice(arguments);
727
728        let supertype = if cast_to_supertypes {
729            Some(CastingRules::cast_to_supertypes())
730        } else {
731            None
732        };
733
734        let mut flags = FunctionFlags::default();
735        if returns_scalar {
736            flags |= FunctionFlags::RETURNS_SCALAR;
737        }
738
739        Expr::Function {
740            input,
741            function: function_expr,
742            options: FunctionOptions {
743                collect_groups: ApplyOptions::GroupWise,
744                flags,
745                cast_options: supertype,
746                ..Default::default()
747            },
748        }
749    }
750
751    pub fn map_many_private(
752        self,
753        function_expr: FunctionExpr,
754        arguments: &[Expr],
755        returns_scalar: bool,
756        cast_to_supertypes: Option<SuperTypeOptions>,
757    ) -> Self {
758        let mut input = Vec::with_capacity(arguments.len() + 1);
759        input.push(self);
760        input.extend_from_slice(arguments);
761
762        let mut flags = FunctionFlags::default();
763        if returns_scalar {
764            flags |= FunctionFlags::RETURNS_SCALAR;
765        }
766
767        Expr::Function {
768            input,
769            function: function_expr,
770            options: FunctionOptions {
771                collect_groups: ApplyOptions::ElementWise,
772                flags,
773                cast_options: cast_to_supertypes.map(CastingRules::Supertype),
774                ..Default::default()
775            },
776        }
777    }
778
779    /// Get mask of finite values if dtype is Float.
780    #[allow(clippy::wrong_self_convention)]
781    pub fn is_finite(self) -> Self {
782        self.map_private(BooleanFunction::IsFinite.into())
783    }
784
785    /// Get mask of infinite values if dtype is Float.
786    #[allow(clippy::wrong_self_convention)]
787    pub fn is_infinite(self) -> Self {
788        self.map_private(BooleanFunction::IsInfinite.into())
789    }
790
791    /// Get mask of NaN values if dtype is Float.
792    pub fn is_nan(self) -> Self {
793        self.map_private(BooleanFunction::IsNan.into())
794    }
795
796    /// Get inverse mask of NaN values if dtype is Float.
797    pub fn is_not_nan(self) -> Self {
798        self.map_private(BooleanFunction::IsNotNan.into())
799    }
800
801    /// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::shift).
802    pub fn shift(self, n: Expr) -> Self {
803        self.apply_many_private(FunctionExpr::Shift, &[n], false, false)
804    }
805
806    /// Shift the values in the array by some period and fill the resulting empty values.
807    pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(self, n: E, fill_value: IE) -> Self {
808        self.apply_many_private(
809            FunctionExpr::ShiftAndFill,
810            &[n.into(), fill_value.into()],
811            false,
812            false,
813        )
814    }
815
816    /// Cumulatively count values from 0 to len.
817    #[cfg(feature = "cum_agg")]
818    pub fn cum_count(self, reverse: bool) -> Self {
819        self.apply_private(FunctionExpr::CumCount { reverse })
820    }
821
822    /// Get an array with the cumulative sum computed at every element.
823    #[cfg(feature = "cum_agg")]
824    pub fn cum_sum(self, reverse: bool) -> Self {
825        self.apply_private(FunctionExpr::CumSum { reverse })
826    }
827
828    /// Get an array with the cumulative product computed at every element.
829    #[cfg(feature = "cum_agg")]
830    pub fn cum_prod(self, reverse: bool) -> Self {
831        self.apply_private(FunctionExpr::CumProd { reverse })
832    }
833
834    /// Get an array with the cumulative min computed at every element.
835    #[cfg(feature = "cum_agg")]
836    pub fn cum_min(self, reverse: bool) -> Self {
837        self.apply_private(FunctionExpr::CumMin { reverse })
838    }
839
840    /// Get an array with the cumulative max computed at every element.
841    #[cfg(feature = "cum_agg")]
842    pub fn cum_max(self, reverse: bool) -> Self {
843        self.apply_private(FunctionExpr::CumMax { reverse })
844    }
845
846    /// Get the product aggregation of an expression.
847    pub fn product(self) -> Self {
848        let options = FunctionOptions {
849            collect_groups: ApplyOptions::GroupWise,
850            flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
851            fmt_str: "product",
852            ..Default::default()
853        };
854
855        self.function_with_options(
856            move |c: Column| {
857                Some(
858                    c.product()
859                        .map(|sc| sc.into_series(c.name().clone()).into_column()),
860                )
861                .transpose()
862            },
863            GetOutput::map_dtype(|dt| {
864                use DataType as T;
865                Ok(match dt {
866                    T::Float32 => T::Float32,
867                    T::Float64 => T::Float64,
868                    T::UInt64 => T::UInt64,
869                    #[cfg(feature = "dtype-i128")]
870                    T::Int128 => T::Int128,
871                    _ => T::Int64,
872                })
873            }),
874            options,
875        )
876    }
877
878    /// Fill missing value with next non-null.
879    pub fn backward_fill(self, limit: FillNullLimit) -> Self {
880        self.apply_private(FunctionExpr::BackwardFill { limit })
881    }
882
883    /// Fill missing value with previous non-null.
884    pub fn forward_fill(self, limit: FillNullLimit) -> Self {
885        self.apply_private(FunctionExpr::ForwardFill { limit })
886    }
887
888    /// Round underlying floating point array to given decimal numbers.
889    #[cfg(feature = "round_series")]
890    pub fn round(self, decimals: u32) -> Self {
891        self.map_private(FunctionExpr::Round { decimals })
892    }
893
894    /// Round to a number of significant figures.
895    #[cfg(feature = "round_series")]
896    pub fn round_sig_figs(self, digits: i32) -> Self {
897        self.map_private(FunctionExpr::RoundSF { digits })
898    }
899
900    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
901    #[cfg(feature = "round_series")]
902    pub fn floor(self) -> Self {
903        self.map_private(FunctionExpr::Floor)
904    }
905
906    /// Constant Pi
907    #[cfg(feature = "round_series")]
908    pub fn pi() -> Self {
909        lit(std::f64::consts::PI)
910    }
911
912    /// Ceil underlying floating point array to the highest integers smaller or equal to the float value.
913    #[cfg(feature = "round_series")]
914    pub fn ceil(self) -> Self {
915        self.map_private(FunctionExpr::Ceil)
916    }
917
918    /// Clip underlying values to a set boundary.
919    #[cfg(feature = "round_series")]
920    pub fn clip(self, min: Expr, max: Expr) -> Self {
921        self.map_many_private(
922            FunctionExpr::Clip {
923                has_min: true,
924                has_max: true,
925            },
926            &[min, max],
927            false,
928            None,
929        )
930    }
931
932    /// Clip underlying values to a set boundary.
933    #[cfg(feature = "round_series")]
934    pub fn clip_max(self, max: Expr) -> Self {
935        self.map_many_private(
936            FunctionExpr::Clip {
937                has_min: false,
938                has_max: true,
939            },
940            &[max],
941            false,
942            None,
943        )
944    }
945
946    /// Clip underlying values to a set boundary.
947    #[cfg(feature = "round_series")]
948    pub fn clip_min(self, min: Expr) -> Self {
949        self.map_many_private(
950            FunctionExpr::Clip {
951                has_min: true,
952                has_max: false,
953            },
954            &[min],
955            false,
956            None,
957        )
958    }
959
960    /// Convert all values to their absolute/positive value.
961    #[cfg(feature = "abs")]
962    pub fn abs(self) -> Self {
963        self.map_private(FunctionExpr::Abs)
964    }
965
966    /// Apply window function over a subgroup.
967    /// This is similar to a group_by + aggregation + self join.
968    /// Or similar to [window functions in Postgres](https://www.postgresql.org/docs/9.1/tutorial-window.html).
969    ///
970    /// # Example
971    ///
972    /// ``` rust
973    /// #[macro_use] extern crate polars_core;
974    /// use polars_core::prelude::*;
975    /// use polars_lazy::prelude::*;
976    ///
977    /// fn example() -> PolarsResult<()> {
978    ///     let df = df! {
979    ///             "groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
980    ///             "values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
981    ///         }?;
982    ///
983    ///     let out = df
984    ///      .lazy()
985    ///      .select(&[
986    ///          col("groups"),
987    ///          sum("values").over([col("groups")]),
988    ///      ])
989    ///      .collect()?;
990    ///     println!("{}", &out);
991    ///     Ok(())
992    /// }
993    ///
994    /// ```
995    ///
996    /// Outputs:
997    ///
998    /// ``` text
999    /// ╭────────┬────────╮
1000    /// │ groups ┆ values │
1001    /// │ ---    ┆ ---    │
1002    /// │ i32    ┆ i32    │
1003    /// ╞════════╪════════╡
1004    /// │ 1      ┆ 16     │
1005    /// │ 1      ┆ 16     │
1006    /// │ 2      ┆ 13     │
1007    /// │ 2      ┆ 13     │
1008    /// │ …      ┆ …      │
1009    /// │ 1      ┆ 16     │
1010    /// │ 2      ┆ 13     │
1011    /// │ 3      ┆ 15     │
1012    /// │ 3      ┆ 15     │
1013    /// │ 1      ┆ 16     │
1014    /// ╰────────┴────────╯
1015    /// ```
1016    pub fn over<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(self, partition_by: E) -> Self {
1017        self.over_with_options(partition_by, None, Default::default())
1018    }
1019
1020    pub fn over_with_options<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1021        self,
1022        partition_by: E,
1023        order_by: Option<(E, SortOptions)>,
1024        options: WindowMapping,
1025    ) -> Self {
1026        let partition_by = partition_by
1027            .as_ref()
1028            .iter()
1029            .map(|e| e.clone().into())
1030            .collect();
1031
1032        let order_by = order_by.map(|(e, options)| {
1033            let e = e.as_ref();
1034            let e = if e.len() == 1 {
1035                Arc::new(e[0].clone().into())
1036            } else {
1037                feature_gated!["dtype-struct", {
1038                    let e = e.iter().map(|e| e.clone().into()).collect::<Vec<_>>();
1039                    Arc::new(as_struct(e))
1040                }]
1041            };
1042            (e, options)
1043        });
1044
1045        Expr::Window {
1046            function: Arc::new(self),
1047            partition_by,
1048            order_by,
1049            options: options.into(),
1050        }
1051    }
1052
1053    #[cfg(feature = "dynamic_group_by")]
1054    pub fn rolling(self, options: RollingGroupOptions) -> Self {
1055        // We add the index column as `partition expr` so that the optimizer will
1056        // not ignore it.
1057        let index_col = col(options.index_column.clone());
1058        Expr::Window {
1059            function: Arc::new(self),
1060            partition_by: vec![index_col],
1061            order_by: None,
1062            options: WindowType::Rolling(options),
1063        }
1064    }
1065
1066    fn fill_null_impl(self, fill_value: Expr) -> Self {
1067        let input = vec![self, fill_value];
1068
1069        Expr::Function {
1070            input,
1071            function: FunctionExpr::FillNull,
1072            options: FunctionOptions {
1073                collect_groups: ApplyOptions::ElementWise,
1074                cast_options: Some(CastingRules::cast_to_supertypes()),
1075                ..Default::default()
1076            },
1077        }
1078    }
1079
1080    /// Replace the null values by a value.
1081    pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> Self {
1082        self.fill_null_impl(fill_value.into())
1083    }
1084
1085    pub fn fill_null_with_strategy(self, strategy: FillNullStrategy) -> Self {
1086        if strategy.is_elementwise() {
1087            self.map_private(FunctionExpr::FillNullWithStrategy(strategy))
1088        } else {
1089            self.apply_private(FunctionExpr::FillNullWithStrategy(strategy))
1090        }
1091    }
1092
1093    /// Replace the floating point `NaN` values by a value.
1094    pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> Self {
1095        // we take the not branch so that self is truthy value of `when -> then -> otherwise`
1096        // and that ensure we keep the name of `self`
1097
1098        when(self.clone().is_not_nan().or(self.clone().is_null()))
1099            .then(self)
1100            .otherwise(fill_value.into())
1101    }
1102    /// Count the values of the Series
1103    /// or
1104    /// Get counts of the group by operation.
1105    pub fn count(self) -> Self {
1106        AggExpr::Count(Arc::new(self), false).into()
1107    }
1108
1109    pub fn len(self) -> Self {
1110        AggExpr::Count(Arc::new(self), true).into()
1111    }
1112
1113    /// Get a mask of duplicated values.
1114    #[allow(clippy::wrong_self_convention)]
1115    #[cfg(feature = "is_unique")]
1116    pub fn is_duplicated(self) -> Self {
1117        self.apply_private(BooleanFunction::IsDuplicated.into())
1118    }
1119
1120    #[allow(clippy::wrong_self_convention)]
1121    #[cfg(feature = "is_between")]
1122    pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
1123        self.map_many_private(
1124            BooleanFunction::IsBetween { closed }.into(),
1125            &[lower.into(), upper.into()],
1126            false,
1127            Some((SuperTypeFlags::default() & !SuperTypeFlags::ALLOW_PRIMITIVE_TO_STRING).into()),
1128        )
1129    }
1130
1131    /// Get a mask of unique values.
1132    #[allow(clippy::wrong_self_convention)]
1133    #[cfg(feature = "is_unique")]
1134    pub fn is_unique(self) -> Self {
1135        self.apply_private(BooleanFunction::IsUnique.into())
1136    }
1137
1138    /// Get the approximate count of unique values.
1139    #[cfg(feature = "approx_unique")]
1140    pub fn approx_n_unique(self) -> Self {
1141        self.apply_private(FunctionExpr::ApproxNUnique)
1142            .with_function_options(|mut options| {
1143                options.flags |= FunctionFlags::RETURNS_SCALAR;
1144                options
1145            })
1146    }
1147
1148    /// Bitwise "and" operation.
1149    pub fn and<E: Into<Expr>>(self, expr: E) -> Self {
1150        binary_expr(self, Operator::And, expr.into())
1151    }
1152
1153    /// Bitwise "xor" operation.
1154    pub fn xor<E: Into<Expr>>(self, expr: E) -> Self {
1155        binary_expr(self, Operator::Xor, expr.into())
1156    }
1157
1158    /// Bitwise "or" operation.
1159    pub fn or<E: Into<Expr>>(self, expr: E) -> Self {
1160        binary_expr(self, Operator::Or, expr.into())
1161    }
1162
1163    /// Logical "or" operation.
1164    pub fn logical_or<E: Into<Expr>>(self, expr: E) -> Self {
1165        binary_expr(self, Operator::LogicalOr, expr.into())
1166    }
1167
1168    /// Logical "and" operation.
1169    pub fn logical_and<E: Into<Expr>>(self, expr: E) -> Self {
1170        binary_expr(self, Operator::LogicalAnd, expr.into())
1171    }
1172
1173    /// Filter a single column.
1174    ///
1175    /// Should be used in aggregation context. If you want to filter on a
1176    /// DataFrame level, use `LazyFrame::filter`.
1177    pub fn filter<E: Into<Expr>>(self, predicate: E) -> Self {
1178        if has_expr(&self, |e| matches!(e, Expr::Wildcard)) {
1179            panic!("filter '*' not allowed, use LazyFrame::filter")
1180        };
1181        Expr::Filter {
1182            input: Arc::new(self),
1183            by: Arc::new(predicate.into()),
1184        }
1185    }
1186
1187    /// Check if the values of the left expression are in the lists of the right expr.
1188    #[allow(clippy::wrong_self_convention)]
1189    #[cfg(feature = "is_in")]
1190    pub fn is_in<E: Into<Expr>>(self, other: E) -> Self {
1191        let other = other.into();
1192        let has_literal = has_leaf_literal(&other);
1193
1194        // lit(true).is_in() returns a scalar.
1195        let returns_scalar = all_return_scalar(&self);
1196
1197        let arguments = &[other];
1198        // we don't have to apply on groups, so this is faster
1199        if has_literal {
1200            self.map_many_private(
1201                BooleanFunction::IsIn.into(),
1202                arguments,
1203                returns_scalar,
1204                Some(Default::default()),
1205            )
1206        } else {
1207            self.apply_many_private(
1208                BooleanFunction::IsIn.into(),
1209                arguments,
1210                returns_scalar,
1211                true,
1212            )
1213        }
1214    }
1215
1216    /// Sort this column by the ordering of another column evaluated from given expr.
1217    /// Can also be used in a group_by context to sort the groups.
1218    ///
1219    /// # Example
1220    ///
1221    /// ```rust
1222    /// # use polars_core::prelude::*;
1223    /// # use polars_lazy::prelude::*;
1224    /// # fn main() -> PolarsResult<()> {
1225    /// let lf = df! {
1226    ///     "a" => [1, 2, 3, 4, 5],
1227    ///     "b" => [5, 4, 3, 2, 1]
1228    /// }?.lazy();
1229    ///
1230    /// let sorted = lf
1231    ///     .select(
1232    ///         vec![col("a").sort_by(col("b"), SortOptions::default())],
1233    ///     )
1234    ///     .collect()?;
1235    ///
1236    /// assert_eq!(
1237    ///     sorted,
1238    ///     df! { "a" => [5, 4, 3, 2, 1] }?
1239    /// );
1240    /// # Ok(())
1241    /// # }
1242    pub fn sort_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1243        self,
1244        by: E,
1245        sort_options: SortMultipleOptions,
1246    ) -> Expr {
1247        let by = by.as_ref().iter().map(|e| e.clone().into()).collect();
1248        Expr::SortBy {
1249            expr: Arc::new(self),
1250            by,
1251            sort_options,
1252        }
1253    }
1254
1255    #[cfg(feature = "repeat_by")]
1256    fn repeat_by_impl(self, by: Expr) -> Expr {
1257        self.apply_many_private(FunctionExpr::RepeatBy, &[by], false, false)
1258    }
1259
1260    #[cfg(feature = "repeat_by")]
1261    /// Repeat the column `n` times, where `n` is determined by the values in `by`.
1262    /// This yields an `Expr` of dtype `List`.
1263    pub fn repeat_by<E: Into<Expr>>(self, by: E) -> Expr {
1264        self.repeat_by_impl(by.into())
1265    }
1266
1267    #[cfg(feature = "is_first_distinct")]
1268    #[allow(clippy::wrong_self_convention)]
1269    /// Get a mask of the first unique value.
1270    pub fn is_first_distinct(self) -> Expr {
1271        self.apply_private(BooleanFunction::IsFirstDistinct.into())
1272    }
1273
1274    #[cfg(feature = "is_last_distinct")]
1275    #[allow(clippy::wrong_self_convention)]
1276    /// Get a mask of the last unique value.
1277    pub fn is_last_distinct(self) -> Expr {
1278        self.apply_private(BooleanFunction::IsLastDistinct.into())
1279    }
1280
1281    fn dot_impl(self, other: Expr) -> Expr {
1282        (self * other).sum()
1283    }
1284
1285    /// Compute the dot/inner product between two expressions.
1286    pub fn dot<E: Into<Expr>>(self, other: E) -> Expr {
1287        self.dot_impl(other.into())
1288    }
1289
1290    #[cfg(feature = "mode")]
1291    /// Compute the mode(s) of this column. This is the most occurring value.
1292    pub fn mode(self) -> Expr {
1293        self.apply_private(FunctionExpr::Mode)
1294    }
1295
1296    /// Exclude a column from a wildcard/regex selection.
1297    ///
1298    /// You may also use regexes in the exclude as long as they start with `^` and end with `$`.
1299    pub fn exclude(self, columns: impl IntoVec<PlSmallStr>) -> Expr {
1300        let v = columns.into_vec().into_iter().map(Excluded::Name).collect();
1301        Expr::Exclude(Arc::new(self), v)
1302    }
1303
1304    pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Expr {
1305        let v = dtypes
1306            .as_ref()
1307            .iter()
1308            .map(|dt| Excluded::Dtype(dt.clone()))
1309            .collect();
1310        Expr::Exclude(Arc::new(self), v)
1311    }
1312
1313    #[cfg(feature = "interpolate")]
1314    /// Fill null values using interpolation.
1315    pub fn interpolate(self, method: InterpolationMethod) -> Expr {
1316        self.apply_private(FunctionExpr::Interpolate(method))
1317    }
1318
1319    #[cfg(feature = "rolling_window_by")]
1320    #[allow(clippy::type_complexity)]
1321    fn finish_rolling_by(
1322        self,
1323        by: Expr,
1324        options: RollingOptionsDynamicWindow,
1325        rolling_function_by: fn(RollingOptionsDynamicWindow) -> RollingFunctionBy,
1326    ) -> Expr {
1327        self.apply_many_private(
1328            FunctionExpr::RollingExprBy(rolling_function_by(options)),
1329            &[by],
1330            false,
1331            false,
1332        )
1333    }
1334
1335    #[cfg(feature = "interpolate_by")]
1336    /// Fill null values using interpolation.
1337    pub fn interpolate_by(self, by: Expr) -> Expr {
1338        self.apply_many_private(FunctionExpr::InterpolateBy, &[by], false, false)
1339    }
1340
1341    #[cfg(feature = "rolling_window")]
1342    #[allow(clippy::type_complexity)]
1343    fn finish_rolling(
1344        self,
1345        options: RollingOptionsFixedWindow,
1346        rolling_function: fn(RollingOptionsFixedWindow) -> RollingFunction,
1347    ) -> Expr {
1348        self.apply_private(FunctionExpr::RollingExpr(rolling_function(options)))
1349    }
1350
1351    /// Apply a rolling minimum based on another column.
1352    #[cfg(feature = "rolling_window_by")]
1353    pub fn rolling_min_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1354        self.finish_rolling_by(by, options, RollingFunctionBy::MinBy)
1355    }
1356
1357    /// Apply a rolling maximum based on another column.
1358    #[cfg(feature = "rolling_window_by")]
1359    pub fn rolling_max_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1360        self.finish_rolling_by(by, options, RollingFunctionBy::MaxBy)
1361    }
1362
1363    /// Apply a rolling mean based on another column.
1364    #[cfg(feature = "rolling_window_by")]
1365    pub fn rolling_mean_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1366        self.finish_rolling_by(by, options, RollingFunctionBy::MeanBy)
1367    }
1368
1369    /// Apply a rolling sum based on another column.
1370    #[cfg(feature = "rolling_window_by")]
1371    pub fn rolling_sum_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1372        self.finish_rolling_by(by, options, RollingFunctionBy::SumBy)
1373    }
1374
1375    /// Apply a rolling quantile based on another column.
1376    #[cfg(feature = "rolling_window_by")]
1377    pub fn rolling_quantile_by(
1378        self,
1379        by: Expr,
1380        method: QuantileMethod,
1381        quantile: f64,
1382        mut options: RollingOptionsDynamicWindow,
1383    ) -> Expr {
1384        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1385            prob: quantile,
1386            method,
1387        }));
1388
1389        self.finish_rolling_by(by, options, RollingFunctionBy::QuantileBy)
1390    }
1391
1392    /// Apply a rolling variance based on another column.
1393    #[cfg(feature = "rolling_window_by")]
1394    pub fn rolling_var_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1395        self.finish_rolling_by(by, options, RollingFunctionBy::VarBy)
1396    }
1397
1398    /// Apply a rolling std-dev based on another column.
1399    #[cfg(feature = "rolling_window_by")]
1400    pub fn rolling_std_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1401        self.finish_rolling_by(by, options, RollingFunctionBy::StdBy)
1402    }
1403
1404    /// Apply a rolling median based on another column.
1405    #[cfg(feature = "rolling_window_by")]
1406    pub fn rolling_median_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1407        self.rolling_quantile_by(by, QuantileMethod::Linear, 0.5, options)
1408    }
1409
1410    /// Apply a rolling minimum.
1411    ///
1412    /// See: [`RollingAgg::rolling_min`]
1413    #[cfg(feature = "rolling_window")]
1414    pub fn rolling_min(self, options: RollingOptionsFixedWindow) -> Expr {
1415        self.finish_rolling(options, RollingFunction::Min)
1416    }
1417
1418    /// Apply a rolling maximum.
1419    ///
1420    /// See: [`RollingAgg::rolling_max`]
1421    #[cfg(feature = "rolling_window")]
1422    pub fn rolling_max(self, options: RollingOptionsFixedWindow) -> Expr {
1423        self.finish_rolling(options, RollingFunction::Max)
1424    }
1425
1426    /// Apply a rolling mean.
1427    ///
1428    /// See: [`RollingAgg::rolling_mean`]
1429    #[cfg(feature = "rolling_window")]
1430    pub fn rolling_mean(self, options: RollingOptionsFixedWindow) -> Expr {
1431        self.finish_rolling(options, RollingFunction::Mean)
1432    }
1433
1434    /// Apply a rolling sum.
1435    ///
1436    /// See: [`RollingAgg::rolling_sum`]
1437    #[cfg(feature = "rolling_window")]
1438    pub fn rolling_sum(self, options: RollingOptionsFixedWindow) -> Expr {
1439        self.finish_rolling(options, RollingFunction::Sum)
1440    }
1441
1442    /// Apply a rolling median.
1443    ///
1444    /// See: [`RollingAgg::rolling_median`]
1445    #[cfg(feature = "rolling_window")]
1446    pub fn rolling_median(self, options: RollingOptionsFixedWindow) -> Expr {
1447        self.rolling_quantile(QuantileMethod::Linear, 0.5, options)
1448    }
1449
1450    /// Apply a rolling quantile.
1451    ///
1452    /// See: [`RollingAgg::rolling_quantile`]
1453    #[cfg(feature = "rolling_window")]
1454    pub fn rolling_quantile(
1455        self,
1456        method: QuantileMethod,
1457        quantile: f64,
1458        mut options: RollingOptionsFixedWindow,
1459    ) -> Expr {
1460        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1461            prob: quantile,
1462            method,
1463        }));
1464
1465        self.finish_rolling(options, RollingFunction::Quantile)
1466    }
1467
1468    /// Apply a rolling variance.
1469    #[cfg(feature = "rolling_window")]
1470    pub fn rolling_var(self, options: RollingOptionsFixedWindow) -> Expr {
1471        self.finish_rolling(options, RollingFunction::Var)
1472    }
1473
1474    /// Apply a rolling std-dev.
1475    #[cfg(feature = "rolling_window")]
1476    pub fn rolling_std(self, options: RollingOptionsFixedWindow) -> Expr {
1477        self.finish_rolling(options, RollingFunction::Std)
1478    }
1479
1480    /// Apply a rolling skew.
1481    #[cfg(feature = "rolling_window")]
1482    #[cfg(feature = "moment")]
1483    pub fn rolling_skew(self, window_size: usize, bias: bool) -> Expr {
1484        self.apply_private(FunctionExpr::RollingExpr(RollingFunction::Skew(
1485            window_size,
1486            bias,
1487        )))
1488    }
1489
1490    #[cfg(feature = "rolling_window")]
1491    /// Apply a custom function over a rolling/ moving window of the array.
1492    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1493    pub fn rolling_map(
1494        self,
1495        f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
1496        output_type: GetOutput,
1497        options: RollingOptionsFixedWindow,
1498    ) -> Expr {
1499        self.apply(
1500            move |c: Column| {
1501                c.as_materialized_series()
1502                    .rolling_map(f.as_ref(), options.clone())
1503                    .map(Column::from)
1504                    .map(Some)
1505            },
1506            output_type,
1507        )
1508        .with_fmt("rolling_map")
1509    }
1510
1511    #[cfg(feature = "rolling_window")]
1512    /// Apply a custom function over a rolling/ moving window of the array.
1513    /// Prefer this over rolling_apply in case of floating point numbers as this is faster.
1514    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1515    pub fn rolling_map_float<F>(self, window_size: usize, f: F) -> Expr
1516    where
1517        F: 'static + FnMut(&mut Float64Chunked) -> Option<f64> + Send + Sync + Copy,
1518    {
1519        self.apply(
1520            move |c: Column| {
1521                let out = match c.dtype() {
1522                    DataType::Float64 => c
1523                        .f64()
1524                        .unwrap()
1525                        .rolling_map_float(window_size, f)
1526                        .map(|ca| ca.into_column()),
1527                    _ => c
1528                        .cast(&DataType::Float64)?
1529                        .f64()
1530                        .unwrap()
1531                        .rolling_map_float(window_size, f)
1532                        .map(|ca| ca.into_column()),
1533                }?;
1534                if let DataType::Float32 = c.dtype() {
1535                    out.cast(&DataType::Float32).map(Some)
1536                } else {
1537                    Ok(Some(out))
1538                }
1539            },
1540            GetOutput::map_field(|field| {
1541                Ok(match field.dtype() {
1542                    DataType::Float64 => field.clone(),
1543                    DataType::Float32 => Field::new(field.name().clone(), DataType::Float32),
1544                    _ => Field::new(field.name().clone(), DataType::Float64),
1545                })
1546            }),
1547        )
1548        .with_fmt("rolling_map_float")
1549    }
1550
1551    #[cfg(feature = "peaks")]
1552    pub fn peak_min(self) -> Expr {
1553        self.apply_private(FunctionExpr::PeakMin)
1554    }
1555
1556    #[cfg(feature = "peaks")]
1557    pub fn peak_max(self) -> Expr {
1558        self.apply_private(FunctionExpr::PeakMax)
1559    }
1560
1561    #[cfg(feature = "rank")]
1562    /// Assign ranks to data, dealing with ties appropriately.
1563    pub fn rank(self, options: RankOptions, seed: Option<u64>) -> Expr {
1564        self.apply_private(FunctionExpr::Rank { options, seed })
1565    }
1566
1567    #[cfg(feature = "replace")]
1568    /// Replace the given values with other values.
1569    pub fn replace<E: Into<Expr>>(self, old: E, new: E) -> Expr {
1570        let old = old.into();
1571        let new = new.into();
1572
1573        // If we search and replace by literals, we can run on batches.
1574        let literal_searchers = matches!(&old, Expr::Literal(_)) & matches!(&new, Expr::Literal(_));
1575
1576        let args = [old, new];
1577
1578        if literal_searchers {
1579            self.map_many_private(FunctionExpr::Replace, &args, false, None)
1580        } else {
1581            self.apply_many_private(FunctionExpr::Replace, &args, false, false)
1582        }
1583    }
1584
1585    #[cfg(feature = "replace")]
1586    /// Replace the given values with other values.
1587    pub fn replace_strict<E: Into<Expr>>(
1588        self,
1589        old: E,
1590        new: E,
1591        default: Option<E>,
1592        return_dtype: Option<DataType>,
1593    ) -> Expr {
1594        let old = old.into();
1595        let new = new.into();
1596
1597        // If we replace by literals, we can run on batches.
1598        let literal_searchers = matches!(&old, Expr::Literal(_)) & matches!(&new, Expr::Literal(_));
1599
1600        let mut args = vec![old, new];
1601        if let Some(default) = default {
1602            args.push(default.into())
1603        }
1604
1605        if literal_searchers {
1606            self.map_many_private(
1607                FunctionExpr::ReplaceStrict { return_dtype },
1608                &args,
1609                false,
1610                None,
1611            )
1612        } else {
1613            self.apply_many_private(
1614                FunctionExpr::ReplaceStrict { return_dtype },
1615                &args,
1616                false,
1617                false,
1618            )
1619        }
1620    }
1621
1622    #[cfg(feature = "cutqcut")]
1623    /// Bin continuous values into discrete categories.
1624    pub fn cut(
1625        self,
1626        breaks: Vec<f64>,
1627        labels: Option<impl IntoVec<PlSmallStr>>,
1628        left_closed: bool,
1629        include_breaks: bool,
1630    ) -> Expr {
1631        self.apply_private(FunctionExpr::Cut {
1632            breaks,
1633            labels: labels.map(|x| x.into_vec()),
1634            left_closed,
1635            include_breaks,
1636        })
1637        .with_function_options(|mut opt| {
1638            opt.flags |= FunctionFlags::PASS_NAME_TO_APPLY;
1639            opt
1640        })
1641    }
1642
1643    #[cfg(feature = "cutqcut")]
1644    /// Bin continuous values into discrete categories based on their quantiles.
1645    pub fn qcut(
1646        self,
1647        probs: Vec<f64>,
1648        labels: Option<impl IntoVec<PlSmallStr>>,
1649        left_closed: bool,
1650        allow_duplicates: bool,
1651        include_breaks: bool,
1652    ) -> Expr {
1653        self.apply_private(FunctionExpr::QCut {
1654            probs,
1655            labels: labels.map(|x| x.into_vec()),
1656            left_closed,
1657            allow_duplicates,
1658            include_breaks,
1659        })
1660        .with_function_options(|mut opt| {
1661            opt.flags |= FunctionFlags::PASS_NAME_TO_APPLY;
1662            opt
1663        })
1664    }
1665
1666    #[cfg(feature = "cutqcut")]
1667    /// Bin continuous values into discrete categories using uniform quantile probabilities.
1668    pub fn qcut_uniform(
1669        self,
1670        n_bins: usize,
1671        labels: Option<impl IntoVec<PlSmallStr>>,
1672        left_closed: bool,
1673        allow_duplicates: bool,
1674        include_breaks: bool,
1675    ) -> Expr {
1676        let probs = (1..n_bins).map(|b| b as f64 / n_bins as f64).collect();
1677        self.apply_private(FunctionExpr::QCut {
1678            probs,
1679            labels: labels.map(|x| x.into_vec()),
1680            left_closed,
1681            allow_duplicates,
1682            include_breaks,
1683        })
1684        .with_function_options(|mut opt| {
1685            opt.flags |= FunctionFlags::PASS_NAME_TO_APPLY;
1686            opt
1687        })
1688    }
1689
1690    #[cfg(feature = "rle")]
1691    /// Get the lengths of runs of identical values.
1692    pub fn rle(self) -> Expr {
1693        self.apply_private(FunctionExpr::RLE)
1694    }
1695
1696    #[cfg(feature = "rle")]
1697    /// Similar to `rle`, but maps values to run IDs.
1698    pub fn rle_id(self) -> Expr {
1699        self.apply_private(FunctionExpr::RLEID)
1700    }
1701
1702    #[cfg(feature = "diff")]
1703    /// Calculate the n-th discrete difference between values.
1704    pub fn diff(self, n: i64, null_behavior: NullBehavior) -> Expr {
1705        self.apply_private(FunctionExpr::Diff(n, null_behavior))
1706    }
1707
1708    #[cfg(feature = "pct_change")]
1709    /// Computes percentage change between values.
1710    pub fn pct_change(self, n: Expr) -> Expr {
1711        self.apply_many_private(FunctionExpr::PctChange, &[n], false, false)
1712    }
1713
1714    #[cfg(feature = "moment")]
1715    /// Compute the sample skewness of a data set.
1716    ///
1717    /// For normally distributed data, the skewness should be about zero. For
1718    /// uni-modal continuous distributions, a skewness value greater than zero means
1719    /// that there is more weight in the right tail of the distribution. The
1720    /// function `skewtest` can be used to determine if the skewness value
1721    /// is close enough to zero, statistically speaking.
1722    ///
1723    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
1724    pub fn skew(self, bias: bool) -> Expr {
1725        self.apply_private(FunctionExpr::Skew(bias))
1726            .with_function_options(|mut options| {
1727                options.flags |= FunctionFlags::RETURNS_SCALAR;
1728                options
1729            })
1730    }
1731
1732    #[cfg(feature = "moment")]
1733    /// Compute the kurtosis (Fisher or Pearson).
1734    ///
1735    /// Kurtosis is the fourth central moment divided by the square of the
1736    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
1737    /// the result to give 0.0 for a normal distribution.
1738    /// If bias is False then the kurtosis is calculated using k statistics to
1739    /// eliminate bias coming from biased moment estimators.
1740    pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr {
1741        self.apply_private(FunctionExpr::Kurtosis(fisher, bias))
1742            .with_function_options(|mut options| {
1743                options.flags |= FunctionFlags::RETURNS_SCALAR;
1744                options
1745            })
1746    }
1747
1748    /// Get maximal value that could be hold by this dtype.
1749    pub fn upper_bound(self) -> Expr {
1750        self.apply_private(FunctionExpr::UpperBound)
1751            .with_function_options(|mut options| {
1752                options.flags |= FunctionFlags::RETURNS_SCALAR;
1753                options
1754            })
1755    }
1756
1757    /// Get minimal value that could be hold by this dtype.
1758    pub fn lower_bound(self) -> Expr {
1759        self.apply_private(FunctionExpr::LowerBound)
1760            .with_function_options(|mut options| {
1761                options.flags |= FunctionFlags::RETURNS_SCALAR;
1762                options
1763            })
1764    }
1765
1766    #[cfg(feature = "dtype-array")]
1767    pub fn reshape(self, dimensions: &[i64]) -> Self {
1768        let dimensions = dimensions
1769            .iter()
1770            .map(|&v| ReshapeDimension::new(v))
1771            .collect();
1772        self.apply_private(FunctionExpr::Reshape(dimensions))
1773    }
1774
1775    #[cfg(feature = "ewma")]
1776    /// Calculate the exponentially-weighted moving average.
1777    pub fn ewm_mean(self, options: EWMOptions) -> Self {
1778        self.apply_private(FunctionExpr::EwmMean { options })
1779    }
1780
1781    #[cfg(feature = "ewma_by")]
1782    /// Calculate the exponentially-weighted moving average by a time column.
1783    pub fn ewm_mean_by(self, times: Expr, half_life: Duration) -> Self {
1784        self.apply_many_private(
1785            FunctionExpr::EwmMeanBy { half_life },
1786            &[times],
1787            false,
1788            false,
1789        )
1790    }
1791
1792    #[cfg(feature = "ewma")]
1793    /// Calculate the exponentially-weighted moving standard deviation.
1794    pub fn ewm_std(self, options: EWMOptions) -> Self {
1795        self.apply_private(FunctionExpr::EwmStd { options })
1796    }
1797
1798    #[cfg(feature = "ewma")]
1799    /// Calculate the exponentially-weighted moving variance.
1800    pub fn ewm_var(self, options: EWMOptions) -> Self {
1801        self.apply_private(FunctionExpr::EwmVar { options })
1802    }
1803
1804    /// Returns whether any of the values in the column are `true`.
1805    ///
1806    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1807    /// if the column contains any null values and no `true` values, the output
1808    /// is null.
1809    ///
1810    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1811    pub fn any(self, ignore_nulls: bool) -> Self {
1812        self.apply_private(BooleanFunction::Any { ignore_nulls }.into())
1813            .with_function_options(|mut opt| {
1814                opt.flags |= FunctionFlags::RETURNS_SCALAR;
1815                opt
1816            })
1817    }
1818
1819    /// Returns whether all values in the column are `true`.
1820    ///
1821    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1822    /// if the column contains any null values and no `false` values, the output
1823    /// is null.
1824    ///
1825    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1826    pub fn all(self, ignore_nulls: bool) -> Self {
1827        self.apply_private(BooleanFunction::All { ignore_nulls }.into())
1828            .with_function_options(|mut opt| {
1829                opt.flags |= FunctionFlags::RETURNS_SCALAR;
1830                opt
1831            })
1832    }
1833
1834    /// Shrink numeric columns to the minimal required datatype
1835    /// needed to fit the extrema of this [`Series`].
1836    /// This can be used to reduce memory pressure.
1837    pub fn shrink_dtype(self) -> Self {
1838        self.apply_private(FunctionExpr::ShrinkType)
1839    }
1840
1841    #[cfg(feature = "dtype-struct")]
1842    /// Count all unique values and create a struct mapping value to count.
1843    /// (Note that it is better to turn parallel off in the aggregation context).
1844    /// The name of the struct field with the counts is given by the parameter `name`.
1845    pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
1846        self.apply_private(FunctionExpr::ValueCounts {
1847            sort,
1848            parallel,
1849            name: name.into(),
1850            normalize,
1851        })
1852        .with_function_options(|mut opts| {
1853            opts.flags |= FunctionFlags::PASS_NAME_TO_APPLY;
1854            opts
1855        })
1856    }
1857
1858    #[cfg(feature = "unique_counts")]
1859    /// Returns a count of the unique values in the order of appearance.
1860    /// This method differs from [`Expr::value_counts`] in that it does not return the
1861    /// values, only the counts and might be faster.
1862    pub fn unique_counts(self) -> Self {
1863        self.apply_private(FunctionExpr::UniqueCounts)
1864    }
1865
1866    #[cfg(feature = "log")]
1867    /// Compute the logarithm to a given base.
1868    pub fn log(self, base: f64) -> Self {
1869        self.map_private(FunctionExpr::Log { base })
1870    }
1871
1872    #[cfg(feature = "log")]
1873    /// Compute the natural logarithm of all elements plus one in the input array.
1874    pub fn log1p(self) -> Self {
1875        self.map_private(FunctionExpr::Log1p)
1876    }
1877
1878    #[cfg(feature = "log")]
1879    /// Calculate the exponential of all elements in the input array.
1880    pub fn exp(self) -> Self {
1881        self.map_private(FunctionExpr::Exp)
1882    }
1883
1884    #[cfg(feature = "log")]
1885    /// Compute the entropy as `-sum(pk * log(pk)`.
1886    /// where `pk` are discrete probabilities.
1887    pub fn entropy(self, base: f64, normalize: bool) -> Self {
1888        self.apply_private(FunctionExpr::Entropy { base, normalize })
1889            .with_function_options(|mut options| {
1890                options.flags |= FunctionFlags::RETURNS_SCALAR;
1891                options
1892            })
1893    }
1894    /// Get the null count of the column/group.
1895    pub fn null_count(self) -> Expr {
1896        self.apply_private(FunctionExpr::NullCount)
1897            .with_function_options(|mut options| {
1898                options.flags |= FunctionFlags::RETURNS_SCALAR;
1899                options
1900            })
1901    }
1902
1903    /// Set this `Series` as `sorted` so that downstream code can use
1904    /// fast paths for sorted arrays.
1905    /// # Warning
1906    /// This can lead to incorrect results if this `Series` is not sorted!!
1907    /// Use with care!
1908    pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
1909        // This is `map`. If a column is sorted. Chunks of that column are also sorted.
1910        self.map_private(FunctionExpr::SetSortedFlag(sorted))
1911    }
1912
1913    #[cfg(feature = "row_hash")]
1914    /// Compute the hash of every element.
1915    pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr {
1916        self.map_private(FunctionExpr::Hash(k0, k1, k2, k3))
1917    }
1918
1919    pub fn to_physical(self) -> Expr {
1920        self.map_private(FunctionExpr::ToPhysical)
1921    }
1922
1923    pub fn gather_every(self, n: usize, offset: usize) -> Expr {
1924        self.apply_private(FunctionExpr::GatherEvery { n, offset })
1925    }
1926
1927    #[cfg(feature = "reinterpret")]
1928    pub fn reinterpret(self, signed: bool) -> Expr {
1929        self.map_private(FunctionExpr::Reinterpret(signed))
1930    }
1931
1932    pub fn extend_constant(self, value: Expr, n: Expr) -> Expr {
1933        self.apply_many_private(FunctionExpr::ExtendConstant, &[value, n], false, false)
1934    }
1935
1936    #[cfg(feature = "strings")]
1937    /// Get the [`string::StringNameSpace`]
1938    pub fn str(self) -> string::StringNameSpace {
1939        string::StringNameSpace(self)
1940    }
1941
1942    /// Get the [`binary::BinaryNameSpace`]
1943    pub fn binary(self) -> binary::BinaryNameSpace {
1944        binary::BinaryNameSpace(self)
1945    }
1946
1947    #[cfg(feature = "temporal")]
1948    /// Get the [`dt::DateLikeNameSpace`]
1949    pub fn dt(self) -> dt::DateLikeNameSpace {
1950        dt::DateLikeNameSpace(self)
1951    }
1952
1953    /// Get the [`list::ListNameSpace`]
1954    pub fn list(self) -> list::ListNameSpace {
1955        list::ListNameSpace(self)
1956    }
1957
1958    /// Get the [`name::ExprNameNameSpace`]
1959    pub fn name(self) -> name::ExprNameNameSpace {
1960        name::ExprNameNameSpace(self)
1961    }
1962
1963    /// Get the [`array::ArrayNameSpace`].
1964    #[cfg(feature = "dtype-array")]
1965    pub fn arr(self) -> array::ArrayNameSpace {
1966        array::ArrayNameSpace(self)
1967    }
1968
1969    /// Get the [`CategoricalNameSpace`].
1970    #[cfg(feature = "dtype-categorical")]
1971    pub fn cat(self) -> cat::CategoricalNameSpace {
1972        cat::CategoricalNameSpace(self)
1973    }
1974
1975    /// Get the [`struct_::StructNameSpace`].
1976    #[cfg(feature = "dtype-struct")]
1977    pub fn struct_(self) -> struct_::StructNameSpace {
1978        struct_::StructNameSpace(self)
1979    }
1980
1981    /// Get the [`meta::MetaNameSpace`]
1982    #[cfg(feature = "meta")]
1983    pub fn meta(self) -> meta::MetaNameSpace {
1984        meta::MetaNameSpace(self)
1985    }
1986}
1987
1988/// Apply a function/closure over multiple columns once the logical plan get executed.
1989///
1990/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
1991///
1992///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1993///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1994///
1995/// It is the responsibility of the caller that the schema is correct by giving
1996/// the correct output_type. If None given the output type of the input expr is used.
1997pub fn map_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
1998where
1999    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
2000    E: AsRef<[Expr]>,
2001{
2002    let input = expr.as_ref().to_vec();
2003
2004    Expr::AnonymousFunction {
2005        input,
2006        function: new_column_udf(function),
2007        output_type,
2008        options: FunctionOptions {
2009            collect_groups: ApplyOptions::ElementWise,
2010            fmt_str: "",
2011            ..Default::default()
2012        },
2013    }
2014}
2015
2016/// Apply a function/closure over multiple columns once the logical plan get executed.
2017///
2018/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
2019///
2020///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
2021///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
2022///  * [`map_list_multiple`] should be used when the function expects a list aggregated series.
2023pub fn map_list_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
2024where
2025    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
2026    E: AsRef<[Expr]>,
2027{
2028    let input = expr.as_ref().to_vec();
2029
2030    Expr::AnonymousFunction {
2031        input,
2032        function: new_column_udf(function),
2033        output_type,
2034        options: FunctionOptions {
2035            collect_groups: ApplyOptions::ApplyList,
2036            fmt_str: "",
2037            flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
2038            ..Default::default()
2039        },
2040    }
2041}
2042
2043/// Apply a function/closure over the groups of multiple columns. This should only be used in a group_by aggregation.
2044///
2045/// It is the responsibility of the caller that the schema is correct by giving
2046/// the correct output_type. If None given the output type of the input expr is used.
2047///
2048/// This difference with [`map_multiple`] is that [`apply_multiple`] will create a separate [`Series`] per group.
2049///
2050/// * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
2051/// * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
2052pub fn apply_multiple<F, E>(
2053    function: F,
2054    expr: E,
2055    output_type: GetOutput,
2056    returns_scalar: bool,
2057) -> Expr
2058where
2059    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
2060    E: AsRef<[Expr]>,
2061{
2062    let input = expr.as_ref().to_vec();
2063    let mut flags = FunctionFlags::default();
2064    if returns_scalar {
2065        flags |= FunctionFlags::RETURNS_SCALAR;
2066    }
2067
2068    Expr::AnonymousFunction {
2069        input,
2070        function: new_column_udf(function),
2071        output_type,
2072        options: FunctionOptions {
2073            collect_groups: ApplyOptions::GroupWise,
2074            // don't set this to true
2075            // this is for the caller to decide
2076            fmt_str: "",
2077            flags,
2078            ..Default::default()
2079        },
2080    }
2081}
2082
2083/// Return the number of rows in the context.
2084pub fn len() -> Expr {
2085    Expr::Len
2086}
2087
2088/// First column in a DataFrame.
2089pub fn first() -> Expr {
2090    Expr::Nth(0)
2091}
2092
2093/// Last column in a DataFrame.
2094pub fn last() -> Expr {
2095    Expr::Nth(-1)
2096}
2097
2098/// Nth column in a DataFrame.
2099pub fn nth(n: i64) -> Expr {
2100    Expr::Nth(n)
2101}
polars_plan/dsl/mod.rs

polars_plan/dsl/
mod.rs