pub struct GroupBy<'df> {
pub df: &'df DataFrame,
/* private fields */
}
algorithm_group_by
only.Expand description
Returned by a group_by operation on a DataFrame. This struct supports several aggregations.
Until described otherwise, the examples in this struct are performed on the following DataFrame:
use polars_core::prelude::*;
let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
.into_series();
// create temperature series
let s1 = Series::new("temp".into(), [20, 10, 7, 9, 1]);
// create rain series
let s2 = Series::new("rain".into(), [0.2, 0.1, 0.3, 0.1, 0.01]);
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);
Outputs:
+------------+------+------+
| date | temp | rain |
| --- | --- | --- |
| Date | i32 | f64 |
+============+======+======+
| 2020-08-21 | 20 | 0.2 |
+------------+------+------+
| 2020-08-21 | 10 | 0.1 |
+------------+------+------+
| 2020-08-22 | 7 | 0.3 |
+------------+------+------+
| 2020-08-23 | 9 | 0.1 |
+------------+------+------+
| 2020-08-22 | 1 | 0.01 |
+------------+------+------+
Fields§
§df: &'df DataFrame
Implementations§
Source§impl<'df> GroupBy<'df>
impl<'df> GroupBy<'df>
pub fn new( df: &'df DataFrame, by: Vec<Column>, groups: GroupsProxy, selected_agg: Option<Vec<PlSmallStr>>, ) -> Self
Sourcepub fn select<I: IntoIterator<Item = S>, S: Into<PlSmallStr>>(
self,
selection: I,
) -> Self
pub fn select<I: IntoIterator<Item = S>, S: Into<PlSmallStr>>( self, selection: I, ) -> Self
Select the column(s) that should be aggregated. You can select a single column or a slice of columns.
Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.
Sourcepub fn get_groups(&self) -> &GroupsProxy
pub fn get_groups(&self) -> &GroupsProxy
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec<indexes>
)
Where second value in the tuple is a vector with all matching indexes.
Sourcepub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy
pub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec<indexes>
)
Where second value in the tuple is a vector with all matching indexes.
§Safety
Groups should always be in bounds of the DataFrame
hold by this GroupBy
.
If you mutate it, you must hold that invariant.
pub fn take_groups(self) -> GroupsProxy
pub fn take_groups_mut(&mut self) -> GroupsProxy
pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Column>
pub fn keys(&self) -> Vec<Column>
Sourcepub fn mean(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn mean(&self) -> PolarsResult<DataFrame>
Aggregate grouped series and compute the mean per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp", "rain"]).mean()
}
Returns:
+------------+-----------+-----------+
| date | temp_mean | rain_mean |
| --- | --- | --- |
| Date | f64 | f64 |
+============+===========+===========+
| 2020-08-23 | 9 | 0.1 |
+------------+-----------+-----------+
| 2020-08-22 | 4 | 0.155 |
+------------+-----------+-----------+
| 2020-08-21 | 15 | 0.15 |
+------------+-----------+-----------+
Sourcepub fn sum(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn sum(&self) -> PolarsResult<DataFrame>
Aggregate grouped series and compute the sum per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).sum()
}
Returns:
+------------+----------+
| date | temp_sum |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 8 |
+------------+----------+
| 2020-08-21 | 30 |
+------------+----------+
Sourcepub fn min(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn min(&self) -> PolarsResult<DataFrame>
Aggregate grouped series and compute the minimal value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).min()
}
Returns:
+------------+----------+
| date | temp_min |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 1 |
+------------+----------+
| 2020-08-21 | 10 |
+------------+----------+
Sourcepub fn max(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn max(&self) -> PolarsResult<DataFrame>
Aggregate grouped series and compute the maximum value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).max()
}
Returns:
+------------+----------+
| date | temp_max |
| --- | --- |
| Date | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 7 |
+------------+----------+
| 2020-08-21 | 20 |
+------------+----------+
Sourcepub fn first(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn first(&self) -> PolarsResult<DataFrame>
Aggregate grouped Series
and find the first value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).first()
}
Returns:
+------------+------------+
| date | temp_first |
| --- | --- |
| Date | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 7 |
+------------+------------+
| 2020-08-21 | 20 |
+------------+------------+
Sourcepub fn last(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn last(&self) -> PolarsResult<DataFrame>
Aggregate grouped Series
and return the last value per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).last()
}
Returns:
+------------+------------+
| date | temp_last |
| --- | --- |
| Date | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 1 |
+------------+------------+
| 2020-08-21 | 10 |
+------------+------------+
Sourcepub fn n_unique(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn n_unique(&self) -> PolarsResult<DataFrame>
Aggregate grouped Series
by counting the number of unique values.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).n_unique()
}
Returns:
+------------+---------------+
| date | temp_n_unique |
| --- | --- |
| Date | u32 |
+============+===============+
| 2020-08-23 | 1 |
+------------+---------------+
| 2020-08-22 | 2 |
+------------+---------------+
| 2020-08-21 | 2 |
+------------+---------------+
Sourcepub fn quantile(
&self,
quantile: f64,
method: QuantileMethod,
) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn quantile( &self, quantile: f64, method: QuantileMethod, ) -> PolarsResult<DataFrame>
Sourcepub fn median(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn median(&self) -> PolarsResult<DataFrame>
Sourcepub fn var(&self, ddof: u8) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn var(&self, ddof: u8) -> PolarsResult<DataFrame>
Aggregate grouped Series
and determine the variance per group.
Sourcepub fn std(&self, ddof: u8) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn std(&self, ddof: u8) -> PolarsResult<DataFrame>
Aggregate grouped Series
and determine the standard deviation per group.
Sourcepub fn count(&self) -> PolarsResult<DataFrame>
pub fn count(&self) -> PolarsResult<DataFrame>
Aggregate grouped series and compute the number of values per group.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.select(["temp"]).count()
}
Returns:
+------------+------------+
| date | temp_count |
| --- | --- |
| Date | u32 |
+============+============+
| 2020-08-23 | 1 |
+------------+------------+
| 2020-08-22 | 2 |
+------------+------------+
| 2020-08-21 | 2 |
+------------+------------+
Sourcepub fn groups(&self) -> PolarsResult<DataFrame>
pub fn groups(&self) -> PolarsResult<DataFrame>
Get the group_by group indexes.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.group_by(["date"])?.groups()
}
Returns:
+--------------+------------+
| date | groups |
| --- | --- |
| Date(days) | list [u32] |
+==============+============+
| 2020-08-23 | "[3]" |
+--------------+------------+
| 2020-08-22 | "[2, 4]" |
+--------------+------------+
| 2020-08-21 | "[0, 1]" |
+--------------+------------+
Sourcepub fn agg_list(&self) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn agg_list(&self) -> PolarsResult<DataFrame>
Aggregate the groups of the group_by operation into lists.
§Example
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
// GroupBy and aggregate to Lists
df.group_by(["date"])?.select(["temp"]).agg_list()
}
Returns:
+------------+------------------------+
| date | temp_agg_list |
| --- | --- |
| Date | list [i32] |
+============+========================+
| 2020-08-23 | "[Some(9)]" |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]" |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+
Sourcepub fn par_apply<F>(&self, f: F) -> PolarsResult<DataFrame>
👎Deprecated since 0.24.1: use polars.lazy aggregations
pub fn par_apply<F>(&self, f: F) -> PolarsResult<DataFrame>
Apply a closure over the groups as a new DataFrame
in parallel.
Sourcepub fn apply<F>(&self, f: F) -> PolarsResult<DataFrame>
pub fn apply<F>(&self, f: F) -> PolarsResult<DataFrame>
Apply a closure over the groups as a new DataFrame
.
pub fn sliced(self, slice: Option<(i64, usize)>) -> Self
Trait Implementations§
Auto Trait Implementations§
impl<'df> Freeze for GroupBy<'df>
impl<'df> !RefUnwindSafe for GroupBy<'df>
impl<'df> Send for GroupBy<'df>
impl<'df> Sync for GroupBy<'df>
impl<'df> Unpin for GroupBy<'df>
impl<'df> !UnwindSafe for GroupBy<'df>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more