Struct polars_lazy::frame::LazyFrame[−][src]

pub struct LazyFrame { /* fields omitted */ }

This is supported on crate feature compile only.

Expand description

Lazy abstraction over an eager DataFrame. It really is an abstraction over a logical plan. The methods of this struct will incrementally modify a logical plan until output is requested (via collect)

Implementations

[src]

impl LazyFrame

[src]

pub fn schema(&self) -> SchemaRef

Get a hold on the schema of the current LazyFrame computation.

[src]

pub fn new_from_parquet(
 path: String,
 stop_after_n_rows: Option<usize>,
 cache: bool
) -> Self

This is supported on crate feature parquet only.

Create a LazyFrame directly from a parquet scan.

[src]

pub fn to_dot(&self, optimized: bool) -> Result<String>

Get a dot language representation of the LogicalPlan.

[src]

pub fn with_projection_pushdown(self, toggle: bool) -> Self

Toggle projection pushdown optimization.

[src]

pub fn with_predicate_pushdown(self, toggle: bool) -> Self

Toggle predicate pushdown optimization.

[src]

pub fn with_type_coercion(self, toggle: bool) -> Self

Toggle type coercion optimization.

[src]

pub fn with_simplify_expr(self, toggle: bool) -> Self

Toggle expression simplification optimization on or off

[src]

pub fn with_aggregate_pushdown(self, toggle: bool) -> Self

Toggle aggregate pushdown.

[src]

pub fn with_string_cache(self, toggle: bool) -> Self

Toggle global string cache.

[src]

pub fn with_join_pruning(self, toggle: bool) -> Self

Toggle join pruning optimization

[src]

pub fn describe_plan(&self) -> String

Describe the logical plan.

[src]

pub fn describe_optimized_plan(&self) -> Result<String>

Describe the optimized logical plan.

[src]

pub fn sort(self, by_column: &str, reverse: bool) -> Self

Add a sort operation to the logical plan.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .sort("sepal.width", false)
}

[src]

pub fn sort_by_exprs(self, by_exprs: Vec<Expr>, reverse: Vec<bool>) -> Self

Add a sort operation to the logical plan.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .sort_by_exprs(vec![col("sepal.width")], vec![false])
}

[src]

pub fn reverse(self) -> Self

Reverse the DataFrame

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .reverse()
}

[src]

pub fn with_column_renamed(self, existing_name: &str, new_name: &str) -> Self

Rename a column in the DataFrame

[src]

pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self where
 I: IntoIterator<Item = T> + Clone,
 J: IntoIterator<Item = S>,
 T: AsRef<str>,
 S: AsRef<str>,

Rename columns in the DataFrame. This does not preserve ordering.

[src]

pub fn drop_columns<I, T>(self, columns: I) -> Self where
I: IntoIterator<Item = T>,
T: AsRef<str>,

Removes columns from the DataFrame. Note that its better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.

[src]

pub fn shift(self, periods: i64) -> Self

Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.

See the method on Series for more info on the shift operation.

[src]

pub fn shift_and_fill(self, periods: i64, fill_value: Expr) -> Self

Shift the values by a given period and fill the parts that will be empty due to this operation with the result of the fill_value expression.

See the method on Series for more info on the shift operation.

[src]

pub fn fill_null(self, fill_value: Expr) -> LazyFrame

Fill none values in the DataFrame

[src]

pub fn fill_nan(self, fill_value: Expr) -> LazyFrame

Fill NaN values in the DataFrame

[src]

pub fn cache(self) -> Self

Caches the result into a new LazyFrame. This should be used to prevent computations running multiple times

[src]

pub fn fetch(self, n_rows: usize) -> Result<DataFrame>

Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.

Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.

[src]

pub fn optimize(
 self,
 lp_arena: &mut Arena<ALogicalPlan>,
 expr_arena: &mut Arena<AExpr>
) -> Result<Node>

[src]

pub fn collect(self) -> Result<DataFrame>

Execute all the lazy operations and collect them into a DataFrame. Before execution the query is being optimized.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> Result<DataFrame> {
    df.lazy()
      .groupby([col("foo")])
      .agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
      .collect()
}

[src]

pub fn filter(self, predicate: Expr) -> Self

Filter by some predicate expression.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .filter(col("sepal.width").is_not_null())
        .select(&[col("sepal.width"), col("sepal.length")])
}

[src]

pub fn select<E: AsRef<[Expr ]>>(self, exprs: E) -> Self

Select (and rename) columns from the query.

Columns can be selected with col; If you want to select all columns use col("*").

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select(&[col("foo"),
                  col("bar").alias("ham")])
}

/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select(&[col("*").exclude("foo")])
}

[src]

pub fn groupby<E: AsRef<[Expr ]>>(self, by: E) -> LazyGroupBy

Group by and aggregate.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
       .groupby([col("date")])
       .agg([
           col("rain").min(),
           col("rain").sum(),
           col("rain").quantile(0.5).alias("median_rain"),
       ])
       .sort("date", false)
}

[src]

pub fn stable_groupby<E: AsRef<[Expr ]>>(self, by: E) -> LazyGroupBy

Similar to groupby, but order of the DataFrame is maintained.

[src]

pub fn left_join(
    self,
    other: LazyFrame,
    left_on: Expr,
    right_on: Expr
) -> LazyFrame

Join query with other lazy query.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .left_join(other, col("foo"), col("bar"))
}

[src]

pub fn outer_join(
    self,
    other: LazyFrame,
    left_on: Expr,
    right_on: Expr
) -> LazyFrame

Join query with other lazy query.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .outer_join(other, col("foo"), col("bar"))
}

[src]

pub fn inner_join(
    self,
    other: LazyFrame,
    left_on: Expr,
    right_on: Expr
) -> LazyFrame

Join query with other lazy query.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .inner_join(other, col("foo"), col("bar").cast(DataType::Utf8))
}

[src]

pub fn cross_join(self, other: LazyFrame) -> LazyFrame

This is supported on crate feature cross_join only.

Creates the cartesian product from both frames, preserves the order of the left keys.

[src]

pub fn join(
 self,
 other: LazyFrame,
 left_on: Vec<Expr>,
 right_on: Vec<Expr>,
 how: JoinType
) -> LazyFrame

Generic join function that can join on multiple columns.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .join(other, vec![col("foo"), col("bar")], vec![col("foo"), col("bar")], JoinType::Inner)
}

[src]

pub fn join_builder(self) -> JoinBuilder

Control more join options with the join builder.

[src]

pub fn with_column(self, expr: Expr) -> LazyFrame

Add a column to a DataFrame

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_column(
            when(col("sepal.length").lt(lit(5.0)))
            .then(lit(10))
            .otherwise(lit(1))
            .alias("new_column_name"),
            )
}

[src]

pub fn with_columns(self, exprs: Vec<Expr>) -> LazyFrame

Add multiple columns to a DataFrame.

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_columns(
            vec![lit(10).alias("foo"), lit(100).alias("bar")]
         )
}