polars_lazy::frame

Struct LazyFrame

Source
pub struct LazyFrame {
    pub logical_plan: DslPlan,
    /* private fields */
}
Expand description

Lazy abstraction over an eager DataFrame.

It really is an abstraction over a logical plan. The methods of this struct will incrementally modify a logical plan until output is requested (via collect).

Fields§

§logical_plan: DslPlan

Implementations§

Source§

impl LazyFrame

Source

pub fn to_dot(&self, optimized: bool) -> PolarsResult<String>

Available on crate feature dot_diagram only.

Get a dot language representation of the LogicalPlan.

Source§

impl LazyFrame

Source

pub fn set_cached_arena(&self, lp_arena: Arena<IR>, expr_arena: Arena<AExpr>)

Source

pub fn schema_with_arenas( &mut self, lp_arena: &mut Arena<IR>, expr_arena: &mut Arena<AExpr>, ) -> PolarsResult<SchemaRef>

Source

pub fn collect_schema(&mut self) -> PolarsResult<SchemaRef>

Get a handle to the schema — a map from column names to data types — of the current LazyFrame computation.

Returns an Err if the logical plan has already encountered an error (i.e., if self.collect() would fail), Ok otherwise.

Source§

impl LazyFrame

Source

pub fn collect_concurrently(self) -> PolarsResult<InProcessQuery>

Available on non-WebAssembly only.
Source§

impl LazyFrame

Source

pub fn get_current_optimizations(&self) -> OptFlags

Get current optimizations.

Source

pub fn with_optimizations(self, opt_state: OptFlags) -> Self

Set allowed optimizations.

Source

pub fn without_optimizations(self) -> Self

Turn off all optimizations.

Source

pub fn with_projection_pushdown(self, toggle: bool) -> Self

Toggle projection pushdown optimization.

Source

pub fn with_cluster_with_columns(self, toggle: bool) -> Self

Toggle cluster with columns optimization.

Source

pub fn with_collapse_joins(self, toggle: bool) -> Self

Toggle collapse joins optimization.

Source

pub fn with_predicate_pushdown(self, toggle: bool) -> Self

Toggle predicate pushdown optimization.

Source

pub fn with_type_coercion(self, toggle: bool) -> Self

Toggle type coercion optimization.

Source

pub fn with_simplify_expr(self, toggle: bool) -> Self

Toggle expression simplification optimization on or off.

Source

pub fn with_comm_subplan_elim(self, toggle: bool) -> Self

Available on crate feature cse only.

Toggle common subplan elimination optimization on or off

Source

pub fn with_comm_subexpr_elim(self, toggle: bool) -> Self

Available on crate feature cse only.

Toggle common subexpression elimination optimization on or off

Source

pub fn with_slice_pushdown(self, toggle: bool) -> Self

Toggle slice pushdown optimization.

Source

pub fn with_streaming(self, toggle: bool) -> Self

Available on crate feature streaming only.

Run nodes that are capably of doing so on the streaming engine.

Source

pub fn with_row_estimate(self, toggle: bool) -> Self

Try to estimate the number of rows so that joins can determine which side to keep in memory.

Source

pub fn _with_eager(self, toggle: bool) -> Self

Run every node eagerly. This turns off multi-node optimizations.

Source

pub fn describe_plan(&self) -> PolarsResult<String>

Return a String describing the naive (un-optimized) logical plan.

Source

pub fn describe_plan_tree(&self) -> PolarsResult<String>

Return a String describing the naive (un-optimized) logical plan in tree format.

Source

pub fn describe_optimized_plan(&self) -> PolarsResult<String>

Return a String describing the optimized logical plan.

Returns Err if optimizing the logical plan fails.

Source

pub fn describe_optimized_plan_tree(&self) -> PolarsResult<String>

Return a String describing the optimized logical plan in tree format.

Returns Err if optimizing the logical plan fails.

Source

pub fn explain(&self, optimized: bool) -> PolarsResult<String>

Return a String describing the logical plan.

If optimized is true, explains the optimized plan. If optimized is false, explains the naive, un-optimized plan.

Source

pub fn sort( self, by: impl IntoVec<PlSmallStr>, sort_options: SortMultipleOptions, ) -> Self

Add a sort operation to the logical plan.

Sorts the LazyFrame by the column name specified using the provided options.

§Example

Sort DataFrame by ‘sepal_width’ column:

fn sort_by_a(df: DataFrame) -> LazyFrame {
    df.lazy().sort(["sepal_width"], Default::default())
}

Sort by a single column with specific order:

fn sort_with_specific_order(df: DataFrame, descending: bool) -> LazyFrame {
    df.lazy().sort(
        ["sepal_width"],
        SortMultipleOptions::new()
            .with_order_descending(descending)
    )
}

Sort by multiple columns with specifying order for each column:

fn sort_by_multiple_columns_with_specific_order(df: DataFrame) -> LazyFrame {
    df.lazy().sort(
        ["sepal_width", "sepal_length"],
        SortMultipleOptions::new()
            .with_order_descending_multi([false, true])
    )
}

See SortMultipleOptions for more options.

Source

pub fn sort_by_exprs<E: AsRef<[Expr]>>( self, by_exprs: E, sort_options: SortMultipleOptions, ) -> Self

Add a sort operation to the logical plan.

Sorts the LazyFrame by the provided list of expressions, which will be turned into concrete columns before sorting.

See SortMultipleOptions for more options.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

/// Sort DataFrame by 'sepal_width' column
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .sort_by_exprs(vec![col("sepal_width")], Default::default())
}
Source

pub fn top_k<E: AsRef<[Expr]>>( self, k: IdxSize, by_exprs: E, sort_options: SortMultipleOptions, ) -> Self

Source

pub fn bottom_k<E: AsRef<[Expr]>>( self, k: IdxSize, by_exprs: E, sort_options: SortMultipleOptions, ) -> Self

Source

pub fn reverse(self) -> Self

Reverse the DataFrame from top to bottom.

Row i becomes row number_of_rows - i - 1.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .reverse()
}
Source

pub fn rename<I, J, T, S>(self, existing: I, new: J, strict: bool) -> Self
where I: IntoIterator<Item = T>, J: IntoIterator<Item = S>, T: AsRef<str>, S: AsRef<str>,

Rename columns in the DataFrame.

existing and new are iterables of the same length containing the old and corresponding new column names. Renaming happens to all existing columns simultaneously, not iteratively. If strict is true, all columns in existing must be present in the LazyFrame when rename is called; otherwise, only those columns that are actually found will be renamed (others will be ignored).

Source

pub fn drop<I, T>(self, columns: I) -> Self
where I: IntoIterator<Item = T>, T: Into<Selector>,

Removes columns from the DataFrame. Note that it’s better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.

Any given columns that are not in the schema will give a PolarsError::ColumnNotFound error while materializing the LazyFrame.

Source

pub fn drop_no_validate<I, T>(self, columns: I) -> Self
where I: IntoIterator<Item = T>, T: Into<Selector>,

Removes columns from the DataFrame. Note that it’s better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.

If a column name does not exist in the schema, it will quietly be ignored.

Source

pub fn shift<E: Into<Expr>>(self, n: E) -> Self

Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.

See the method on Series for more info on the shift operation.

Source

pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>( self, n: E, fill_value: IE, ) -> Self

Shift the values by a given period and fill the parts that will be empty due to this operation with the result of the fill_value expression.

See the method on Series for more info on the shift operation.

Source

pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame

Fill None values in the DataFrame with an expression.

Source

pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame

Fill NaN values in the DataFrame with an expression.

Source

pub fn cache(self) -> Self

Caches the result into a new LazyFrame.

This should be used to prevent computations running multiple times.

Source

pub fn cast(self, dtypes: PlHashMap<&str, DataType>, strict: bool) -> Self

Cast named frame columns, resulting in a new LazyFrame with updated dtypes

Source

pub fn cast_all(self, dtype: DataType, strict: bool) -> Self

Cast all frame columns to the given dtype, resulting in a new LazyFrame

Source

pub fn fetch(self, n_rows: usize) -> PolarsResult<DataFrame>

Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.

Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.

Source

pub fn optimize( self, lp_arena: &mut Arena<IR>, expr_arena: &mut Arena<AExpr>, ) -> PolarsResult<Node>

Source

pub fn to_alp_optimized(self) -> PolarsResult<IRPlan>

Source

pub fn to_alp(self) -> PolarsResult<IRPlan>

Source

pub fn _collect_post_opt<P>(self, post_opt: P) -> PolarsResult<DataFrame>
where P: Fn(Node, &mut Arena<IR>, &mut Arena<AExpr>) -> PolarsResult<()>,

Source

pub fn collect(self) -> PolarsResult<DataFrame>

Execute all the lazy operations and collect them into a DataFrame.

The query is optimized prior to execution.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.lazy()
      .group_by([col("foo")])
      .agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
      .collect()
}
Source

pub fn profile(self) -> PolarsResult<(DataFrame, DataFrame)>

Profile a LazyFrame.

This will run the query and return a tuple containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed.

The units of the timings are microseconds.

Source

pub fn sink_parquet( self, path: &dyn AsRef<Path>, options: ParquetWriteOptions, cloud_options: Option<CloudOptions>, ) -> PolarsResult<()>

Available on crate feature parquet only.

Stream a query result into a parquet file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.

Source

pub fn sink_ipc( self, path: impl AsRef<Path>, options: IpcWriterOptions, cloud_options: Option<CloudOptions>, ) -> PolarsResult<()>

Available on crate feature ipc only.

Stream a query result into an ipc/arrow file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.

Source

pub fn sink_csv( self, path: impl AsRef<Path>, options: CsvWriterOptions, cloud_options: Option<CloudOptions>, ) -> PolarsResult<()>

Available on crate feature csv only.

Stream a query result into an csv file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.

Source

pub fn sink_json( self, path: impl AsRef<Path>, options: JsonWriterOptions, cloud_options: Option<CloudOptions>, ) -> PolarsResult<()>

Available on crate feature json only.

Stream a query result into a json file. This is useful if the final result doesn’t fit into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.

Source

pub fn filter(self, predicate: Expr) -> Self

Filter by some predicate expression.

The expression must yield boolean values.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .filter(col("sepal_width").is_not_null())
        .select([col("sepal_width"), col("sepal_length")])
}
Source

pub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self

Select (and optionally rename, with alias) columns from the query.

Columns can be selected with col; If you want to select all columns use col(PlSmallStr::from_static("*")).

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select([col("foo"),
                  col("bar").alias("ham")])
}

/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select([col(PlSmallStr::from_static("*")).exclude(["foo"])])
}
Source

pub fn select_seq<E: AsRef<[Expr]>>(self, exprs: E) -> Self

Source

pub fn group_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>( self, by: E, ) -> LazyGroupBy

Performs a “group-by” on a LazyFrame, producing a LazyGroupBy, which can subsequently be aggregated.

Takes a list of expressions to group on.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
use arrow::legacy::prelude::QuantileMethod;

fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
       .group_by([col("date")])
       .agg([
           col("rain").min().alias("min_rain"),
           col("rain").sum().alias("sum_rain"),
           col("rain").quantile(lit(0.5), QuantileMethod::Nearest).alias("median_rain"),
       ])
}
Source

pub fn rolling<E: AsRef<[Expr]>>( self, index_column: Expr, group_by: E, options: RollingGroupOptions, ) -> LazyGroupBy

Available on crate feature dynamic_group_by only.

Create rolling groups based on a time column.

Also works for index values of type UInt32, UInt64, Int32, or Int64.

Different from a group_by_dynamic, the windows are now determined by the individual values and are not of constant intervals. For constant intervals use group_by_dynamic

Source

pub fn group_by_dynamic<E: AsRef<[Expr]>>( self, index_column: Expr, group_by: E, options: DynamicGroupOptions, ) -> LazyGroupBy

Available on crate feature dynamic_group_by only.

Group based on a time value (or index value of type Int32, Int64).

Time windows are calculated and rows are assigned to windows. Different from a normal group_by is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.

A window is defined by:

  • every: interval of the window
  • period: length of the window
  • offset: offset of the window

The group_by argument should be empty [] if you don’t want to combine this with a ordinary group_by on these keys.

Source

pub fn group_by_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>( self, by: E, ) -> LazyGroupBy

Similar to group_by, but order of the DataFrame is maintained.

Source

pub fn anti_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E, ) -> LazyFrame

Available on crate feature semi_anti_join only.

Left anti join this query with another lazy query.

Matches on the values of the expressions left_on and right_on. For more flexible join logic, see join or join_builder.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn anti_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .anti_join(other, col("foo"), col("bar").cast(DataType::String))
}
Source

pub fn cross_join( self, other: LazyFrame, suffix: Option<PlSmallStr>, ) -> LazyFrame

Available on crate feature cross_join only.

Creates the Cartesian product from both frames, preserving the order of the left keys.

Source

pub fn left_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E, ) -> LazyFrame

Left outer join this query with another lazy query.

Matches on the values of the expressions left_on and right_on. For more flexible join logic, see join or join_builder.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn left_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .left_join(other, col("foo"), col("bar"))
}
Source

pub fn inner_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E, ) -> LazyFrame

Inner join this query with another lazy query.

Matches on the values of the expressions left_on and right_on. For more flexible join logic, see join or join_builder.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn inner_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .inner_join(other, col("foo"), col("bar").cast(DataType::String))
}
Source

pub fn full_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E, ) -> LazyFrame

Full outer join this query with another lazy query.

Matches on the values of the expressions left_on and right_on. For more flexible join logic, see join or join_builder.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn full_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .full_join(other, col("foo"), col("bar"))
}
Source

pub fn semi_join<E: Into<Expr>>( self, other: LazyFrame, left_on: E, right_on: E, ) -> LazyFrame

Available on crate feature semi_anti_join only.

Left semi join this query with another lazy query.

Matches on the values of the expressions left_on and right_on. For more flexible join logic, see join or join_builder.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn semi_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .semi_join(other, col("foo"), col("bar").cast(DataType::String))
}
Source

pub fn join<E: AsRef<[Expr]>>( self, other: LazyFrame, left_on: E, right_on: E, args: JoinArgs, ) -> LazyFrame

Generic function to join two LazyFrames.

join can join on multiple columns, given as two list of expressions, and with a JoinType specified by how. Non-joined column names in the right DataFrame that already exist in this DataFrame are suffixed with "_right". For control over how columns are renamed and parallelization options, use join_builder.

Any provided args.slice parameter is not considered, but set by the internal optimizer.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
        ldf
        .join(other, [col("foo"), col("bar")], [col("foo"), col("bar")], JoinArgs::new(JoinType::Inner))
}
Source

pub fn join_builder(self) -> JoinBuilder

Consume self and return a JoinBuilder to customize a join on this LazyFrame.

After the JoinBuilder has been created and set up, calling finish() on it will give back the LazyFrame representing the join operation.

Source

pub fn with_column(self, expr: Expr) -> LazyFrame

Add or replace a column, given as an expression, to a DataFrame.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_column(
            when(col("sepal_length").lt(lit(5.0)))
            .then(lit(10))
            .otherwise(lit(1))
            .alias("new_column_name"),
        )
}
Source

pub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame

Add or replace multiple columns, given as expressions, to a DataFrame.

§Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
    df.lazy()
        .with_columns(
            vec![lit(10).alias("foo"), lit(100).alias("bar")]
         )
}
Source

pub fn with_columns_seq<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame

Add or replace multiple columns to a DataFrame, but evaluate them sequentially.

Source

pub fn with_context<C: AsRef<[LazyFrame]>>(self, contexts: C) -> LazyFrame

Source

pub fn max(self) -> Self

Aggregate all the columns as their maximum values.

Aggregated columns will have the same names as the original columns.

Source

pub fn min(self) -> Self

Aggregate all the columns as their minimum values.

Aggregated columns will have the same names as the original columns.

Source

pub fn sum(self) -> Self

Aggregate all the columns as their sum values.

Aggregated columns will have the same names as the original columns.

  • Boolean columns will sum to a u32 containing the number of trues.
  • For integer columns, the ordinary checks for overflow are performed: if running in debug mode, overflows will panic, whereas in release mode overflows will silently wrap.
  • String columns will sum to None.
Source

pub fn mean(self) -> Self

Aggregate all the columns as their mean values.

  • Boolean and integer columns are converted to f64 before computing the mean.
  • String columns will have a mean of None.
Source

pub fn median(self) -> Self

Aggregate all the columns as their median values.

  • Boolean and integer results are converted to f64. However, they are still susceptible to overflow before this conversion occurs.
  • String columns will sum to None.
Source

pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self

Aggregate all the columns as their quantile values.

Source

pub fn std(self, ddof: u8) -> Self

Aggregate all the columns as their standard deviation values.

ddof is the “Delta Degrees of Freedom”; N - ddof will be the denominator when computing the variance, where N is the number of rows.

In standard statistical practice, ddof=1 provides an unbiased estimator of the variance of a hypothetical infinite population. ddof=0 provides a maximum likelihood estimate of the variance for normally distributed variables. The standard deviation computed in this function is the square root of the estimated variance, so even with ddof=1, it will not be an unbiased estimate of the standard deviation per se.

Source: Numpy

Source

pub fn var(self, ddof: u8) -> Self

Aggregate all the columns as their variance values.

ddof is the “Delta Degrees of Freedom”; N - ddof will be the denominator when computing the variance, where N is the number of rows.

In standard statistical practice, ddof=1 provides an unbiased estimator of the variance of a hypothetical infinite population. ddof=0 provides a maximum likelihood estimate of the variance for normally distributed variables.

Source: Numpy

Source

pub fn explode<E: AsRef<[IE]>, IE: Into<Selector> + Clone>( self, columns: E, ) -> LazyFrame

Apply explode operation. See eager explode.

Source

pub fn null_count(self) -> LazyFrame

Aggregate all the columns as the sum of their null value count.

Source

pub fn unique_stable( self, subset: Option<Vec<PlSmallStr>>, keep_strategy: UniqueKeepStrategy, ) -> LazyFrame

Drop non-unique rows and maintain the order of kept rows.

subset is an optional Vec of column names to consider for uniqueness; if None, all columns are considered.

Source

pub fn unique_stable_generic<E, IE>( self, subset: Option<E>, keep_strategy: UniqueKeepStrategy, ) -> LazyFrame
where E: AsRef<[IE]>, IE: Into<Selector> + Clone,

Source

pub fn unique( self, subset: Option<Vec<String>>, keep_strategy: UniqueKeepStrategy, ) -> LazyFrame

Drop non-unique rows without maintaining the order of kept rows.

The order of the kept rows may change; to maintain the original row order, use unique_stable.

subset is an optional Vec of column names to consider for uniqueness; if None, all columns are considered.

Source

pub fn unique_generic<E: AsRef<[IE]>, IE: Into<Selector> + Clone>( self, subset: Option<E>, keep_strategy: UniqueKeepStrategy, ) -> LazyFrame

Source

pub fn drop_nans(self, subset: Option<Vec<Expr>>) -> LazyFrame

Drop rows containing one or more NaN values.

subset is an optional Vec of column names to consider for NaNs; if None, all floating point columns are considered.

Source

pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame

Drop rows containing one or more None values.

subset is an optional Vec of column names to consider for nulls; if None, all columns are considered.

Source

pub fn slice(self, offset: i64, len: IdxSize) -> LazyFrame

Slice the DataFrame using an offset (starting row) and a length.

If offset is negative, it is counted from the end of the DataFrame. For instance, lf.slice(-5, 3) gets three rows, starting at the row fifth from the end.

If offset and len are such that the slice extends beyond the end of the DataFrame, the portion between offset and the end will be returned. In this case, the number of rows in the returned DataFrame will be less than len.

Source

pub fn first(self) -> LazyFrame

Get the first row.

Equivalent to self.slice(0, 1).

Source

pub fn last(self) -> LazyFrame

Get the last row.

Equivalent to self.slice(-1, 1).

Source

pub fn tail(self, n: IdxSize) -> LazyFrame

Get the last n rows.

Equivalent to self.slice(-(n as i64), n).

Source

pub fn unpivot(self, args: UnpivotArgsDSL) -> LazyFrame

Available on crate feature pivot only.

Unpivot the DataFrame from wide to long format.

See UnpivotArgsIR for information on how to unpivot a DataFrame.

Source

pub fn limit(self, n: IdxSize) -> LazyFrame

Limit the DataFrame to the first n rows.

Note if you don’t want the rows to be scanned, use fetch.

Source

pub fn map<F>( self, function: F, optimizations: AllowedOptimizations, schema: Option<Arc<dyn UdfSchema>>, name: Option<&'static str>, ) -> LazyFrame
where F: 'static + Fn(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,

Apply a function/closure once the logical plan get executed.

The function has access to the whole materialized DataFrame at the time it is called.

To apply specific functions to specific columns, use Expr::map in conjunction with LazyFrame::with_column or with_columns.

§Warning

This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.

You can toggle certain optimizations off.

Source

pub fn with_row_index<S>(self, name: S, offset: Option<IdxSize>) -> LazyFrame
where S: Into<PlSmallStr>,

Add a new column at index 0 that counts the rows.

name is the name of the new column. offset is where to start counting from; if None, it is set to 0.

§Warning

This can have a negative effect on query performance. This may for instance block predicate pushdown optimization.

Source

pub fn count(self) -> LazyFrame

Return the number of non-null elements for each column.

Source

pub fn unnest<E, IE>(self, cols: E) -> Self
where E: AsRef<[IE]>, IE: Into<Selector> + Clone,

Available on crate feature dtype-struct only.

Unnest the given Struct columns: the fields of the Struct type will be inserted as columns.

Source

pub fn merge_sorted<S>( self, other: LazyFrame, key: S, ) -> PolarsResult<LazyFrame>
where S: Into<PlSmallStr>,

Available on crate feature merge_sorted only.
Source§

impl LazyFrame

Source

pub fn anonymous_scan( function: Arc<dyn AnonymousScan>, args: ScanArgsAnonymous, ) -> PolarsResult<Self>

Source§

impl LazyFrame

Source

pub fn scan_ipc(path: impl AsRef<Path>, args: ScanArgsIpc) -> PolarsResult<Self>

Available on crate feature ipc only.

Create a LazyFrame directly from a ipc scan.

Source

pub fn scan_ipc_files( paths: Arc<[PathBuf]>, args: ScanArgsIpc, ) -> PolarsResult<Self>

Available on crate feature ipc only.
Source

pub fn scan_ipc_sources( sources: ScanSources, args: ScanArgsIpc, ) -> PolarsResult<Self>

Available on crate feature ipc only.
Source§

impl LazyFrame

Source

pub fn scan_parquet( path: impl AsRef<Path>, args: ScanArgsParquet, ) -> PolarsResult<Self>

Available on crate feature parquet only.

Create a LazyFrame directly from a parquet scan.

Source

pub fn scan_parquet_sources( sources: ScanSources, args: ScanArgsParquet, ) -> PolarsResult<Self>

Available on crate feature parquet only.

Create a LazyFrame directly from a parquet scan.

Source

pub fn scan_parquet_files( paths: Arc<[PathBuf]>, args: ScanArgsParquet, ) -> PolarsResult<Self>

Available on crate feature parquet only.

Create a LazyFrame directly from a parquet scan.

Trait Implementations§

Source§

impl Clone for LazyFrame

Source§

fn clone(&self) -> LazyFrame

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Default for LazyFrame

Source§

fn default() -> LazyFrame

Returns the “default value” for a type. Read more
Source§

impl From<DslPlan> for LazyFrame

Source§

fn from(plan: DslPlan) -> Self

Converts to this type from the input type.
Source§

impl From<LazyGroupBy> for LazyFrame

Source§

fn from(lgb: LazyGroupBy) -> Self

Converts to this type from the input type.
Source§

impl IntoLazy for LazyFrame

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> DynClone for T
where T: Clone,

Source§

fn __clone_box(&self, _: Private) -> *mut ()

Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize = _

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T