pub struct ParquetReader<R: Read + Seek> { /* private fields */ }
Available on crate feature
parquet
only.Expand description
Read Apache parquet format into a DataFrame.
Implementations§
Source§impl<R: MmapBytesReader> ParquetReader<R>
impl<R: MmapBytesReader> ParquetReader<R>
Sourcepub fn set_low_memory(self, low_memory: bool) -> Self
pub fn set_low_memory(self, low_memory: bool) -> Self
Try to reduce memory pressure at the expense of performance. If setting this does not reduce memory enough, turn off parallelization.
Sourcepub fn read_parallel(self, parallel: ParallelStrategy) -> Self
pub fn read_parallel(self, parallel: ParallelStrategy) -> Self
Read the parquet file in parallel (default). The single threaded reader consumes less memory.
pub fn with_slice(self, slice: Option<(usize, usize)>) -> Self
Sourcepub fn with_columns(self, columns: Option<Vec<String>>) -> Self
pub fn with_columns(self, columns: Option<Vec<String>>) -> Self
Columns to select/ project
Sourcepub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
Set the reader’s column projection. This counts from 0, meaning that
vec![0, 4]
would select the 1st and 5th column.
Sourcepub fn with_row_index(self, row_index: Option<RowIndex>) -> Self
pub fn with_row_index(self, row_index: Option<RowIndex>) -> Self
Add a row index column.
Sourcepub fn with_arrow_schema_projection(
self,
first_schema: &Arc<ArrowSchema>,
projected_arrow_schema: Option<&ArrowSchema>,
allow_missing_columns: bool,
) -> PolarsResult<Self>
pub fn with_arrow_schema_projection( self, first_schema: &Arc<ArrowSchema>, projected_arrow_schema: Option<&ArrowSchema>, allow_missing_columns: bool, ) -> PolarsResult<Self>
Checks that the file contains all the columns in projected_arrow_schema
with the same
dtype, and sets the projection indices.
Sourcepub fn schema(&mut self) -> PolarsResult<ArrowSchemaRef>
pub fn schema(&mut self) -> PolarsResult<ArrowSchemaRef>
Schema
of the file.
Sourcepub fn use_statistics(self, toggle: bool) -> Self
pub fn use_statistics(self, toggle: bool) -> Self
Use statistics in the parquet to determine if pages can be skipped from reading.
Sourcepub fn num_rows(&mut self) -> PolarsResult<usize>
pub fn num_rows(&mut self) -> PolarsResult<usize>
Number of rows in the parquet file.
pub fn with_hive_partition_columns(self, columns: Option<Vec<Series>>) -> Self
pub fn with_include_file_path( self, include_file_path: Option<(PlSmallStr, Arc<str>)>, ) -> Self
pub fn set_metadata(&mut self, metadata: FileMetadataRef)
pub fn get_metadata(&mut self) -> PolarsResult<&FileMetadataRef>
pub fn with_predicate(self, predicate: Option<Arc<dyn PhysicalIoExpr>>) -> Self
Source§impl<R: MmapBytesReader + 'static> ParquetReader<R>
impl<R: MmapBytesReader + 'static> ParquetReader<R>
pub fn batched(self, chunk_size: usize) -> PolarsResult<BatchedParquetReader>
Trait Implementations§
Source§impl<R: MmapBytesReader> SerReader<R> for ParquetReader<R>
impl<R: MmapBytesReader> SerReader<R> for ParquetReader<R>
Source§fn new(reader: R) -> Self
fn new(reader: R) -> Self
Create a new ParquetReader
from an existing Reader
.
Source§fn set_rechunk(self, rechunk: bool) -> Self
fn set_rechunk(self, rechunk: bool) -> Self
Make sure that all columns are contiguous in memory by
aggregating the chunks into a single array.
Source§fn finish(self) -> PolarsResult<DataFrame>
fn finish(self) -> PolarsResult<DataFrame>
Take the SerReader and return a parsed DataFrame.
Auto Trait Implementations§
impl<R> Freeze for ParquetReader<R>where
R: Freeze,
impl<R> !RefUnwindSafe for ParquetReader<R>
impl<R> Send for ParquetReader<R>where
R: Send,
impl<R> Sync for ParquetReader<R>where
R: Sync,
impl<R> Unpin for ParquetReader<R>where
R: Unpin,
impl<R> !UnwindSafe for ParquetReader<R>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more