pub struct ListingTable { /* private fields */ }
Expand description
Reads data from one or more files as a single table.
Implements TableProvider
, a DataFusion data source. The files are read
using an ObjectStore
instance, for example from local files or objects
from AWS S3.
For example, given the table1
directory (or object store prefix)
table1
├── file1.parquet
└── file2.parquet
A ListingTable
would read the files file1.parquet
and file2.parquet
as
a single table, merging the schemas if the files have compatible but not
identical schemas.
Given the table2
directory (or object store prefix)
table2
├── date=2024-06-01
│ ├── file3.parquet
│ └── file4.parquet
└── date=2024-06-02
└── file5.parquet
A ListingTable
would read the files file3.parquet
, file4.parquet
, and
file5.parquet
as a single table, again merging schemas if necessary.
Given the hive style partitioning structure (e.g,. directories named
date=2024-06-01
and date=2026-06-02
), ListingTable
also adds a date
column when reading the table:
- The files in
table2/date=2024-06-01
will have the value2024-06-01
- The files in
table2/date=2024-06-02
will have the value2024-06-02
.
If the query has a predicate like WHERE date = '2024-06-01'
only the corresponding directory will be read.
ListingTable
also supports filter and projection pushdown for formats that
support it as such as Parquet.
§Example
Here is an example of reading a directory of parquet files using a
ListingTable
:
let ctx = SessionContext::new();
let session_state = ctx.state();
let table_path = "/path/to/parquet";
// Parse the path
let table_path = ListingTableUrl::parse(table_path)?;
// Create default parquet options
let file_format = ParquetFormat::new();
let listing_options = ListingOptions::new(Arc::new(file_format))
.with_file_extension(".parquet");
// Resolve the schema
let resolved_schema = listing_options
.infer_schema(&session_state, &table_path)
.await?;
let config = ListingTableConfig::new(table_path)
.with_listing_options(listing_options)
.with_schema(resolved_schema);
// Create a new TableProvider
let provider = Arc::new(ListingTable::try_new(config)?);
// This provider can now be read as a dataframe:
let df = ctx.read_table(provider.clone());
// or registered as a named table:
ctx.register_table("my_table", provider);
Implementations§
Source§impl ListingTable
impl ListingTable
Sourcepub fn try_new(config: ListingTableConfig) -> Result<Self>
pub fn try_new(config: ListingTableConfig) -> Result<Self>
Create new ListingTable
that lists the FS to get the files
to scan. See ListingTable
for and example.
Takes a ListingTableConfig
as input which requires an ObjectStore
and table_path
.
ListingOptions
and SchemaRef
are optional. If they are not
provided the file type is inferred based on the file suffix.
If the schema is provided then it must be resolved before creating the table
and should contain the fields of the file without the table
partitioning columns.
Sourcepub fn with_constraints(self, constraints: Constraints) -> Self
pub fn with_constraints(self, constraints: Constraints) -> Self
Assign constraints
Sourcepub fn with_column_defaults(
self,
column_defaults: HashMap<String, Expr>,
) -> Self
pub fn with_column_defaults( self, column_defaults: HashMap<String, Expr>, ) -> Self
Assign column defaults
Sourcepub fn with_cache(self, cache: Option<FileStatisticsCache>) -> Self
pub fn with_cache(self, cache: Option<FileStatisticsCache>) -> Self
Set the FileStatisticsCache
used to cache parquet file statistics.
Setting a statistics cache on the SessionContext
can avoid refetching statistics
multiple times in the same session.
If None
, creates a new DefaultFileStatisticsCache
scoped to this query.
Sourcepub fn with_definition(self, definition: Option<String>) -> Self
pub fn with_definition(self, definition: Option<String>) -> Self
Specify the SQL definition for this table, if any
Sourcepub fn table_paths(&self) -> &Vec<ListingTableUrl>
pub fn table_paths(&self) -> &Vec<ListingTableUrl>
Get paths ref
Sourcepub fn options(&self) -> &ListingOptions
pub fn options(&self) -> &ListingOptions
Get options ref
Trait Implementations§
Source§impl Debug for ListingTable
impl Debug for ListingTable
Source§impl TableProvider for ListingTable
impl TableProvider for ListingTable
Source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Any
so that it can be
downcast to a specific implementation.Source§fn constraints(&self) -> Option<&Constraints>
fn constraints(&self) -> Option<&Constraints>
Source§fn table_type(&self) -> TableType
fn table_type(&self) -> TableType
Source§fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
ExecutionPlan
for scanning the table with optionally
specified projection
, filter
and limit
, described below. Read moreSource§fn supports_filters_pushdown(
&self,
filters: &[&Expr],
) -> Result<Vec<TableProviderFilterPushDown>>
fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> Result<Vec<TableProviderFilterPushDown>>
Source§fn get_table_definition(&self) -> Option<&str>
fn get_table_definition(&self) -> Option<&str>
Source§fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
input: Arc<dyn ExecutionPlan>,
insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
input: Arc<dyn ExecutionPlan>,
insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
ExecutionPlan
to insert data into this table, if
supported. Read moreSource§fn get_column_default(&self, column: &str) -> Option<&Expr>
fn get_column_default(&self, column: &str) -> Option<&Expr>
Source§fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
LogicalPlan
of this table, if available.Source§fn statistics(&self) -> Option<Statistics>
fn statistics(&self) -> Option<Statistics>
Auto Trait Implementations§
impl Freeze for ListingTable
impl !RefUnwindSafe for ListingTable
impl Send for ListingTable
impl Sync for ListingTable
impl Unpin for ListingTable
impl !UnwindSafe for ListingTable
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more