#[cfg(feature = "csv")]
use std::num::NonZeroUsize;
use std::path::PathBuf;
use polars_core::prelude::*;
#[cfg(feature = "csv")]
use polars_io::csv::SerializeOptions;
#[cfg(feature = "csv")]
use polars_io::csv::{CommentPrefix, CsvEncoding, NullValues};
#[cfg(feature = "ipc")]
use polars_io::ipc::IpcCompression;
#[cfg(feature = "parquet")]
use polars_io::parquet::ParquetCompression;
use polars_io::RowIndex;
#[cfg(feature = "dynamic_group_by")]
use polars_time::{DynamicGroupOptions, RollingGroupOptions};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "python")]
use crate::prelude::python_udf::PythonFunction;
pub type FileCount = u32;
#[cfg(feature = "csv")]
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CsvParserOptions {
pub separator: u8,
pub comment_prefix: Option<CommentPrefix>,
pub quote_char: Option<u8>,
pub eol_char: u8,
pub has_header: bool,
pub skip_rows: usize,
pub low_memory: bool,
pub ignore_errors: bool,
pub null_values: Option<NullValues>,
pub encoding: CsvEncoding,
pub try_parse_dates: bool,
pub raise_if_empty: bool,
pub truncate_ragged_lines: bool,
pub n_threads: Option<usize>,
}
#[cfg(feature = "parquet")]
#[derive(Clone, Debug, PartialEq, Eq, Copy)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ParquetOptions {
pub parallel: polars_io::parquet::ParallelStrategy,
pub low_memory: bool,
pub use_statistics: bool,
}
#[cfg(feature = "parquet")]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ParquetWriteOptions {
pub compression: ParquetCompression,
pub statistics: bool,
pub row_group_size: Option<usize>,
pub data_pagesize_limit: Option<usize>,
pub maintain_order: bool,
}
#[cfg(feature = "ipc")]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct IpcWriterOptions {
pub compression: Option<IpcCompression>,
pub maintain_order: bool,
}
#[cfg(feature = "csv")]
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CsvWriterOptions {
pub include_bom: bool,
pub include_header: bool,
pub batch_size: NonZeroUsize,
pub maintain_order: bool,
pub serialize_options: SerializeOptions,
}
#[cfg(feature = "csv")]
impl Default for CsvWriterOptions {
fn default() -> Self {
Self {
include_bom: false,
include_header: true,
batch_size: NonZeroUsize::new(1024).unwrap(),
maintain_order: false,
serialize_options: SerializeOptions::default(),
}
}
}
#[cfg(feature = "json")]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct JsonWriterOptions {
pub maintain_order: bool,
}
#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct IpcScanOptions {
pub memmap: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FileScanOptions {
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub cache: bool,
pub row_index: Option<RowIndex>,
pub rechunk: bool,
pub file_counter: FileCount,
pub hive_partitioning: bool,
}
#[derive(Clone, Debug, Copy, Default, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct UnionOptions {
pub slice: Option<(i64, usize)>,
pub parallel: bool,
pub rows: (Option<usize>, usize),
pub from_partitioned_ds: bool,
pub flattened_by_opt: bool,
pub rechunk: bool,
}
#[derive(Clone, Debug, Copy, Default, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct HConcatOptions {
pub parallel: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GroupbyOptions {
#[cfg(feature = "dynamic_group_by")]
pub dynamic: Option<DynamicGroupOptions>,
#[cfg(feature = "dynamic_group_by")]
pub rolling: Option<RollingGroupOptions>,
pub slice: Option<(i64, usize)>,
}
#[derive(Clone, Debug, Eq, PartialEq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DistinctOptions {
pub subset: Option<Arc<Vec<String>>>,
pub maintain_order: bool,
pub keep_strategy: UniqueKeepStrategy,
pub slice: Option<(i64, usize)>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ApplyOptions {
GroupWise,
ApplyList,
ElementWise,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct UnsafeBool(bool);
impl Default for UnsafeBool {
fn default() -> Self {
UnsafeBool(true)
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FunctionOptions {
pub collect_groups: ApplyOptions,
#[cfg_attr(feature = "serde", serde(skip_deserializing))]
pub fmt_str: &'static str,
pub input_wildcard_expansion: bool,
pub returns_scalar: bool,
pub cast_to_supertypes: bool,
pub allow_rename: bool,
pub pass_name_to_apply: bool,
pub changes_length: bool,
pub check_lengths: UnsafeBool,
pub allow_group_aware: bool,
}
impl FunctionOptions {
pub fn is_groups_sensitive(&self) -> bool {
matches!(self.collect_groups, ApplyOptions::GroupWise)
}
#[cfg(feature = "fused")]
pub(crate) unsafe fn no_check_lengths(&mut self) {
self.check_lengths = UnsafeBool(false);
}
pub fn check_lengths(&self) -> bool {
self.check_lengths.0
}
}
impl Default for FunctionOptions {
fn default() -> Self {
FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
input_wildcard_expansion: false,
returns_scalar: false,
fmt_str: "",
cast_to_supertypes: false,
allow_rename: false,
pass_name_to_apply: false,
changes_length: false,
check_lengths: UnsafeBool(true),
allow_group_aware: true,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct LogicalPlanUdfOptions {
pub predicate_pd: bool,
pub projection_pd: bool,
pub fmt_str: &'static str,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SortArguments {
pub descending: Vec<bool>,
pub nulls_last: bool,
pub slice: Option<(i64, usize)>,
pub maintain_order: bool,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg(feature = "python")]
pub struct PythonOptions {
pub scan_fn: Option<PythonFunction>,
pub schema: SchemaRef,
pub output_schema: Option<SchemaRef>,
pub with_columns: Option<Arc<Vec<String>>>,
pub pyarrow: bool,
pub predicate: Option<String>,
pub n_rows: Option<usize>,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct AnonymousScanOptions {
pub skip_rows: Option<usize>,
pub fmt_str: &'static str,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub enum SinkType {
Memory,
File {
path: Arc<PathBuf>,
file_type: FileType,
},
#[cfg(feature = "cloud")]
Cloud {
uri: Arc<String>,
file_type: FileType,
cloud_options: Option<polars_io::cloud::CloudOptions>,
},
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub struct FileSinkOptions {
pub path: Arc<PathBuf>,
pub file_type: FileType,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub enum FileType {
#[cfg(feature = "parquet")]
Parquet(ParquetWriteOptions),
#[cfg(feature = "ipc")]
Ipc(IpcWriterOptions),
#[cfg(feature = "csv")]
Csv(CsvWriterOptions),
#[cfg(feature = "json")]
Json(JsonWriterOptions),
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Copy, Debug)]
pub struct ProjectionOptions {
pub run_parallel: bool,
pub duplicate_check: bool,
}
impl Default for ProjectionOptions {
fn default() -> Self {
Self {
run_parallel: true,
duplicate_check: true,
}
}
}