#[cfg(feature = "json")]
use std::num::NonZeroUsize;
use std::path::PathBuf;
use bitflags::bitflags;
use polars_core::prelude::*;
use polars_core::utils::SuperTypeOptions;
#[cfg(feature = "csv")]
use polars_io::csv::write::CsvWriterOptions;
#[cfg(feature = "ipc")]
use polars_io::ipc::IpcWriterOptions;
#[cfg(feature = "json")]
use polars_io::json::JsonWriterOptions;
#[cfg(feature = "parquet")]
use polars_io::parquet::write::ParquetWriteOptions;
use polars_io::{is_cloud_url, HiveOptions, RowIndex};
#[cfg(feature = "dynamic_group_by")]
use polars_time::{DynamicGroupOptions, RollingGroupOptions};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::dsl::Selector;
use crate::plans::{ExprIR, PlSmallStr};
#[cfg(feature = "python")]
use crate::prelude::python_udf::PythonFunction;
pub type FileCount = u32;
#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FileScanOptions {
pub slice: Option<(i64, usize)>,
pub with_columns: Option<Arc<[PlSmallStr]>>,
pub cache: bool,
pub row_index: Option<RowIndex>,
pub rechunk: bool,
pub file_counter: FileCount,
pub hive_options: HiveOptions,
pub glob: bool,
pub include_file_paths: Option<PlSmallStr>,
pub allow_missing_columns: bool,
}
#[derive(Clone, Debug, Copy, Default, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct UnionOptions {
pub slice: Option<(i64, usize)>,
pub parallel: bool,
pub rows: (Option<usize>, usize),
pub from_partitioned_ds: bool,
pub flattened_by_opt: bool,
pub rechunk: bool,
}
#[derive(Clone, Debug, Copy, Default, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct HConcatOptions {
pub parallel: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GroupbyOptions {
#[cfg(feature = "dynamic_group_by")]
pub dynamic: Option<DynamicGroupOptions>,
#[cfg(feature = "dynamic_group_by")]
pub rolling: Option<RollingGroupOptions>,
pub slice: Option<(i64, usize)>,
}
#[derive(Clone, Debug, Eq, PartialEq, Default, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DistinctOptionsDSL {
pub subset: Option<Vec<Selector>>,
pub maintain_order: bool,
pub keep_strategy: UniqueKeepStrategy,
}
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
pub struct DistinctOptionsIR {
pub subset: Option<Arc<[PlSmallStr]>>,
pub maintain_order: bool,
pub keep_strategy: UniqueKeepStrategy,
pub slice: Option<(i64, usize)>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ApplyOptions {
GroupWise,
ApplyList,
ElementWise,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct UnsafeBool(bool);
impl Default for UnsafeBool {
fn default() -> Self {
UnsafeBool(true)
}
}
bitflags!(
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FunctionFlags: u8 {
const ALLOW_GROUP_AWARE = 1 << 0;
const CHANGES_LENGTH = 1 << 1;
const ALLOW_RENAME = 1 << 2;
const PASS_NAME_TO_APPLY = 1 << 3;
const INPUT_WILDCARD_EXPANSION = 1 << 4;
const RETURNS_SCALAR = 1 << 5;
const OPTIONAL_RE_ENTRANT = 1 << 6;
const ALLOW_EMPTY_INPUTS = 1 << 7;
}
);
impl Default for FunctionFlags {
fn default() -> Self {
Self::from_bits_truncate(0) | Self::ALLOW_GROUP_AWARE
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FunctionOptions {
pub collect_groups: ApplyOptions,
#[cfg_attr(feature = "serde", serde(skip_deserializing))]
pub fmt_str: &'static str,
#[cfg_attr(feature = "serde", serde(skip))]
pub cast_to_supertypes: Option<SuperTypeOptions>,
pub check_lengths: UnsafeBool,
pub flags: FunctionFlags,
}
impl FunctionOptions {
#[cfg(feature = "fused")]
pub(crate) unsafe fn no_check_lengths(&mut self) {
self.check_lengths = UnsafeBool(false);
}
pub fn check_lengths(&self) -> bool {
self.check_lengths.0
}
pub fn is_elementwise(&self) -> bool {
matches!(
self.collect_groups,
ApplyOptions::ElementWise | ApplyOptions::ApplyList
) && !self
.flags
.contains(FunctionFlags::CHANGES_LENGTH | FunctionFlags::RETURNS_SCALAR)
}
}
impl Default for FunctionOptions {
fn default() -> Self {
FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
fmt_str: "",
cast_to_supertypes: None,
check_lengths: UnsafeBool(true),
flags: Default::default(),
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct LogicalPlanUdfOptions {
pub predicate_pd: bool,
pub projection_pd: bool,
pub fmt_str: &'static str,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg(feature = "python")]
pub struct PythonOptions {
pub scan_fn: Option<PythonFunction>,
pub schema: SchemaRef,
pub output_schema: Option<SchemaRef>,
pub with_columns: Option<Arc<[PlSmallStr]>>,
pub python_source: PythonScanSource,
#[cfg_attr(feature = "serde", serde(skip))]
pub predicate: PythonPredicate,
pub n_rows: Option<usize>,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum PythonScanSource {
Pyarrow,
Cuda,
#[default]
IOPlugin,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
pub enum PythonPredicate {
PyArrow(String),
Polars(ExprIR),
#[default]
None,
}
#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct AnonymousScanOptions {
pub skip_rows: Option<usize>,
pub fmt_str: &'static str,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum SinkType {
Memory,
File {
path: Arc<PathBuf>,
file_type: FileType,
cloud_options: Option<polars_io::cloud::CloudOptions>,
},
}
impl SinkType {
pub(crate) fn is_cloud_destination(&self) -> bool {
if let Self::File { path, .. } = self {
if is_cloud_url(path.as_ref()) {
return true;
}
}
false
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub struct FileSinkOptions {
pub path: Arc<PathBuf>,
pub file_type: FileType,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum FileType {
#[cfg(feature = "parquet")]
Parquet(ParquetWriteOptions),
#[cfg(feature = "ipc")]
Ipc(IpcWriterOptions),
#[cfg(feature = "csv")]
Csv(CsvWriterOptions),
#[cfg(feature = "json")]
Json(JsonWriterOptions),
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ProjectionOptions {
pub run_parallel: bool,
pub duplicate_check: bool,
pub should_broadcast: bool,
}
impl Default for ProjectionOptions {
fn default() -> Self {
Self {
run_parallel: true,
duplicate_check: true,
should_broadcast: true,
}
}
}
impl ProjectionOptions {
pub fn merge_options(&self, other: &Self) -> Self {
Self {
run_parallel: self.run_parallel & other.run_parallel,
duplicate_check: self.duplicate_check & other.duplicate_check,
should_broadcast: self.should_broadcast | other.should_broadcast,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct UnionArgs {
pub parallel: bool,
pub rechunk: bool,
pub to_supertypes: bool,
pub diagonal: bool,
pub from_partitioned_ds: bool,
}
impl Default for UnionArgs {
fn default() -> Self {
Self {
parallel: true,
rechunk: false,
to_supertypes: false,
diagonal: false,
from_partitioned_ds: false,
}
}
}
impl From<UnionArgs> for UnionOptions {
fn from(args: UnionArgs) -> Self {
UnionOptions {
slice: None,
parallel: args.parallel,
rows: (None, 0),
from_partitioned_ds: args.from_partitioned_ds,
flattened_by_opt: false,
rechunk: args.rechunk,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg(feature = "json")]
pub struct NDJsonReadOptions {
pub n_threads: Option<usize>,
pub infer_schema_length: Option<NonZeroUsize>,
pub chunk_size: NonZeroUsize,
pub low_memory: bool,
pub ignore_errors: bool,
pub schema: Option<SchemaRef>,
pub schema_overwrite: Option<SchemaRef>,
}