polars_plan/plans/
file_scan.rs

1use std::hash::{Hash, Hasher};
2
3#[cfg(feature = "csv")]
4use polars_io::csv::read::CsvReadOptions;
5#[cfg(feature = "ipc")]
6use polars_io::ipc::IpcScanOptions;
7#[cfg(feature = "parquet")]
8use polars_io::parquet::metadata::FileMetadataRef;
9#[cfg(feature = "parquet")]
10use polars_io::parquet::read::ParquetOptions;
11
12use super::*;
13
14#[derive(Clone, Debug, IntoStaticStr)]
15#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
16pub enum FileScan {
17    #[cfg(feature = "csv")]
18    Csv {
19        options: CsvReadOptions,
20        cloud_options: Option<polars_io::cloud::CloudOptions>,
21    },
22    #[cfg(feature = "json")]
23    NDJson {
24        options: NDJsonReadOptions,
25        cloud_options: Option<polars_io::cloud::CloudOptions>,
26    },
27    #[cfg(feature = "parquet")]
28    Parquet {
29        options: ParquetOptions,
30        cloud_options: Option<polars_io::cloud::CloudOptions>,
31        #[cfg_attr(feature = "serde", serde(skip))]
32        metadata: Option<FileMetadataRef>,
33    },
34    #[cfg(feature = "ipc")]
35    Ipc {
36        options: IpcScanOptions,
37        cloud_options: Option<polars_io::cloud::CloudOptions>,
38        #[cfg_attr(feature = "serde", serde(skip))]
39        metadata: Option<Arc<arrow::io::ipc::read::FileMetadata>>,
40    },
41    #[cfg_attr(feature = "serde", serde(skip))]
42    Anonymous {
43        options: Arc<AnonymousScanOptions>,
44        function: Arc<dyn AnonymousScan>,
45    },
46}
47
48impl PartialEq for FileScan {
49    fn eq(&self, other: &Self) -> bool {
50        match (self, other) {
51            #[cfg(feature = "csv")]
52            (
53                FileScan::Csv {
54                    options: l,
55                    cloud_options: c_l,
56                },
57                FileScan::Csv {
58                    options: r,
59                    cloud_options: c_r,
60                },
61            ) => l == r && c_l == c_r,
62            #[cfg(feature = "parquet")]
63            (
64                FileScan::Parquet {
65                    options: opt_l,
66                    cloud_options: c_l,
67                    ..
68                },
69                FileScan::Parquet {
70                    options: opt_r,
71                    cloud_options: c_r,
72                    ..
73                },
74            ) => opt_l == opt_r && c_l == c_r,
75            #[cfg(feature = "ipc")]
76            (
77                FileScan::Ipc {
78                    options: l,
79                    cloud_options: c_l,
80                    ..
81                },
82                FileScan::Ipc {
83                    options: r,
84                    cloud_options: c_r,
85                    ..
86                },
87            ) => l == r && c_l == c_r,
88            #[cfg(feature = "json")]
89            (
90                FileScan::NDJson {
91                    options: l,
92                    cloud_options: c_l,
93                },
94                FileScan::NDJson {
95                    options: r,
96                    cloud_options: c_r,
97                },
98            ) => l == r && c_l == c_r,
99            _ => false,
100        }
101    }
102}
103
104impl Eq for FileScan {}
105
106impl Hash for FileScan {
107    fn hash<H: Hasher>(&self, state: &mut H) {
108        std::mem::discriminant(self).hash(state);
109        match self {
110            #[cfg(feature = "csv")]
111            FileScan::Csv {
112                options,
113                cloud_options,
114            } => {
115                options.hash(state);
116                cloud_options.hash(state);
117            },
118            #[cfg(feature = "parquet")]
119            FileScan::Parquet {
120                options,
121                cloud_options,
122                metadata: _,
123            } => {
124                options.hash(state);
125                cloud_options.hash(state);
126            },
127            #[cfg(feature = "ipc")]
128            FileScan::Ipc {
129                options,
130                cloud_options,
131                metadata: _,
132            } => {
133                options.hash(state);
134                cloud_options.hash(state);
135            },
136            #[cfg(feature = "json")]
137            FileScan::NDJson {
138                options,
139                cloud_options,
140            } => {
141                options.hash(state);
142                cloud_options.hash(state)
143            },
144            FileScan::Anonymous { options, .. } => options.hash(state),
145        }
146    }
147}
148
149impl FileScan {
150    pub(crate) fn remove_metadata(&mut self) {
151        match self {
152            #[cfg(feature = "parquet")]
153            Self::Parquet { metadata, .. } => {
154                *metadata = None;
155            },
156            #[cfg(feature = "ipc")]
157            Self::Ipc { metadata, .. } => {
158                *metadata = None;
159            },
160            _ => {},
161        }
162    }
163
164    pub(crate) fn sort_projection(&self, _file_options: &FileScanOptions) -> bool {
165        match self {
166            #[cfg(feature = "csv")]
167            Self::Csv { .. } => true,
168            #[cfg(feature = "ipc")]
169            Self::Ipc { .. } => _file_options.row_index.is_some(),
170            #[cfg(feature = "parquet")]
171            Self::Parquet { .. } => false,
172            #[allow(unreachable_patterns)]
173            _ => false,
174        }
175    }
176
177    pub fn streamable(&self) -> bool {
178        match self {
179            #[cfg(feature = "csv")]
180            Self::Csv { .. } => true,
181            #[cfg(feature = "ipc")]
182            Self::Ipc { .. } => false,
183            #[cfg(feature = "parquet")]
184            Self::Parquet { .. } => true,
185            #[cfg(feature = "json")]
186            Self::NDJson { .. } => false,
187            #[allow(unreachable_patterns)]
188            _ => false,
189        }
190    }
191}