1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
use std::hash::{Hash, Hasher};

#[cfg(feature = "csv")]
use polars_io::csv::read::CsvReadOptions;
#[cfg(feature = "ipc")]
use polars_io::ipc::IpcScanOptions;
#[cfg(feature = "parquet")]
use polars_io::parquet::metadata::FileMetaDataRef;
#[cfg(feature = "parquet")]
use polars_io::parquet::read::ParquetOptions;

use super::*;

#[derive(Clone, Debug, IntoStaticStr)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum FileScan {
    #[cfg(feature = "csv")]
    Csv {
        options: CsvReadOptions,
        cloud_options: Option<polars_io::cloud::CloudOptions>,
    },
    #[cfg(feature = "parquet")]
    Parquet {
        options: ParquetOptions,
        cloud_options: Option<polars_io::cloud::CloudOptions>,
        #[cfg_attr(feature = "serde", serde(skip))]
        metadata: Option<FileMetaDataRef>,
    },
    #[cfg(feature = "ipc")]
    Ipc {
        options: IpcScanOptions,
        cloud_options: Option<polars_io::cloud::CloudOptions>,
        #[cfg_attr(feature = "serde", serde(skip))]
        metadata: Option<arrow::io::ipc::read::FileMetadata>,
    },
    #[cfg(feature = "json")]
    NDJson { options: NDJsonReadOptions },
    #[cfg_attr(feature = "serde", serde(skip))]
    Anonymous {
        options: Arc<AnonymousScanOptions>,
        function: Arc<dyn AnonymousScan>,
    },
}

impl PartialEq for FileScan {
    fn eq(&self, other: &Self) -> bool {
        match (self, other) {
            #[cfg(feature = "csv")]
            (
                FileScan::Csv {
                    options: l,
                    cloud_options: c_l,
                },
                FileScan::Csv {
                    options: r,
                    cloud_options: c_r,
                },
            ) => l == r && c_l == c_r,
            #[cfg(feature = "parquet")]
            (
                FileScan::Parquet {
                    options: opt_l,
                    cloud_options: c_l,
                    ..
                },
                FileScan::Parquet {
                    options: opt_r,
                    cloud_options: c_r,
                    ..
                },
            ) => opt_l == opt_r && c_l == c_r,
            #[cfg(feature = "ipc")]
            (
                FileScan::Ipc {
                    options: l,
                    cloud_options: c_l,
                    ..
                },
                FileScan::Ipc {
                    options: r,
                    cloud_options: c_r,
                    ..
                },
            ) => l == r && c_l == c_r,
            #[cfg(feature = "json")]
            (FileScan::NDJson { options: l }, FileScan::NDJson { options: r }) => l == r,
            _ => false,
        }
    }
}

impl Eq for FileScan {}

impl Hash for FileScan {
    fn hash<H: Hasher>(&self, state: &mut H) {
        std::mem::discriminant(self).hash(state);
        match self {
            #[cfg(feature = "csv")]
            FileScan::Csv {
                options,
                cloud_options,
            } => {
                options.hash(state);
                cloud_options.hash(state);
            },
            #[cfg(feature = "parquet")]
            FileScan::Parquet {
                options,
                cloud_options,
                metadata: _,
            } => {
                options.hash(state);
                cloud_options.hash(state);
            },
            #[cfg(feature = "ipc")]
            FileScan::Ipc {
                options,
                cloud_options,
                metadata: _,
            } => {
                options.hash(state);
                cloud_options.hash(state);
            },
            #[cfg(feature = "json")]
            FileScan::NDJson { options } => options.hash(state),
            FileScan::Anonymous { options, .. } => options.hash(state),
        }
    }
}

impl FileScan {
    pub(crate) fn remove_metadata(&mut self) {
        match self {
            #[cfg(feature = "parquet")]
            Self::Parquet { metadata, .. } => {
                *metadata = None;
            },
            #[cfg(feature = "ipc")]
            Self::Ipc { metadata, .. } => {
                *metadata = None;
            },
            _ => {},
        }
    }

    pub(crate) fn sort_projection(&self, _file_options: &FileScanOptions) -> bool {
        match self {
            #[cfg(feature = "csv")]
            Self::Csv { .. } => true,
            #[cfg(feature = "ipc")]
            Self::Ipc { .. } => _file_options.row_index.is_some(),
            #[cfg(feature = "parquet")]
            Self::Parquet { .. } => _file_options.row_index.is_some(),
            #[allow(unreachable_patterns)]
            _ => false,
        }
    }

    pub fn streamable(&self) -> bool {
        match self {
            #[cfg(feature = "csv")]
            Self::Csv { .. } => true,
            #[cfg(feature = "ipc")]
            Self::Ipc { .. } => false,
            #[cfg(feature = "parquet")]
            Self::Parquet { .. } => true,
            #[cfg(feature = "json")]
            Self::NDJson { .. } => false,
            #[allow(unreachable_patterns)]
            _ => false,
        }
    }
}