polars_parquet/arrow/write/
file.rsuse std::io::Write;
use arrow::datatypes::ArrowSchema;
use polars_error::{PolarsError, PolarsResult};
use super::schema::schema_to_metadata_key;
use super::{to_parquet_schema, ThriftFileMetadata, WriteOptions};
use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};
pub fn add_arrow_schema(
schema: &ArrowSchema,
key_value_metadata: Option<Vec<KeyValue>>,
) -> Option<Vec<KeyValue>> {
key_value_metadata
.map(|mut x| {
x.push(schema_to_metadata_key(schema));
x
})
.or_else(|| Some(vec![schema_to_metadata_key(schema)]))
}
pub struct FileWriter<W: Write> {
writer: crate::parquet::write::FileWriter<W>,
schema: ArrowSchema,
options: WriteOptions,
}
impl<W: Write> FileWriter<W> {
pub fn options(&self) -> WriteOptions {
self.options
}
pub fn parquet_schema(&self) -> &SchemaDescriptor {
self.writer.schema()
}
pub fn schema(&self) -> &ArrowSchema {
&self.schema
}
}
impl<W: Write> FileWriter<W> {
pub fn try_new(writer: W, schema: ArrowSchema, options: WriteOptions) -> PolarsResult<Self> {
let parquet_schema = to_parquet_schema(&schema)?;
let created_by = Some("Polars".to_string());
Ok(Self {
writer: crate::parquet::write::FileWriter::new(
writer,
parquet_schema,
FileWriteOptions {
version: options.version,
write_statistics: options.has_statistics(),
},
created_by,
),
schema,
options,
})
}
pub fn write(&mut self, row_group: RowGroupIterColumns<'_, PolarsError>) -> PolarsResult<()> {
Ok(self.writer.write(row_group)?)
}
pub fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> PolarsResult<u64> {
let key_value_metadata = add_arrow_schema(&self.schema, key_value_metadata);
Ok(self.writer.end(key_value_metadata)?)
}
pub fn into_inner(self) -> W {
self.writer.into_inner()
}
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {
self.writer.into_inner_and_metadata()
}
}