use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
pub enum Indentation {
OneLine,
MultiLine(u8),
}
impl Indentation {
fn value(&self) -> String {
match self {
Self::OneLine => "".to_string(),
Self::MultiLine(spaces) => " ".repeat(*spaces as usize),
}
}
fn deepen(&self) -> Self {
match self {
Self::OneLine => Self::OneLine,
Self::MultiLine(spaces) => Self::MultiLine(spaces + 2),
}
}
}
pub trait FieldExt {
fn to_compact_string(&self, indent: Indentation) -> String;
fn is_packed_struct(&self) -> bool;
}
impl FieldExt for Field {
fn to_compact_string(&self, indent: Indentation) -> String {
let mut result = format!("{}: ", self.name().clone());
match self.data_type() {
DataType::Struct(fields) => {
result += "{";
result += &indent.value();
for (field_idx, field) in fields.iter().enumerate() {
result += field.to_compact_string(indent.deepen()).as_str();
if field_idx < fields.len() - 1 {
result += ",";
}
result += indent.value().as_str();
}
result += "}";
}
DataType::List(field)
| DataType::LargeList(field)
| DataType::ListView(field)
| DataType::LargeListView(field) => {
result += "[";
result += field.to_compact_string(indent.deepen()).as_str();
result += "]";
}
DataType::FixedSizeList(child, dimension) => {
result += &format!(
"[{}; {}]",
child.to_compact_string(indent.deepen()),
dimension
);
}
DataType::Dictionary(key_type, value_type) => {
result += &value_type.to_string();
result += "@";
result += &key_type.to_string();
}
_ => {
result += &self.data_type().to_string();
}
}
if self.is_nullable() {
result += "?";
}
result
}
fn is_packed_struct(&self) -> bool {
let field_metadata = self.metadata();
field_metadata
.get("packed")
.map(|v| v.to_lowercase() == "true")
.unwrap_or(false)
}
}
pub trait SchemaExt {
fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError>;
fn try_with_column_at(
&self,
index: usize,
field: Field,
) -> std::result::Result<Schema, ArrowError>;
fn field_names(&self) -> Vec<&String>;
fn without_column(&self, column_name: &str) -> Schema;
fn to_compact_string(&self, indent: Indentation) -> String;
}
impl SchemaExt for Schema {
fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError> {
if self.column_with_name(field.name()).is_some() {
return Err(ArrowError::SchemaError(format!(
"Can not append column {} on schema: {:?}",
field.name(),
self
)));
};
let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
fields.push(FieldRef::new(field));
Ok(Self::new_with_metadata(fields, self.metadata.clone()))
}
fn try_with_column_at(
&self,
index: usize,
field: Field,
) -> std::result::Result<Schema, ArrowError> {
if self.column_with_name(field.name()).is_some() {
return Err(ArrowError::SchemaError(format!(
"Failed to modify schema: Inserting column {} would create a duplicate column in schema: {:?}",
field.name(),
self
)));
};
let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
fields.insert(index, FieldRef::new(field));
Ok(Self::new_with_metadata(fields, self.metadata.clone()))
}
fn without_column(&self, column_name: &str) -> Schema {
let fields: Vec<FieldRef> = self
.fields()
.iter()
.filter(|f| f.name() != column_name)
.cloned()
.collect();
Self::new_with_metadata(fields, self.metadata.clone())
}
fn field_names(&self) -> Vec<&String> {
self.fields().iter().map(|f| f.name()).collect()
}
fn to_compact_string(&self, indent: Indentation) -> String {
let mut result = "{".to_string();
result += &indent.value();
for (field_idx, field) in self.fields.iter().enumerate() {
result += field.to_compact_string(indent.deepen()).as_str();
if field_idx < self.fields.len() - 1 {
result += ",";
}
result += indent.value().as_str();
}
result += "}";
result
}
}