1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
use std::sync::Arc;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{Field, Metadata};
/// An ordered sequence of [`Field`]s with associated [`Metadata`].
///
/// [`ArrowSchema`] is an abstraction used to read from, and write to, Arrow IPC format,
/// Apache Parquet, and Apache Avro. All these formats have a concept of a schema
/// with fields and metadata.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ArrowSchema {
/// The fields composing this schema.
pub fields: Vec<Field>,
/// Optional metadata.
pub metadata: Metadata,
}
pub type ArrowSchemaRef = Arc<ArrowSchema>;
impl ArrowSchema {
/// Attaches a [`Metadata`] to [`ArrowSchema`]
#[inline]
pub fn with_metadata(self, metadata: Metadata) -> Self {
Self {
fields: self.fields,
metadata,
}
}
#[inline]
pub fn len(&self) -> usize {
self.fields.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.fields.is_empty()
}
/// Returns a new [`ArrowSchema`] with a subset of all fields whose `predicate`
/// evaluates to true.
pub fn filter<F: Fn(usize, &Field) -> bool>(self, predicate: F) -> Self {
let fields = self
.fields
.into_iter()
.enumerate()
.filter_map(|(index, f)| {
if (predicate)(index, &f) {
Some(f)
} else {
None
}
})
.collect();
ArrowSchema {
fields,
metadata: self.metadata,
}
}
}
impl From<Vec<Field>> for ArrowSchema {
fn from(fields: Vec<Field>) -> Self {
Self {
fields,
..Default::default()
}
}
}