polars_arrow/record_batch.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
//! Contains [`RecordBatchT`], a container of [`Array`] where every array has the
//! same length.
use polars_error::{polars_ensure, PolarsResult};
use crate::array::{Array, ArrayRef};
/// A vector of trait objects of [`Array`] where every item has
/// the same length, [`RecordBatchT::len`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RecordBatchT<A: AsRef<dyn Array>> {
height: usize,
arrays: Vec<A>,
}
pub type RecordBatch = RecordBatchT<ArrayRef>;
impl<A: AsRef<dyn Array>> RecordBatchT<A> {
/// Creates a new [`RecordBatchT`].
///
/// # Panics
///
/// I.f.f. the length does not match the length of any of the arrays
pub fn new(length: usize, arrays: Vec<A>) -> Self {
Self::try_new(length, arrays).unwrap()
}
/// Creates a new [`RecordBatchT`].
///
/// # Error
///
/// I.f.f. the height does not match the length of any of the arrays
pub fn try_new(height: usize, arrays: Vec<A>) -> PolarsResult<Self> {
polars_ensure!(
arrays.iter().all(|arr| arr.as_ref().len() == height),
ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
);
Ok(Self { height, arrays })
}
/// returns the [`Array`]s in [`RecordBatchT`]
pub fn arrays(&self) -> &[A] {
&self.arrays
}
/// returns the [`Array`]s in [`RecordBatchT`]
pub fn columns(&self) -> &[A] {
&self.arrays
}
/// returns the number of rows of every array
pub fn len(&self) -> usize {
self.height
}
/// returns the number of rows of every array
pub fn height(&self) -> usize {
self.height
}
/// returns the number of arrays
pub fn width(&self) -> usize {
self.arrays.len()
}
/// returns whether the columns have any rows
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Consumes [`RecordBatchT`] into its underlying arrays.
/// The arrays are guaranteed to have the same length
pub fn into_arrays(self) -> Vec<A> {
self.arrays
}
}
impl<A: AsRef<dyn Array>> From<RecordBatchT<A>> for Vec<A> {
fn from(c: RecordBatchT<A>) -> Self {
c.into_arrays()
}
}
impl<A: AsRef<dyn Array>> std::ops::Deref for RecordBatchT<A> {
type Target = [A];
#[inline]
fn deref(&self) -> &[A] {
self.arrays()
}
}