lance_arrow

Trait RecordBatchExt

Source
pub trait RecordBatchExt {
    // Required methods
    fn try_with_column(
        &self,
        field: Field,
        arr: ArrayRef,
    ) -> Result<RecordBatch, ArrowError>;
    fn try_with_column_at(
        &self,
        index: usize,
        field: Field,
        arr: ArrayRef,
    ) -> Result<RecordBatch, ArrowError>;
    fn try_new_from_struct_array(
        &self,
        arr: StructArray,
    ) -> Result<RecordBatch, ArrowError>;
    fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>;
    fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>;
    fn replace_column_by_name(
        &self,
        name: &str,
        column: Arc<dyn Array>,
    ) -> Result<RecordBatch, ArrowError>;
    fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>;
    fn project_by_schema(
        &self,
        schema: &Schema,
    ) -> Result<RecordBatch, ArrowError>;
    fn metadata(&self) -> &HashMap<String, String>;
    fn with_metadata(
        &self,
        metadata: HashMap<String, String>,
    ) -> Result<RecordBatch, ArrowError>;
    fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>;

    // Provided method
    fn add_metadata(
        &self,
        key: String,
        value: String,
    ) -> Result<RecordBatch, ArrowError> { ... }
}
Expand description

Extends Arrow’s RecordBatch.

Required Methods§

Source

fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>

Append a new column to this RecordBatch and returns a new RecordBatch.

use std::sync::Arc;
use arrow_array::{RecordBatch, Int32Array, StringArray};
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;

let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let record_batch = RecordBatch::try_new(schema, vec![int_arr.clone()]).unwrap();

let new_field = Field::new("s", DataType::Utf8, true);
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let new_record_batch = record_batch.try_with_column(new_field, str_arr.clone()).unwrap();

assert_eq!(
    new_record_batch,
    RecordBatch::try_new(
        Arc::new(Schema::new(
            vec![
                Field::new("a", DataType::Int32, true),
                Field::new("s", DataType::Utf8, true)
            ])
        ),
        vec![int_arr, str_arr],
    ).unwrap()
)
Source

fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>

Created a new RecordBatch with column at index.

Source

fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<RecordBatch, ArrowError>

Creates a new RecordBatch from the provided StructArray.

The fields on the StructArray need to match this RecordBatch schema

Source

fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>

Merge with another RecordBatch and returns a new one.

use std::sync::Arc;
use arrow_array::*;
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;

let left_schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let left = RecordBatch::try_new(left_schema, vec![int_arr.clone()]).unwrap();

let right_schema = Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let right = RecordBatch::try_new(right_schema, vec![str_arr.clone()]).unwrap();

let new_record_batch = left.merge(&right).unwrap();

assert_eq!(
    new_record_batch,
    RecordBatch::try_new(
        Arc::new(Schema::new(
            vec![
                Field::new("a", DataType::Int32, true),
                Field::new("s", DataType::Utf8, true)
            ])
        ),
        vec![int_arr, str_arr],
    ).unwrap()
)

TODO: add merge nested fields support.

Source

fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>

Drop one column specified with the name and return the new RecordBatch.

If the named column does not exist, it returns a copy of this RecordBatch.

Source

fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>

Replace a column (specified by name) and return the new RecordBatch.

Source

fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>

Get (potentially nested) column by qualified name.

Source

fn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>

Project the schema over the RecordBatch.

Source

fn metadata(&self) -> &HashMap<String, String>

metadata of the schema.

Source

fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>

Replace the schema metadata with the provided one.

Source

fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>

Take selected rows from the RecordBatch.

Provided Methods§

Source

fn add_metadata( &self, key: String, value: String, ) -> Result<RecordBatch, ArrowError>

Add metadata to the schema.

Implementations on Foreign Types§

Source§

impl RecordBatchExt for RecordBatch

Source§

fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<Self, ArrowError>

Source§

fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<Self, ArrowError>

Source§

fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<Self, ArrowError>

Source§

fn merge(&self, other: &Self) -> Result<Self, ArrowError>

Source§

fn drop_column(&self, name: &str) -> Result<Self, ArrowError>

Source§

fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>

Source§

fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>

Source§

fn project_by_schema(&self, schema: &Schema) -> Result<Self, ArrowError>

Source§

fn metadata(&self) -> &HashMap<String, String>

Source§

fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>

Source§

fn take(&self, indices: &UInt32Array) -> Result<Self, ArrowError>

Implementors§