lance_arrow

Trait RecordBatchExt

Source
pub trait RecordBatchExt {
Show 13 methods // Required methods fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>; fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>; fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<RecordBatch, ArrowError>; fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>; fn merge_with_schema( &self, other: &RecordBatch, schema: &Schema, ) -> Result<RecordBatch, ArrowError>; fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>; fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>; fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>; fn project_by_schema( &self, schema: &Schema, ) -> Result<RecordBatch, ArrowError>; fn metadata(&self) -> &HashMap<String, String>; fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>; fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>; // Provided method fn add_metadata( &self, key: String, value: String, ) -> Result<RecordBatch, ArrowError> { ... }
}
Expand description

Extends Arrow’s RecordBatch.

Required Methods§

Source

fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>

Append a new column to this RecordBatch and returns a new RecordBatch.

use std::sync::Arc;
use arrow_array::{RecordBatch, Int32Array, StringArray};
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;

let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let record_batch = RecordBatch::try_new(schema, vec![int_arr.clone()]).unwrap();

let new_field = Field::new("s", DataType::Utf8, true);
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let new_record_batch = record_batch.try_with_column(new_field, str_arr.clone()).unwrap();

assert_eq!(
    new_record_batch,
    RecordBatch::try_new(
        Arc::new(Schema::new(
            vec![
                Field::new("a", DataType::Int32, true),
                Field::new("s", DataType::Utf8, true)
            ])
        ),
        vec![int_arr, str_arr],
    ).unwrap()
)
Source

fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>

Created a new RecordBatch with column at index.

Source

fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<RecordBatch, ArrowError>

Creates a new RecordBatch from the provided StructArray.

The fields on the StructArray need to match this RecordBatch schema

Source

fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>

Merge with another RecordBatch and returns a new one.

Fields are merged based on name. First we iterate the left columns. If a matching name is found in the right then we merge the two columns. If there is no match then we add the left column to the output.

To merge two columns we consider the type. If both arrays are struct arrays we recurse. Otherwise we use the left array.

Afterwards we add all non-matching right columns to the output.

Note: This method likely does not handle nested fields correctly and you may want to consider using [merge_with_schema] instead.

use std::sync::Arc;
use arrow_array::*;
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;

let left_schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let left = RecordBatch::try_new(left_schema, vec![int_arr.clone()]).unwrap();

let right_schema = Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let right = RecordBatch::try_new(right_schema, vec![str_arr.clone()]).unwrap();

let new_record_batch = left.merge(&right).unwrap();

assert_eq!(
    new_record_batch,
    RecordBatch::try_new(
        Arc::new(Schema::new(
            vec![
                Field::new("a", DataType::Int32, true),
                Field::new("s", DataType::Utf8, true)
            ])
        ),
        vec![int_arr, str_arr],
    ).unwrap()
)

TODO: add merge nested fields support.

Source

fn merge_with_schema( &self, other: &RecordBatch, schema: &Schema, ) -> Result<RecordBatch, ArrowError>

Create a batch by merging columns between two batches with a given schema.

A reference schema is used to determine the proper ordering of nested fields.

For each field in the reference schema we look for corresponding fields in the left and right batches. If a field is found in both batches we recursively merge it.

If a field is only in the left or right batch we take it as it is.

Source

fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>

Drop one column specified with the name and return the new RecordBatch.

If the named column does not exist, it returns a copy of this RecordBatch.

Source

fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>

Replace a column (specified by name) and return the new RecordBatch.

Source

fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>

Get (potentially nested) column by qualified name.

Source

fn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>

Project the schema over the RecordBatch.

Source

fn metadata(&self) -> &HashMap<String, String>

metadata of the schema.

Source

fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>

Replace the schema metadata with the provided one.

Source

fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>

Take selected rows from the RecordBatch.

Provided Methods§

Source

fn add_metadata( &self, key: String, value: String, ) -> Result<RecordBatch, ArrowError>

Add metadata to the schema.

Implementations on Foreign Types§

Source§

impl RecordBatchExt for RecordBatch

Source§

fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<Self, ArrowError>

Source§

fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<Self, ArrowError>

Source§

fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<Self, ArrowError>

Source§

fn merge(&self, other: &Self) -> Result<Self, ArrowError>

Source§

fn merge_with_schema( &self, other: &RecordBatch, schema: &Schema, ) -> Result<RecordBatch, ArrowError>

Source§

fn drop_column(&self, name: &str) -> Result<Self, ArrowError>

Source§

fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>

Source§

fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>

Source§

fn project_by_schema(&self, schema: &Schema) -> Result<Self, ArrowError>

Source§

fn metadata(&self) -> &HashMap<String, String>

Source§

fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>

Source§

fn take(&self, indices: &UInt32Array) -> Result<Self, ArrowError>

Implementors§