pub trait RecordBatchExt {
Show 13 methods
// Required methods
fn try_with_column(
&self,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>;
fn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>;
fn try_new_from_struct_array(
&self,
arr: StructArray,
) -> Result<RecordBatch, ArrowError>;
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>;
fn merge_with_schema(
&self,
other: &RecordBatch,
schema: &Schema,
) -> Result<RecordBatch, ArrowError>;
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>;
fn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>,
) -> Result<RecordBatch, ArrowError>;
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>;
fn project_by_schema(
&self,
schema: &Schema,
) -> Result<RecordBatch, ArrowError>;
fn metadata(&self) -> &HashMap<String, String>;
fn with_metadata(
&self,
metadata: HashMap<String, String>,
) -> Result<RecordBatch, ArrowError>;
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>;
// Provided method
fn add_metadata(
&self,
key: String,
value: String,
) -> Result<RecordBatch, ArrowError> { ... }
}
Expand description
Extends Arrow’s RecordBatch.
Required Methods§
Sourcefn try_with_column(
&self,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>
fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>
Append a new column to this RecordBatch
and returns a new RecordBatch.
use std::sync::Arc;
use arrow_array::{RecordBatch, Int32Array, StringArray};
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let record_batch = RecordBatch::try_new(schema, vec![int_arr.clone()]).unwrap();
let new_field = Field::new("s", DataType::Utf8, true);
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let new_record_batch = record_batch.try_with_column(new_field, str_arr.clone()).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)
Sourcefn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>
fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>
Created a new RecordBatch with column at index.
Sourcefn try_new_from_struct_array(
&self,
arr: StructArray,
) -> Result<RecordBatch, ArrowError>
fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<RecordBatch, ArrowError>
Creates a new RecordBatch
from the provided StructArray
.
The fields on the StructArray
need to match this RecordBatch
schema
Sourcefn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
Merge with another RecordBatch
and returns a new one.
Fields are merged based on name. First we iterate the left columns. If a matching name is found in the right then we merge the two columns. If there is no match then we add the left column to the output.
To merge two columns we consider the type. If both arrays are struct arrays we recurse. Otherwise we use the left array.
Afterwards we add all non-matching right columns to the output.
Note: This method likely does not handle nested fields correctly and you may want to consider
using [merge_with_schema
] instead.
use std::sync::Arc;
use arrow_array::*;
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let left_schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let left = RecordBatch::try_new(left_schema, vec![int_arr.clone()]).unwrap();
let right_schema = Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let right = RecordBatch::try_new(right_schema, vec![str_arr.clone()]).unwrap();
let new_record_batch = left.merge(&right).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)
TODO: add merge nested fields support.
Sourcefn merge_with_schema(
&self,
other: &RecordBatch,
schema: &Schema,
) -> Result<RecordBatch, ArrowError>
fn merge_with_schema( &self, other: &RecordBatch, schema: &Schema, ) -> Result<RecordBatch, ArrowError>
Create a batch by merging columns between two batches with a given schema.
A reference schema is used to determine the proper ordering of nested fields.
For each field in the reference schema we look for corresponding fields in the left and right batches. If a field is found in both batches we recursively merge it.
If a field is only in the left or right batch we take it as it is.
Sourcefn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
Drop one column specified with the name and return the new RecordBatch
.
If the named column does not exist, it returns a copy of this RecordBatch
.
Sourcefn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>,
) -> Result<RecordBatch, ArrowError>
fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>
Replace a column (specified by name) and return the new RecordBatch
.
Sourcefn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
Get (potentially nested) column by qualified name.
Sourcefn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
fn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
Project the schema over the RecordBatch.
Sourcefn with_metadata(
&self,
metadata: HashMap<String, String>,
) -> Result<RecordBatch, ArrowError>
fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>
Replace the schema metadata with the provided one.
Sourcefn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
Take selected rows from the RecordBatch.
Provided Methods§
Sourcefn add_metadata(
&self,
key: String,
value: String,
) -> Result<RecordBatch, ArrowError>
fn add_metadata( &self, key: String, value: String, ) -> Result<RecordBatch, ArrowError>
Add metadata to the schema.