pub trait RecordBatchExt {
// Required methods
fn try_with_column(
&self,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>;
fn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>;
fn try_new_from_struct_array(
&self,
arr: StructArray,
) -> Result<RecordBatch, ArrowError>;
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>;
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>;
fn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>,
) -> Result<RecordBatch, ArrowError>;
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>;
fn project_by_schema(
&self,
schema: &Schema,
) -> Result<RecordBatch, ArrowError>;
fn metadata(&self) -> &HashMap<String, String>;
fn with_metadata(
&self,
metadata: HashMap<String, String>,
) -> Result<RecordBatch, ArrowError>;
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>;
// Provided method
fn add_metadata(
&self,
key: String,
value: String,
) -> Result<RecordBatch, ArrowError> { ... }
}
Expand description
Extends Arrow’s RecordBatch.
Required Methods§
Sourcefn try_with_column(
&self,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>
fn try_with_column( &self, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>
Append a new column to this RecordBatch
and returns a new RecordBatch.
use std::sync::Arc;
use arrow_array::{RecordBatch, Int32Array, StringArray};
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let record_batch = RecordBatch::try_new(schema, vec![int_arr.clone()]).unwrap();
let new_field = Field::new("s", DataType::Utf8, true);
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let new_record_batch = record_batch.try_with_column(new_field, str_arr.clone()).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)
Sourcefn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef,
) -> Result<RecordBatch, ArrowError>
fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef, ) -> Result<RecordBatch, ArrowError>
Created a new RecordBatch with column at index.
Sourcefn try_new_from_struct_array(
&self,
arr: StructArray,
) -> Result<RecordBatch, ArrowError>
fn try_new_from_struct_array( &self, arr: StructArray, ) -> Result<RecordBatch, ArrowError>
Creates a new RecordBatch
from the provided StructArray
.
The fields on the StructArray
need to match this RecordBatch
schema
Sourcefn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
Merge with another RecordBatch
and returns a new one.
use std::sync::Arc;
use arrow_array::*;
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let left_schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let left = RecordBatch::try_new(left_schema, vec![int_arr.clone()]).unwrap();
let right_schema = Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let right = RecordBatch::try_new(right_schema, vec![str_arr.clone()]).unwrap();
let new_record_batch = left.merge(&right).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)
TODO: add merge nested fields support.
Sourcefn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
Drop one column specified with the name and return the new RecordBatch
.
If the named column does not exist, it returns a copy of this RecordBatch
.
Sourcefn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>,
) -> Result<RecordBatch, ArrowError>
fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array>, ) -> Result<RecordBatch, ArrowError>
Replace a column (specified by name) and return the new RecordBatch
.
Sourcefn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
Get (potentially nested) column by qualified name.
Sourcefn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
fn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
Project the schema over the RecordBatch.
Sourcefn with_metadata(
&self,
metadata: HashMap<String, String>,
) -> Result<RecordBatch, ArrowError>
fn with_metadata( &self, metadata: HashMap<String, String>, ) -> Result<RecordBatch, ArrowError>
Replace the schema metadata with the provided one.
Sourcefn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
Take selected rows from the RecordBatch.
Provided Methods§
Sourcefn add_metadata(
&self,
key: String,
value: String,
) -> Result<RecordBatch, ArrowError>
fn add_metadata( &self, key: String, value: String, ) -> Result<RecordBatch, ArrowError>
Add metadata to the schema.