Struct datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec

source ·

pub struct CoalesceBatchesExec { /* private fields */ }

Expand description

CoalesceBatchesExec combines small batches into larger batches for more efficient use of vectorized processing by later operators.

The operator buffers batches until it collects target_batch_size rows and then emits a single concatenated batch. When only a limited number of rows are necessary (specified by the fetch parameter), the operator will stop buffering and returns the final batch once the number of collected rows reaches the fetch value.

§Background

Generally speaking, larger RecordBatches are more efficient to process than smaller record batches (until the CPU cache is exceeded) because there is fixed processing overhead per batch. This code concatenates multiple small record batches into larger ones to amortize this overhead.

┌────────────────────┐
│    RecordBatch     │
│   num_rows = 23    │
└────────────────────┘                 ┌────────────────────┐
                                       │                    │
┌────────────────────┐     Coalesce    │                    │
│                    │      Batches    │                    │
│    RecordBatch     │                 │                    │
│   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
│                    │                 │    RecordBatch     │
│                    │                 │   num_rows = 106   │
└────────────────────┘                 │                    │
                                       │                    │
┌────────────────────┐                 │                    │
│                    │                 │                    │
│    RecordBatch     │                 │                    │
│   num_rows = 33    │                 └────────────────────┘
│                    │
└────────────────────┘

Struct datafusion_physical_plan::coalesce_batches::CoalesceBatchesExecCopy item path

§Background

Implementations§

impl CoalesceBatchesExec

pub fn new(input: Arc<dyn ExecutionPlan>, target_batch_size: usize) -> Self

pub fn with_fetch(self, fetch: Option<usize>) -> Self

pub fn input(&self) -> &Arc<dyn ExecutionPlan>

pub fn target_batch_size(&self) -> usize

Trait Implementations§

impl Debug for CoalesceBatchesExec

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl DisplayAs for CoalesceBatchesExec

fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result

impl ExecutionPlan for CoalesceBatchesExec

fn as_any(&self) -> &dyn Any

fn name(&self) -> &'static str

fn properties(&self) -> &PlanProperties

fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>

fn maintains_input_order(&self) -> Vec<bool>

fn benefits_from_input_partitioning(&self) -> Vec<bool>

fn with_new_children( self: Arc<Self>, children: Vec<Arc<dyn ExecutionPlan>>, ) -> Result<Arc<dyn ExecutionPlan>>

fn execute( &self, partition: usize, context: Arc<TaskContext>, ) -> Result<SendableRecordBatchStream>

fn metrics(&self) -> Option<MetricsSet>

fn statistics(&self) -> Result<Statistics>

fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>>

fn static_name() -> &'static strwhere Self: Sized,

fn schema(&self) -> SchemaRef

fn required_input_distribution(&self) -> Vec<Distribution>

fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>>

fn repartitioned( &self, _target_partitions: usize, _config: &ConfigOptions, ) -> Result<Option<Arc<dyn ExecutionPlan>>>

fn supports_limit_pushdown(&self) -> bool

Auto Trait Implementations§

impl Freeze for CoalesceBatchesExec

impl !RefUnwindSafe for CoalesceBatchesExec

impl Send for CoalesceBatchesExec

impl Sync for CoalesceBatchesExec

impl Unpin for CoalesceBatchesExec

impl !UnwindSafe for CoalesceBatchesExec

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec

fn static_name() -> &'static str
where Self: Sized,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,