deltalake_core::operations::merge

Struct MergeBuilder

Source
pub struct MergeBuilder { /* private fields */ }
Expand description

Merge records into a Delta Table.

Implementations§

Source§

impl MergeBuilder

Source

pub fn new<E: Into<Expression>>( log_store: LogStoreRef, snapshot: DeltaTableState, predicate: E, source: DataFrame, ) -> Self

Create a new MergeBuilder

Source

pub fn when_matched_update<F>(self, builder: F) -> DeltaResult<MergeBuilder>

Update a target record when it matches with a source record

The update expressions can specify both source and target columns.

Multiple match clauses can be specified and their predicates are evaluated to determine if the corresponding operation are performed. Only the first clause that results in a satisfy predicate is executed. The order of match clauses matter.

#Example

let table = open_table("../path/to/table")?;
let (table, metrics) = DeltaOps(table)
    .merge(source, col("target.id").eq(col("source.id")))
    .with_source_alias("source")
    .with_target_alias("target")
    .when_matched_update(|update| {
        update
            .predicate(col("source.value").lt(lit(0)))
            .update("value", lit(0))
            .update("modified", col("source.modified"))
    })?
    .when_matched_update(|update| {
        update
            .update("value", col("source.value") + lit(1))
            .update("modified", col("source.modified"))
    })?
    .await?
Source

pub fn when_matched_delete<F>(self, builder: F) -> DeltaResult<MergeBuilder>

Delete a target record when it matches with a source record

Multiple match clauses can be specified and their predicates are evaluated to determine if the corresponding operation are performed. Only the first clause that results in a satisfy predicate is executed. The order of match clauses matter.

#Example

let table = open_table("../path/to/table")?;
let (table, metrics) = DeltaOps(table)
    .merge(source, col("target.id").eq(col("source.id")))
    .with_source_alias("source")
    .with_target_alias("target")
    .when_matched_delete(|delete| {
        delete.predicate(col("source.delete"))
    })?
    .await?
Source

pub fn when_not_matched_insert<F>(self, builder: F) -> DeltaResult<MergeBuilder>

Insert a source record when it does not match with a target record

Multiple not match clauses can be specified and their predicates are evaluated to determine if the corresponding operation are performed. Only the first clause that results in a satisfy predicate is executed. The order of not match clauses matter.

#Example

let table = open_table("../path/to/table")?;
let (table, metrics) = DeltaOps(table)
    .merge(source, col("target.id").eq(col("source.id")))
    .with_source_alias("source")
    .with_target_alias("target")
    .when_not_matched_insert(|insert| {
        insert
            .set("id", col("source.id"))
            .set("value", col("source.value"))
            .set("modified", col("source.modified"))
    })?
    .await?
Source

pub fn when_not_matched_by_source_update<F>( self, builder: F, ) -> DeltaResult<MergeBuilder>

Update a target record when it does not match with a source record

The update expressions can specify only target columns.

Multiple source not match clauses can be specified and their predicates are evaluated to determine if the corresponding operation are performed. Only the first clause that results in a satisfy predicate is executed. The order of source not match clauses matter.

#Example

let table = open_table("../path/to/table")?;
let (table, metrics) = DeltaOps(table)
    .merge(source, col("target.id").eq(col("source.id")))
    .with_source_alias("source")
    .with_target_alias("target")
    .when_not_matched_by_source_update(|update| {
        update
            .update("active", lit(false))
            .update("to_dt", lit("2023-07-11"))
    })?
    .await?
Source

pub fn when_not_matched_by_source_delete<F>( self, builder: F, ) -> DeltaResult<MergeBuilder>

Delete a target record when it does not match with a source record

Multiple source “not match” clauses can be specified and their predicates are evaluated to determine if the corresponding operations are performed. Only the first clause that results in a satisfy predicate is executed. The order of source “not match” clauses matter.

#Example

let table = open_table("../path/to/table")?;
let (table, metrics) = DeltaOps(table)
    .merge(source, col("target.id").eq(col("source.id")))
    .with_source_alias("source")
    .with_target_alias("target")
    .when_not_matched_by_source_delete(|delete| {
        delete
    })?
    .await?
Source

pub fn with_source_alias<S: ToString>(self, alias: S) -> Self

Rename columns in the source dataset to have a prefix of alias.original column name

Source

pub fn with_target_alias<S: ToString>(self, alias: S) -> Self

Rename columns in the target dataset to have a prefix of alias.original column name

Source

pub fn with_session_state(self, state: SessionState) -> Self

The Datafusion session state to use

Source

pub fn with_commit_properties(self, commit_properties: CommitProperties) -> Self

Additional metadata to be added to commit info

Source

pub fn with_writer_properties(self, writer_properties: WriterProperties) -> Self

Writer properties passed to parquet writer for when fiiles are rewritten

Source

pub fn with_safe_cast(self, safe_cast: bool) -> Self

Specify the cast options to use when casting columns that do not match the table’s schema. When cast_options.safe is set true then any failures to cast a datatype will use null instead of returning an error to the user.

Example (column’s type is int): Input Output 123 -> 123 Test123 -> null

Trait Implementations§

Source§

impl IntoFuture for MergeBuilder

Source§

type Output = Result<(DeltaTable, MergeMetrics), DeltaTableError>

The output that the future will produce on completion.
Source§

type IntoFuture = Pin<Box<dyn Future<Output = <MergeBuilder as IntoFuture>::Output> + Send>>

Which kind of future are we turning this into?
Source§

fn into_future(self) -> Self::IntoFuture

Creates a future from a value. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T