datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::collections::HashSet;
21use std::fmt::{self, Display, Formatter, Write};
22use std::hash::{Hash, Hasher};
23use std::mem;
24use std::sync::Arc;
25
26use crate::expr_fn::binary_expr;
27use crate::logical_plan::Subquery;
28use crate::utils::expr_to_columns;
29use crate::Volatility;
30use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
31
32use arrow::datatypes::{DataType, FieldRef};
33use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
34use datafusion_common::tree_node::{
35    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
36};
37use datafusion_common::{
38    plan_err, Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
39};
40use datafusion_functions_window_common::field::WindowUDFFieldArgs;
41use sqlparser::ast::{
42    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
43    NullTreatment, RenameSelectItem, ReplaceSelectElement,
44};
45
46/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
47///
48/// For example the expression `A + 1` will be represented as
49///
50///```text
51///  BinaryExpr {
52///    left: Expr::Column("A"),
53///    op: Operator::Plus,
54///    right: Expr::Literal(ScalarValue::Int32(Some(1)))
55/// }
56/// ```
57///
58/// # Creating Expressions
59///
60/// `Expr`s can be created directly, but it is often easier and less verbose to
61/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
62/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
63///
64/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
65///
66/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
67///
68/// # Schema Access
69///
70/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
71/// of an `Expr`.
72///
73/// # Visiting and Rewriting `Expr`s
74///
75/// The `Expr` struct implements the [`TreeNode`] trait for walking and
76/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
77/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
78/// the examples below and [`TreeNode`] for more information.
79///
80/// # Examples
81///
82/// ## Column references and literals
83///
84/// [`Expr::Column`] refer to the values of columns and are often created with
85/// the [`col`] function. For example to create an expression `c1` referring to
86/// column named "c1":
87///
88/// [`col`]: crate::expr_fn::col
89///
90/// ```
91/// # use datafusion_common::Column;
92/// # use datafusion_expr::{lit, col, Expr};
93/// let expr = col("c1");
94/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
95/// ```
96///
97/// [`Expr::Literal`] refer to literal, or constant, values. These are created
98/// with the [`lit`] function. For example to create an expression `42`:
99///
100/// [`lit`]: crate::lit
101///
102/// ```
103/// # use datafusion_common::{Column, ScalarValue};
104/// # use datafusion_expr::{lit, col, Expr};
105/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
106/// let expr = lit(42i64);
107/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
108/// // To make a (typed) NULL:
109/// let expr = Expr::Literal(ScalarValue::Int64(None));
110/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
111/// let expr = lit(ScalarValue::Null);
112/// ```
113///
114/// ## Binary Expressions
115///
116/// Exprs implement traits that allow easy to understand construction of more
117/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
118/// "c2" together
119///
120/// ```
121/// # use datafusion_expr::{lit, col, Operator, Expr};
122/// // Use the `+` operator to add two columns together
123/// let expr = col("c1") + col("c2");
124/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
125/// if let Expr::BinaryExpr(binary_expr) = expr {
126///   assert_eq!(*binary_expr.left, col("c1"));
127///   assert_eq!(*binary_expr.right, col("c2"));
128///   assert_eq!(binary_expr.op, Operator::Plus);
129/// }
130/// ```
131///
132/// The expression `c1 = 42` to compares the value in column "c1" to the
133/// literal value `42`:
134///
135/// ```
136/// # use datafusion_common::ScalarValue;
137/// # use datafusion_expr::{lit, col, Operator, Expr};
138/// let expr = col("c1").eq(lit(42_i32));
139/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
140/// if let Expr::BinaryExpr(binary_expr) = expr {
141///   assert_eq!(*binary_expr.left, col("c1"));
142///   let scalar = ScalarValue::Int32(Some(42));
143///   assert_eq!(*binary_expr.right, Expr::Literal(scalar));
144///   assert_eq!(binary_expr.op, Operator::Eq);
145/// }
146/// ```
147///
148/// Here is how to implement the equivalent of `SELECT *` to select all
149/// [`Expr::Column`] from a [`DFSchema`]'s columns:
150///
151/// ```
152/// # use arrow::datatypes::{DataType, Field, Schema};
153/// # use datafusion_common::{DFSchema, Column};
154/// # use datafusion_expr::Expr;
155/// // Create a schema c1(int, c2 float)
156/// let arrow_schema = Schema::new(vec![
157///    Field::new("c1", DataType::Int32, false),
158///    Field::new("c2", DataType::Float64, false),
159/// ]);
160/// // DFSchema is a an Arrow schema with optional relation name
161/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
162///   .unwrap();
163///
164/// // Form Vec<Expr> with an expression for each column in the schema
165/// let exprs: Vec<_> = df_schema.iter()
166///   .map(Expr::from)
167///   .collect();
168///
169/// assert_eq!(exprs, vec![
170///   Expr::from(Column::from_qualified_name("t1.c1")),
171///   Expr::from(Column::from_qualified_name("t1.c2")),
172/// ]);
173/// ```
174///
175/// # Visiting and Rewriting `Expr`s
176///
177/// Here is an example that finds all literals in an `Expr` tree:
178/// ```
179/// # use std::collections::{HashSet};
180/// use datafusion_common::ScalarValue;
181/// # use datafusion_expr::{col, Expr, lit};
182/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
183/// // Expression a = 5 AND b = 6
184/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
185/// // find all literals in a HashMap
186/// let mut scalars = HashSet::new();
187/// // apply recursively visits all nodes in the expression tree
188/// expr.apply(|e| {
189///    if let Expr::Literal(scalar) = e {
190///       scalars.insert(scalar);
191///    }
192///    // The return value controls whether to continue visiting the tree
193///    Ok(TreeNodeRecursion::Continue)
194/// }).unwrap();
195/// // All subtrees have been visited and literals found
196/// assert_eq!(scalars.len(), 2);
197/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
198/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
199/// ```
200///
201/// Rewrite an expression, replacing references to column "a" in an
202/// to the literal `42`:
203///
204///  ```
205/// # use datafusion_common::tree_node::{Transformed, TreeNode};
206/// # use datafusion_expr::{col, Expr, lit};
207/// // expression a = 5 AND b = 6
208/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
209/// // rewrite all references to column "a" to the literal 42
210/// let rewritten = expr.transform(|e| {
211///   if let Expr::Column(c) = &e {
212///     if &c.name == "a" {
213///       // return Transformed::yes to indicate the node was changed
214///       return Ok(Transformed::yes(lit(42)))
215///     }
216///   }
217///   // return Transformed::no to indicate the node was not changed
218///   Ok(Transformed::no(e))
219/// }).unwrap();
220/// // The expression has been rewritten
221/// assert!(rewritten.transformed);
222/// // to 42 = 5 AND b = 6
223/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
224#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
225pub enum Expr {
226    /// An expression with a specific name.
227    Alias(Alias),
228    /// A named reference to a qualified field in a schema.
229    Column(Column),
230    /// A named reference to a variable in a registry.
231    ScalarVariable(DataType, Vec<String>),
232    /// A constant value.
233    Literal(ScalarValue),
234    /// A binary expression such as "age > 21"
235    BinaryExpr(BinaryExpr),
236    /// LIKE expression
237    Like(Like),
238    /// LIKE expression that uses regular expressions
239    SimilarTo(Like),
240    /// Negation of an expression. The expression's type must be a boolean to make sense.
241    Not(Box<Expr>),
242    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
243    IsNotNull(Box<Expr>),
244    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
245    IsNull(Box<Expr>),
246    /// True if argument is true, false otherwise. This expression itself is never NULL.
247    IsTrue(Box<Expr>),
248    /// True if argument is  false, false otherwise. This expression itself is never NULL.
249    IsFalse(Box<Expr>),
250    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
251    IsUnknown(Box<Expr>),
252    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
253    IsNotTrue(Box<Expr>),
254    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
255    IsNotFalse(Box<Expr>),
256    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
257    IsNotUnknown(Box<Expr>),
258    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
259    Negative(Box<Expr>),
260    /// Whether an expression is between a given range.
261    Between(Between),
262    /// The CASE expression is similar to a series of nested if/else and there are two forms that
263    /// can be used. The first form consists of a series of boolean "when" expressions with
264    /// corresponding "then" expressions, and an optional "else" expression.
265    ///
266    /// ```text
267    /// CASE WHEN condition THEN result
268    ///      [WHEN ...]
269    ///      [ELSE result]
270    /// END
271    /// ```
272    ///
273    /// The second form uses a base expression and then a series of "when" clauses that match on a
274    /// literal value.
275    ///
276    /// ```text
277    /// CASE expression
278    ///     WHEN value THEN result
279    ///     [WHEN ...]
280    ///     [ELSE result]
281    /// END
282    /// ```
283    Case(Case),
284    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
285    /// This expression is guaranteed to have a fixed type.
286    Cast(Cast),
287    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
288    /// This expression is guaranteed to have a fixed type.
289    TryCast(TryCast),
290    /// Represents the call of a scalar function with a set of arguments.
291    ScalarFunction(ScalarFunction),
292    /// Calls an aggregate function with arguments, and optional
293    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
294    ///
295    /// See also [`ExprFunctionExt`] to set these fields.
296    ///
297    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
298    AggregateFunction(AggregateFunction),
299    /// Represents the call of a window function with arguments.
300    WindowFunction(WindowFunction),
301    /// Returns whether the list contains the expr value.
302    InList(InList),
303    /// EXISTS subquery
304    Exists(Exists),
305    /// IN subquery
306    InSubquery(InSubquery),
307    /// Scalar subquery
308    ScalarSubquery(Subquery),
309    /// Represents a reference to all available fields in a specific schema,
310    /// with an optional (schema) qualifier.
311    ///
312    /// This expr has to be resolved to a list of columns before translating logical
313    /// plan into physical plan.
314    #[deprecated(
315        since = "46.0.0",
316        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
317    )]
318    Wildcard {
319        qualifier: Option<TableReference>,
320        options: Box<WildcardOptions>,
321    },
322    /// List of grouping set expressions. Only valid in the context of an aggregate
323    /// GROUP BY expression list
324    GroupingSet(GroupingSet),
325    /// A place holder for parameters in a prepared statement
326    /// (e.g. `$foo` or `$1`)
327    Placeholder(Placeholder),
328    /// A place holder which hold a reference to a qualified field
329    /// in the outer query, used for correlated sub queries.
330    OuterReferenceColumn(DataType, Column),
331    /// Unnest expression
332    Unnest(Unnest),
333}
334
335impl Default for Expr {
336    fn default() -> Self {
337        Expr::Literal(ScalarValue::Null)
338    }
339}
340
341/// Create an [`Expr`] from a [`Column`]
342impl From<Column> for Expr {
343    fn from(value: Column) -> Self {
344        Expr::Column(value)
345    }
346}
347
348/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
349/// useful for creating [`Expr`] from a [`DFSchema`].
350///
351/// See example on [`Expr`]
352impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
353    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
354        Expr::from(Column::from(value))
355    }
356}
357
358impl<'a> TreeNodeContainer<'a, Self> for Expr {
359    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
360        &'a self,
361        mut f: F,
362    ) -> Result<TreeNodeRecursion> {
363        f(self)
364    }
365
366    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
367        self,
368        mut f: F,
369    ) -> Result<Transformed<Self>> {
370        f(self)
371    }
372}
373
374/// UNNEST expression.
375#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
376pub struct Unnest {
377    pub expr: Box<Expr>,
378}
379
380impl Unnest {
381    /// Create a new Unnest expression.
382    pub fn new(expr: Expr) -> Self {
383        Self {
384            expr: Box::new(expr),
385        }
386    }
387
388    /// Create a new Unnest expression.
389    pub fn new_boxed(boxed: Box<Expr>) -> Self {
390        Self { expr: boxed }
391    }
392}
393
394/// Alias expression
395#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
396pub struct Alias {
397    pub expr: Box<Expr>,
398    pub relation: Option<TableReference>,
399    pub name: String,
400}
401
402impl Alias {
403    /// Create an alias with an optional schema/field qualifier.
404    pub fn new(
405        expr: Expr,
406        relation: Option<impl Into<TableReference>>,
407        name: impl Into<String>,
408    ) -> Self {
409        Self {
410            expr: Box::new(expr),
411            relation: relation.map(|r| r.into()),
412            name: name.into(),
413        }
414    }
415}
416
417/// Binary expression
418#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
419pub struct BinaryExpr {
420    /// Left-hand side of the expression
421    pub left: Box<Expr>,
422    /// The comparison operator
423    pub op: Operator,
424    /// Right-hand side of the expression
425    pub right: Box<Expr>,
426}
427
428impl BinaryExpr {
429    /// Create a new binary expression
430    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
431        Self { left, op, right }
432    }
433}
434
435impl Display for BinaryExpr {
436    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
437        // Put parentheses around child binary expressions so that we can see the difference
438        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
439        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
440        // equivalent and the parentheses are not necessary.
441
442        fn write_child(
443            f: &mut Formatter<'_>,
444            expr: &Expr,
445            precedence: u8,
446        ) -> fmt::Result {
447            match expr {
448                Expr::BinaryExpr(child) => {
449                    let p = child.op.precedence();
450                    if p == 0 || p < precedence {
451                        write!(f, "({child})")?;
452                    } else {
453                        write!(f, "{child}")?;
454                    }
455                }
456                _ => write!(f, "{expr}")?,
457            }
458            Ok(())
459        }
460
461        let precedence = self.op.precedence();
462        write_child(f, self.left.as_ref(), precedence)?;
463        write!(f, " {} ", self.op)?;
464        write_child(f, self.right.as_ref(), precedence)
465    }
466}
467
468/// CASE expression
469#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
470pub struct Case {
471    /// Optional base expression that can be compared to literal values in the "when" expressions
472    pub expr: Option<Box<Expr>>,
473    /// One or more when/then expressions
474    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
475    /// Optional "else" expression
476    pub else_expr: Option<Box<Expr>>,
477}
478
479impl Case {
480    /// Create a new Case expression
481    pub fn new(
482        expr: Option<Box<Expr>>,
483        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
484        else_expr: Option<Box<Expr>>,
485    ) -> Self {
486        Self {
487            expr,
488            when_then_expr,
489            else_expr,
490        }
491    }
492}
493
494/// LIKE expression
495#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
496pub struct Like {
497    pub negated: bool,
498    pub expr: Box<Expr>,
499    pub pattern: Box<Expr>,
500    pub escape_char: Option<char>,
501    /// Whether to ignore case on comparing
502    pub case_insensitive: bool,
503}
504
505impl Like {
506    /// Create a new Like expression
507    pub fn new(
508        negated: bool,
509        expr: Box<Expr>,
510        pattern: Box<Expr>,
511        escape_char: Option<char>,
512        case_insensitive: bool,
513    ) -> Self {
514        Self {
515            negated,
516            expr,
517            pattern,
518            escape_char,
519            case_insensitive,
520        }
521    }
522}
523
524/// BETWEEN expression
525#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
526pub struct Between {
527    /// The value to compare
528    pub expr: Box<Expr>,
529    /// Whether the expression is negated
530    pub negated: bool,
531    /// The low end of the range
532    pub low: Box<Expr>,
533    /// The high end of the range
534    pub high: Box<Expr>,
535}
536
537impl Between {
538    /// Create a new Between expression
539    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
540        Self {
541            expr,
542            negated,
543            low,
544            high,
545        }
546    }
547}
548
549/// ScalarFunction expression invokes a built-in scalar function
550#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
551pub struct ScalarFunction {
552    /// The function
553    pub func: Arc<crate::ScalarUDF>,
554    /// List of expressions to feed to the functions as arguments
555    pub args: Vec<Expr>,
556}
557
558impl ScalarFunction {
559    // return the Function's name
560    pub fn name(&self) -> &str {
561        self.func.name()
562    }
563}
564
565impl ScalarFunction {
566    /// Create a new ScalarFunction expression with a user-defined function (UDF)
567    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
568        Self { func: udf, args }
569    }
570}
571
572/// Access a sub field of a nested type, such as `Field` or `List`
573#[derive(Clone, PartialEq, Eq, Hash, Debug)]
574pub enum GetFieldAccess {
575    /// Named field, for example `struct["name"]`
576    NamedStructField { name: ScalarValue },
577    /// Single list index, for example: `list[i]`
578    ListIndex { key: Box<Expr> },
579    /// List stride, for example `list[i:j:k]`
580    ListRange {
581        start: Box<Expr>,
582        stop: Box<Expr>,
583        stride: Box<Expr>,
584    },
585}
586
587/// Cast expression
588#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
589pub struct Cast {
590    /// The expression being cast
591    pub expr: Box<Expr>,
592    /// The `DataType` the expression will yield
593    pub data_type: DataType,
594}
595
596impl Cast {
597    /// Create a new Cast expression
598    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
599        Self { expr, data_type }
600    }
601}
602
603/// TryCast Expression
604#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
605pub struct TryCast {
606    /// The expression being cast
607    pub expr: Box<Expr>,
608    /// The `DataType` the expression will yield
609    pub data_type: DataType,
610}
611
612impl TryCast {
613    /// Create a new TryCast expression
614    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
615        Self { expr, data_type }
616    }
617}
618
619/// SORT expression
620#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
621pub struct Sort {
622    /// The expression to sort on
623    pub expr: Expr,
624    /// The direction of the sort
625    pub asc: bool,
626    /// Whether to put Nulls before all other data values
627    pub nulls_first: bool,
628}
629
630impl Sort {
631    /// Create a new Sort expression
632    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
633        Self {
634            expr,
635            asc,
636            nulls_first,
637        }
638    }
639
640    /// Create a new Sort expression with the opposite sort direction
641    pub fn reverse(&self) -> Self {
642        Self {
643            expr: self.expr.clone(),
644            asc: !self.asc,
645            nulls_first: !self.nulls_first,
646        }
647    }
648
649    /// Replaces the Sort expressions with `expr`
650    pub fn with_expr(&self, expr: Expr) -> Self {
651        Self {
652            expr,
653            asc: self.asc,
654            nulls_first: self.nulls_first,
655        }
656    }
657}
658
659impl Display for Sort {
660    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
661        write!(f, "{}", self.expr)?;
662        if self.asc {
663            write!(f, " ASC")?;
664        } else {
665            write!(f, " DESC")?;
666        }
667        if self.nulls_first {
668            write!(f, " NULLS FIRST")?;
669        } else {
670            write!(f, " NULLS LAST")?;
671        }
672        Ok(())
673    }
674}
675
676impl<'a> TreeNodeContainer<'a, Expr> for Sort {
677    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
678        &'a self,
679        f: F,
680    ) -> Result<TreeNodeRecursion> {
681        self.expr.apply_elements(f)
682    }
683
684    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
685        self,
686        f: F,
687    ) -> Result<Transformed<Self>> {
688        self.expr
689            .map_elements(f)?
690            .map_data(|expr| Ok(Self { expr, ..self }))
691    }
692}
693
694/// Aggregate function
695///
696/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
697///
698/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
699#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
700pub struct AggregateFunction {
701    /// Name of the function
702    pub func: Arc<crate::AggregateUDF>,
703    pub params: AggregateFunctionParams,
704}
705
706#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
707pub struct AggregateFunctionParams {
708    pub args: Vec<Expr>,
709    /// Whether this is a DISTINCT aggregation or not
710    pub distinct: bool,
711    /// Optional filter
712    pub filter: Option<Box<Expr>>,
713    /// Optional ordering
714    pub order_by: Option<Vec<Sort>>,
715    pub null_treatment: Option<NullTreatment>,
716}
717
718impl AggregateFunction {
719    /// Create a new AggregateFunction expression with a user-defined function (UDF)
720    pub fn new_udf(
721        func: Arc<crate::AggregateUDF>,
722        args: Vec<Expr>,
723        distinct: bool,
724        filter: Option<Box<Expr>>,
725        order_by: Option<Vec<Sort>>,
726        null_treatment: Option<NullTreatment>,
727    ) -> Self {
728        Self {
729            func,
730            params: AggregateFunctionParams {
731                args,
732                distinct,
733                filter,
734                order_by,
735                null_treatment,
736            },
737        }
738    }
739}
740
741/// A function used as a SQL window function
742///
743/// In SQL, you can use:
744/// - Actual window functions ([`WindowUDF`])
745/// - Normal aggregate functions ([`AggregateUDF`])
746#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
747pub enum WindowFunctionDefinition {
748    /// A user defined aggregate function
749    AggregateUDF(Arc<crate::AggregateUDF>),
750    /// A user defined aggregate function
751    WindowUDF(Arc<WindowUDF>),
752}
753
754impl WindowFunctionDefinition {
755    /// Returns the datatype of the window function
756    pub fn return_type(
757        &self,
758        input_expr_types: &[DataType],
759        _input_expr_nullable: &[bool],
760        display_name: &str,
761    ) -> Result<DataType> {
762        match self {
763            WindowFunctionDefinition::AggregateUDF(fun) => {
764                fun.return_type(input_expr_types)
765            }
766            WindowFunctionDefinition::WindowUDF(fun) => fun
767                .field(WindowUDFFieldArgs::new(input_expr_types, display_name))
768                .map(|field| field.data_type().clone()),
769        }
770    }
771
772    /// The signatures supported by the function `fun`.
773    pub fn signature(&self) -> Signature {
774        match self {
775            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
776            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
777        }
778    }
779
780    /// Function's name for display
781    pub fn name(&self) -> &str {
782        match self {
783            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
784            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
785        }
786    }
787}
788
789impl Display for WindowFunctionDefinition {
790    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
791        match self {
792            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
793            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
794        }
795    }
796}
797
798impl From<Arc<crate::AggregateUDF>> for WindowFunctionDefinition {
799    fn from(value: Arc<crate::AggregateUDF>) -> Self {
800        Self::AggregateUDF(value)
801    }
802}
803
804impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
805    fn from(value: Arc<WindowUDF>) -> Self {
806        Self::WindowUDF(value)
807    }
808}
809
810/// Window function
811///
812/// Holds the actual function to call [`WindowFunction`] as well as its
813/// arguments (`args`) and the contents of the `OVER` clause:
814///
815/// 1. `PARTITION BY`
816/// 2. `ORDER BY`
817/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
818///
819/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
820///
821/// [`ExprFunctionExt`]: crate::ExprFunctionExt
822#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
823pub struct WindowFunction {
824    /// Name of the function
825    pub fun: WindowFunctionDefinition,
826    pub params: WindowFunctionParams,
827}
828
829#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
830pub struct WindowFunctionParams {
831    /// List of expressions to feed to the functions as arguments
832    pub args: Vec<Expr>,
833    /// List of partition by expressions
834    pub partition_by: Vec<Expr>,
835    /// List of order by expressions
836    pub order_by: Vec<Sort>,
837    /// Window frame
838    pub window_frame: WindowFrame,
839    /// Specifies how NULL value is treated: ignore or respect
840    pub null_treatment: Option<NullTreatment>,
841}
842
843impl WindowFunction {
844    /// Create a new Window expression with the specified argument an
845    /// empty `OVER` clause
846    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
847        Self {
848            fun: fun.into(),
849            params: WindowFunctionParams {
850                args,
851                partition_by: Vec::default(),
852                order_by: Vec::default(),
853                window_frame: WindowFrame::new(None),
854                null_treatment: None,
855            },
856        }
857    }
858}
859
860/// EXISTS expression
861#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
862pub struct Exists {
863    /// Subquery that will produce a single column of data
864    pub subquery: Subquery,
865    /// Whether the expression is negated
866    pub negated: bool,
867}
868
869impl Exists {
870    // Create a new Exists expression.
871    pub fn new(subquery: Subquery, negated: bool) -> Self {
872        Self { subquery, negated }
873    }
874}
875
876/// User Defined Aggregate Function
877///
878/// See [`udaf::AggregateUDF`] for more information.
879#[derive(Clone, PartialEq, Eq, Hash, Debug)]
880pub struct AggregateUDF {
881    /// The function
882    pub fun: Arc<udaf::AggregateUDF>,
883    /// List of expressions to feed to the functions as arguments
884    pub args: Vec<Expr>,
885    /// Optional filter
886    pub filter: Option<Box<Expr>>,
887    /// Optional ORDER BY applied prior to aggregating
888    pub order_by: Option<Vec<Expr>>,
889}
890
891impl AggregateUDF {
892    /// Create a new AggregateUDF expression
893    pub fn new(
894        fun: Arc<udaf::AggregateUDF>,
895        args: Vec<Expr>,
896        filter: Option<Box<Expr>>,
897        order_by: Option<Vec<Expr>>,
898    ) -> Self {
899        Self {
900            fun,
901            args,
902            filter,
903            order_by,
904        }
905    }
906}
907
908/// InList expression
909#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
910pub struct InList {
911    /// The expression to compare
912    pub expr: Box<Expr>,
913    /// The list of values to compare against
914    pub list: Vec<Expr>,
915    /// Whether the expression is negated
916    pub negated: bool,
917}
918
919impl InList {
920    /// Create a new InList expression
921    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
922        Self {
923            expr,
924            list,
925            negated,
926        }
927    }
928}
929
930/// IN subquery
931#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
932pub struct InSubquery {
933    /// The expression to compare
934    pub expr: Box<Expr>,
935    /// Subquery that will produce a single column of data to compare against
936    pub subquery: Subquery,
937    /// Whether the expression is negated
938    pub negated: bool,
939}
940
941impl InSubquery {
942    /// Create a new InSubquery expression
943    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
944        Self {
945            expr,
946            subquery,
947            negated,
948        }
949    }
950}
951
952/// Placeholder, representing bind parameter values such as `$1` or `$name`.
953///
954/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
955/// or can be specified directly using `PREPARE` statements.
956#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
957pub struct Placeholder {
958    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
959    pub id: String,
960    /// The type the parameter will be filled in with
961    pub data_type: Option<DataType>,
962}
963
964impl Placeholder {
965    /// Create a new Placeholder expression
966    pub fn new(id: String, data_type: Option<DataType>) -> Self {
967        Self { id, data_type }
968    }
969}
970
971/// Grouping sets
972///
973/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
974/// for Postgres definition.
975/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
976/// for Apache Spark definition.
977#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
978pub enum GroupingSet {
979    /// Rollup grouping sets
980    Rollup(Vec<Expr>),
981    /// Cube grouping sets
982    Cube(Vec<Expr>),
983    /// User-defined grouping sets
984    GroupingSets(Vec<Vec<Expr>>),
985}
986
987impl GroupingSet {
988    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
989    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
990    /// the exprs in the underlying sets.
991    pub fn distinct_expr(&self) -> Vec<&Expr> {
992        match self {
993            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
994                exprs.iter().collect()
995            }
996            GroupingSet::GroupingSets(groups) => {
997                let mut exprs: Vec<&Expr> = vec![];
998                for exp in groups.iter().flatten() {
999                    if !exprs.contains(&exp) {
1000                        exprs.push(exp);
1001                    }
1002                }
1003                exprs
1004            }
1005        }
1006    }
1007}
1008
1009/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1010#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1011pub struct WildcardOptions {
1012    /// `[ILIKE...]`.
1013    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1014    pub ilike: Option<IlikeSelectItem>,
1015    /// `[EXCLUDE...]`.
1016    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1017    pub exclude: Option<ExcludeSelectItem>,
1018    /// `[EXCEPT...]`.
1019    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1020    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1021    pub except: Option<ExceptSelectItem>,
1022    /// `[REPLACE]`
1023    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1024    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1025    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1026    pub replace: Option<PlannedReplaceSelectItem>,
1027    /// `[RENAME ...]`.
1028    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1029    pub rename: Option<RenameSelectItem>,
1030}
1031
1032impl WildcardOptions {
1033    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1034        WildcardOptions {
1035            ilike: self.ilike,
1036            exclude: self.exclude,
1037            except: self.except,
1038            replace: Some(replace),
1039            rename: self.rename,
1040        }
1041    }
1042}
1043
1044impl Display for WildcardOptions {
1045    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1046        if let Some(ilike) = &self.ilike {
1047            write!(f, " {ilike}")?;
1048        }
1049        if let Some(exclude) = &self.exclude {
1050            write!(f, " {exclude}")?;
1051        }
1052        if let Some(except) = &self.except {
1053            write!(f, " {except}")?;
1054        }
1055        if let Some(replace) = &self.replace {
1056            write!(f, " {replace}")?;
1057        }
1058        if let Some(rename) = &self.rename {
1059            write!(f, " {rename}")?;
1060        }
1061        Ok(())
1062    }
1063}
1064
1065/// The planned expressions for `REPLACE`
1066#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1067pub struct PlannedReplaceSelectItem {
1068    /// The original ast nodes
1069    pub items: Vec<ReplaceSelectElement>,
1070    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1071    pub planned_expressions: Vec<Expr>,
1072}
1073
1074impl Display for PlannedReplaceSelectItem {
1075    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1076        write!(f, "REPLACE")?;
1077        write!(f, " ({})", display_comma_separated(&self.items))?;
1078        Ok(())
1079    }
1080}
1081
1082impl PlannedReplaceSelectItem {
1083    pub fn items(&self) -> &[ReplaceSelectElement] {
1084        &self.items
1085    }
1086
1087    pub fn expressions(&self) -> &[Expr] {
1088        &self.planned_expressions
1089    }
1090}
1091
1092impl Expr {
1093    #[deprecated(since = "40.0.0", note = "use schema_name instead")]
1094    pub fn display_name(&self) -> Result<String> {
1095        Ok(self.schema_name().to_string())
1096    }
1097
1098    /// The name of the column (field) that this `Expr` will produce.
1099    ///
1100    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1101    /// [`Schema`] will have a field with this name.
1102    ///
1103    /// Note that the resulting string is subtlety different from the `Display`
1104    /// representation for certain `Expr`. Some differences:
1105    ///
1106    /// 1. [`Expr::Alias`], which shows only the alias itself
1107    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1108    ///
1109    /// # Example
1110    /// ```
1111    /// # use datafusion_expr::{col, lit};
1112    /// let expr = col("foo").eq(lit(42));
1113    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1114    ///
1115    /// let expr = col("foo").alias("bar").eq(lit(11));
1116    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1117    /// ```
1118    ///
1119    /// [`Schema`]: arrow::datatypes::Schema
1120    pub fn schema_name(&self) -> impl Display + '_ {
1121        SchemaDisplay(self)
1122    }
1123
1124    /// Returns the qualifier and the schema name of this expression.
1125    ///
1126    /// Used when the expression forms the output field of a certain plan.
1127    /// The result is the field's qualifier and field name in the plan's
1128    /// output schema. We can use this qualified name to reference the field.
1129    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1130        match self {
1131            Expr::Column(Column {
1132                relation,
1133                name,
1134                spans: _,
1135            }) => (relation.clone(), name.clone()),
1136            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1137            _ => (None, self.schema_name().to_string()),
1138        }
1139    }
1140
1141    /// Returns a full and complete string representation of this expression.
1142    #[deprecated(since = "42.0.0", note = "use format! instead")]
1143    pub fn canonical_name(&self) -> String {
1144        format!("{self}")
1145    }
1146
1147    /// Return String representation of the variant represented by `self`
1148    /// Useful for non-rust based bindings
1149    pub fn variant_name(&self) -> &str {
1150        match self {
1151            Expr::AggregateFunction { .. } => "AggregateFunction",
1152            Expr::Alias(..) => "Alias",
1153            Expr::Between { .. } => "Between",
1154            Expr::BinaryExpr { .. } => "BinaryExpr",
1155            Expr::Case { .. } => "Case",
1156            Expr::Cast { .. } => "Cast",
1157            Expr::Column(..) => "Column",
1158            Expr::OuterReferenceColumn(_, _) => "Outer",
1159            Expr::Exists { .. } => "Exists",
1160            Expr::GroupingSet(..) => "GroupingSet",
1161            Expr::InList { .. } => "InList",
1162            Expr::InSubquery(..) => "InSubquery",
1163            Expr::IsNotNull(..) => "IsNotNull",
1164            Expr::IsNull(..) => "IsNull",
1165            Expr::Like { .. } => "Like",
1166            Expr::SimilarTo { .. } => "RLike",
1167            Expr::IsTrue(..) => "IsTrue",
1168            Expr::IsFalse(..) => "IsFalse",
1169            Expr::IsUnknown(..) => "IsUnknown",
1170            Expr::IsNotTrue(..) => "IsNotTrue",
1171            Expr::IsNotFalse(..) => "IsNotFalse",
1172            Expr::IsNotUnknown(..) => "IsNotUnknown",
1173            Expr::Literal(..) => "Literal",
1174            Expr::Negative(..) => "Negative",
1175            Expr::Not(..) => "Not",
1176            Expr::Placeholder(_) => "Placeholder",
1177            Expr::ScalarFunction(..) => "ScalarFunction",
1178            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1179            Expr::ScalarVariable(..) => "ScalarVariable",
1180            Expr::TryCast { .. } => "TryCast",
1181            Expr::WindowFunction { .. } => "WindowFunction",
1182            #[expect(deprecated)]
1183            Expr::Wildcard { .. } => "Wildcard",
1184            Expr::Unnest { .. } => "Unnest",
1185        }
1186    }
1187
1188    /// Return `self == other`
1189    pub fn eq(self, other: Expr) -> Expr {
1190        binary_expr(self, Operator::Eq, other)
1191    }
1192
1193    /// Return `self != other`
1194    pub fn not_eq(self, other: Expr) -> Expr {
1195        binary_expr(self, Operator::NotEq, other)
1196    }
1197
1198    /// Return `self > other`
1199    pub fn gt(self, other: Expr) -> Expr {
1200        binary_expr(self, Operator::Gt, other)
1201    }
1202
1203    /// Return `self >= other`
1204    pub fn gt_eq(self, other: Expr) -> Expr {
1205        binary_expr(self, Operator::GtEq, other)
1206    }
1207
1208    /// Return `self < other`
1209    pub fn lt(self, other: Expr) -> Expr {
1210        binary_expr(self, Operator::Lt, other)
1211    }
1212
1213    /// Return `self <= other`
1214    pub fn lt_eq(self, other: Expr) -> Expr {
1215        binary_expr(self, Operator::LtEq, other)
1216    }
1217
1218    /// Return `self && other`
1219    pub fn and(self, other: Expr) -> Expr {
1220        binary_expr(self, Operator::And, other)
1221    }
1222
1223    /// Return `self || other`
1224    pub fn or(self, other: Expr) -> Expr {
1225        binary_expr(self, Operator::Or, other)
1226    }
1227
1228    /// Return `self LIKE other`
1229    pub fn like(self, other: Expr) -> Expr {
1230        Expr::Like(Like::new(
1231            false,
1232            Box::new(self),
1233            Box::new(other),
1234            None,
1235            false,
1236        ))
1237    }
1238
1239    /// Return `self NOT LIKE other`
1240    pub fn not_like(self, other: Expr) -> Expr {
1241        Expr::Like(Like::new(
1242            true,
1243            Box::new(self),
1244            Box::new(other),
1245            None,
1246            false,
1247        ))
1248    }
1249
1250    /// Return `self ILIKE other`
1251    pub fn ilike(self, other: Expr) -> Expr {
1252        Expr::Like(Like::new(
1253            false,
1254            Box::new(self),
1255            Box::new(other),
1256            None,
1257            true,
1258        ))
1259    }
1260
1261    /// Return `self NOT ILIKE other`
1262    pub fn not_ilike(self, other: Expr) -> Expr {
1263        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1264    }
1265
1266    /// Return the name to use for the specific Expr
1267    pub fn name_for_alias(&self) -> Result<String> {
1268        Ok(self.schema_name().to_string())
1269    }
1270
1271    /// Ensure `expr` has the name as `original_name` by adding an
1272    /// alias if necessary.
1273    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1274        let new_name = self.name_for_alias()?;
1275        if new_name == original_name {
1276            return Ok(self);
1277        }
1278
1279        Ok(self.alias(original_name))
1280    }
1281
1282    /// Return `self AS name` alias expression
1283    pub fn alias(self, name: impl Into<String>) -> Expr {
1284        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1285    }
1286
1287    /// Return `self AS name` alias expression with a specific qualifier
1288    pub fn alias_qualified(
1289        self,
1290        relation: Option<impl Into<TableReference>>,
1291        name: impl Into<String>,
1292    ) -> Expr {
1293        Expr::Alias(Alias::new(self, relation, name.into()))
1294    }
1295
1296    /// Remove an alias from an expression if one exists.
1297    ///
1298    /// If the expression is not an alias, the expression is returned unchanged.
1299    /// This method does not remove aliases from nested expressions.
1300    ///
1301    /// # Example
1302    /// ```
1303    /// # use datafusion_expr::col;
1304    /// // `foo as "bar"` is unaliased to `foo`
1305    /// let expr = col("foo").alias("bar");
1306    /// assert_eq!(expr.unalias(), col("foo"));
1307    ///
1308    /// // `foo as "bar" + baz` is not unaliased
1309    /// let expr = col("foo").alias("bar") + col("baz");
1310    /// assert_eq!(expr.clone().unalias(), expr);
1311    ///
1312    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1313    /// let expr = col("foo").alias("bar").alias("baz");
1314    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1315    /// ```
1316    pub fn unalias(self) -> Expr {
1317        match self {
1318            Expr::Alias(alias) => *alias.expr,
1319            _ => self,
1320        }
1321    }
1322
1323    /// Recursively removed potentially multiple aliases from an expression.
1324    ///
1325    /// This method removes nested aliases and returns [`Transformed`]
1326    /// to signal if the expression was changed.
1327    ///
1328    /// # Example
1329    /// ```
1330    /// # use datafusion_expr::col;
1331    /// // `foo as "bar"` is unaliased to `foo`
1332    /// let expr = col("foo").alias("bar");
1333    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1334    ///
1335    /// // `foo as "bar" + baz` is  unaliased
1336    /// let expr = col("foo").alias("bar") + col("baz");
1337    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1338    ///
1339    /// // `foo as "bar" as "baz" is unalaised to foo
1340    /// let expr = col("foo").alias("bar").alias("baz");
1341    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1342    /// ```
1343    pub fn unalias_nested(self) -> Transformed<Expr> {
1344        self.transform_down_up(
1345            |expr| {
1346                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1347                let recursion = if matches!(
1348                    expr,
1349                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1350                ) {
1351                    // Subqueries could contain aliases so don't recurse into those
1352                    TreeNodeRecursion::Jump
1353                } else {
1354                    TreeNodeRecursion::Continue
1355                };
1356                Ok(Transformed::new(expr, false, recursion))
1357            },
1358            |expr| {
1359                // f_up: unalias on up so we can remove nested aliases like
1360                // `(x as foo) as bar`
1361                if let Expr::Alias(Alias { expr, .. }) = expr {
1362                    Ok(Transformed::yes(*expr))
1363                } else {
1364                    Ok(Transformed::no(expr))
1365                }
1366            },
1367        )
1368        // Unreachable code: internal closure doesn't return err
1369        .unwrap()
1370    }
1371
1372    /// Return `self IN <list>` if `negated` is false, otherwise
1373    /// return `self NOT IN <list>`.a
1374    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1375        Expr::InList(InList::new(Box::new(self), list, negated))
1376    }
1377
1378    /// Return `IsNull(Box(self))
1379    pub fn is_null(self) -> Expr {
1380        Expr::IsNull(Box::new(self))
1381    }
1382
1383    /// Return `IsNotNull(Box(self))
1384    pub fn is_not_null(self) -> Expr {
1385        Expr::IsNotNull(Box::new(self))
1386    }
1387
1388    /// Create a sort configuration from an existing expression.
1389    ///
1390    /// ```
1391    /// # use datafusion_expr::col;
1392    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1393    /// ```
1394    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1395        Sort::new(self, asc, nulls_first)
1396    }
1397
1398    /// Return `IsTrue(Box(self))`
1399    pub fn is_true(self) -> Expr {
1400        Expr::IsTrue(Box::new(self))
1401    }
1402
1403    /// Return `IsNotTrue(Box(self))`
1404    pub fn is_not_true(self) -> Expr {
1405        Expr::IsNotTrue(Box::new(self))
1406    }
1407
1408    /// Return `IsFalse(Box(self))`
1409    pub fn is_false(self) -> Expr {
1410        Expr::IsFalse(Box::new(self))
1411    }
1412
1413    /// Return `IsNotFalse(Box(self))`
1414    pub fn is_not_false(self) -> Expr {
1415        Expr::IsNotFalse(Box::new(self))
1416    }
1417
1418    /// Return `IsUnknown(Box(self))`
1419    pub fn is_unknown(self) -> Expr {
1420        Expr::IsUnknown(Box::new(self))
1421    }
1422
1423    /// Return `IsNotUnknown(Box(self))`
1424    pub fn is_not_unknown(self) -> Expr {
1425        Expr::IsNotUnknown(Box::new(self))
1426    }
1427
1428    /// return `self BETWEEN low AND high`
1429    pub fn between(self, low: Expr, high: Expr) -> Expr {
1430        Expr::Between(Between::new(
1431            Box::new(self),
1432            false,
1433            Box::new(low),
1434            Box::new(high),
1435        ))
1436    }
1437
1438    /// Return `self NOT BETWEEN low AND high`
1439    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1440        Expr::Between(Between::new(
1441            Box::new(self),
1442            true,
1443            Box::new(low),
1444            Box::new(high),
1445        ))
1446    }
1447
1448    #[deprecated(since = "39.0.0", note = "use try_as_col instead")]
1449    pub fn try_into_col(&self) -> Result<Column> {
1450        match self {
1451            Expr::Column(it) => Ok(it.clone()),
1452            _ => plan_err!("Could not coerce '{self}' into Column!"),
1453        }
1454    }
1455
1456    /// Return a reference to the inner `Column` if any
1457    ///
1458    /// returns `None` if the expression is not a `Column`
1459    ///
1460    /// Note: None may be returned for expressions that are not `Column` but
1461    /// are convertible to `Column` such as `Cast` expressions.
1462    ///
1463    /// Example
1464    /// ```
1465    /// # use datafusion_common::Column;
1466    /// use datafusion_expr::{col, Expr};
1467    /// let expr = col("foo");
1468    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1469    ///
1470    /// let expr = col("foo").alias("bar");
1471    /// assert_eq!(expr.try_as_col(), None);
1472    /// ```
1473    pub fn try_as_col(&self) -> Option<&Column> {
1474        if let Expr::Column(it) = self {
1475            Some(it)
1476        } else {
1477            None
1478        }
1479    }
1480
1481    /// Returns the inner `Column` if any. This is a specialized version of
1482    /// [`Self::try_as_col`] that take Cast expressions into account when the
1483    /// expression is as on condition for joins.
1484    ///
1485    /// Called this method when you are sure that the expression is a `Column`
1486    /// or a `Cast` expression that wraps a `Column`.
1487    pub fn get_as_join_column(&self) -> Option<&Column> {
1488        match self {
1489            Expr::Column(c) => Some(c),
1490            Expr::Cast(Cast { expr, .. }) => match &**expr {
1491                Expr::Column(c) => Some(c),
1492                _ => None,
1493            },
1494            _ => None,
1495        }
1496    }
1497
1498    /// Return all referenced columns of this expression.
1499    #[deprecated(since = "40.0.0", note = "use Expr::column_refs instead")]
1500    pub fn to_columns(&self) -> Result<HashSet<Column>> {
1501        let mut using_columns = HashSet::new();
1502        expr_to_columns(self, &mut using_columns)?;
1503
1504        Ok(using_columns)
1505    }
1506
1507    /// Return all references to columns in this expression.
1508    ///
1509    /// # Example
1510    /// ```
1511    /// # use std::collections::HashSet;
1512    /// # use datafusion_common::Column;
1513    /// # use datafusion_expr::col;
1514    /// // For an expression `a + (b * a)`
1515    /// let expr = col("a") + (col("b") * col("a"));
1516    /// let refs = expr.column_refs();
1517    /// // refs contains "a" and "b"
1518    /// assert_eq!(refs.len(), 2);
1519    /// assert!(refs.contains(&Column::new_unqualified("a")));
1520    /// assert!(refs.contains(&Column::new_unqualified("b")));
1521    /// ```
1522    pub fn column_refs(&self) -> HashSet<&Column> {
1523        let mut using_columns = HashSet::new();
1524        self.add_column_refs(&mut using_columns);
1525        using_columns
1526    }
1527
1528    /// Adds references to all columns in this expression to the set
1529    ///
1530    /// See [`Self::column_refs`] for details
1531    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1532        self.apply(|expr| {
1533            if let Expr::Column(col) = expr {
1534                set.insert(col);
1535            }
1536            Ok(TreeNodeRecursion::Continue)
1537        })
1538        .expect("traversal is infallible");
1539    }
1540
1541    /// Return all references to columns and their occurrence counts in the expression.
1542    ///
1543    /// # Example
1544    /// ```
1545    /// # use std::collections::HashMap;
1546    /// # use datafusion_common::Column;
1547    /// # use datafusion_expr::col;
1548    /// // For an expression `a + (b * a)`
1549    /// let expr = col("a") + (col("b") * col("a"));
1550    /// let mut refs = expr.column_refs_counts();
1551    /// // refs contains "a" and "b"
1552    /// assert_eq!(refs.len(), 2);
1553    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1554    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1555    /// ```
1556    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1557        let mut map = HashMap::new();
1558        self.add_column_ref_counts(&mut map);
1559        map
1560    }
1561
1562    /// Adds references to all columns and their occurrence counts in the expression to
1563    /// the map.
1564    ///
1565    /// See [`Self::column_refs_counts`] for details
1566    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1567        self.apply(|expr| {
1568            if let Expr::Column(col) = expr {
1569                *map.entry(col).or_default() += 1;
1570            }
1571            Ok(TreeNodeRecursion::Continue)
1572        })
1573        .expect("traversal is infallible");
1574    }
1575
1576    /// Returns true if there are any column references in this Expr
1577    pub fn any_column_refs(&self) -> bool {
1578        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
1579            .expect("exists closure is infallible")
1580    }
1581
1582    /// Return true if the expression contains out reference(correlated) expressions.
1583    pub fn contains_outer(&self) -> bool {
1584        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1585            .expect("exists closure is infallible")
1586    }
1587
1588    /// Returns true if the expression node is volatile, i.e. whether it can return
1589    /// different results when evaluated multiple times with the same input.
1590    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1591    /// - `rand()` returns `true`,
1592    /// - `a + rand()` returns `false`
1593    pub fn is_volatile_node(&self) -> bool {
1594        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
1595    }
1596
1597    /// Returns true if the expression is volatile, i.e. whether it can return different
1598    /// results when evaluated multiple times with the same input.
1599    ///
1600    /// For example the function call `RANDOM()` is volatile as each call will
1601    /// return a different value.
1602    ///
1603    /// See [`Volatility`] for more information.
1604    pub fn is_volatile(&self) -> bool {
1605        self.exists(|expr| Ok(expr.is_volatile_node()))
1606            .expect("exists closure is infallible")
1607    }
1608
1609    /// Recursively find all [`Expr::Placeholder`] expressions, and
1610    /// to infer their [`DataType`] from the context of their use.
1611    ///
1612    /// For example, given an expression like `<int32> = $0` will infer `$0` to
1613    /// have type `int32`.
1614    ///
1615    /// Returns transformed expression and flag that is true if expression contains
1616    /// at least one placeholder.
1617    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
1618        let mut has_placeholder = false;
1619        self.transform(|mut expr| {
1620            // Default to assuming the arguments are the same type
1621            if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = &mut expr {
1622                rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
1623                rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
1624            };
1625            if let Expr::Between(Between {
1626                expr,
1627                negated: _,
1628                low,
1629                high,
1630            }) = &mut expr
1631            {
1632                rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
1633                rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
1634            }
1635            if let Expr::Placeholder(_) = &expr {
1636                has_placeholder = true;
1637            }
1638            Ok(Transformed::yes(expr))
1639        })
1640        .data()
1641        .map(|data| (data, has_placeholder))
1642    }
1643
1644    /// Returns true if some of this `exprs` subexpressions may not be evaluated
1645    /// and thus any side effects (like divide by zero) may not be encountered
1646    pub fn short_circuits(&self) -> bool {
1647        match self {
1648            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
1649            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
1650                matches!(op, Operator::And | Operator::Or)
1651            }
1652            Expr::Case { .. } => true,
1653            // Use explicit pattern match instead of a default
1654            // implementation, so that in the future if someone adds
1655            // new Expr types, they will check here as well
1656            // TODO: remove the next line after `Expr::Wildcard` is removed
1657            #[expect(deprecated)]
1658            Expr::AggregateFunction(..)
1659            | Expr::Alias(..)
1660            | Expr::Between(..)
1661            | Expr::Cast(..)
1662            | Expr::Column(..)
1663            | Expr::Exists(..)
1664            | Expr::GroupingSet(..)
1665            | Expr::InList(..)
1666            | Expr::InSubquery(..)
1667            | Expr::IsFalse(..)
1668            | Expr::IsNotFalse(..)
1669            | Expr::IsNotNull(..)
1670            | Expr::IsNotTrue(..)
1671            | Expr::IsNotUnknown(..)
1672            | Expr::IsNull(..)
1673            | Expr::IsTrue(..)
1674            | Expr::IsUnknown(..)
1675            | Expr::Like(..)
1676            | Expr::ScalarSubquery(..)
1677            | Expr::ScalarVariable(_, _)
1678            | Expr::SimilarTo(..)
1679            | Expr::Not(..)
1680            | Expr::Negative(..)
1681            | Expr::OuterReferenceColumn(_, _)
1682            | Expr::TryCast(..)
1683            | Expr::Unnest(..)
1684            | Expr::Wildcard { .. }
1685            | Expr::WindowFunction(..)
1686            | Expr::Literal(..)
1687            | Expr::Placeholder(..) => false,
1688        }
1689    }
1690
1691    /// Returns a reference to the set of locations in the SQL query where this
1692    /// expression appears, if known. [`None`] is returned if the expression
1693    /// type doesn't support tracking locations yet.
1694    pub fn spans(&self) -> Option<&Spans> {
1695        match self {
1696            Expr::Column(col) => Some(&col.spans),
1697            _ => None,
1698        }
1699    }
1700}
1701
1702impl Normalizeable for Expr {
1703    fn can_normalize(&self) -> bool {
1704        #[allow(clippy::match_like_matches_macro)]
1705        match self {
1706            Expr::BinaryExpr(BinaryExpr {
1707                op:
1708                    _op @ (Operator::Plus
1709                    | Operator::Multiply
1710                    | Operator::BitwiseAnd
1711                    | Operator::BitwiseOr
1712                    | Operator::BitwiseXor
1713                    | Operator::Eq
1714                    | Operator::NotEq),
1715                ..
1716            }) => true,
1717            _ => false,
1718        }
1719    }
1720}
1721
1722impl NormalizeEq for Expr {
1723    fn normalize_eq(&self, other: &Self) -> bool {
1724        match (self, other) {
1725            (
1726                Expr::BinaryExpr(BinaryExpr {
1727                    left: self_left,
1728                    op: self_op,
1729                    right: self_right,
1730                }),
1731                Expr::BinaryExpr(BinaryExpr {
1732                    left: other_left,
1733                    op: other_op,
1734                    right: other_right,
1735                }),
1736            ) => {
1737                if self_op != other_op {
1738                    return false;
1739                }
1740
1741                if matches!(
1742                    self_op,
1743                    Operator::Plus
1744                        | Operator::Multiply
1745                        | Operator::BitwiseAnd
1746                        | Operator::BitwiseOr
1747                        | Operator::BitwiseXor
1748                        | Operator::Eq
1749                        | Operator::NotEq
1750                ) {
1751                    (self_left.normalize_eq(other_left)
1752                        && self_right.normalize_eq(other_right))
1753                        || (self_left.normalize_eq(other_right)
1754                            && self_right.normalize_eq(other_left))
1755                } else {
1756                    self_left.normalize_eq(other_left)
1757                        && self_right.normalize_eq(other_right)
1758                }
1759            }
1760            (
1761                Expr::Alias(Alias {
1762                    expr: self_expr,
1763                    relation: self_relation,
1764                    name: self_name,
1765                }),
1766                Expr::Alias(Alias {
1767                    expr: other_expr,
1768                    relation: other_relation,
1769                    name: other_name,
1770                }),
1771            ) => {
1772                self_name == other_name
1773                    && self_relation == other_relation
1774                    && self_expr.normalize_eq(other_expr)
1775            }
1776            (
1777                Expr::Like(Like {
1778                    negated: self_negated,
1779                    expr: self_expr,
1780                    pattern: self_pattern,
1781                    escape_char: self_escape_char,
1782                    case_insensitive: self_case_insensitive,
1783                }),
1784                Expr::Like(Like {
1785                    negated: other_negated,
1786                    expr: other_expr,
1787                    pattern: other_pattern,
1788                    escape_char: other_escape_char,
1789                    case_insensitive: other_case_insensitive,
1790                }),
1791            )
1792            | (
1793                Expr::SimilarTo(Like {
1794                    negated: self_negated,
1795                    expr: self_expr,
1796                    pattern: self_pattern,
1797                    escape_char: self_escape_char,
1798                    case_insensitive: self_case_insensitive,
1799                }),
1800                Expr::SimilarTo(Like {
1801                    negated: other_negated,
1802                    expr: other_expr,
1803                    pattern: other_pattern,
1804                    escape_char: other_escape_char,
1805                    case_insensitive: other_case_insensitive,
1806                }),
1807            ) => {
1808                self_negated == other_negated
1809                    && self_escape_char == other_escape_char
1810                    && self_case_insensitive == other_case_insensitive
1811                    && self_expr.normalize_eq(other_expr)
1812                    && self_pattern.normalize_eq(other_pattern)
1813            }
1814            (Expr::Not(self_expr), Expr::Not(other_expr))
1815            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
1816            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
1817            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
1818            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
1819            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
1820            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
1821            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
1822            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
1823            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
1824            | (
1825                Expr::Unnest(Unnest { expr: self_expr }),
1826                Expr::Unnest(Unnest { expr: other_expr }),
1827            ) => self_expr.normalize_eq(other_expr),
1828            (
1829                Expr::Between(Between {
1830                    expr: self_expr,
1831                    negated: self_negated,
1832                    low: self_low,
1833                    high: self_high,
1834                }),
1835                Expr::Between(Between {
1836                    expr: other_expr,
1837                    negated: other_negated,
1838                    low: other_low,
1839                    high: other_high,
1840                }),
1841            ) => {
1842                self_negated == other_negated
1843                    && self_expr.normalize_eq(other_expr)
1844                    && self_low.normalize_eq(other_low)
1845                    && self_high.normalize_eq(other_high)
1846            }
1847            (
1848                Expr::Cast(Cast {
1849                    expr: self_expr,
1850                    data_type: self_data_type,
1851                }),
1852                Expr::Cast(Cast {
1853                    expr: other_expr,
1854                    data_type: other_data_type,
1855                }),
1856            )
1857            | (
1858                Expr::TryCast(TryCast {
1859                    expr: self_expr,
1860                    data_type: self_data_type,
1861                }),
1862                Expr::TryCast(TryCast {
1863                    expr: other_expr,
1864                    data_type: other_data_type,
1865                }),
1866            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
1867            (
1868                Expr::ScalarFunction(ScalarFunction {
1869                    func: self_func,
1870                    args: self_args,
1871                }),
1872                Expr::ScalarFunction(ScalarFunction {
1873                    func: other_func,
1874                    args: other_args,
1875                }),
1876            ) => {
1877                self_func.name() == other_func.name()
1878                    && self_args.len() == other_args.len()
1879                    && self_args
1880                        .iter()
1881                        .zip(other_args.iter())
1882                        .all(|(a, b)| a.normalize_eq(b))
1883            }
1884            (
1885                Expr::AggregateFunction(AggregateFunction {
1886                    func: self_func,
1887                    params:
1888                        AggregateFunctionParams {
1889                            args: self_args,
1890                            distinct: self_distinct,
1891                            filter: self_filter,
1892                            order_by: self_order_by,
1893                            null_treatment: self_null_treatment,
1894                        },
1895                }),
1896                Expr::AggregateFunction(AggregateFunction {
1897                    func: other_func,
1898                    params:
1899                        AggregateFunctionParams {
1900                            args: other_args,
1901                            distinct: other_distinct,
1902                            filter: other_filter,
1903                            order_by: other_order_by,
1904                            null_treatment: other_null_treatment,
1905                        },
1906                }),
1907            ) => {
1908                self_func.name() == other_func.name()
1909                    && self_distinct == other_distinct
1910                    && self_null_treatment == other_null_treatment
1911                    && self_args.len() == other_args.len()
1912                    && self_args
1913                        .iter()
1914                        .zip(other_args.iter())
1915                        .all(|(a, b)| a.normalize_eq(b))
1916                    && match (self_filter, other_filter) {
1917                        (Some(self_filter), Some(other_filter)) => {
1918                            self_filter.normalize_eq(other_filter)
1919                        }
1920                        (None, None) => true,
1921                        _ => false,
1922                    }
1923                    && match (self_order_by, other_order_by) {
1924                        (Some(self_order_by), Some(other_order_by)) => self_order_by
1925                            .iter()
1926                            .zip(other_order_by.iter())
1927                            .all(|(a, b)| {
1928                                a.asc == b.asc
1929                                    && a.nulls_first == b.nulls_first
1930                                    && a.expr.normalize_eq(&b.expr)
1931                            }),
1932                        (None, None) => true,
1933                        _ => false,
1934                    }
1935            }
1936            (
1937                Expr::WindowFunction(WindowFunction {
1938                    fun: self_fun,
1939                    params: self_params,
1940                }),
1941                Expr::WindowFunction(WindowFunction {
1942                    fun: other_fun,
1943                    params: other_params,
1944                }),
1945            ) => {
1946                let (
1947                    WindowFunctionParams {
1948                        args: self_args,
1949                        window_frame: self_window_frame,
1950                        partition_by: self_partition_by,
1951                        order_by: self_order_by,
1952                        null_treatment: self_null_treatment,
1953                    },
1954                    WindowFunctionParams {
1955                        args: other_args,
1956                        window_frame: other_window_frame,
1957                        partition_by: other_partition_by,
1958                        order_by: other_order_by,
1959                        null_treatment: other_null_treatment,
1960                    },
1961                ) = (self_params, other_params);
1962
1963                self_fun.name() == other_fun.name()
1964                    && self_window_frame == other_window_frame
1965                    && self_null_treatment == other_null_treatment
1966                    && self_args.len() == other_args.len()
1967                    && self_args
1968                        .iter()
1969                        .zip(other_args.iter())
1970                        .all(|(a, b)| a.normalize_eq(b))
1971                    && self_partition_by
1972                        .iter()
1973                        .zip(other_partition_by.iter())
1974                        .all(|(a, b)| a.normalize_eq(b))
1975                    && self_order_by
1976                        .iter()
1977                        .zip(other_order_by.iter())
1978                        .all(|(a, b)| {
1979                            a.asc == b.asc
1980                                && a.nulls_first == b.nulls_first
1981                                && a.expr.normalize_eq(&b.expr)
1982                        })
1983            }
1984            (
1985                Expr::Exists(Exists {
1986                    subquery: self_subquery,
1987                    negated: self_negated,
1988                }),
1989                Expr::Exists(Exists {
1990                    subquery: other_subquery,
1991                    negated: other_negated,
1992                }),
1993            ) => {
1994                self_negated == other_negated
1995                    && self_subquery.normalize_eq(other_subquery)
1996            }
1997            (
1998                Expr::InSubquery(InSubquery {
1999                    expr: self_expr,
2000                    subquery: self_subquery,
2001                    negated: self_negated,
2002                }),
2003                Expr::InSubquery(InSubquery {
2004                    expr: other_expr,
2005                    subquery: other_subquery,
2006                    negated: other_negated,
2007                }),
2008            ) => {
2009                self_negated == other_negated
2010                    && self_expr.normalize_eq(other_expr)
2011                    && self_subquery.normalize_eq(other_subquery)
2012            }
2013            (
2014                Expr::ScalarSubquery(self_subquery),
2015                Expr::ScalarSubquery(other_subquery),
2016            ) => self_subquery.normalize_eq(other_subquery),
2017            (
2018                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2019                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2020            )
2021            | (
2022                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2023                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2024            ) => {
2025                self_exprs.len() == other_exprs.len()
2026                    && self_exprs
2027                        .iter()
2028                        .zip(other_exprs.iter())
2029                        .all(|(a, b)| a.normalize_eq(b))
2030            }
2031            (
2032                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2033                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2034            ) => {
2035                self_exprs.len() == other_exprs.len()
2036                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2037                        a.len() == b.len()
2038                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2039                    })
2040            }
2041            (
2042                Expr::InList(InList {
2043                    expr: self_expr,
2044                    list: self_list,
2045                    negated: self_negated,
2046                }),
2047                Expr::InList(InList {
2048                    expr: other_expr,
2049                    list: other_list,
2050                    negated: other_negated,
2051                }),
2052            ) => {
2053                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2054                self_negated == other_negated
2055                    && self_expr.normalize_eq(other_expr)
2056                    && self_list.len() == other_list.len()
2057                    && self_list
2058                        .iter()
2059                        .zip(other_list.iter())
2060                        .all(|(a, b)| a.normalize_eq(b))
2061            }
2062            (
2063                Expr::Case(Case {
2064                    expr: self_expr,
2065                    when_then_expr: self_when_then_expr,
2066                    else_expr: self_else_expr,
2067                }),
2068                Expr::Case(Case {
2069                    expr: other_expr,
2070                    when_then_expr: other_when_then_expr,
2071                    else_expr: other_else_expr,
2072                }),
2073            ) => {
2074                // TODO: normalize_eq for when_then_expr
2075                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2076                self_when_then_expr.len() == other_when_then_expr.len()
2077                    && self_when_then_expr
2078                        .iter()
2079                        .zip(other_when_then_expr.iter())
2080                        .all(|((self_when, self_then), (other_when, other_then))| {
2081                            self_when.normalize_eq(other_when)
2082                                && self_then.normalize_eq(other_then)
2083                        })
2084                    && match (self_expr, other_expr) {
2085                        (Some(self_expr), Some(other_expr)) => {
2086                            self_expr.normalize_eq(other_expr)
2087                        }
2088                        (None, None) => true,
2089                        (_, _) => false,
2090                    }
2091                    && match (self_else_expr, other_else_expr) {
2092                        (Some(self_else_expr), Some(other_else_expr)) => {
2093                            self_else_expr.normalize_eq(other_else_expr)
2094                        }
2095                        (None, None) => true,
2096                        (_, _) => false,
2097                    }
2098            }
2099            (_, _) => self == other,
2100        }
2101    }
2102}
2103
2104impl HashNode for Expr {
2105    /// As it is pretty easy to forget changing this method when `Expr` changes the
2106    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2107    /// compile time.
2108    fn hash_node<H: Hasher>(&self, state: &mut H) {
2109        mem::discriminant(self).hash(state);
2110        match self {
2111            Expr::Alias(Alias {
2112                expr: _expr,
2113                relation,
2114                name,
2115            }) => {
2116                relation.hash(state);
2117                name.hash(state);
2118            }
2119            Expr::Column(column) => {
2120                column.hash(state);
2121            }
2122            Expr::ScalarVariable(data_type, name) => {
2123                data_type.hash(state);
2124                name.hash(state);
2125            }
2126            Expr::Literal(scalar_value) => {
2127                scalar_value.hash(state);
2128            }
2129            Expr::BinaryExpr(BinaryExpr {
2130                left: _left,
2131                op,
2132                right: _right,
2133            }) => {
2134                op.hash(state);
2135            }
2136            Expr::Like(Like {
2137                negated,
2138                expr: _expr,
2139                pattern: _pattern,
2140                escape_char,
2141                case_insensitive,
2142            })
2143            | Expr::SimilarTo(Like {
2144                negated,
2145                expr: _expr,
2146                pattern: _pattern,
2147                escape_char,
2148                case_insensitive,
2149            }) => {
2150                negated.hash(state);
2151                escape_char.hash(state);
2152                case_insensitive.hash(state);
2153            }
2154            Expr::Not(_expr)
2155            | Expr::IsNotNull(_expr)
2156            | Expr::IsNull(_expr)
2157            | Expr::IsTrue(_expr)
2158            | Expr::IsFalse(_expr)
2159            | Expr::IsUnknown(_expr)
2160            | Expr::IsNotTrue(_expr)
2161            | Expr::IsNotFalse(_expr)
2162            | Expr::IsNotUnknown(_expr)
2163            | Expr::Negative(_expr) => {}
2164            Expr::Between(Between {
2165                expr: _expr,
2166                negated,
2167                low: _low,
2168                high: _high,
2169            }) => {
2170                negated.hash(state);
2171            }
2172            Expr::Case(Case {
2173                expr: _expr,
2174                when_then_expr: _when_then_expr,
2175                else_expr: _else_expr,
2176            }) => {}
2177            Expr::Cast(Cast {
2178                expr: _expr,
2179                data_type,
2180            })
2181            | Expr::TryCast(TryCast {
2182                expr: _expr,
2183                data_type,
2184            }) => {
2185                data_type.hash(state);
2186            }
2187            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2188                func.hash(state);
2189            }
2190            Expr::AggregateFunction(AggregateFunction {
2191                func,
2192                params:
2193                    AggregateFunctionParams {
2194                        args: _args,
2195                        distinct,
2196                        filter: _,
2197                        order_by: _,
2198                        null_treatment,
2199                    },
2200            }) => {
2201                func.hash(state);
2202                distinct.hash(state);
2203                null_treatment.hash(state);
2204            }
2205            Expr::WindowFunction(WindowFunction { fun, params }) => {
2206                let WindowFunctionParams {
2207                    args: _args,
2208                    partition_by: _,
2209                    order_by: _,
2210                    window_frame,
2211                    null_treatment,
2212                } = params;
2213                fun.hash(state);
2214                window_frame.hash(state);
2215                null_treatment.hash(state);
2216            }
2217            Expr::InList(InList {
2218                expr: _expr,
2219                list: _list,
2220                negated,
2221            }) => {
2222                negated.hash(state);
2223            }
2224            Expr::Exists(Exists { subquery, negated }) => {
2225                subquery.hash(state);
2226                negated.hash(state);
2227            }
2228            Expr::InSubquery(InSubquery {
2229                expr: _expr,
2230                subquery,
2231                negated,
2232            }) => {
2233                subquery.hash(state);
2234                negated.hash(state);
2235            }
2236            Expr::ScalarSubquery(subquery) => {
2237                subquery.hash(state);
2238            }
2239            #[expect(deprecated)]
2240            Expr::Wildcard { qualifier, options } => {
2241                qualifier.hash(state);
2242                options.hash(state);
2243            }
2244            Expr::GroupingSet(grouping_set) => {
2245                mem::discriminant(grouping_set).hash(state);
2246                match grouping_set {
2247                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2248                    GroupingSet::GroupingSets(_exprs) => {}
2249                }
2250            }
2251            Expr::Placeholder(place_holder) => {
2252                place_holder.hash(state);
2253            }
2254            Expr::OuterReferenceColumn(data_type, column) => {
2255                data_type.hash(state);
2256                column.hash(state);
2257            }
2258            Expr::Unnest(Unnest { expr: _expr }) => {}
2259        };
2260    }
2261}
2262
2263// Modifies expr if it is a placeholder with datatype of right
2264fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2265    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
2266        if data_type.is_none() {
2267            let other_dt = other.get_type(schema);
2268            match other_dt {
2269                Err(e) => {
2270                    Err(e.context(format!(
2271                        "Can not find type of {other} needed to infer type of {expr}"
2272                    )))?;
2273                }
2274                Ok(dt) => {
2275                    *data_type = Some(dt);
2276                }
2277            }
2278        };
2279    }
2280    Ok(())
2281}
2282
2283#[macro_export]
2284macro_rules! expr_vec_fmt {
2285    ( $ARRAY:expr ) => {{
2286        $ARRAY
2287            .iter()
2288            .map(|e| format!("{e}"))
2289            .collect::<Vec<String>>()
2290            .join(", ")
2291    }};
2292}
2293
2294struct SchemaDisplay<'a>(&'a Expr);
2295impl Display for SchemaDisplay<'_> {
2296    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2297        match self.0 {
2298            // The same as Display
2299            // TODO: remove the next line after `Expr::Wildcard` is removed
2300            #[expect(deprecated)]
2301            Expr::Column(_)
2302            | Expr::Literal(_)
2303            | Expr::ScalarVariable(..)
2304            | Expr::OuterReferenceColumn(..)
2305            | Expr::Placeholder(_)
2306            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2307            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2308                match func.schema_name(params) {
2309                    Ok(name) => {
2310                        write!(f, "{name}")
2311                    }
2312                    Err(e) => {
2313                        write!(f, "got error from schema_name {}", e)
2314                    }
2315                }
2316            }
2317            // Expr is not shown since it is aliased
2318            Expr::Alias(Alias {
2319                name,
2320                relation: Some(relation),
2321                ..
2322            }) => write!(f, "{relation}.{name}"),
2323            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2324            Expr::Between(Between {
2325                expr,
2326                negated,
2327                low,
2328                high,
2329            }) => {
2330                if *negated {
2331                    write!(
2332                        f,
2333                        "{} NOT BETWEEN {} AND {}",
2334                        SchemaDisplay(expr),
2335                        SchemaDisplay(low),
2336                        SchemaDisplay(high),
2337                    )
2338                } else {
2339                    write!(
2340                        f,
2341                        "{} BETWEEN {} AND {}",
2342                        SchemaDisplay(expr),
2343                        SchemaDisplay(low),
2344                        SchemaDisplay(high),
2345                    )
2346                }
2347            }
2348            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2349                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2350            }
2351            Expr::Case(Case {
2352                expr,
2353                when_then_expr,
2354                else_expr,
2355            }) => {
2356                write!(f, "CASE ")?;
2357
2358                if let Some(e) = expr {
2359                    write!(f, "{} ", SchemaDisplay(e))?;
2360                }
2361
2362                for (when, then) in when_then_expr {
2363                    write!(
2364                        f,
2365                        "WHEN {} THEN {} ",
2366                        SchemaDisplay(when),
2367                        SchemaDisplay(then),
2368                    )?;
2369                }
2370
2371                if let Some(e) = else_expr {
2372                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2373                }
2374
2375                write!(f, "END")
2376            }
2377            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2378            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2379                write!(f, "{}", SchemaDisplay(expr))
2380            }
2381            Expr::InList(InList {
2382                expr,
2383                list,
2384                negated,
2385            }) => {
2386                let inlist_name = schema_name_from_exprs(list)?;
2387
2388                if *negated {
2389                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2390                } else {
2391                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2392                }
2393            }
2394            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2395            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2396            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2397                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2398            }
2399            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2400                write!(f, "GROUPING SETS (")?;
2401                for exprs in lists_of_exprs.iter() {
2402                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2403                }
2404                write!(f, ")")
2405            }
2406            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2407                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2408            }
2409            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2410            Expr::IsNotNull(expr) => {
2411                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2412            }
2413            Expr::IsUnknown(expr) => {
2414                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2415            }
2416            Expr::IsNotUnknown(expr) => {
2417                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2418            }
2419            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2420                write!(f, "NOT IN")
2421            }
2422            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2423            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2424            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2425            Expr::IsNotTrue(expr) => {
2426                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2427            }
2428            Expr::IsNotFalse(expr) => {
2429                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2430            }
2431            Expr::Like(Like {
2432                negated,
2433                expr,
2434                pattern,
2435                escape_char,
2436                case_insensitive,
2437            }) => {
2438                write!(
2439                    f,
2440                    "{} {}{} {}",
2441                    SchemaDisplay(expr),
2442                    if *negated { "NOT " } else { "" },
2443                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2444                    SchemaDisplay(pattern),
2445                )?;
2446
2447                if let Some(char) = escape_char {
2448                    write!(f, " CHAR '{char}'")?;
2449                }
2450
2451                Ok(())
2452            }
2453            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2454            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2455            Expr::Unnest(Unnest { expr }) => {
2456                write!(f, "UNNEST({})", SchemaDisplay(expr))
2457            }
2458            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2459                match func.schema_name(args) {
2460                    Ok(name) => {
2461                        write!(f, "{name}")
2462                    }
2463                    Err(e) => {
2464                        write!(f, "got error from schema_name {}", e)
2465                    }
2466                }
2467            }
2468            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2469                write!(f, "{}", subquery.schema().field(0).name())
2470            }
2471            Expr::SimilarTo(Like {
2472                negated,
2473                expr,
2474                pattern,
2475                escape_char,
2476                ..
2477            }) => {
2478                write!(
2479                    f,
2480                    "{} {} {}",
2481                    SchemaDisplay(expr),
2482                    if *negated {
2483                        "NOT SIMILAR TO"
2484                    } else {
2485                        "SIMILAR TO"
2486                    },
2487                    SchemaDisplay(pattern),
2488                )?;
2489                if let Some(char) = escape_char {
2490                    write!(f, " CHAR '{char}'")?;
2491                }
2492
2493                Ok(())
2494            }
2495            Expr::WindowFunction(WindowFunction { fun, params }) => match fun {
2496                WindowFunctionDefinition::AggregateUDF(fun) => {
2497                    match fun.window_function_schema_name(params) {
2498                        Ok(name) => {
2499                            write!(f, "{name}")
2500                        }
2501                        Err(e) => {
2502                            write!(f, "got error from window_function_schema_name {}", e)
2503                        }
2504                    }
2505                }
2506                _ => {
2507                    let WindowFunctionParams {
2508                        args,
2509                        partition_by,
2510                        order_by,
2511                        window_frame,
2512                        null_treatment,
2513                    } = params;
2514
2515                    write!(
2516                        f,
2517                        "{}({})",
2518                        fun,
2519                        schema_name_from_exprs_comma_separated_without_space(args)?
2520                    )?;
2521
2522                    if let Some(null_treatment) = null_treatment {
2523                        write!(f, " {}", null_treatment)?;
2524                    }
2525
2526                    if !partition_by.is_empty() {
2527                        write!(
2528                            f,
2529                            " PARTITION BY [{}]",
2530                            schema_name_from_exprs(partition_by)?
2531                        )?;
2532                    }
2533
2534                    if !order_by.is_empty() {
2535                        write!(f, " ORDER BY [{}]", schema_name_from_sorts(order_by)?)?;
2536                    };
2537
2538                    write!(f, " {window_frame}")
2539                }
2540            },
2541        }
2542    }
2543}
2544
2545/// Get schema_name for Vector of expressions
2546///
2547/// Internal usage. Please call `schema_name_from_exprs` instead
2548// TODO: Use ", " to standardize the formatting of Vec<Expr>,
2549// <https://github.com/apache/datafusion/issues/10364>
2550pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
2551    exprs: &[Expr],
2552) -> Result<String, fmt::Error> {
2553    schema_name_from_exprs_inner(exprs, ",")
2554}
2555
2556/// Get schema_name for Vector of expressions
2557pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
2558    schema_name_from_exprs_inner(exprs, ", ")
2559}
2560
2561fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
2562    let mut s = String::new();
2563    for (i, e) in exprs.iter().enumerate() {
2564        if i > 0 {
2565            write!(&mut s, "{sep}")?;
2566        }
2567        write!(&mut s, "{}", SchemaDisplay(e))?;
2568    }
2569
2570    Ok(s)
2571}
2572
2573pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
2574    let mut s = String::new();
2575    for (i, e) in sorts.iter().enumerate() {
2576        if i > 0 {
2577            write!(&mut s, ", ")?;
2578        }
2579        let ordering = if e.asc { "ASC" } else { "DESC" };
2580        let nulls_ordering = if e.nulls_first {
2581            "NULLS FIRST"
2582        } else {
2583            "NULLS LAST"
2584        };
2585        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
2586    }
2587
2588    Ok(s)
2589}
2590
2591pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
2592pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
2593
2594/// Format expressions for display as part of a logical plan. In many cases, this will produce
2595/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
2596impl Display for Expr {
2597    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
2598        match self {
2599            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
2600            Expr::Column(c) => write!(f, "{c}"),
2601            Expr::OuterReferenceColumn(_, c) => {
2602                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
2603            }
2604            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
2605            Expr::Literal(v) => write!(f, "{v:?}"),
2606            Expr::Case(case) => {
2607                write!(f, "CASE ")?;
2608                if let Some(e) = &case.expr {
2609                    write!(f, "{e} ")?;
2610                }
2611                for (w, t) in &case.when_then_expr {
2612                    write!(f, "WHEN {w} THEN {t} ")?;
2613                }
2614                if let Some(e) = &case.else_expr {
2615                    write!(f, "ELSE {e} ")?;
2616                }
2617                write!(f, "END")
2618            }
2619            Expr::Cast(Cast { expr, data_type }) => {
2620                write!(f, "CAST({expr} AS {data_type:?})")
2621            }
2622            Expr::TryCast(TryCast { expr, data_type }) => {
2623                write!(f, "TRY_CAST({expr} AS {data_type:?})")
2624            }
2625            Expr::Not(expr) => write!(f, "NOT {expr}"),
2626            Expr::Negative(expr) => write!(f, "(- {expr})"),
2627            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
2628            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
2629            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
2630            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
2631            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
2632            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
2633            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
2634            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
2635            Expr::Exists(Exists {
2636                subquery,
2637                negated: true,
2638            }) => write!(f, "NOT EXISTS ({subquery:?})"),
2639            Expr::Exists(Exists {
2640                subquery,
2641                negated: false,
2642            }) => write!(f, "EXISTS ({subquery:?})"),
2643            Expr::InSubquery(InSubquery {
2644                expr,
2645                subquery,
2646                negated: true,
2647            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
2648            Expr::InSubquery(InSubquery {
2649                expr,
2650                subquery,
2651                negated: false,
2652            }) => write!(f, "{expr} IN ({subquery:?})"),
2653            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
2654            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
2655            Expr::ScalarFunction(fun) => {
2656                fmt_function(f, fun.name(), false, &fun.args, true)
2657            }
2658            // TODO: use udf's display_name, need to fix the separator issue, <https://github.com/apache/datafusion/issues/10364>
2659            // Expr::ScalarFunction(ScalarFunction { func, args }) => {
2660            //     write!(f, "{}", func.display_name(args).unwrap())
2661            // }
2662            Expr::WindowFunction(WindowFunction { fun, params }) => match fun {
2663                WindowFunctionDefinition::AggregateUDF(fun) => {
2664                    match fun.window_function_display_name(params) {
2665                        Ok(name) => {
2666                            write!(f, "{}", name)
2667                        }
2668                        Err(e) => {
2669                            write!(f, "got error from window_function_display_name {}", e)
2670                        }
2671                    }
2672                }
2673                WindowFunctionDefinition::WindowUDF(fun) => {
2674                    let WindowFunctionParams {
2675                        args,
2676                        partition_by,
2677                        order_by,
2678                        window_frame,
2679                        null_treatment,
2680                    } = params;
2681
2682                    fmt_function(f, &fun.to_string(), false, args, true)?;
2683
2684                    if let Some(nt) = null_treatment {
2685                        write!(f, "{}", nt)?;
2686                    }
2687
2688                    if !partition_by.is_empty() {
2689                        write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
2690                    }
2691                    if !order_by.is_empty() {
2692                        write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
2693                    }
2694                    write!(
2695                        f,
2696                        " {} BETWEEN {} AND {}",
2697                        window_frame.units,
2698                        window_frame.start_bound,
2699                        window_frame.end_bound
2700                    )
2701                }
2702            },
2703            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2704                match func.display_name(params) {
2705                    Ok(name) => {
2706                        write!(f, "{}", name)
2707                    }
2708                    Err(e) => {
2709                        write!(f, "got error from display_name {}", e)
2710                    }
2711                }
2712            }
2713            Expr::Between(Between {
2714                expr,
2715                negated,
2716                low,
2717                high,
2718            }) => {
2719                if *negated {
2720                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
2721                } else {
2722                    write!(f, "{expr} BETWEEN {low} AND {high}")
2723                }
2724            }
2725            Expr::Like(Like {
2726                negated,
2727                expr,
2728                pattern,
2729                escape_char,
2730                case_insensitive,
2731            }) => {
2732                write!(f, "{expr}")?;
2733                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
2734                if *negated {
2735                    write!(f, " NOT")?;
2736                }
2737                if let Some(char) = escape_char {
2738                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
2739                } else {
2740                    write!(f, " {op_name} {pattern}")
2741                }
2742            }
2743            Expr::SimilarTo(Like {
2744                negated,
2745                expr,
2746                pattern,
2747                escape_char,
2748                case_insensitive: _,
2749            }) => {
2750                write!(f, "{expr}")?;
2751                if *negated {
2752                    write!(f, " NOT")?;
2753                }
2754                if let Some(char) = escape_char {
2755                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
2756                } else {
2757                    write!(f, " SIMILAR TO {pattern}")
2758                }
2759            }
2760            Expr::InList(InList {
2761                expr,
2762                list,
2763                negated,
2764            }) => {
2765                if *negated {
2766                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
2767                } else {
2768                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
2769                }
2770            }
2771            #[expect(deprecated)]
2772            Expr::Wildcard { qualifier, options } => match qualifier {
2773                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
2774                None => write!(f, "*{options}"),
2775            },
2776            Expr::GroupingSet(grouping_sets) => match grouping_sets {
2777                GroupingSet::Rollup(exprs) => {
2778                    // ROLLUP (c0, c1, c2)
2779                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
2780                }
2781                GroupingSet::Cube(exprs) => {
2782                    // CUBE (c0, c1, c2)
2783                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
2784                }
2785                GroupingSet::GroupingSets(lists_of_exprs) => {
2786                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
2787                    write!(
2788                        f,
2789                        "GROUPING SETS ({})",
2790                        lists_of_exprs
2791                            .iter()
2792                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
2793                            .collect::<Vec<String>>()
2794                            .join(", ")
2795                    )
2796                }
2797            },
2798            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
2799            Expr::Unnest(Unnest { expr }) => {
2800                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
2801            }
2802        }
2803    }
2804}
2805
2806fn fmt_function(
2807    f: &mut Formatter,
2808    fun: &str,
2809    distinct: bool,
2810    args: &[Expr],
2811    display: bool,
2812) -> fmt::Result {
2813    let args: Vec<String> = match display {
2814        true => args.iter().map(|arg| format!("{arg}")).collect(),
2815        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
2816    };
2817
2818    let distinct_str = match distinct {
2819        true => "DISTINCT ",
2820        false => "",
2821    };
2822    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
2823}
2824
2825/// The name of the column (field) that this `Expr` will produce in the physical plan.
2826/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
2827pub fn physical_name(expr: &Expr) -> Result<String> {
2828    match expr {
2829        Expr::Column(col) => Ok(col.name.clone()),
2830        Expr::Alias(alias) => Ok(alias.name.clone()),
2831        _ => Ok(expr.schema_name().to_string()),
2832    }
2833}
2834
2835#[cfg(test)]
2836mod test {
2837    use crate::expr_fn::col;
2838    use crate::{
2839        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
2840        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
2841    };
2842    use sqlparser::ast;
2843    use sqlparser::ast::{Ident, IdentWithAlias};
2844    use std::any::Any;
2845
2846    #[test]
2847    #[allow(deprecated)]
2848    fn format_case_when() -> Result<()> {
2849        let expr = case(col("a"))
2850            .when(lit(1), lit(true))
2851            .when(lit(0), lit(false))
2852            .otherwise(lit(ScalarValue::Null))?;
2853        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
2854        assert_eq!(expected, expr.canonical_name());
2855        assert_eq!(expected, format!("{expr}"));
2856        Ok(())
2857    }
2858
2859    #[test]
2860    #[allow(deprecated)]
2861    fn format_cast() -> Result<()> {
2862        let expr = Expr::Cast(Cast {
2863            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))),
2864            data_type: DataType::Utf8,
2865        });
2866        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
2867        assert_eq!(expected_canonical, expr.canonical_name());
2868        assert_eq!(expected_canonical, format!("{expr}"));
2869        // Note that CAST intentionally has a name that is different from its `Display`
2870        // representation. CAST does not change the name of expressions.
2871        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
2872        Ok(())
2873    }
2874
2875    #[test]
2876    fn test_partial_ord() {
2877        // Test validates that partial ord is defined for Expr, not
2878        // intended to exhaustively test all possibilities
2879        let exp1 = col("a") + lit(1);
2880        let exp2 = col("a") + lit(2);
2881        let exp3 = !(col("a") + lit(2));
2882
2883        assert!(exp1 < exp2);
2884        assert!(exp3 > exp2);
2885        assert!(exp1 < exp3)
2886    }
2887
2888    #[test]
2889    fn test_collect_expr() -> Result<()> {
2890        // single column
2891        {
2892            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
2893            let columns = expr.column_refs();
2894            assert_eq!(1, columns.len());
2895            assert!(columns.contains(&Column::from_name("a")));
2896        }
2897
2898        // multiple columns
2899        {
2900            let expr = col("a") + col("b") + lit(1);
2901            let columns = expr.column_refs();
2902            assert_eq!(2, columns.len());
2903            assert!(columns.contains(&Column::from_name("a")));
2904            assert!(columns.contains(&Column::from_name("b")));
2905        }
2906
2907        Ok(())
2908    }
2909
2910    #[test]
2911    fn test_logical_ops() {
2912        assert_eq!(
2913            format!("{}", lit(1u32).eq(lit(2u32))),
2914            "UInt32(1) = UInt32(2)"
2915        );
2916        assert_eq!(
2917            format!("{}", lit(1u32).not_eq(lit(2u32))),
2918            "UInt32(1) != UInt32(2)"
2919        );
2920        assert_eq!(
2921            format!("{}", lit(1u32).gt(lit(2u32))),
2922            "UInt32(1) > UInt32(2)"
2923        );
2924        assert_eq!(
2925            format!("{}", lit(1u32).gt_eq(lit(2u32))),
2926            "UInt32(1) >= UInt32(2)"
2927        );
2928        assert_eq!(
2929            format!("{}", lit(1u32).lt(lit(2u32))),
2930            "UInt32(1) < UInt32(2)"
2931        );
2932        assert_eq!(
2933            format!("{}", lit(1u32).lt_eq(lit(2u32))),
2934            "UInt32(1) <= UInt32(2)"
2935        );
2936        assert_eq!(
2937            format!("{}", lit(1u32).and(lit(2u32))),
2938            "UInt32(1) AND UInt32(2)"
2939        );
2940        assert_eq!(
2941            format!("{}", lit(1u32).or(lit(2u32))),
2942            "UInt32(1) OR UInt32(2)"
2943        );
2944    }
2945
2946    #[test]
2947    fn test_is_volatile_scalar_func() {
2948        // UDF
2949        #[derive(Debug)]
2950        struct TestScalarUDF {
2951            signature: Signature,
2952        }
2953        impl ScalarUDFImpl for TestScalarUDF {
2954            fn as_any(&self) -> &dyn Any {
2955                self
2956            }
2957            fn name(&self) -> &str {
2958                "TestScalarUDF"
2959            }
2960
2961            fn signature(&self) -> &Signature {
2962                &self.signature
2963            }
2964
2965            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
2966                Ok(DataType::Utf8)
2967            }
2968
2969            fn invoke_with_args(
2970                &self,
2971                _args: ScalarFunctionArgs,
2972            ) -> Result<ColumnarValue> {
2973                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
2974            }
2975        }
2976        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
2977            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
2978        }));
2979        assert_ne!(udf.signature().volatility, Volatility::Volatile);
2980
2981        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
2982            signature: Signature::uniform(
2983                1,
2984                vec![DataType::Float32],
2985                Volatility::Volatile,
2986            ),
2987        }));
2988        assert_eq!(udf.signature().volatility, Volatility::Volatile);
2989    }
2990
2991    use super::*;
2992
2993    #[test]
2994    fn test_display_wildcard() {
2995        assert_eq!(format!("{}", wildcard()), "*");
2996        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
2997        assert_eq!(
2998            format!(
2999                "{}",
3000                wildcard_with_options(wildcard_options(
3001                    Some(IlikeSelectItem {
3002                        pattern: "c1".to_string()
3003                    }),
3004                    None,
3005                    None,
3006                    None,
3007                    None
3008                ))
3009            ),
3010            "* ILIKE 'c1'"
3011        );
3012        assert_eq!(
3013            format!(
3014                "{}",
3015                wildcard_with_options(wildcard_options(
3016                    None,
3017                    Some(ExcludeSelectItem::Multiple(vec![
3018                        Ident::from("c1"),
3019                        Ident::from("c2")
3020                    ])),
3021                    None,
3022                    None,
3023                    None
3024                ))
3025            ),
3026            "* EXCLUDE (c1, c2)"
3027        );
3028        assert_eq!(
3029            format!(
3030                "{}",
3031                wildcard_with_options(wildcard_options(
3032                    None,
3033                    None,
3034                    Some(ExceptSelectItem {
3035                        first_element: Ident::from("c1"),
3036                        additional_elements: vec![Ident::from("c2")]
3037                    }),
3038                    None,
3039                    None
3040                ))
3041            ),
3042            "* EXCEPT (c1, c2)"
3043        );
3044        assert_eq!(
3045            format!(
3046                "{}",
3047                wildcard_with_options(wildcard_options(
3048                    None,
3049                    None,
3050                    None,
3051                    Some(PlannedReplaceSelectItem {
3052                        items: vec![ReplaceSelectElement {
3053                            expr: ast::Expr::Identifier(Ident::from("c1")),
3054                            column_name: Ident::from("a1"),
3055                            as_keyword: false
3056                        }],
3057                        planned_expressions: vec![]
3058                    }),
3059                    None
3060                ))
3061            ),
3062            "* REPLACE (c1 a1)"
3063        );
3064        assert_eq!(
3065            format!(
3066                "{}",
3067                wildcard_with_options(wildcard_options(
3068                    None,
3069                    None,
3070                    None,
3071                    None,
3072                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3073                        ident: Ident::from("c1"),
3074                        alias: Ident::from("a1")
3075                    }]))
3076                ))
3077            ),
3078            "* RENAME (c1 AS a1)"
3079        )
3080    }
3081
3082    #[test]
3083    fn test_schema_display_alias_with_relation() {
3084        assert_eq!(
3085            format!(
3086                "{}",
3087                SchemaDisplay(
3088                    &lit(1).alias_qualified("table_name".into(), "column_name")
3089                )
3090            ),
3091            "table_name.column_name"
3092        );
3093    }
3094
3095    #[test]
3096    fn test_schema_display_alias_without_relation() {
3097        assert_eq!(
3098            format!(
3099                "{}",
3100                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3101            ),
3102            "column_name"
3103        );
3104    }
3105
3106    fn wildcard_options(
3107        opt_ilike: Option<IlikeSelectItem>,
3108        opt_exclude: Option<ExcludeSelectItem>,
3109        opt_except: Option<ExceptSelectItem>,
3110        opt_replace: Option<PlannedReplaceSelectItem>,
3111        opt_rename: Option<RenameSelectItem>,
3112    ) -> WildcardOptions {
3113        WildcardOptions {
3114            ilike: opt_ilike,
3115            exclude: opt_exclude,
3116            except: opt_except,
3117            replace: opt_replace,
3118            rename: opt_rename,
3119        }
3120    }
3121}