datafusion_common/
unnest.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`UnnestOptions`] for unnesting structured types
19
20use crate::Column;
21
22/// Options for unnesting a column that contains a list type,
23/// replicating values in the other, non nested rows.
24///
25/// Conceptually this operation is like joining each row with all the
26/// values in the list column.
27///
28/// If `preserve_nulls` is false, nulls and empty lists
29/// from the input column are not carried through to the output. This
30/// is the default behavior for other systems such as ClickHouse and
31/// DuckDB
32///
33/// If `preserve_nulls` is true (the default), nulls from the input
34/// column are carried through to the output.
35///
36/// # Examples
37///
38/// ## `Unnest(c1)`, preserve_nulls: false
39/// ```text
40///      ┌─────────┐ ┌─────┐                ┌─────────┐ ┌─────┐
41///      │ {1, 2}  │ │  A  │   Unnest       │    1    │ │  A  │
42///      ├─────────┤ ├─────┤                ├─────────┤ ├─────┤
43///      │  null   │ │  B  │                │    2    │ │  A  │
44///      ├─────────┤ ├─────┤ ────────────▶  ├─────────┤ ├─────┤
45///      │   {}    │ │  D  │                │    3    │ │  E  │
46///      ├─────────┤ ├─────┤                └─────────┘ └─────┘
47///      │   {3}   │ │  E  │                    c1        c2
48///      └─────────┘ └─────┘
49///        c1         c2
50/// ```
51///
52/// ## `Unnest(c1)`, preserve_nulls: true
53/// ```text
54///      ┌─────────┐ ┌─────┐                ┌─────────┐ ┌─────┐
55///      │ {1, 2}  │ │  A  │   Unnest       │    1    │ │  A  │
56///      ├─────────┤ ├─────┤                ├─────────┤ ├─────┤
57///      │  null   │ │  B  │                │    2    │ │  A  │
58///      ├─────────┤ ├─────┤ ────────────▶  ├─────────┤ ├─────┤
59///      │   {}    │ │  D  │                │  null   │ │  B  │
60///      ├─────────┤ ├─────┤                ├─────────┤ ├─────┤
61///      │   {3}   │ │  E  │                │    3    │ │  E  │
62///      └─────────┘ └─────┘                └─────────┘ └─────┘
63///        c1         c2                        c1        c2
64/// ```
65///
66/// `recursions` instruct how a column should be unnested (e.g unnesting a column multiple
67/// time, with depth = 1 and depth = 2). Any unnested column not being mentioned inside this
68/// options is inferred to be unnested with depth = 1
69#[derive(Debug, Clone, PartialEq, PartialOrd, Hash, Eq)]
70pub struct UnnestOptions {
71    /// Should nulls in the input be preserved? Defaults to true
72    pub preserve_nulls: bool,
73    /// If specific columns need to be unnested multiple times (e.g at different depth),
74    /// declare them here. Any unnested columns not being mentioned inside this option
75    /// will be unnested with depth = 1
76    pub recursions: Vec<RecursionUnnestOption>,
77}
78
79/// Instruction on how to unnest a column (mostly with a list type)
80/// such as how to name the output, and how many level it should be unnested
81#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
82pub struct RecursionUnnestOption {
83    pub input_column: Column,
84    pub output_column: Column,
85    pub depth: usize,
86}
87
88impl Default for UnnestOptions {
89    fn default() -> Self {
90        Self {
91            // default to true to maintain backwards compatible behavior
92            preserve_nulls: true,
93            recursions: vec![],
94        }
95    }
96}
97
98impl UnnestOptions {
99    /// Create a new [`UnnestOptions`] with default values
100    pub fn new() -> Self {
101        Default::default()
102    }
103
104    /// Set the behavior with nulls in the input as described on
105    /// [`Self`]
106    pub fn with_preserve_nulls(mut self, preserve_nulls: bool) -> Self {
107        self.preserve_nulls = preserve_nulls;
108        self
109    }
110
111    /// Set the recursions for the unnest operation
112    pub fn with_recursions(mut self, recursion: RecursionUnnestOption) -> Self {
113        self.recursions.push(recursion);
114        self
115    }
116}