datafusion_common/unnest.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`UnnestOptions`] for unnesting structured types
19
20use crate::Column;
21
22/// Options for unnesting a column that contains a list type,
23/// replicating values in the other, non nested rows.
24///
25/// Conceptually this operation is like joining each row with all the
26/// values in the list column.
27///
28/// If `preserve_nulls` is false, nulls and empty lists
29/// from the input column are not carried through to the output. This
30/// is the default behavior for other systems such as ClickHouse and
31/// DuckDB
32///
33/// If `preserve_nulls` is true (the default), nulls from the input
34/// column are carried through to the output.
35///
36/// # Examples
37///
38/// ## `Unnest(c1)`, preserve_nulls: false
39/// ```text
40/// ┌─────────┐ ┌─────┐ ┌─────────┐ ┌─────┐
41/// │ {1, 2} │ │ A │ Unnest │ 1 │ │ A │
42/// ├─────────┤ ├─────┤ ├─────────┤ ├─────┤
43/// │ null │ │ B │ │ 2 │ │ A │
44/// ├─────────┤ ├─────┤ ────────────▶ ├─────────┤ ├─────┤
45/// │ {} │ │ D │ │ 3 │ │ E │
46/// ├─────────┤ ├─────┤ └─────────┘ └─────┘
47/// │ {3} │ │ E │ c1 c2
48/// └─────────┘ └─────┘
49/// c1 c2
50/// ```
51///
52/// ## `Unnest(c1)`, preserve_nulls: true
53/// ```text
54/// ┌─────────┐ ┌─────┐ ┌─────────┐ ┌─────┐
55/// │ {1, 2} │ │ A │ Unnest │ 1 │ │ A │
56/// ├─────────┤ ├─────┤ ├─────────┤ ├─────┤
57/// │ null │ │ B │ │ 2 │ │ A │
58/// ├─────────┤ ├─────┤ ────────────▶ ├─────────┤ ├─────┤
59/// │ {} │ │ D │ │ null │ │ B │
60/// ├─────────┤ ├─────┤ ├─────────┤ ├─────┤
61/// │ {3} │ │ E │ │ 3 │ │ E │
62/// └─────────┘ └─────┘ └─────────┘ └─────┘
63/// c1 c2 c1 c2
64/// ```
65///
66/// `recursions` instruct how a column should be unnested (e.g unnesting a column multiple
67/// time, with depth = 1 and depth = 2). Any unnested column not being mentioned inside this
68/// options is inferred to be unnested with depth = 1
69#[derive(Debug, Clone, PartialEq, PartialOrd, Hash, Eq)]
70pub struct UnnestOptions {
71 /// Should nulls in the input be preserved? Defaults to true
72 pub preserve_nulls: bool,
73 /// If specific columns need to be unnested multiple times (e.g at different depth),
74 /// declare them here. Any unnested columns not being mentioned inside this option
75 /// will be unnested with depth = 1
76 pub recursions: Vec<RecursionUnnestOption>,
77}
78
79/// Instruction on how to unnest a column (mostly with a list type)
80/// such as how to name the output, and how many level it should be unnested
81#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
82pub struct RecursionUnnestOption {
83 pub input_column: Column,
84 pub output_column: Column,
85 pub depth: usize,
86}
87
88impl Default for UnnestOptions {
89 fn default() -> Self {
90 Self {
91 // default to true to maintain backwards compatible behavior
92 preserve_nulls: true,
93 recursions: vec![],
94 }
95 }
96}
97
98impl UnnestOptions {
99 /// Create a new [`UnnestOptions`] with default values
100 pub fn new() -> Self {
101 Default::default()
102 }
103
104 /// Set the behavior with nulls in the input as described on
105 /// [`Self`]
106 pub fn with_preserve_nulls(mut self, preserve_nulls: bool) -> Self {
107 self.preserve_nulls = preserve_nulls;
108 self
109 }
110
111 /// Set the recursions for the unnest operation
112 pub fn with_recursions(mut self, recursion: RecursionUnnestOption) -> Self {
113 self.recursions.push(recursion);
114 self
115 }
116}