datafusion_functions/datetime/
make_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::builder::PrimitiveBuilder;
22use arrow::array::cast::AsArray;
23use arrow::array::types::{Date32Type, Int32Type};
24use arrow::array::PrimitiveArray;
25use arrow::datatypes::DataType;
26use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf8View};
27use chrono::prelude::*;
28
29use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
30use datafusion_expr::{
31    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_macros::user_doc;
34
35#[user_doc(
36    doc_section(label = "Time and Date Functions"),
37    description = "Make a date from year/month/day component parts.",
38    syntax_example = "make_date(year, month, day)",
39    sql_example = r#"```sql
40> select make_date(2023, 1, 31);
41+-------------------------------------------+
42| make_date(Int64(2023),Int64(1),Int64(31)) |
43+-------------------------------------------+
44| 2023-01-31                                |
45+-------------------------------------------+
46> select make_date('2023', '01', '31');
47+-----------------------------------------------+
48| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
49+-----------------------------------------------+
50| 2023-01-31                                    |
51+-----------------------------------------------+
52```
53
54Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
55"#,
56    argument(
57        name = "year",
58        description = "Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
59    ),
60    argument(
61        name = "month",
62        description = "Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
63    ),
64    argument(
65        name = "day",
66        description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
67    )
68)]
69#[derive(Debug)]
70pub struct MakeDateFunc {
71    signature: Signature,
72}
73
74impl Default for MakeDateFunc {
75    fn default() -> Self {
76        Self::new()
77    }
78}
79
80impl MakeDateFunc {
81    pub fn new() -> Self {
82        Self {
83            signature: Signature::uniform(
84                3,
85                vec![Int32, Int64, UInt32, UInt64, Utf8, Utf8View],
86                Volatility::Immutable,
87            ),
88        }
89    }
90}
91
92impl ScalarUDFImpl for MakeDateFunc {
93    fn as_any(&self) -> &dyn Any {
94        self
95    }
96
97    fn name(&self) -> &str {
98        "make_date"
99    }
100
101    fn signature(&self) -> &Signature {
102        &self.signature
103    }
104
105    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
106        Ok(Date32)
107    }
108
109    fn invoke_with_args(
110        &self,
111        args: datafusion_expr::ScalarFunctionArgs,
112    ) -> Result<ColumnarValue> {
113        // first, identify if any of the arguments is an Array. If yes, store its `len`,
114        // as any scalar will need to be converted to an array of len `len`.
115        let args = args.args;
116        let len = args
117            .iter()
118            .fold(Option::<usize>::None, |acc, arg| match arg {
119                ColumnarValue::Scalar(_) => acc,
120                ColumnarValue::Array(a) => Some(a.len()),
121            });
122
123        let [years, months, days] = take_function_args(self.name(), args)?;
124
125        let years = years.cast_to(&Int32, None)?;
126        let months = months.cast_to(&Int32, None)?;
127        let days = days.cast_to(&Int32, None)?;
128
129        let scalar_value_fn = |col: &ColumnarValue| -> Result<i32> {
130            let ColumnarValue::Scalar(s) = col else {
131                return exec_err!("Expected scalar value");
132            };
133            let ScalarValue::Int32(Some(i)) = s else {
134                return exec_err!("Unable to parse date from null/empty value");
135            };
136            Ok(*i)
137        };
138
139        let value = if let Some(array_size) = len {
140            let to_primitive_array_fn =
141                |col: &ColumnarValue| -> PrimitiveArray<Int32Type> {
142                    match col {
143                        ColumnarValue::Array(a) => {
144                            a.as_primitive::<Int32Type>().to_owned()
145                        }
146                        _ => {
147                            let v = scalar_value_fn(col).unwrap();
148                            PrimitiveArray::<Int32Type>::from_value(v, array_size)
149                        }
150                    }
151                };
152
153            let years = to_primitive_array_fn(&years);
154            let months = to_primitive_array_fn(&months);
155            let days = to_primitive_array_fn(&days);
156
157            let mut builder: PrimitiveBuilder<Date32Type> =
158                PrimitiveArray::builder(array_size);
159            for i in 0..array_size {
160                make_date_inner(
161                    years.value(i),
162                    months.value(i),
163                    days.value(i),
164                    |days: i32| builder.append_value(days),
165                )?;
166            }
167
168            let arr = builder.finish();
169
170            ColumnarValue::Array(Arc::new(arr))
171        } else {
172            // For scalar only columns the operation is faster without using the PrimitiveArray.
173            // Also, keep the output as scalar since all inputs are scalar.
174            let mut value = 0;
175            make_date_inner(
176                scalar_value_fn(&years)?,
177                scalar_value_fn(&months)?,
178                scalar_value_fn(&days)?,
179                |days: i32| value = days,
180            )?;
181
182            ColumnarValue::Scalar(ScalarValue::Date32(Some(value)))
183        };
184
185        Ok(value)
186    }
187    fn documentation(&self) -> Option<&Documentation> {
188        self.doc()
189    }
190}
191
192/// Converts the year/month/day fields to an `i32` representing the days from
193/// the unix epoch and invokes `date_consumer_fn` with the value
194fn make_date_inner<F: FnMut(i32)>(
195    year: i32,
196    month: i32,
197    day: i32,
198    mut date_consumer_fn: F,
199) -> Result<()> {
200    let Ok(m) = u32::try_from(month) else {
201        return exec_err!("Month value '{month:?}' is out of range");
202    };
203    let Ok(d) = u32::try_from(day) else {
204        return exec_err!("Day value '{day:?}' is out of range");
205    };
206
207    if let Some(date) = NaiveDate::from_ymd_opt(year, m, d) {
208        // The number of days until the start of the unix epoch in the proleptic Gregorian calendar
209        // (with January 1, Year 1 (CE) as day 1). See [Datelike::num_days_from_ce].
210        const UNIX_DAYS_FROM_CE: i32 = 719_163;
211
212        // since the epoch for the date32 datatype is the unix epoch
213        // we need to subtract the unix epoch from the current date
214        // note that this can result in a negative value
215        date_consumer_fn(date.num_days_from_ce() - UNIX_DAYS_FROM_CE);
216        Ok(())
217    } else {
218        exec_err!("Unable to parse date from {year}, {month}, {day}")
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use crate::datetime::make_date::MakeDateFunc;
225    use arrow::array::{Array, Date32Array, Int32Array, Int64Array, UInt32Array};
226    use arrow::datatypes::DataType;
227    use datafusion_common::ScalarValue;
228    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
229    use std::sync::Arc;
230
231    #[test]
232    fn test_make_date() {
233        let args = datafusion_expr::ScalarFunctionArgs {
234            args: vec![
235                ColumnarValue::Scalar(ScalarValue::Int32(Some(2024))),
236                ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
237                ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
238            ],
239            number_rows: 1,
240            return_type: &DataType::Date32,
241        };
242        let res = MakeDateFunc::new()
243            .invoke_with_args(args)
244            .expect("that make_date parsed values without error");
245
246        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
247            assert_eq!(19736, date.unwrap());
248        } else {
249            panic!("Expected a scalar value")
250        }
251
252        let args = datafusion_expr::ScalarFunctionArgs {
253            args: vec![
254                ColumnarValue::Scalar(ScalarValue::Int64(Some(2024))),
255                ColumnarValue::Scalar(ScalarValue::UInt64(Some(1))),
256                ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
257            ],
258            number_rows: 1,
259            return_type: &DataType::Date32,
260        };
261        let res = MakeDateFunc::new()
262            .invoke_with_args(args)
263            .expect("that make_date parsed values without error");
264
265        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
266            assert_eq!(19736, date.unwrap());
267        } else {
268            panic!("Expected a scalar value")
269        }
270
271        let args = datafusion_expr::ScalarFunctionArgs {
272            args: vec![
273                ColumnarValue::Scalar(ScalarValue::Utf8(Some("2024".to_string()))),
274                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("1".to_string()))),
275                ColumnarValue::Scalar(ScalarValue::Utf8(Some("14".to_string()))),
276            ],
277            number_rows: 1,
278            return_type: &DataType::Date32,
279        };
280        let res = MakeDateFunc::new()
281            .invoke_with_args(args)
282            .expect("that make_date parsed values without error");
283
284        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
285            assert_eq!(19736, date.unwrap());
286        } else {
287            panic!("Expected a scalar value")
288        }
289
290        let years = Arc::new((2021..2025).map(Some).collect::<Int64Array>());
291        let months = Arc::new((1..5).map(Some).collect::<Int32Array>());
292        let days = Arc::new((11..15).map(Some).collect::<UInt32Array>());
293        let batch_len = years.len();
294        let args = datafusion_expr::ScalarFunctionArgs {
295            args: vec![
296                ColumnarValue::Array(years),
297                ColumnarValue::Array(months),
298                ColumnarValue::Array(days),
299            ],
300            number_rows: batch_len,
301            return_type: &DataType::Date32,
302        };
303        let res = MakeDateFunc::new()
304            .invoke_with_args(args)
305            .expect("that make_date parsed values without error");
306
307        if let ColumnarValue::Array(array) = res {
308            assert_eq!(array.len(), 4);
309            let mut builder = Date32Array::builder(4);
310            builder.append_value(18_638);
311            builder.append_value(19_035);
312            builder.append_value(19_429);
313            builder.append_value(19_827);
314            assert_eq!(&builder.finish() as &dyn Array, array.as_ref());
315        } else {
316            panic!("Expected a columnar array")
317        }
318
319        //
320        // Fallible test cases
321        //
322
323        // invalid number of arguments
324        let args = datafusion_expr::ScalarFunctionArgs {
325            args: vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))],
326            number_rows: 1,
327            return_type: &DataType::Date32,
328        };
329        let res = MakeDateFunc::new().invoke_with_args(args);
330        assert_eq!(
331            res.err().unwrap().strip_backtrace(),
332            "Execution error: make_date function requires 3 arguments, got 1"
333        );
334
335        // invalid type
336        let args = datafusion_expr::ScalarFunctionArgs {
337            args: vec![
338                ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
339                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
340                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
341            ],
342            number_rows: 1,
343            return_type: &DataType::Date32,
344        };
345        let res = MakeDateFunc::new().invoke_with_args(args);
346        assert_eq!(
347            res.err().unwrap().strip_backtrace(),
348            "Arrow error: Cast error: Casting from Interval(YearMonth) to Int32 not supported"
349        );
350
351        // overflow of month
352        let args = datafusion_expr::ScalarFunctionArgs {
353            args: vec![
354                ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
355                ColumnarValue::Scalar(ScalarValue::UInt64(Some(u64::MAX))),
356                ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
357            ],
358            number_rows: 1,
359            return_type: &DataType::Date32,
360        };
361        let res = MakeDateFunc::new().invoke_with_args(args);
362        assert_eq!(
363            res.err().unwrap().strip_backtrace(),
364            "Arrow error: Cast error: Can't cast value 18446744073709551615 to type Int32"
365        );
366
367        // overflow of day
368        let args = datafusion_expr::ScalarFunctionArgs {
369            args: vec![
370                ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
371                ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
372                ColumnarValue::Scalar(ScalarValue::UInt32(Some(u32::MAX))),
373            ],
374            number_rows: 1,
375            return_type: &DataType::Date32,
376        };
377        let res = MakeDateFunc::new().invoke_with_args(args);
378        assert_eq!(
379            res.err().unwrap().strip_backtrace(),
380            "Arrow error: Cast error: Can't cast value 4294967295 to type Int32"
381        );
382    }
383}