datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32    doc_section(label = "Time and Date Functions"),
33    description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41    sql_example = r#"```sql
42> select to_date('2023-01-31'); 
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31                    |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31                                                          |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
57"#,
58    standard_argument(name = "expression", prefix = "String"),
59    argument(
60        name = "format_n",
61        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62  they appear with the first successful one being returned. If none of the formats successfully parse the expression
63  an error will be returned."
64    )
65)]
66#[derive(Debug)]
67pub struct ToDateFunc {
68    signature: Signature,
69}
70
71impl Default for ToDateFunc {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77impl ToDateFunc {
78    pub fn new() -> Self {
79        Self {
80            signature: Signature::variadic_any(Volatility::Immutable),
81        }
82    }
83
84    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85        match args.len() {
86            1 => handle::<Date32Type, _, Date32Type>(
87                args,
88                |s| match Date32Type::parse(s) {
89                    Some(v) => Ok(v),
90                    None => arrow_err!(ParseError(
91                        "Unable to cast to Date32 for converting from i64 to i32 failed"
92                            .to_string()
93                    )),
94                },
95                "to_date",
96            ),
97            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98                args,
99                |s, format| {
100                    string_to_timestamp_millis_formatted(s, format)
101                        .map(|n| n / (24 * 60 * 60 * 1_000))
102                        .and_then(|v| {
103                            v.try_into().map_err(|_| {
104                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105                            })
106                        })
107                },
108                |n| n,
109                "to_date",
110            ),
111            0 => exec_err!("Unsupported 0 argument count for function to_date"),
112        }
113    }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117    fn as_any(&self) -> &dyn Any {
118        self
119    }
120
121    fn name(&self) -> &str {
122        "to_date"
123    }
124
125    fn signature(&self) -> &Signature {
126        &self.signature
127    }
128
129    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130        Ok(Date32)
131    }
132
133    fn invoke_with_args(
134        &self,
135        args: datafusion_expr::ScalarFunctionArgs,
136    ) -> Result<ColumnarValue> {
137        let args = args.args;
138        if args.is_empty() {
139            return exec_err!("to_date function requires 1 or more arguments, got 0");
140        }
141
142        // validate that any args after the first one are Utf8
143        if args.len() > 1 {
144            validate_data_types(&args, "to_date")?;
145        }
146
147        match args[0].data_type() {
148            Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149                args[0].cast_to(&Date32, None)
150            }
151            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152            other => {
153                exec_err!("Unsupported data type {:?} for function to_date", other)
154            }
155        }
156    }
157
158    fn documentation(&self) -> Option<&Documentation> {
159        self.doc()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
166    use arrow::datatypes::DataType;
167    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
168    use datafusion_common::ScalarValue;
169    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
170    use std::sync::Arc;
171
172    use super::ToDateFunc;
173
174    #[test]
175    fn test_to_date_without_format() {
176        struct TestCase {
177            name: &'static str,
178            date_str: &'static str,
179        }
180
181        let test_cases = vec![
182            TestCase {
183                name: "Largest four-digit year (9999)",
184                date_str: "9999-12-31",
185            },
186            TestCase {
187                name: "Year 1 (0001)",
188                date_str: "0001-12-31",
189            },
190            TestCase {
191                name: "Year before epoch (1969)",
192                date_str: "1969-01-01",
193            },
194            TestCase {
195                name: "Switch Julian/Gregorian calendar (1582-10-10)",
196                date_str: "1582-10-10",
197            },
198        ];
199
200        for tc in &test_cases {
201            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
202            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
203            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
204
205            test_array::<GenericStringArray<i32>>(tc);
206            test_array::<GenericStringArray<i64>>(tc);
207            test_array::<StringViewArray>(tc);
208        }
209
210        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
211            let args = datafusion_expr::ScalarFunctionArgs {
212                args: vec![ColumnarValue::Scalar(sv)],
213                number_rows: 1,
214                return_type: &DataType::Date32,
215            };
216            let to_date_result = ToDateFunc::new().invoke_with_args(args);
217
218            match to_date_result {
219                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
220                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
221                    assert_eq!(
222                        date_val, expected,
223                        "{}: to_date created wrong value",
224                        tc.name
225                    );
226                }
227                _ => panic!("Could not convert '{}' to Date", tc.date_str),
228            }
229        }
230
231        fn test_array<A>(tc: &TestCase)
232        where
233            A: From<Vec<&'static str>> + Array + 'static,
234        {
235            let date_array = A::from(vec![tc.date_str]);
236            let batch_len = date_array.len();
237            let args = datafusion_expr::ScalarFunctionArgs {
238                args: vec![ColumnarValue::Array(Arc::new(date_array))],
239                number_rows: batch_len,
240                return_type: &DataType::Date32,
241            };
242            let to_date_result = ToDateFunc::new().invoke_with_args(args);
243
244            match to_date_result {
245                Ok(ColumnarValue::Array(a)) => {
246                    assert_eq!(a.len(), 1);
247
248                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
249                    let mut builder = Date32Array::builder(4);
250                    builder.append_value(expected.unwrap());
251
252                    assert_eq!(
253                        &builder.finish() as &dyn Array,
254                        a.as_ref(),
255                        "{}: to_date created wrong value",
256                        tc.name
257                    );
258                }
259                _ => panic!("Could not convert '{}' to Date", tc.date_str),
260            }
261        }
262    }
263
264    #[test]
265    fn test_to_date_with_format() {
266        struct TestCase {
267            name: &'static str,
268            date_str: &'static str,
269            format_str: &'static str,
270            formatted_date: &'static str,
271        }
272
273        let test_cases = vec![
274            TestCase {
275                name: "Largest four-digit year (9999)",
276                date_str: "9999-12-31",
277                format_str: "%Y%m%d",
278                formatted_date: "99991231",
279            },
280            TestCase {
281                name: "Smallest four-digit year (-9999)",
282                date_str: "-9999-12-31",
283                format_str: "%Y/%m/%d",
284                formatted_date: "-9999/12/31",
285            },
286            TestCase {
287                name: "Year 1 (0001)",
288                date_str: "0001-12-31",
289                format_str: "%Y%m%d",
290                formatted_date: "00011231",
291            },
292            TestCase {
293                name: "Year before epoch (1969)",
294                date_str: "1969-01-01",
295                format_str: "%Y%m%d",
296                formatted_date: "19690101",
297            },
298            TestCase {
299                name: "Switch Julian/Gregorian calendar (1582-10-10)",
300                date_str: "1582-10-10",
301                format_str: "%Y%m%d",
302                formatted_date: "15821010",
303            },
304            TestCase {
305                name: "Negative Year, BC (-42-01-01)",
306                date_str: "-42-01-01",
307                format_str: "%Y/%m/%d",
308                formatted_date: "-42/01/01",
309            },
310        ];
311
312        for tc in &test_cases {
313            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
314            test_scalar(
315                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
316                tc,
317            );
318            test_scalar(
319                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
320                tc,
321            );
322
323            test_array::<GenericStringArray<i32>>(tc);
324            test_array::<GenericStringArray<i64>>(tc);
325            test_array::<StringViewArray>(tc);
326        }
327
328        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
329            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
330
331            let args = datafusion_expr::ScalarFunctionArgs {
332                args: vec![
333                    ColumnarValue::Scalar(sv),
334                    ColumnarValue::Scalar(format_scalar),
335                ],
336                number_rows: 1,
337                return_type: &DataType::Date32,
338            };
339            let to_date_result = ToDateFunc::new().invoke_with_args(args);
340
341            match to_date_result {
342                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
343                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
344                    assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
345                }
346                _ => panic!(
347                    "Could not convert '{}' with format string '{}'to Date",
348                    tc.date_str, tc.format_str
349                ),
350            }
351        }
352
353        fn test_array<A>(tc: &TestCase)
354        where
355            A: From<Vec<&'static str>> + Array + 'static,
356        {
357            let date_array = A::from(vec![tc.formatted_date]);
358            let format_array = A::from(vec![tc.format_str]);
359            let batch_len = date_array.len();
360
361            let args = datafusion_expr::ScalarFunctionArgs {
362                args: vec![
363                    ColumnarValue::Array(Arc::new(date_array)),
364                    ColumnarValue::Array(Arc::new(format_array)),
365                ],
366                number_rows: batch_len,
367                return_type: &DataType::Date32,
368            };
369            let to_date_result = ToDateFunc::new().invoke_with_args(args);
370
371            match to_date_result {
372                Ok(ColumnarValue::Array(a)) => {
373                    assert_eq!(a.len(), 1);
374
375                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
376                    let mut builder = Date32Array::builder(4);
377                    builder.append_value(expected.unwrap());
378
379                    assert_eq!(
380                        &builder.finish() as &dyn Array, a.as_ref(),
381                        "{}: to_date created wrong value for date '{}' with format string '{}'",
382                        tc.name,
383                        tc.formatted_date,
384                        tc.format_str
385                    );
386                }
387                _ => panic!(
388                    "Could not convert '{}' with format string '{}'to Date: {:?}",
389                    tc.formatted_date, tc.format_str, to_date_result
390                ),
391            }
392        }
393    }
394
395    #[test]
396    fn test_to_date_multiple_format_strings() {
397        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
398        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
399        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
400
401        let args = datafusion_expr::ScalarFunctionArgs {
402            args: vec![
403                ColumnarValue::Scalar(formatted_date_scalar),
404                ColumnarValue::Scalar(format1_scalar),
405                ColumnarValue::Scalar(format2_scalar),
406            ],
407            number_rows: 1,
408            return_type: &DataType::Date32,
409        };
410        let to_date_result = ToDateFunc::new().invoke_with_args(args);
411
412        match to_date_result {
413            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
414                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
415                assert_eq!(
416                    date_val, expected,
417                    "to_date created wrong value for date with 2 format strings"
418                );
419            }
420            _ => panic!("Conversion failed",),
421        }
422    }
423
424    #[test]
425    fn test_to_date_from_timestamp() {
426        let test_cases = vec![
427            "2020-09-08T13:42:29Z",
428            "2020-09-08T13:42:29.190855-05:00",
429            "2020-09-08 12:13:29",
430        ];
431        for date_str in test_cases {
432            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
433
434            let args = datafusion_expr::ScalarFunctionArgs {
435                args: vec![ColumnarValue::Scalar(formatted_date_scalar)],
436                number_rows: 1,
437                return_type: &DataType::Date32,
438            };
439            let to_date_result = ToDateFunc::new().invoke_with_args(args);
440
441            match to_date_result {
442                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
443                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
444                    assert_eq!(date_val, expected, "to_date created wrong value");
445                }
446                _ => panic!("Conversion of {} failed", date_str),
447            }
448        }
449    }
450
451    #[test]
452    fn test_to_date_string_with_valid_number() {
453        let date_str = "20241231";
454        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
455
456        let args = datafusion_expr::ScalarFunctionArgs {
457            args: vec![ColumnarValue::Scalar(date_scalar)],
458            number_rows: 1,
459            return_type: &DataType::Date32,
460        };
461        let to_date_result = ToDateFunc::new().invoke_with_args(args);
462
463        match to_date_result {
464            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
465                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
466                assert_eq!(
467                    date_val, expected,
468                    "to_date created wrong value for {}",
469                    date_str
470                );
471            }
472            _ => panic!("Conversion of {} failed", date_str),
473        }
474    }
475
476    #[test]
477    fn test_to_date_string_with_invalid_number() {
478        let date_str = "202412311";
479        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
480
481        let args = datafusion_expr::ScalarFunctionArgs {
482            args: vec![ColumnarValue::Scalar(date_scalar)],
483            number_rows: 1,
484            return_type: &DataType::Date32,
485        };
486        let to_date_result = ToDateFunc::new().invoke_with_args(args);
487
488        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
489            panic!(
490                "Conversion of {} succeeded, but should have failed, ",
491                date_str
492            );
493        }
494    }
495}