1use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32 doc_section(label = "Time and Date Functions"),
33 description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41 sql_example = r#"```sql
42> select to_date('2023-01-31');
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31 |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31 |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
57"#,
58 standard_argument(name = "expression", prefix = "String"),
59 argument(
60 name = "format_n",
61 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62 they appear with the first successful one being returned. If none of the formats successfully parse the expression
63 an error will be returned."
64 )
65)]
66#[derive(Debug)]
67pub struct ToDateFunc {
68 signature: Signature,
69}
70
71impl Default for ToDateFunc {
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77impl ToDateFunc {
78 pub fn new() -> Self {
79 Self {
80 signature: Signature::variadic_any(Volatility::Immutable),
81 }
82 }
83
84 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85 match args.len() {
86 1 => handle::<Date32Type, _, Date32Type>(
87 args,
88 |s| match Date32Type::parse(s) {
89 Some(v) => Ok(v),
90 None => arrow_err!(ParseError(
91 "Unable to cast to Date32 for converting from i64 to i32 failed"
92 .to_string()
93 )),
94 },
95 "to_date",
96 ),
97 2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98 args,
99 |s, format| {
100 string_to_timestamp_millis_formatted(s, format)
101 .map(|n| n / (24 * 60 * 60 * 1_000))
102 .and_then(|v| {
103 v.try_into().map_err(|_| {
104 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105 })
106 })
107 },
108 |n| n,
109 "to_date",
110 ),
111 0 => exec_err!("Unsupported 0 argument count for function to_date"),
112 }
113 }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117 fn as_any(&self) -> &dyn Any {
118 self
119 }
120
121 fn name(&self) -> &str {
122 "to_date"
123 }
124
125 fn signature(&self) -> &Signature {
126 &self.signature
127 }
128
129 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130 Ok(Date32)
131 }
132
133 fn invoke_with_args(
134 &self,
135 args: datafusion_expr::ScalarFunctionArgs,
136 ) -> Result<ColumnarValue> {
137 let args = args.args;
138 if args.is_empty() {
139 return exec_err!("to_date function requires 1 or more arguments, got 0");
140 }
141
142 if args.len() > 1 {
144 validate_data_types(&args, "to_date")?;
145 }
146
147 match args[0].data_type() {
148 Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149 args[0].cast_to(&Date32, None)
150 }
151 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152 other => {
153 exec_err!("Unsupported data type {:?} for function to_date", other)
154 }
155 }
156 }
157
158 fn documentation(&self) -> Option<&Documentation> {
159 self.doc()
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
166 use arrow::datatypes::DataType;
167 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
168 use datafusion_common::ScalarValue;
169 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
170 use std::sync::Arc;
171
172 use super::ToDateFunc;
173
174 #[test]
175 fn test_to_date_without_format() {
176 struct TestCase {
177 name: &'static str,
178 date_str: &'static str,
179 }
180
181 let test_cases = vec![
182 TestCase {
183 name: "Largest four-digit year (9999)",
184 date_str: "9999-12-31",
185 },
186 TestCase {
187 name: "Year 1 (0001)",
188 date_str: "0001-12-31",
189 },
190 TestCase {
191 name: "Year before epoch (1969)",
192 date_str: "1969-01-01",
193 },
194 TestCase {
195 name: "Switch Julian/Gregorian calendar (1582-10-10)",
196 date_str: "1582-10-10",
197 },
198 ];
199
200 for tc in &test_cases {
201 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
202 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
203 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
204
205 test_array::<GenericStringArray<i32>>(tc);
206 test_array::<GenericStringArray<i64>>(tc);
207 test_array::<StringViewArray>(tc);
208 }
209
210 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
211 let args = datafusion_expr::ScalarFunctionArgs {
212 args: vec![ColumnarValue::Scalar(sv)],
213 number_rows: 1,
214 return_type: &DataType::Date32,
215 };
216 let to_date_result = ToDateFunc::new().invoke_with_args(args);
217
218 match to_date_result {
219 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
220 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
221 assert_eq!(
222 date_val, expected,
223 "{}: to_date created wrong value",
224 tc.name
225 );
226 }
227 _ => panic!("Could not convert '{}' to Date", tc.date_str),
228 }
229 }
230
231 fn test_array<A>(tc: &TestCase)
232 where
233 A: From<Vec<&'static str>> + Array + 'static,
234 {
235 let date_array = A::from(vec![tc.date_str]);
236 let batch_len = date_array.len();
237 let args = datafusion_expr::ScalarFunctionArgs {
238 args: vec![ColumnarValue::Array(Arc::new(date_array))],
239 number_rows: batch_len,
240 return_type: &DataType::Date32,
241 };
242 let to_date_result = ToDateFunc::new().invoke_with_args(args);
243
244 match to_date_result {
245 Ok(ColumnarValue::Array(a)) => {
246 assert_eq!(a.len(), 1);
247
248 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
249 let mut builder = Date32Array::builder(4);
250 builder.append_value(expected.unwrap());
251
252 assert_eq!(
253 &builder.finish() as &dyn Array,
254 a.as_ref(),
255 "{}: to_date created wrong value",
256 tc.name
257 );
258 }
259 _ => panic!("Could not convert '{}' to Date", tc.date_str),
260 }
261 }
262 }
263
264 #[test]
265 fn test_to_date_with_format() {
266 struct TestCase {
267 name: &'static str,
268 date_str: &'static str,
269 format_str: &'static str,
270 formatted_date: &'static str,
271 }
272
273 let test_cases = vec![
274 TestCase {
275 name: "Largest four-digit year (9999)",
276 date_str: "9999-12-31",
277 format_str: "%Y%m%d",
278 formatted_date: "99991231",
279 },
280 TestCase {
281 name: "Smallest four-digit year (-9999)",
282 date_str: "-9999-12-31",
283 format_str: "%Y/%m/%d",
284 formatted_date: "-9999/12/31",
285 },
286 TestCase {
287 name: "Year 1 (0001)",
288 date_str: "0001-12-31",
289 format_str: "%Y%m%d",
290 formatted_date: "00011231",
291 },
292 TestCase {
293 name: "Year before epoch (1969)",
294 date_str: "1969-01-01",
295 format_str: "%Y%m%d",
296 formatted_date: "19690101",
297 },
298 TestCase {
299 name: "Switch Julian/Gregorian calendar (1582-10-10)",
300 date_str: "1582-10-10",
301 format_str: "%Y%m%d",
302 formatted_date: "15821010",
303 },
304 TestCase {
305 name: "Negative Year, BC (-42-01-01)",
306 date_str: "-42-01-01",
307 format_str: "%Y/%m/%d",
308 formatted_date: "-42/01/01",
309 },
310 ];
311
312 for tc in &test_cases {
313 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
314 test_scalar(
315 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
316 tc,
317 );
318 test_scalar(
319 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
320 tc,
321 );
322
323 test_array::<GenericStringArray<i32>>(tc);
324 test_array::<GenericStringArray<i64>>(tc);
325 test_array::<StringViewArray>(tc);
326 }
327
328 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
329 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
330
331 let args = datafusion_expr::ScalarFunctionArgs {
332 args: vec![
333 ColumnarValue::Scalar(sv),
334 ColumnarValue::Scalar(format_scalar),
335 ],
336 number_rows: 1,
337 return_type: &DataType::Date32,
338 };
339 let to_date_result = ToDateFunc::new().invoke_with_args(args);
340
341 match to_date_result {
342 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
343 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
344 assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
345 }
346 _ => panic!(
347 "Could not convert '{}' with format string '{}'to Date",
348 tc.date_str, tc.format_str
349 ),
350 }
351 }
352
353 fn test_array<A>(tc: &TestCase)
354 where
355 A: From<Vec<&'static str>> + Array + 'static,
356 {
357 let date_array = A::from(vec![tc.formatted_date]);
358 let format_array = A::from(vec![tc.format_str]);
359 let batch_len = date_array.len();
360
361 let args = datafusion_expr::ScalarFunctionArgs {
362 args: vec![
363 ColumnarValue::Array(Arc::new(date_array)),
364 ColumnarValue::Array(Arc::new(format_array)),
365 ],
366 number_rows: batch_len,
367 return_type: &DataType::Date32,
368 };
369 let to_date_result = ToDateFunc::new().invoke_with_args(args);
370
371 match to_date_result {
372 Ok(ColumnarValue::Array(a)) => {
373 assert_eq!(a.len(), 1);
374
375 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
376 let mut builder = Date32Array::builder(4);
377 builder.append_value(expected.unwrap());
378
379 assert_eq!(
380 &builder.finish() as &dyn Array, a.as_ref(),
381 "{}: to_date created wrong value for date '{}' with format string '{}'",
382 tc.name,
383 tc.formatted_date,
384 tc.format_str
385 );
386 }
387 _ => panic!(
388 "Could not convert '{}' with format string '{}'to Date: {:?}",
389 tc.formatted_date, tc.format_str, to_date_result
390 ),
391 }
392 }
393 }
394
395 #[test]
396 fn test_to_date_multiple_format_strings() {
397 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
398 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
399 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
400
401 let args = datafusion_expr::ScalarFunctionArgs {
402 args: vec![
403 ColumnarValue::Scalar(formatted_date_scalar),
404 ColumnarValue::Scalar(format1_scalar),
405 ColumnarValue::Scalar(format2_scalar),
406 ],
407 number_rows: 1,
408 return_type: &DataType::Date32,
409 };
410 let to_date_result = ToDateFunc::new().invoke_with_args(args);
411
412 match to_date_result {
413 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
414 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
415 assert_eq!(
416 date_val, expected,
417 "to_date created wrong value for date with 2 format strings"
418 );
419 }
420 _ => panic!("Conversion failed",),
421 }
422 }
423
424 #[test]
425 fn test_to_date_from_timestamp() {
426 let test_cases = vec![
427 "2020-09-08T13:42:29Z",
428 "2020-09-08T13:42:29.190855-05:00",
429 "2020-09-08 12:13:29",
430 ];
431 for date_str in test_cases {
432 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
433
434 let args = datafusion_expr::ScalarFunctionArgs {
435 args: vec![ColumnarValue::Scalar(formatted_date_scalar)],
436 number_rows: 1,
437 return_type: &DataType::Date32,
438 };
439 let to_date_result = ToDateFunc::new().invoke_with_args(args);
440
441 match to_date_result {
442 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
443 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
444 assert_eq!(date_val, expected, "to_date created wrong value");
445 }
446 _ => panic!("Conversion of {} failed", date_str),
447 }
448 }
449 }
450
451 #[test]
452 fn test_to_date_string_with_valid_number() {
453 let date_str = "20241231";
454 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
455
456 let args = datafusion_expr::ScalarFunctionArgs {
457 args: vec![ColumnarValue::Scalar(date_scalar)],
458 number_rows: 1,
459 return_type: &DataType::Date32,
460 };
461 let to_date_result = ToDateFunc::new().invoke_with_args(args);
462
463 match to_date_result {
464 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
465 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
466 assert_eq!(
467 date_val, expected,
468 "to_date created wrong value for {}",
469 date_str
470 );
471 }
472 _ => panic!("Conversion of {} failed", date_str),
473 }
474 }
475
476 #[test]
477 fn test_to_date_string_with_invalid_number() {
478 let date_str = "202412311";
479 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
480
481 let args = datafusion_expr::ScalarFunctionArgs {
482 args: vec![ColumnarValue::Scalar(date_scalar)],
483 number_rows: 1,
484 return_type: &DataType::Date32,
485 };
486 let to_date_result = ToDateFunc::new().invoke_with_args(args);
487
488 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
489 panic!(
490 "Conversion of {} succeeded, but should have failed, ",
491 date_str
492 );
493 }
494 }
495}