1use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::cast::AsArray;
22use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
23use arrow::datatypes::DataType;
24use arrow::datatypes::DataType::{
25 Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
26};
27use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
28use arrow::error::ArrowError;
29use arrow::util::display::{ArrayFormatter, DurationFormat, FormatOptions};
30
31use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
32use datafusion_expr::TypeSignature::Exact;
33use datafusion_expr::{
34 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
35};
36use datafusion_macros::user_doc;
37
38#[user_doc(
39 doc_section(label = "Time and Date Functions"),
40 description = "Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.",
41 syntax_example = "to_char(expression, format)",
42 sql_example = r#"```sql
43> select to_char('2023-03-01'::date, '%d-%m-%Y');
44+----------------------------------------------+
45| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
46+----------------------------------------------+
47| 01-03-2023 |
48+----------------------------------------------+
49```
50
51Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs)
52"#,
53 argument(
54 name = "expression",
55 description = "Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration."
56 ),
57 argument(
58 name = "format",
59 description = "A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression."
60 ),
61 argument(
62 name = "day",
63 description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
64 )
65)]
66#[derive(Debug)]
67pub struct ToCharFunc {
68 signature: Signature,
69 aliases: Vec<String>,
70}
71
72impl Default for ToCharFunc {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl ToCharFunc {
79 pub fn new() -> Self {
80 Self {
81 signature: Signature::one_of(
82 vec![
83 Exact(vec![Date32, Utf8]),
84 Exact(vec![Date64, Utf8]),
85 Exact(vec![Time64(Nanosecond), Utf8]),
86 Exact(vec![Time64(Microsecond), Utf8]),
87 Exact(vec![Time32(Millisecond), Utf8]),
88 Exact(vec![Time32(Second), Utf8]),
89 Exact(vec![
90 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
91 Utf8,
92 ]),
93 Exact(vec![Timestamp(Nanosecond, None), Utf8]),
94 Exact(vec![
95 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
96 Utf8,
97 ]),
98 Exact(vec![Timestamp(Microsecond, None), Utf8]),
99 Exact(vec![
100 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
101 Utf8,
102 ]),
103 Exact(vec![Timestamp(Millisecond, None), Utf8]),
104 Exact(vec![
105 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
106 Utf8,
107 ]),
108 Exact(vec![Timestamp(Second, None), Utf8]),
109 Exact(vec![Duration(Nanosecond), Utf8]),
110 Exact(vec![Duration(Microsecond), Utf8]),
111 Exact(vec![Duration(Millisecond), Utf8]),
112 Exact(vec![Duration(Second), Utf8]),
113 ],
114 Volatility::Immutable,
115 ),
116 aliases: vec![String::from("date_format")],
117 }
118 }
119}
120
121impl ScalarUDFImpl for ToCharFunc {
122 fn as_any(&self) -> &dyn Any {
123 self
124 }
125
126 fn name(&self) -> &str {
127 "to_char"
128 }
129
130 fn signature(&self) -> &Signature {
131 &self.signature
132 }
133
134 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
135 Ok(Utf8)
136 }
137
138 fn invoke_with_args(
139 &self,
140 args: datafusion_expr::ScalarFunctionArgs,
141 ) -> Result<ColumnarValue> {
142 let args = args.args;
143 let [date_time, format] = take_function_args(self.name(), &args)?;
144
145 match format {
146 ColumnarValue::Scalar(ScalarValue::Utf8(None))
147 | ColumnarValue::Scalar(ScalarValue::Null) => {
148 _to_char_scalar(date_time.clone(), None)
149 }
150 ColumnarValue::Scalar(ScalarValue::Utf8(Some(format))) => {
152 _to_char_scalar(date_time.clone(), Some(format))
154 }
155 ColumnarValue::Array(_) => _to_char_array(&args),
156 _ => {
157 exec_err!(
158 "Format for `to_char` must be non-null Utf8, received {:?}",
159 format.data_type()
160 )
161 }
162 }
163 }
164
165 fn aliases(&self) -> &[String] {
166 &self.aliases
167 }
168 fn documentation(&self) -> Option<&Documentation> {
169 self.doc()
170 }
171}
172
173fn _build_format_options<'a>(
174 data_type: &DataType,
175 format: Option<&'a str>,
176) -> Result<FormatOptions<'a>, Result<ColumnarValue>> {
177 let Some(format) = format else {
178 return Ok(FormatOptions::new());
179 };
180 let format_options = match data_type {
181 Date32 => FormatOptions::new().with_date_format(Some(format)),
182 Date64 => FormatOptions::new().with_datetime_format(Some(format)),
183 Time32(_) => FormatOptions::new().with_time_format(Some(format)),
184 Time64(_) => FormatOptions::new().with_time_format(Some(format)),
185 Timestamp(_, _) => FormatOptions::new()
186 .with_timestamp_format(Some(format))
187 .with_timestamp_tz_format(Some(format)),
188 Duration(_) => FormatOptions::new().with_duration_format(
189 if "ISO8601".eq_ignore_ascii_case(format) {
190 DurationFormat::ISO8601
191 } else {
192 DurationFormat::Pretty
193 },
194 ),
195 other => {
196 return Err(exec_err!(
197 "to_char only supports date, time, timestamp and duration data types, received {other:?}"
198 ));
199 }
200 };
201 Ok(format_options)
202}
203
204fn _to_char_scalar(
206 expression: ColumnarValue,
207 format: Option<&str>,
208) -> Result<ColumnarValue> {
209 let data_type = &expression.data_type();
212 let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
213 let array = expression.into_array(1)?;
214
215 if format.is_none() {
216 if is_scalar_expression {
217 return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
218 } else {
219 return Ok(ColumnarValue::Array(new_null_array(&Utf8, array.len())));
220 }
221 }
222
223 let format_options = match _build_format_options(data_type, format) {
224 Ok(value) => value,
225 Err(value) => return value,
226 };
227
228 let formatter = ArrayFormatter::try_new(array.as_ref(), &format_options)?;
229 let formatted: Result<Vec<Option<String>>, ArrowError> = (0..array.len())
230 .map(|i| {
231 if array.is_null(i) {
232 Ok(None)
233 } else {
234 formatter.value(i).try_to_string().map(Some)
235 }
236 })
237 .collect();
238
239 if let Ok(formatted) = formatted {
240 if is_scalar_expression {
241 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
242 formatted.first().unwrap().clone(),
243 )))
244 } else {
245 Ok(ColumnarValue::Array(
246 Arc::new(StringArray::from(formatted)) as ArrayRef
247 ))
248 }
249 } else {
250 exec_err!("{}", formatted.unwrap_err())
251 }
252}
253
254fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
255 let arrays = ColumnarValue::values_to_arrays(args)?;
256 let mut results: Vec<Option<String>> = vec![];
257 let format_array = arrays[1].as_string::<i32>();
258 let data_type = arrays[0].data_type();
259
260 for idx in 0..arrays[0].len() {
261 let format = if format_array.is_null(idx) {
262 None
263 } else {
264 Some(format_array.value(idx))
265 };
266 if format.is_none() {
267 results.push(None);
268 continue;
269 }
270 let format_options = match _build_format_options(data_type, format) {
271 Ok(value) => value,
272 Err(value) => return value,
273 };
274 let formatter = ArrayFormatter::try_new(arrays[0].as_ref(), &format_options)?;
277 let result = formatter.value(idx).try_to_string();
278 match result {
279 Ok(value) => results.push(Some(value)),
280 Err(e) => return exec_err!("{}", e),
281 }
282 }
283
284 match args[0] {
285 ColumnarValue::Array(_) => Ok(ColumnarValue::Array(Arc::new(StringArray::from(
286 results,
287 )) as ArrayRef)),
288 ColumnarValue::Scalar(_) => match results.first().unwrap() {
289 Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
290 value.to_string(),
291 )))),
292 None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
293 },
294 }
295}
296
297#[cfg(test)]
298mod tests {
299 use crate::datetime::to_char::ToCharFunc;
300 use arrow::array::{
301 Array, ArrayRef, Date32Array, Date64Array, StringArray, Time32MillisecondArray,
302 Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
303 TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
304 TimestampSecondArray,
305 };
306 use arrow::datatypes::DataType;
307 use chrono::{NaiveDateTime, Timelike};
308 use datafusion_common::ScalarValue;
309 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
310 use std::sync::Arc;
311
312 #[test]
313 fn test_to_char() {
314 let date = "2020-01-02T03:04:05"
315 .parse::<NaiveDateTime>()
316 .unwrap()
317 .with_nanosecond(12345)
318 .unwrap();
319 let date2 = "2026-07-08T09:10:11"
320 .parse::<NaiveDateTime>()
321 .unwrap()
322 .with_nanosecond(56789)
323 .unwrap();
324
325 let scalar_data = vec![
326 (
327 ScalarValue::Date32(Some(18506)),
328 ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
329 "2020::09::01".to_string(),
330 ),
331 (
332 ScalarValue::Date64(Some(date.and_utc().timestamp_millis())),
333 ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
334 "2020::01::02".to_string(),
335 ),
336 (
337 ScalarValue::Time32Second(Some(31851)),
338 ScalarValue::Utf8(Some("%H-%M-%S".to_string())),
339 "08-50-51".to_string(),
340 ),
341 (
342 ScalarValue::Time32Millisecond(Some(18506000)),
343 ScalarValue::Utf8(Some("%H-%M-%S".to_string())),
344 "05-08-26".to_string(),
345 ),
346 (
347 ScalarValue::Time64Microsecond(Some(12344567000)),
348 ScalarValue::Utf8(Some("%H-%M-%S %f".to_string())),
349 "03-25-44 567000000".to_string(),
350 ),
351 (
352 ScalarValue::Time64Nanosecond(Some(12344567890000)),
353 ScalarValue::Utf8(Some("%H-%M-%S %f".to_string())),
354 "03-25-44 567890000".to_string(),
355 ),
356 (
357 ScalarValue::TimestampSecond(Some(date.and_utc().timestamp()), None),
358 ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H".to_string())),
359 "2020::01::02 05::04::03".to_string(),
360 ),
361 (
362 ScalarValue::TimestampMillisecond(
363 Some(date.and_utc().timestamp_millis()),
364 None,
365 ),
366 ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H".to_string())),
367 "2020::01::02 05::04::03".to_string(),
368 ),
369 (
370 ScalarValue::TimestampMicrosecond(
371 Some(date.and_utc().timestamp_micros()),
372 None,
373 ),
374 ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H %f".to_string())),
375 "2020::01::02 05::04::03 000012000".to_string(),
376 ),
377 (
378 ScalarValue::TimestampNanosecond(
379 Some(date.and_utc().timestamp_nanos_opt().unwrap()),
380 None,
381 ),
382 ScalarValue::Utf8(Some("%Y::%m::%d %S::%M::%H %f".to_string())),
383 "2020::01::02 05::04::03 000012345".to_string(),
384 ),
385 ];
386
387 for (value, format, expected) in scalar_data {
388 let args = datafusion_expr::ScalarFunctionArgs {
389 args: vec![ColumnarValue::Scalar(value), ColumnarValue::Scalar(format)],
390 number_rows: 1,
391 return_type: &DataType::Utf8,
392 };
393 let result = ToCharFunc::new()
394 .invoke_with_args(args)
395 .expect("that to_char parsed values without error");
396
397 if let ColumnarValue::Scalar(ScalarValue::Utf8(date)) = result {
398 assert_eq!(expected, date.unwrap());
399 } else {
400 panic!("Expected a scalar value")
401 }
402 }
403
404 let scalar_array_data = vec![
405 (
406 ScalarValue::Date32(Some(18506)),
407 StringArray::from(vec!["%Y::%m::%d".to_string()]),
408 "2020::09::01".to_string(),
409 ),
410 (
411 ScalarValue::Date64(Some(date.and_utc().timestamp_millis())),
412 StringArray::from(vec!["%Y::%m::%d".to_string()]),
413 "2020::01::02".to_string(),
414 ),
415 (
416 ScalarValue::Time32Second(Some(31851)),
417 StringArray::from(vec!["%H-%M-%S".to_string()]),
418 "08-50-51".to_string(),
419 ),
420 (
421 ScalarValue::Time32Millisecond(Some(18506000)),
422 StringArray::from(vec!["%H-%M-%S".to_string()]),
423 "05-08-26".to_string(),
424 ),
425 (
426 ScalarValue::Time64Microsecond(Some(12344567000)),
427 StringArray::from(vec!["%H-%M-%S %f".to_string()]),
428 "03-25-44 567000000".to_string(),
429 ),
430 (
431 ScalarValue::Time64Nanosecond(Some(12344567890000)),
432 StringArray::from(vec!["%H-%M-%S %f".to_string()]),
433 "03-25-44 567890000".to_string(),
434 ),
435 (
436 ScalarValue::TimestampSecond(Some(date.and_utc().timestamp()), None),
437 StringArray::from(vec!["%Y::%m::%d %S::%M::%H".to_string()]),
438 "2020::01::02 05::04::03".to_string(),
439 ),
440 (
441 ScalarValue::TimestampMillisecond(
442 Some(date.and_utc().timestamp_millis()),
443 None,
444 ),
445 StringArray::from(vec!["%Y::%m::%d %S::%M::%H".to_string()]),
446 "2020::01::02 05::04::03".to_string(),
447 ),
448 (
449 ScalarValue::TimestampMicrosecond(
450 Some(date.and_utc().timestamp_micros()),
451 None,
452 ),
453 StringArray::from(vec!["%Y::%m::%d %S::%M::%H %f".to_string()]),
454 "2020::01::02 05::04::03 000012000".to_string(),
455 ),
456 (
457 ScalarValue::TimestampNanosecond(
458 Some(date.and_utc().timestamp_nanos_opt().unwrap()),
459 None,
460 ),
461 StringArray::from(vec!["%Y::%m::%d %S::%M::%H %f".to_string()]),
462 "2020::01::02 05::04::03 000012345".to_string(),
463 ),
464 ];
465
466 for (value, format, expected) in scalar_array_data {
467 let batch_len = format.len();
468 let args = datafusion_expr::ScalarFunctionArgs {
469 args: vec![
470 ColumnarValue::Scalar(value),
471 ColumnarValue::Array(Arc::new(format) as ArrayRef),
472 ],
473 number_rows: batch_len,
474 return_type: &DataType::Utf8,
475 };
476 let result = ToCharFunc::new()
477 .invoke_with_args(args)
478 .expect("that to_char parsed values without error");
479
480 if let ColumnarValue::Scalar(ScalarValue::Utf8(date)) = result {
481 assert_eq!(expected, date.unwrap());
482 } else {
483 panic!("Expected a scalar value")
484 }
485 }
486
487 let array_scalar_data = vec![
488 (
489 Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
490 ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
491 StringArray::from(vec!["2020::09::01", "2020::09::02"]),
492 ),
493 (
494 Arc::new(Date64Array::from(vec![
495 date.and_utc().timestamp_millis(),
496 date2.and_utc().timestamp_millis(),
497 ])) as ArrayRef,
498 ScalarValue::Utf8(Some("%Y::%m::%d".to_string())),
499 StringArray::from(vec!["2020::01::02", "2026::07::08"]),
500 ),
501 ];
502
503 let array_array_data = vec![
504 (
505 Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
506 StringArray::from(vec!["%Y::%m::%d", "%d::%m::%Y"]),
507 StringArray::from(vec!["2020::09::01", "02::09::2020"]),
508 ),
509 (
510 Arc::new(Date64Array::from(vec![
511 date.and_utc().timestamp_millis(),
512 date2.and_utc().timestamp_millis(),
513 ])) as ArrayRef,
514 StringArray::from(vec!["%Y::%m::%d", "%d::%m::%Y"]),
515 StringArray::from(vec!["2020::01::02", "08::07::2026"]),
516 ),
517 (
518 Arc::new(Time32MillisecondArray::from(vec![1850600, 1860700]))
519 as ArrayRef,
520 StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
521 StringArray::from(vec!["00:30:50", "00::31::00"]),
522 ),
523 (
524 Arc::new(Time32SecondArray::from(vec![18506, 18507])) as ArrayRef,
525 StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
526 StringArray::from(vec!["05:08:26", "05::08::27"]),
527 ),
528 (
529 Arc::new(Time64MicrosecondArray::from(vec![12344567000, 22244567000]))
530 as ArrayRef,
531 StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
532 StringArray::from(vec!["03:25:44", "06::10::44"]),
533 ),
534 (
535 Arc::new(Time64NanosecondArray::from(vec![
536 1234456789000,
537 2224456789000,
538 ])) as ArrayRef,
539 StringArray::from(vec!["%H:%M:%S", "%H::%M::%S"]),
540 StringArray::from(vec!["00:20:34", "00::37::04"]),
541 ),
542 (
543 Arc::new(TimestampSecondArray::from(vec![
544 date.and_utc().timestamp(),
545 date2.and_utc().timestamp(),
546 ])) as ArrayRef,
547 StringArray::from(vec!["%Y::%m::%d %S::%M::%H", "%d::%m::%Y %S-%M-%H"]),
548 StringArray::from(vec![
549 "2020::01::02 05::04::03",
550 "08::07::2026 11-10-09",
551 ]),
552 ),
553 (
554 Arc::new(TimestampMillisecondArray::from(vec![
555 date.and_utc().timestamp_millis(),
556 date2.and_utc().timestamp_millis(),
557 ])) as ArrayRef,
558 StringArray::from(vec![
559 "%Y::%m::%d %S::%M::%H %f",
560 "%d::%m::%Y %S-%M-%H %f",
561 ]),
562 StringArray::from(vec![
563 "2020::01::02 05::04::03 000000000",
564 "08::07::2026 11-10-09 000000000",
565 ]),
566 ),
567 (
568 Arc::new(TimestampMicrosecondArray::from(vec![
569 date.and_utc().timestamp_micros(),
570 date2.and_utc().timestamp_micros(),
571 ])) as ArrayRef,
572 StringArray::from(vec![
573 "%Y::%m::%d %S::%M::%H %f",
574 "%d::%m::%Y %S-%M-%H %f",
575 ]),
576 StringArray::from(vec![
577 "2020::01::02 05::04::03 000012000",
578 "08::07::2026 11-10-09 000056000",
579 ]),
580 ),
581 (
582 Arc::new(TimestampNanosecondArray::from(vec![
583 date.and_utc().timestamp_nanos_opt().unwrap(),
584 date2.and_utc().timestamp_nanos_opt().unwrap(),
585 ])) as ArrayRef,
586 StringArray::from(vec![
587 "%Y::%m::%d %S::%M::%H %f",
588 "%d::%m::%Y %S-%M-%H %f",
589 ]),
590 StringArray::from(vec![
591 "2020::01::02 05::04::03 000012345",
592 "08::07::2026 11-10-09 000056789",
593 ]),
594 ),
595 ];
596
597 for (value, format, expected) in array_scalar_data {
598 let batch_len = value.len();
599 let args = datafusion_expr::ScalarFunctionArgs {
600 args: vec![
601 ColumnarValue::Array(value as ArrayRef),
602 ColumnarValue::Scalar(format),
603 ],
604 number_rows: batch_len,
605 return_type: &DataType::Utf8,
606 };
607 let result = ToCharFunc::new()
608 .invoke_with_args(args)
609 .expect("that to_char parsed values without error");
610
611 if let ColumnarValue::Array(result) = result {
612 assert_eq!(result.len(), 2);
613 assert_eq!(&expected as &dyn Array, result.as_ref());
614 } else {
615 panic!("Expected an array value")
616 }
617 }
618
619 for (value, format, expected) in array_array_data {
620 let batch_len = value.len();
621 let args = datafusion_expr::ScalarFunctionArgs {
622 args: vec![
623 ColumnarValue::Array(value),
624 ColumnarValue::Array(Arc::new(format) as ArrayRef),
625 ],
626 number_rows: batch_len,
627 return_type: &DataType::Utf8,
628 };
629 let result = ToCharFunc::new()
630 .invoke_with_args(args)
631 .expect("that to_char parsed values without error");
632
633 if let ColumnarValue::Array(result) = result {
634 assert_eq!(result.len(), 2);
635 assert_eq!(&expected as &dyn Array, result.as_ref());
636 } else {
637 panic!("Expected an array value")
638 }
639 }
640
641 let args = datafusion_expr::ScalarFunctionArgs {
647 args: vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))],
648 number_rows: 1,
649 return_type: &DataType::Utf8,
650 };
651 let result = ToCharFunc::new().invoke_with_args(args);
652 assert_eq!(
653 result.err().unwrap().strip_backtrace(),
654 "Execution error: to_char function requires 2 arguments, got 1"
655 );
656
657 let args = datafusion_expr::ScalarFunctionArgs {
659 args: vec![
660 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
661 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
662 ],
663 number_rows: 1,
664 return_type: &DataType::Utf8,
665 };
666 let result = ToCharFunc::new().invoke_with_args(args);
667 assert_eq!(
668 result.err().unwrap().strip_backtrace(),
669 "Execution error: Format for `to_char` must be non-null Utf8, received Timestamp(Nanosecond, None)"
670 );
671 }
672}