1use std::any::Any;
19use std::sync::Arc;
20
21use arrow::datatypes::DataType::*;
22use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
23use arrow::datatypes::{
24 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
25 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
26};
27
28use crate::datetime::common::*;
29use datafusion_common::{exec_err, Result, ScalarType};
30use datafusion_expr::{
31 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_macros::user_doc;
34
35#[user_doc(
36 doc_section(label = "Time and Date Functions"),
37 description = r#"
38Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
39
40Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
41"#,
42 syntax_example = "to_timestamp(expression[, ..., format_n])",
43 sql_example = r#"```sql
44> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
45+-----------------------------------------------------------+
46| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
47+-----------------------------------------------------------+
48| 2023-01-31T14:26:56.123456789 |
49+-----------------------------------------------------------+
50> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
51+--------------------------------------------------------------------------------------------------------+
52| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
53+--------------------------------------------------------------------------------------------------------+
54| 2023-05-17T03:59:00.123456789 |
55+--------------------------------------------------------------------------------------------------------+
56```
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
58"#,
59 argument(
60 name = "expression",
61 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
62 ),
63 argument(
64 name = "format_n",
65 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
66 )
67)]
68#[derive(Debug)]
69pub struct ToTimestampFunc {
70 signature: Signature,
71}
72
73#[user_doc(
74 doc_section(label = "Time and Date Functions"),
75 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
76 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
77 sql_example = r#"```sql
78> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
79+-------------------------------------------------------------------+
80| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
81+-------------------------------------------------------------------+
82| 2023-01-31T14:26:56 |
83+-------------------------------------------------------------------+
84> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
85+----------------------------------------------------------------------------------------------------------------+
86| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
87+----------------------------------------------------------------------------------------------------------------+
88| 2023-05-17T03:59:00 |
89+----------------------------------------------------------------------------------------------------------------+
90```
91Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
92"#,
93 argument(
94 name = "expression",
95 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
96 ),
97 argument(
98 name = "format_n",
99 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
100 )
101)]
102#[derive(Debug)]
103pub struct ToTimestampSecondsFunc {
104 signature: Signature,
105}
106
107#[user_doc(
108 doc_section(label = "Time and Date Functions"),
109 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
110 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
111 sql_example = r#"```sql
112> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
113+------------------------------------------------------------------+
114| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
115+------------------------------------------------------------------+
116| 2023-01-31T14:26:56.123 |
117+------------------------------------------------------------------+
118> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
119+---------------------------------------------------------------------------------------------------------------+
120| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
121+---------------------------------------------------------------------------------------------------------------+
122| 2023-05-17T03:59:00.123 |
123+---------------------------------------------------------------------------------------------------------------+
124```
125Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
126"#,
127 argument(
128 name = "expression",
129 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
130 ),
131 argument(
132 name = "format_n",
133 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
134 )
135)]
136#[derive(Debug)]
137pub struct ToTimestampMillisFunc {
138 signature: Signature,
139}
140
141#[user_doc(
142 doc_section(label = "Time and Date Functions"),
143 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
144 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
145 sql_example = r#"```sql
146> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
147+------------------------------------------------------------------+
148| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
149+------------------------------------------------------------------+
150| 2023-01-31T14:26:56.123456 |
151+------------------------------------------------------------------+
152> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
153+---------------------------------------------------------------------------------------------------------------+
154| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
155+---------------------------------------------------------------------------------------------------------------+
156| 2023-05-17T03:59:00.123456 |
157+---------------------------------------------------------------------------------------------------------------+
158```
159Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
160"#,
161 argument(
162 name = "expression",
163 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
164 ),
165 argument(
166 name = "format_n",
167 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
168 )
169)]
170#[derive(Debug)]
171pub struct ToTimestampMicrosFunc {
172 signature: Signature,
173}
174
175#[user_doc(
176 doc_section(label = "Time and Date Functions"),
177 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
178 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
179 sql_example = r#"```sql
180> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
181+-----------------------------------------------------------------+
182| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
183+-----------------------------------------------------------------+
184| 2023-01-31T14:26:56.123456789 |
185+-----------------------------------------------------------------+
186> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
187+--------------------------------------------------------------------------------------------------------------+
188| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
189+--------------------------------------------------------------------------------------------------------------+
190| 2023-05-17T03:59:00.123456789 |
191+---------------------------------------------------------------------------------------------------------------+
192```
193Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
194"#,
195 argument(
196 name = "expression",
197 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
198 ),
199 argument(
200 name = "format_n",
201 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
202 )
203)]
204#[derive(Debug)]
205pub struct ToTimestampNanosFunc {
206 signature: Signature,
207}
208
209impl Default for ToTimestampFunc {
210 fn default() -> Self {
211 Self::new()
212 }
213}
214
215impl ToTimestampFunc {
216 pub fn new() -> Self {
217 Self {
218 signature: Signature::variadic_any(Volatility::Immutable),
219 }
220 }
221}
222
223impl Default for ToTimestampSecondsFunc {
224 fn default() -> Self {
225 Self::new()
226 }
227}
228
229impl ToTimestampSecondsFunc {
230 pub fn new() -> Self {
231 Self {
232 signature: Signature::variadic_any(Volatility::Immutable),
233 }
234 }
235}
236
237impl Default for ToTimestampMillisFunc {
238 fn default() -> Self {
239 Self::new()
240 }
241}
242
243impl ToTimestampMillisFunc {
244 pub fn new() -> Self {
245 Self {
246 signature: Signature::variadic_any(Volatility::Immutable),
247 }
248 }
249}
250
251impl Default for ToTimestampMicrosFunc {
252 fn default() -> Self {
253 Self::new()
254 }
255}
256
257impl ToTimestampMicrosFunc {
258 pub fn new() -> Self {
259 Self {
260 signature: Signature::variadic_any(Volatility::Immutable),
261 }
262 }
263}
264
265impl Default for ToTimestampNanosFunc {
266 fn default() -> Self {
267 Self::new()
268 }
269}
270
271impl ToTimestampNanosFunc {
272 pub fn new() -> Self {
273 Self {
274 signature: Signature::variadic_any(Volatility::Immutable),
275 }
276 }
277}
278
279impl ScalarUDFImpl for ToTimestampFunc {
286 fn as_any(&self) -> &dyn Any {
287 self
288 }
289
290 fn name(&self) -> &str {
291 "to_timestamp"
292 }
293
294 fn signature(&self) -> &Signature {
295 &self.signature
296 }
297
298 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
299 Ok(return_type_for(&arg_types[0], Nanosecond))
300 }
301
302 fn invoke_with_args(
303 &self,
304 args: datafusion_expr::ScalarFunctionArgs,
305 ) -> Result<ColumnarValue> {
306 let args = args.args;
307 if args.is_empty() {
308 return exec_err!(
309 "to_timestamp function requires 1 or more arguments, got {}",
310 args.len()
311 );
312 }
313
314 if args.len() > 1 {
316 validate_data_types(&args, "to_timestamp")?;
317 }
318
319 match args[0].data_type() {
320 Int32 | Int64 => args[0]
321 .cast_to(&Timestamp(Second, None), None)?
322 .cast_to(&Timestamp(Nanosecond, None), None),
323 Null | Float64 | Timestamp(_, None) => {
324 args[0].cast_to(&Timestamp(Nanosecond, None), None)
325 }
326 Timestamp(_, Some(tz)) => {
327 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
328 }
329 Utf8View | LargeUtf8 | Utf8 => {
330 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
331 }
332 other => {
333 exec_err!(
334 "Unsupported data type {:?} for function to_timestamp",
335 other
336 )
337 }
338 }
339 }
340 fn documentation(&self) -> Option<&Documentation> {
341 self.doc()
342 }
343}
344
345impl ScalarUDFImpl for ToTimestampSecondsFunc {
346 fn as_any(&self) -> &dyn Any {
347 self
348 }
349
350 fn name(&self) -> &str {
351 "to_timestamp_seconds"
352 }
353
354 fn signature(&self) -> &Signature {
355 &self.signature
356 }
357
358 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
359 Ok(return_type_for(&arg_types[0], Second))
360 }
361
362 fn invoke_with_args(
363 &self,
364 args: datafusion_expr::ScalarFunctionArgs,
365 ) -> Result<ColumnarValue> {
366 let args = args.args;
367 if args.is_empty() {
368 return exec_err!(
369 "to_timestamp_seconds function requires 1 or more arguments, got {}",
370 args.len()
371 );
372 }
373
374 if args.len() > 1 {
376 validate_data_types(&args, "to_timestamp")?;
377 }
378
379 match args[0].data_type() {
380 Null | Int32 | Int64 | Timestamp(_, None) => {
381 args[0].cast_to(&Timestamp(Second, None), None)
382 }
383 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
384 Utf8View | LargeUtf8 | Utf8 => {
385 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
386 }
387 other => {
388 exec_err!(
389 "Unsupported data type {:?} for function to_timestamp_seconds",
390 other
391 )
392 }
393 }
394 }
395 fn documentation(&self) -> Option<&Documentation> {
396 self.doc()
397 }
398}
399
400impl ScalarUDFImpl for ToTimestampMillisFunc {
401 fn as_any(&self) -> &dyn Any {
402 self
403 }
404
405 fn name(&self) -> &str {
406 "to_timestamp_millis"
407 }
408
409 fn signature(&self) -> &Signature {
410 &self.signature
411 }
412
413 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
414 Ok(return_type_for(&arg_types[0], Millisecond))
415 }
416
417 fn invoke_with_args(
418 &self,
419 args: datafusion_expr::ScalarFunctionArgs,
420 ) -> Result<ColumnarValue> {
421 let args = args.args;
422 if args.is_empty() {
423 return exec_err!(
424 "to_timestamp_millis function requires 1 or more arguments, got {}",
425 args.len()
426 );
427 }
428
429 if args.len() > 1 {
431 validate_data_types(&args, "to_timestamp")?;
432 }
433
434 match args[0].data_type() {
435 Null | Int32 | Int64 | Timestamp(_, None) => {
436 args[0].cast_to(&Timestamp(Millisecond, None), None)
437 }
438 Timestamp(_, Some(tz)) => {
439 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
440 }
441 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
442 &args,
443 "to_timestamp_millis",
444 ),
445 other => {
446 exec_err!(
447 "Unsupported data type {:?} for function to_timestamp_millis",
448 other
449 )
450 }
451 }
452 }
453 fn documentation(&self) -> Option<&Documentation> {
454 self.doc()
455 }
456}
457
458impl ScalarUDFImpl for ToTimestampMicrosFunc {
459 fn as_any(&self) -> &dyn Any {
460 self
461 }
462
463 fn name(&self) -> &str {
464 "to_timestamp_micros"
465 }
466
467 fn signature(&self) -> &Signature {
468 &self.signature
469 }
470
471 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
472 Ok(return_type_for(&arg_types[0], Microsecond))
473 }
474
475 fn invoke_with_args(
476 &self,
477 args: datafusion_expr::ScalarFunctionArgs,
478 ) -> Result<ColumnarValue> {
479 let args = args.args;
480 if args.is_empty() {
481 return exec_err!(
482 "to_timestamp_micros function requires 1 or more arguments, got {}",
483 args.len()
484 );
485 }
486
487 if args.len() > 1 {
489 validate_data_types(&args, "to_timestamp")?;
490 }
491
492 match args[0].data_type() {
493 Null | Int32 | Int64 | Timestamp(_, None) => {
494 args[0].cast_to(&Timestamp(Microsecond, None), None)
495 }
496 Timestamp(_, Some(tz)) => {
497 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
498 }
499 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
500 &args,
501 "to_timestamp_micros",
502 ),
503 other => {
504 exec_err!(
505 "Unsupported data type {:?} for function to_timestamp_micros",
506 other
507 )
508 }
509 }
510 }
511 fn documentation(&self) -> Option<&Documentation> {
512 self.doc()
513 }
514}
515
516impl ScalarUDFImpl for ToTimestampNanosFunc {
517 fn as_any(&self) -> &dyn Any {
518 self
519 }
520
521 fn name(&self) -> &str {
522 "to_timestamp_nanos"
523 }
524
525 fn signature(&self) -> &Signature {
526 &self.signature
527 }
528
529 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
530 Ok(return_type_for(&arg_types[0], Nanosecond))
531 }
532
533 fn invoke_with_args(
534 &self,
535 args: datafusion_expr::ScalarFunctionArgs,
536 ) -> Result<ColumnarValue> {
537 let args = args.args;
538 if args.is_empty() {
539 return exec_err!(
540 "to_timestamp_nanos function requires 1 or more arguments, got {}",
541 args.len()
542 );
543 }
544
545 if args.len() > 1 {
547 validate_data_types(&args, "to_timestamp")?;
548 }
549
550 match args[0].data_type() {
551 Null | Int32 | Int64 | Timestamp(_, None) => {
552 args[0].cast_to(&Timestamp(Nanosecond, None), None)
553 }
554 Timestamp(_, Some(tz)) => {
555 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
556 }
557 Utf8View | LargeUtf8 | Utf8 => {
558 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
559 }
560 other => {
561 exec_err!(
562 "Unsupported data type {:?} for function to_timestamp_nanos",
563 other
564 )
565 }
566 }
567 }
568 fn documentation(&self) -> Option<&Documentation> {
569 self.doc()
570 }
571}
572
573fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
576 match arg {
577 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
578 _ => Timestamp(unit, None),
579 }
580}
581
582fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
583 args: &[ColumnarValue],
584 name: &str,
585) -> Result<ColumnarValue> {
586 let factor = match T::UNIT {
587 Second => 1_000_000_000,
588 Millisecond => 1_000_000,
589 Microsecond => 1_000,
590 Nanosecond => 1,
591 };
592
593 match args.len() {
594 1 => handle::<T, _, T>(
595 args,
596 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
597 name,
598 ),
599 n if n >= 2 => handle_multiple::<T, _, T, _>(
600 args,
601 string_to_timestamp_nanos_formatted,
602 |n| n / factor,
603 name,
604 ),
605 _ => exec_err!("Unsupported 0 argument count for function {name}"),
606 }
607}
608
609#[cfg(test)]
610mod tests {
611 use std::sync::Arc;
612
613 use arrow::array::types::Int64Type;
614 use arrow::array::{
615 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
616 TimestampNanosecondArray, TimestampSecondArray,
617 };
618 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
619 use arrow::datatypes::TimeUnit;
620 use chrono::Utc;
621 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
622 use datafusion_expr::ScalarFunctionImplementation;
623
624 use super::*;
625
626 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
627 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
628 }
629
630 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
632 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
633 }
634
635 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
637 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
638 }
639
640 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
642 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
643 }
644
645 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
647 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
648 }
649
650 #[test]
651 fn to_timestamp_arrays_and_nulls() -> Result<()> {
652 let mut string_builder = StringBuilder::with_capacity(2, 1024);
655 let mut ts_builder = TimestampNanosecondArray::builder(2);
656
657 string_builder.append_value("2020-09-08T13:42:29.190855");
658 ts_builder.append_value(1599572549190855000);
659
660 string_builder.append_null();
661 ts_builder.append_null();
662 let expected_timestamps = &ts_builder.finish() as &dyn Array;
663
664 let string_array =
665 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
666 let parsed_timestamps = to_timestamp(&[string_array])
667 .expect("that to_timestamp parsed values without error");
668 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
669 assert_eq!(parsed_array.len(), 2);
670 assert_eq!(expected_timestamps, parsed_array.as_ref());
671 } else {
672 panic!("Expected a columnar array")
673 }
674 Ok(())
675 }
676
677 #[test]
678 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
679 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
682 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
683 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
684 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
685 let mut ts_builder = TimestampNanosecondArray::builder(2);
686
687 date_string_builder.append_null();
688 format1_builder.append_null();
689 format2_builder.append_null();
690 format3_builder.append_null();
691 ts_builder.append_null();
692
693 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
694 format1_builder.append_value("%s");
695 format2_builder.append_value("%c");
696 format3_builder.append_value("%+");
697 ts_builder.append_value(1599572549190850000);
698
699 let expected_timestamps = &ts_builder.finish() as &dyn Array;
700
701 let string_array = [
702 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
703 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
704 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
705 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
706 ];
707 let parsed_timestamps = to_timestamp(&string_array)
708 .expect("that to_timestamp with format args parsed values without error");
709 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
710 assert_eq!(parsed_array.len(), 2);
711 assert_eq!(expected_timestamps, parsed_array.as_ref());
712 } else {
713 panic!("Expected a columnar array")
714 }
715 Ok(())
716 }
717
718 #[test]
719 fn to_timestamp_invalid_input_type() -> Result<()> {
720 let mut builder = Int64Array::builder(1);
724 builder.append_value(1);
725 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
726
727 let expected_err =
728 "Execution error: Unsupported data type Int64 for function to_timestamp";
729 match to_timestamp(&[int64array]) {
730 Ok(_) => panic!("Expected error but got success"),
731 Err(e) => {
732 assert!(
733 e.to_string().contains(expected_err),
734 "Can not find expected error '{expected_err}'. Actual error '{e}'"
735 );
736 }
737 }
738 Ok(())
739 }
740
741 #[test]
742 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
743 let mut builder = Int64Array::builder(1);
747 builder.append_value(1);
748 let int64array = [
749 ColumnarValue::Array(Arc::new(builder.finish())),
750 ColumnarValue::Array(Arc::new(builder.finish())),
751 ];
752
753 let expected_err =
754 "Execution error: Unsupported data type Int64 for function to_timestamp";
755 match to_timestamp(&int64array) {
756 Ok(_) => panic!("Expected error but got success"),
757 Err(e) => {
758 assert!(
759 e.to_string().contains(expected_err),
760 "Can not find expected error '{expected_err}'. Actual error '{e}'"
761 );
762 }
763 }
764 Ok(())
765 }
766
767 #[test]
768 fn to_timestamp_with_unparseable_data() -> Result<()> {
769 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
770
771 date_string_builder.append_null();
772
773 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
774
775 let string_array =
776 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
777
778 let expected_err =
779 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
780 match to_timestamp(&[string_array]) {
781 Ok(_) => panic!("Expected error but got success"),
782 Err(e) => {
783 assert!(
784 e.to_string().contains(expected_err),
785 "Can not find expected error '{expected_err}'. Actual error '{e}'"
786 );
787 }
788 }
789 Ok(())
790 }
791
792 #[test]
793 fn to_timestamp_with_invalid_tz() -> Result<()> {
794 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
795
796 date_string_builder.append_null();
797
798 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
799
800 let string_array =
801 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
802
803 let expected_err =
804 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
805 match to_timestamp(&[string_array]) {
806 Ok(_) => panic!("Expected error but got success"),
807 Err(e) => {
808 assert!(
809 e.to_string().contains(expected_err),
810 "Can not find expected error '{expected_err}'. Actual error '{e}'"
811 );
812 }
813 }
814 Ok(())
815 }
816
817 #[test]
818 fn to_timestamp_with_no_matching_formats() -> Result<()> {
819 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
820 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
821 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
822 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
823
824 date_string_builder.append_null();
825 format1_builder.append_null();
826 format2_builder.append_null();
827 format3_builder.append_null();
828
829 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
830 format1_builder.append_value("%s");
831 format2_builder.append_value("%c");
832 format3_builder.append_value("%H:%M:%S");
833
834 let string_array = [
835 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
836 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
837 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
838 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
839 ];
840
841 let expected_err =
842 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
843 match to_timestamp(&string_array) {
844 Ok(_) => panic!("Expected error but got success"),
845 Err(e) => {
846 assert!(
847 e.to_string().contains(expected_err),
848 "Can not find expected error '{expected_err}'. Actual error '{e}'"
849 );
850 }
851 }
852 Ok(())
853 }
854
855 #[test]
856 fn string_to_timestamp_formatted() {
857 assert_eq!(
859 1599572549190855000,
860 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
861 );
862 assert_eq!(
863 1599572549190855000,
864 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
865 );
866 assert_eq!(
867 1599572549000000000,
868 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
869 ); assert_eq!(
871 1599590549190855000,
872 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
873 );
874 assert_eq!(
875 1599590549000000000,
876 parse_timestamp_formatted("1599590549", "%s").unwrap()
877 );
878 assert_eq!(
879 1599572549000000000,
880 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
881 .unwrap()
882 );
883 assert_eq!(
884 1642896000000000000,
885 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
886 );
887 }
888
889 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
890 let result = string_to_timestamp_nanos_formatted(s, format);
891 if let Err(e) = &result {
892 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
893 }
894 result
895 }
896
897 #[test]
898 fn string_to_timestamp_formatted_invalid() {
899 let cases = [
901 ("", "%Y%m%d %H%M%S", "premature end of input"),
902 ("SS", "%c", "premature end of input"),
903 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
904 (
905 "Wed, 18 Feb 2015 23:16:09 GMT",
906 "%XX",
907 "input contains invalid characters",
908 ),
909 (
910 "Wed, 18 Feb 2015 23:16:09 GMT",
911 "%Y%m%d %H%M%S",
912 "input contains invalid characters",
913 ),
914 ];
915
916 for (s, f, ctx) in cases {
917 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
918 let actual = string_to_datetime_formatted(&Utc, s, f)
919 .unwrap_err()
920 .to_string();
921 assert_eq!(actual, expected)
922 }
923 }
924
925 #[test]
926 fn string_to_timestamp_invalid_arguments() {
927 let cases = [
929 ("", "%Y%m%d %H%M%S", "premature end of input"),
930 ("SS", "%c", "premature end of input"),
931 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
932 (
933 "Wed, 18 Feb 2015 23:16:09 GMT",
934 "%XX",
935 "input contains invalid characters",
936 ),
937 (
938 "Wed, 18 Feb 2015 23:16:09 GMT",
939 "%Y%m%d %H%M%S",
940 "input contains invalid characters",
941 ),
942 ];
943
944 for (s, f, ctx) in cases {
945 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
946 let actual = string_to_datetime_formatted(&Utc, s, f)
947 .unwrap_err()
948 .to_string();
949 assert_eq!(actual, expected)
950 }
951 }
952
953 #[test]
954 fn test_tz() {
955 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
956 Box::new(ToTimestampFunc::new()),
957 Box::new(ToTimestampSecondsFunc::new()),
958 Box::new(ToTimestampMillisFunc::new()),
959 Box::new(ToTimestampNanosFunc::new()),
960 Box::new(ToTimestampSecondsFunc::new()),
961 ];
962
963 let mut nanos_builder = TimestampNanosecondArray::builder(2);
964 let mut millis_builder = TimestampMillisecondArray::builder(2);
965 let mut micros_builder = TimestampMicrosecondArray::builder(2);
966 let mut sec_builder = TimestampSecondArray::builder(2);
967
968 nanos_builder.append_value(1599572549190850000);
969 millis_builder.append_value(1599572549190);
970 micros_builder.append_value(1599572549190850);
971 sec_builder.append_value(1599572549);
972
973 let nanos_timestamps =
974 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
975 let millis_timestamps =
976 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
977 let micros_timestamps =
978 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
979 let sec_timestamps =
980 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
981
982 let arrays = &[
983 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
984 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
985 ColumnarValue::Array(Arc::clone(µs_timestamps)),
986 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
987 ];
988
989 for udf in &udfs {
990 for array in arrays {
991 let rt = udf.return_type(&[array.data_type()]).unwrap();
992 assert!(matches!(rt, Timestamp(_, Some(_))));
993 let args = datafusion_expr::ScalarFunctionArgs {
994 args: vec![array.clone()],
995 number_rows: 4,
996 return_type: &rt,
997 };
998 let res = udf
999 .invoke_with_args(args)
1000 .expect("that to_timestamp parsed values without error");
1001 let array = match res {
1002 ColumnarValue::Array(res) => res,
1003 _ => panic!("Expected a columnar array"),
1004 };
1005 let ty = array.data_type();
1006 assert!(matches!(ty, Timestamp(_, Some(_))));
1007 }
1008 }
1009
1010 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1011 let mut millis_builder = TimestampMillisecondArray::builder(2);
1012 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1013 let mut sec_builder = TimestampSecondArray::builder(2);
1014 let mut i64_builder = Int64Array::builder(2);
1015
1016 nanos_builder.append_value(1599572549190850000);
1017 millis_builder.append_value(1599572549190);
1018 micros_builder.append_value(1599572549190850);
1019 sec_builder.append_value(1599572549);
1020 i64_builder.append_value(1599572549);
1021
1022 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1023 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1024 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1025 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1026 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1027
1028 let arrays = &[
1029 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1030 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1031 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1032 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1033 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1034 ];
1035
1036 for udf in &udfs {
1037 for array in arrays {
1038 let rt = udf.return_type(&[array.data_type()]).unwrap();
1039 assert!(matches!(rt, Timestamp(_, None)));
1040 let args = datafusion_expr::ScalarFunctionArgs {
1041 args: vec![array.clone()],
1042 number_rows: 5,
1043 return_type: &rt,
1044 };
1045 let res = udf
1046 .invoke_with_args(args)
1047 .expect("that to_timestamp parsed values without error");
1048 let array = match res {
1049 ColumnarValue::Array(res) => res,
1050 _ => panic!("Expected a columnar array"),
1051 };
1052 let ty = array.data_type();
1053 assert!(matches!(ty, Timestamp(_, None)));
1054 }
1055 }
1056 }
1057
1058 #[test]
1059 fn test_to_timestamp_arg_validation() {
1060 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1061 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1062
1063 let data = date_string_builder.finish();
1064
1065 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1066 (Arc::new(to_timestamp), Nanosecond),
1067 (Arc::new(to_timestamp_micros), Microsecond),
1068 (Arc::new(to_timestamp_millis), Millisecond),
1069 (Arc::new(to_timestamp_nanos), Nanosecond),
1070 (Arc::new(to_timestamp_seconds), Second),
1071 ];
1072
1073 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1074 let mut millis_builder = TimestampMillisecondArray::builder(2);
1075 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1076 let mut sec_builder = TimestampSecondArray::builder(2);
1077
1078 nanos_builder.append_value(1599572549190850000);
1079 millis_builder.append_value(1599572549190);
1080 micros_builder.append_value(1599572549190850);
1081 sec_builder.append_value(1599572549);
1082
1083 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1084 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1085 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1086 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1087
1088 for (func, time_unit) in funcs {
1089 let string_array = [
1091 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1092 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1093 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1094 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1095 ];
1096 let parsed_timestamps = func(&string_array)
1097 .expect("that to_timestamp with format args parsed values without error");
1098 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1099 assert_eq!(parsed_array.len(), 1);
1100 match time_unit {
1101 Nanosecond => {
1102 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1103 }
1104 Millisecond => {
1105 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1106 }
1107 Microsecond => {
1108 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1109 }
1110 Second => {
1111 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1112 }
1113 };
1114 } else {
1115 panic!("Expected a columnar array")
1116 }
1117
1118 let string_array = [
1120 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1121 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1122 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1123 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1124 ];
1125 let parsed_timestamps = func(&string_array)
1126 .expect("that to_timestamp with format args parsed values without error");
1127 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1128 assert_eq!(parsed_array.len(), 1);
1129 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1130
1131 match time_unit {
1132 Nanosecond => {
1133 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1134 }
1135 Millisecond => {
1136 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1137 }
1138 Microsecond => {
1139 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1140 }
1141 Second => {
1142 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1143 }
1144 };
1145 } else {
1146 panic!("Expected a columnar array")
1147 }
1148
1149 let string_array = [
1151 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1152 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1153 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1154 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1155 ];
1156
1157 let expected = "Unsupported data type Int32 for function".to_string();
1158 let actual = func(&string_array).unwrap_err().to_string();
1159 assert_contains!(actual, expected);
1160
1161 let string_array = [
1163 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1164 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1165 vec![1i64].into(),
1166 None,
1167 )) as ArrayRef),
1168 ];
1169
1170 let expected = "Unsupported data type".to_string();
1171 let actual = func(&string_array).unwrap_err().to_string();
1172 assert_contains!(actual, expected);
1173 }
1174 }
1175}