1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::datatypes::DataType::*;
23use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
24use arrow::datatypes::{
25 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
26 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
27};
28use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
29use datafusion_expr::{
30 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
31};
32use datafusion_macros::user_doc;
33
34#[user_doc(
35 doc_section(label = "Time and Date Functions"),
36 description = r#"
37Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
38
39Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
40"#,
41 syntax_example = "to_timestamp(expression[, ..., format_n])",
42 sql_example = r#"```sql
43> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
44+-----------------------------------------------------------+
45| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
46+-----------------------------------------------------------+
47| 2023-01-31T14:26:56.123456789 |
48+-----------------------------------------------------------+
49> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
50+--------------------------------------------------------------------------------------------------------+
51| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
52+--------------------------------------------------------------------------------------------------------+
53| 2023-05-17T03:59:00.123456789 |
54+--------------------------------------------------------------------------------------------------------+
55```
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
57"#,
58 argument(
59 name = "expression",
60 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
61 ),
62 argument(
63 name = "format_n",
64 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
65 )
66)]
67#[derive(Debug)]
68pub struct ToTimestampFunc {
69 signature: Signature,
70}
71
72#[user_doc(
73 doc_section(label = "Time and Date Functions"),
74 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
75 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
76 sql_example = r#"```sql
77> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
78+-------------------------------------------------------------------+
79| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
80+-------------------------------------------------------------------+
81| 2023-01-31T14:26:56 |
82+-------------------------------------------------------------------+
83> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
84+----------------------------------------------------------------------------------------------------------------+
85| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
86+----------------------------------------------------------------------------------------------------------------+
87| 2023-05-17T03:59:00 |
88+----------------------------------------------------------------------------------------------------------------+
89```
90Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
91"#,
92 argument(
93 name = "expression",
94 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
95 ),
96 argument(
97 name = "format_n",
98 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
99 )
100)]
101#[derive(Debug)]
102pub struct ToTimestampSecondsFunc {
103 signature: Signature,
104}
105
106#[user_doc(
107 doc_section(label = "Time and Date Functions"),
108 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
109 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
110 sql_example = r#"```sql
111> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
112+------------------------------------------------------------------+
113| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
114+------------------------------------------------------------------+
115| 2023-01-31T14:26:56.123 |
116+------------------------------------------------------------------+
117> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
118+---------------------------------------------------------------------------------------------------------------+
119| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
120+---------------------------------------------------------------------------------------------------------------+
121| 2023-05-17T03:59:00.123 |
122+---------------------------------------------------------------------------------------------------------------+
123```
124Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
125"#,
126 argument(
127 name = "expression",
128 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
129 ),
130 argument(
131 name = "format_n",
132 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
133 )
134)]
135#[derive(Debug)]
136pub struct ToTimestampMillisFunc {
137 signature: Signature,
138}
139
140#[user_doc(
141 doc_section(label = "Time and Date Functions"),
142 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
143 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
144 sql_example = r#"```sql
145> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
146+------------------------------------------------------------------+
147| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
148+------------------------------------------------------------------+
149| 2023-01-31T14:26:56.123456 |
150+------------------------------------------------------------------+
151> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
152+---------------------------------------------------------------------------------------------------------------+
153| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
154+---------------------------------------------------------------------------------------------------------------+
155| 2023-05-17T03:59:00.123456 |
156+---------------------------------------------------------------------------------------------------------------+
157```
158Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
159"#,
160 argument(
161 name = "expression",
162 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
163 ),
164 argument(
165 name = "format_n",
166 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
167 )
168)]
169#[derive(Debug)]
170pub struct ToTimestampMicrosFunc {
171 signature: Signature,
172}
173
174#[user_doc(
175 doc_section(label = "Time and Date Functions"),
176 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
177 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
178 sql_example = r#"```sql
179> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
180+-----------------------------------------------------------------+
181| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
182+-----------------------------------------------------------------+
183| 2023-01-31T14:26:56.123456789 |
184+-----------------------------------------------------------------+
185> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
186+--------------------------------------------------------------------------------------------------------------+
187| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
188+--------------------------------------------------------------------------------------------------------------+
189| 2023-05-17T03:59:00.123456789 |
190+---------------------------------------------------------------------------------------------------------------+
191```
192Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
193"#,
194 argument(
195 name = "expression",
196 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
197 ),
198 argument(
199 name = "format_n",
200 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
201 )
202)]
203#[derive(Debug)]
204pub struct ToTimestampNanosFunc {
205 signature: Signature,
206}
207
208impl Default for ToTimestampFunc {
209 fn default() -> Self {
210 Self::new()
211 }
212}
213
214impl ToTimestampFunc {
215 pub fn new() -> Self {
216 Self {
217 signature: Signature::variadic_any(Volatility::Immutable),
218 }
219 }
220}
221
222impl Default for ToTimestampSecondsFunc {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228impl ToTimestampSecondsFunc {
229 pub fn new() -> Self {
230 Self {
231 signature: Signature::variadic_any(Volatility::Immutable),
232 }
233 }
234}
235
236impl Default for ToTimestampMillisFunc {
237 fn default() -> Self {
238 Self::new()
239 }
240}
241
242impl ToTimestampMillisFunc {
243 pub fn new() -> Self {
244 Self {
245 signature: Signature::variadic_any(Volatility::Immutable),
246 }
247 }
248}
249
250impl Default for ToTimestampMicrosFunc {
251 fn default() -> Self {
252 Self::new()
253 }
254}
255
256impl ToTimestampMicrosFunc {
257 pub fn new() -> Self {
258 Self {
259 signature: Signature::variadic_any(Volatility::Immutable),
260 }
261 }
262}
263
264impl Default for ToTimestampNanosFunc {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270impl ToTimestampNanosFunc {
271 pub fn new() -> Self {
272 Self {
273 signature: Signature::variadic_any(Volatility::Immutable),
274 }
275 }
276}
277
278impl ScalarUDFImpl for ToTimestampFunc {
285 fn as_any(&self) -> &dyn Any {
286 self
287 }
288
289 fn name(&self) -> &str {
290 "to_timestamp"
291 }
292
293 fn signature(&self) -> &Signature {
294 &self.signature
295 }
296
297 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
298 Ok(return_type_for(&arg_types[0], Nanosecond))
299 }
300
301 fn invoke_with_args(
302 &self,
303 args: datafusion_expr::ScalarFunctionArgs,
304 ) -> Result<ColumnarValue> {
305 let args = args.args;
306 if args.is_empty() {
307 return exec_err!(
308 "to_timestamp function requires 1 or more arguments, got {}",
309 args.len()
310 );
311 }
312
313 if args.len() > 1 {
315 validate_data_types(&args, "to_timestamp")?;
316 }
317
318 match args[0].data_type() {
319 Int32 | Int64 => args[0]
320 .cast_to(&Timestamp(Second, None), None)?
321 .cast_to(&Timestamp(Nanosecond, None), None),
322 Null | Float64 | Timestamp(_, None) => {
323 args[0].cast_to(&Timestamp(Nanosecond, None), None)
324 }
325 Timestamp(_, Some(tz)) => {
326 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
327 }
328 Utf8View | LargeUtf8 | Utf8 => {
329 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
330 }
331 Decimal128(_, _) => {
332 match &args[0] {
333 ColumnarValue::Scalar(ScalarValue::Decimal128(
334 Some(value),
335 _,
336 scale,
337 )) => {
338 let scale_factor = 10_i128.pow(*scale as u32);
340 let seconds = value / scale_factor;
341 let fraction = value % scale_factor;
342
343 let nanos = (fraction * 1_000_000_000) / scale_factor;
344
345 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
346
347 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
348 Some(timestamp_nanos as i64),
349 None,
350 )))
351 }
352 _ => exec_err!("Invalid decimal value"),
353 }
354 }
355 other => {
356 exec_err!(
357 "Unsupported data type {:?} for function to_timestamp",
358 other
359 )
360 }
361 }
362 }
363 fn documentation(&self) -> Option<&Documentation> {
364 self.doc()
365 }
366}
367
368impl ScalarUDFImpl for ToTimestampSecondsFunc {
369 fn as_any(&self) -> &dyn Any {
370 self
371 }
372
373 fn name(&self) -> &str {
374 "to_timestamp_seconds"
375 }
376
377 fn signature(&self) -> &Signature {
378 &self.signature
379 }
380
381 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
382 Ok(return_type_for(&arg_types[0], Second))
383 }
384
385 fn invoke_with_args(
386 &self,
387 args: datafusion_expr::ScalarFunctionArgs,
388 ) -> Result<ColumnarValue> {
389 let args = args.args;
390 if args.is_empty() {
391 return exec_err!(
392 "to_timestamp_seconds function requires 1 or more arguments, got {}",
393 args.len()
394 );
395 }
396
397 if args.len() > 1 {
399 validate_data_types(&args, "to_timestamp")?;
400 }
401
402 match args[0].data_type() {
403 Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
404 args[0].cast_to(&Timestamp(Second, None), None)
405 }
406 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
407 Utf8View | LargeUtf8 | Utf8 => {
408 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
409 }
410 other => {
411 exec_err!(
412 "Unsupported data type {:?} for function to_timestamp_seconds",
413 other
414 )
415 }
416 }
417 }
418 fn documentation(&self) -> Option<&Documentation> {
419 self.doc()
420 }
421}
422
423impl ScalarUDFImpl for ToTimestampMillisFunc {
424 fn as_any(&self) -> &dyn Any {
425 self
426 }
427
428 fn name(&self) -> &str {
429 "to_timestamp_millis"
430 }
431
432 fn signature(&self) -> &Signature {
433 &self.signature
434 }
435
436 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
437 Ok(return_type_for(&arg_types[0], Millisecond))
438 }
439
440 fn invoke_with_args(
441 &self,
442 args: datafusion_expr::ScalarFunctionArgs,
443 ) -> Result<ColumnarValue> {
444 let args = args.args;
445 if args.is_empty() {
446 return exec_err!(
447 "to_timestamp_millis function requires 1 or more arguments, got {}",
448 args.len()
449 );
450 }
451
452 if args.len() > 1 {
454 validate_data_types(&args, "to_timestamp")?;
455 }
456
457 match args[0].data_type() {
458 Null | Int32 | Int64 | Timestamp(_, None) => {
459 args[0].cast_to(&Timestamp(Millisecond, None), None)
460 }
461 Timestamp(_, Some(tz)) => {
462 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
463 }
464 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
465 &args,
466 "to_timestamp_millis",
467 ),
468 other => {
469 exec_err!(
470 "Unsupported data type {:?} for function to_timestamp_millis",
471 other
472 )
473 }
474 }
475 }
476 fn documentation(&self) -> Option<&Documentation> {
477 self.doc()
478 }
479}
480
481impl ScalarUDFImpl for ToTimestampMicrosFunc {
482 fn as_any(&self) -> &dyn Any {
483 self
484 }
485
486 fn name(&self) -> &str {
487 "to_timestamp_micros"
488 }
489
490 fn signature(&self) -> &Signature {
491 &self.signature
492 }
493
494 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
495 Ok(return_type_for(&arg_types[0], Microsecond))
496 }
497
498 fn invoke_with_args(
499 &self,
500 args: datafusion_expr::ScalarFunctionArgs,
501 ) -> Result<ColumnarValue> {
502 let args = args.args;
503 if args.is_empty() {
504 return exec_err!(
505 "to_timestamp_micros function requires 1 or more arguments, got {}",
506 args.len()
507 );
508 }
509
510 if args.len() > 1 {
512 validate_data_types(&args, "to_timestamp")?;
513 }
514
515 match args[0].data_type() {
516 Null | Int32 | Int64 | Timestamp(_, None) => {
517 args[0].cast_to(&Timestamp(Microsecond, None), None)
518 }
519 Timestamp(_, Some(tz)) => {
520 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
521 }
522 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
523 &args,
524 "to_timestamp_micros",
525 ),
526 other => {
527 exec_err!(
528 "Unsupported data type {:?} for function to_timestamp_micros",
529 other
530 )
531 }
532 }
533 }
534 fn documentation(&self) -> Option<&Documentation> {
535 self.doc()
536 }
537}
538
539impl ScalarUDFImpl for ToTimestampNanosFunc {
540 fn as_any(&self) -> &dyn Any {
541 self
542 }
543
544 fn name(&self) -> &str {
545 "to_timestamp_nanos"
546 }
547
548 fn signature(&self) -> &Signature {
549 &self.signature
550 }
551
552 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
553 Ok(return_type_for(&arg_types[0], Nanosecond))
554 }
555
556 fn invoke_with_args(
557 &self,
558 args: datafusion_expr::ScalarFunctionArgs,
559 ) -> Result<ColumnarValue> {
560 let args = args.args;
561 if args.is_empty() {
562 return exec_err!(
563 "to_timestamp_nanos function requires 1 or more arguments, got {}",
564 args.len()
565 );
566 }
567
568 if args.len() > 1 {
570 validate_data_types(&args, "to_timestamp")?;
571 }
572
573 match args[0].data_type() {
574 Null | Int32 | Int64 | Timestamp(_, None) => {
575 args[0].cast_to(&Timestamp(Nanosecond, None), None)
576 }
577 Timestamp(_, Some(tz)) => {
578 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
579 }
580 Utf8View | LargeUtf8 | Utf8 => {
581 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
582 }
583 other => {
584 exec_err!(
585 "Unsupported data type {:?} for function to_timestamp_nanos",
586 other
587 )
588 }
589 }
590 }
591 fn documentation(&self) -> Option<&Documentation> {
592 self.doc()
593 }
594}
595
596fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
599 match arg {
600 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
601 _ => Timestamp(unit, None),
602 }
603}
604
605fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
606 args: &[ColumnarValue],
607 name: &str,
608) -> Result<ColumnarValue> {
609 let factor = match T::UNIT {
610 Second => 1_000_000_000,
611 Millisecond => 1_000_000,
612 Microsecond => 1_000,
613 Nanosecond => 1,
614 };
615
616 match args.len() {
617 1 => handle::<T, _, T>(
618 args,
619 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
620 name,
621 ),
622 n if n >= 2 => handle_multiple::<T, _, T, _>(
623 args,
624 string_to_timestamp_nanos_formatted,
625 |n| n / factor,
626 name,
627 ),
628 _ => exec_err!("Unsupported 0 argument count for function {name}"),
629 }
630}
631
632#[cfg(test)]
633mod tests {
634 use std::sync::Arc;
635
636 use arrow::array::types::Int64Type;
637 use arrow::array::{
638 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
639 TimestampNanosecondArray, TimestampSecondArray,
640 };
641 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
642 use arrow::datatypes::TimeUnit;
643 use chrono::Utc;
644 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
645 use datafusion_expr::ScalarFunctionImplementation;
646
647 use super::*;
648
649 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
650 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
651 }
652
653 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
655 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
656 }
657
658 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
660 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
661 }
662
663 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
665 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
666 }
667
668 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
670 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
671 }
672
673 #[test]
674 fn to_timestamp_arrays_and_nulls() -> Result<()> {
675 let mut string_builder = StringBuilder::with_capacity(2, 1024);
678 let mut ts_builder = TimestampNanosecondArray::builder(2);
679
680 string_builder.append_value("2020-09-08T13:42:29.190855");
681 ts_builder.append_value(1599572549190855000);
682
683 string_builder.append_null();
684 ts_builder.append_null();
685 let expected_timestamps = &ts_builder.finish() as &dyn Array;
686
687 let string_array =
688 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
689 let parsed_timestamps = to_timestamp(&[string_array])
690 .expect("that to_timestamp parsed values without error");
691 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
692 assert_eq!(parsed_array.len(), 2);
693 assert_eq!(expected_timestamps, parsed_array.as_ref());
694 } else {
695 panic!("Expected a columnar array")
696 }
697 Ok(())
698 }
699
700 #[test]
701 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
702 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
705 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
706 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
707 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
708 let mut ts_builder = TimestampNanosecondArray::builder(2);
709
710 date_string_builder.append_null();
711 format1_builder.append_null();
712 format2_builder.append_null();
713 format3_builder.append_null();
714 ts_builder.append_null();
715
716 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
717 format1_builder.append_value("%s");
718 format2_builder.append_value("%c");
719 format3_builder.append_value("%+");
720 ts_builder.append_value(1599572549190850000);
721
722 let expected_timestamps = &ts_builder.finish() as &dyn Array;
723
724 let string_array = [
725 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
726 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
727 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
728 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
729 ];
730 let parsed_timestamps = to_timestamp(&string_array)
731 .expect("that to_timestamp with format args parsed values without error");
732 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
733 assert_eq!(parsed_array.len(), 2);
734 assert_eq!(expected_timestamps, parsed_array.as_ref());
735 } else {
736 panic!("Expected a columnar array")
737 }
738 Ok(())
739 }
740
741 #[test]
742 fn to_timestamp_invalid_input_type() -> Result<()> {
743 let mut builder = Int64Array::builder(1);
747 builder.append_value(1);
748 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
749
750 let expected_err =
751 "Execution error: Unsupported data type Int64 for function to_timestamp";
752 match to_timestamp(&[int64array]) {
753 Ok(_) => panic!("Expected error but got success"),
754 Err(e) => {
755 assert!(
756 e.to_string().contains(expected_err),
757 "Can not find expected error '{expected_err}'. Actual error '{e}'"
758 );
759 }
760 }
761 Ok(())
762 }
763
764 #[test]
765 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
766 let mut builder = Int64Array::builder(1);
770 builder.append_value(1);
771 let int64array = [
772 ColumnarValue::Array(Arc::new(builder.finish())),
773 ColumnarValue::Array(Arc::new(builder.finish())),
774 ];
775
776 let expected_err =
777 "Execution error: Unsupported data type Int64 for function to_timestamp";
778 match to_timestamp(&int64array) {
779 Ok(_) => panic!("Expected error but got success"),
780 Err(e) => {
781 assert!(
782 e.to_string().contains(expected_err),
783 "Can not find expected error '{expected_err}'. Actual error '{e}'"
784 );
785 }
786 }
787 Ok(())
788 }
789
790 #[test]
791 fn to_timestamp_with_unparseable_data() -> Result<()> {
792 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
793
794 date_string_builder.append_null();
795
796 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
797
798 let string_array =
799 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
800
801 let expected_err =
802 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
803 match to_timestamp(&[string_array]) {
804 Ok(_) => panic!("Expected error but got success"),
805 Err(e) => {
806 assert!(
807 e.to_string().contains(expected_err),
808 "Can not find expected error '{expected_err}'. Actual error '{e}'"
809 );
810 }
811 }
812 Ok(())
813 }
814
815 #[test]
816 fn to_timestamp_with_invalid_tz() -> Result<()> {
817 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
818
819 date_string_builder.append_null();
820
821 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
822
823 let string_array =
824 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
825
826 let expected_err =
827 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
828 match to_timestamp(&[string_array]) {
829 Ok(_) => panic!("Expected error but got success"),
830 Err(e) => {
831 assert!(
832 e.to_string().contains(expected_err),
833 "Can not find expected error '{expected_err}'. Actual error '{e}'"
834 );
835 }
836 }
837 Ok(())
838 }
839
840 #[test]
841 fn to_timestamp_with_no_matching_formats() -> Result<()> {
842 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
843 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
844 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
845 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
846
847 date_string_builder.append_null();
848 format1_builder.append_null();
849 format2_builder.append_null();
850 format3_builder.append_null();
851
852 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
853 format1_builder.append_value("%s");
854 format2_builder.append_value("%c");
855 format3_builder.append_value("%H:%M:%S");
856
857 let string_array = [
858 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
859 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
860 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
861 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
862 ];
863
864 let expected_err =
865 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
866 match to_timestamp(&string_array) {
867 Ok(_) => panic!("Expected error but got success"),
868 Err(e) => {
869 assert!(
870 e.to_string().contains(expected_err),
871 "Can not find expected error '{expected_err}'. Actual error '{e}'"
872 );
873 }
874 }
875 Ok(())
876 }
877
878 #[test]
879 fn string_to_timestamp_formatted() {
880 assert_eq!(
882 1599572549190855000,
883 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
884 );
885 assert_eq!(
886 1599572549190855000,
887 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
888 );
889 assert_eq!(
890 1599572549000000000,
891 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
892 ); assert_eq!(
894 1599590549190855000,
895 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
896 );
897 assert_eq!(
898 1599590549000000000,
899 parse_timestamp_formatted("1599590549", "%s").unwrap()
900 );
901 assert_eq!(
902 1599572549000000000,
903 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
904 .unwrap()
905 );
906 assert_eq!(
907 1642896000000000000,
908 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
909 );
910 }
911
912 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
913 let result = string_to_timestamp_nanos_formatted(s, format);
914 if let Err(e) = &result {
915 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
916 }
917 result
918 }
919
920 #[test]
921 fn string_to_timestamp_formatted_invalid() {
922 let cases = [
924 ("", "%Y%m%d %H%M%S", "premature end of input"),
925 ("SS", "%c", "premature end of input"),
926 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
927 (
928 "Wed, 18 Feb 2015 23:16:09 GMT",
929 "%XX",
930 "input contains invalid characters",
931 ),
932 (
933 "Wed, 18 Feb 2015 23:16:09 GMT",
934 "%Y%m%d %H%M%S",
935 "input contains invalid characters",
936 ),
937 ];
938
939 for (s, f, ctx) in cases {
940 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
941 let actual = string_to_datetime_formatted(&Utc, s, f)
942 .unwrap_err()
943 .to_string();
944 assert_eq!(actual, expected)
945 }
946 }
947
948 #[test]
949 fn string_to_timestamp_invalid_arguments() {
950 let cases = [
952 ("", "%Y%m%d %H%M%S", "premature end of input"),
953 ("SS", "%c", "premature end of input"),
954 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
955 (
956 "Wed, 18 Feb 2015 23:16:09 GMT",
957 "%XX",
958 "input contains invalid characters",
959 ),
960 (
961 "Wed, 18 Feb 2015 23:16:09 GMT",
962 "%Y%m%d %H%M%S",
963 "input contains invalid characters",
964 ),
965 ];
966
967 for (s, f, ctx) in cases {
968 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
969 let actual = string_to_datetime_formatted(&Utc, s, f)
970 .unwrap_err()
971 .to_string();
972 assert_eq!(actual, expected)
973 }
974 }
975
976 #[test]
977 fn test_tz() {
978 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
979 Box::new(ToTimestampFunc::new()),
980 Box::new(ToTimestampSecondsFunc::new()),
981 Box::new(ToTimestampMillisFunc::new()),
982 Box::new(ToTimestampNanosFunc::new()),
983 Box::new(ToTimestampSecondsFunc::new()),
984 ];
985
986 let mut nanos_builder = TimestampNanosecondArray::builder(2);
987 let mut millis_builder = TimestampMillisecondArray::builder(2);
988 let mut micros_builder = TimestampMicrosecondArray::builder(2);
989 let mut sec_builder = TimestampSecondArray::builder(2);
990
991 nanos_builder.append_value(1599572549190850000);
992 millis_builder.append_value(1599572549190);
993 micros_builder.append_value(1599572549190850);
994 sec_builder.append_value(1599572549);
995
996 let nanos_timestamps =
997 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
998 let millis_timestamps =
999 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1000 let micros_timestamps =
1001 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1002 let sec_timestamps =
1003 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1004
1005 let arrays = &[
1006 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1007 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1008 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1009 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1010 ];
1011
1012 for udf in &udfs {
1013 for array in arrays {
1014 let rt = udf.return_type(&[array.data_type()]).unwrap();
1015 assert!(matches!(rt, Timestamp(_, Some(_))));
1016 let args = datafusion_expr::ScalarFunctionArgs {
1017 args: vec![array.clone()],
1018 number_rows: 4,
1019 return_type: &rt,
1020 };
1021 let res = udf
1022 .invoke_with_args(args)
1023 .expect("that to_timestamp parsed values without error");
1024 let array = match res {
1025 ColumnarValue::Array(res) => res,
1026 _ => panic!("Expected a columnar array"),
1027 };
1028 let ty = array.data_type();
1029 assert!(matches!(ty, Timestamp(_, Some(_))));
1030 }
1031 }
1032
1033 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1034 let mut millis_builder = TimestampMillisecondArray::builder(2);
1035 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1036 let mut sec_builder = TimestampSecondArray::builder(2);
1037 let mut i64_builder = Int64Array::builder(2);
1038
1039 nanos_builder.append_value(1599572549190850000);
1040 millis_builder.append_value(1599572549190);
1041 micros_builder.append_value(1599572549190850);
1042 sec_builder.append_value(1599572549);
1043 i64_builder.append_value(1599572549);
1044
1045 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1046 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1047 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1048 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1049 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1050
1051 let arrays = &[
1052 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1053 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1054 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1055 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1056 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1057 ];
1058
1059 for udf in &udfs {
1060 for array in arrays {
1061 let rt = udf.return_type(&[array.data_type()]).unwrap();
1062 assert!(matches!(rt, Timestamp(_, None)));
1063 let args = datafusion_expr::ScalarFunctionArgs {
1064 args: vec![array.clone()],
1065 number_rows: 5,
1066 return_type: &rt,
1067 };
1068 let res = udf
1069 .invoke_with_args(args)
1070 .expect("that to_timestamp parsed values without error");
1071 let array = match res {
1072 ColumnarValue::Array(res) => res,
1073 _ => panic!("Expected a columnar array"),
1074 };
1075 let ty = array.data_type();
1076 assert!(matches!(ty, Timestamp(_, None)));
1077 }
1078 }
1079 }
1080
1081 #[test]
1082 fn test_to_timestamp_arg_validation() {
1083 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1084 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1085
1086 let data = date_string_builder.finish();
1087
1088 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1089 (Arc::new(to_timestamp), Nanosecond),
1090 (Arc::new(to_timestamp_micros), Microsecond),
1091 (Arc::new(to_timestamp_millis), Millisecond),
1092 (Arc::new(to_timestamp_nanos), Nanosecond),
1093 (Arc::new(to_timestamp_seconds), Second),
1094 ];
1095
1096 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1097 let mut millis_builder = TimestampMillisecondArray::builder(2);
1098 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1099 let mut sec_builder = TimestampSecondArray::builder(2);
1100
1101 nanos_builder.append_value(1599572549190850000);
1102 millis_builder.append_value(1599572549190);
1103 micros_builder.append_value(1599572549190850);
1104 sec_builder.append_value(1599572549);
1105
1106 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1107 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1108 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1109 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1110
1111 for (func, time_unit) in funcs {
1112 let string_array = [
1114 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1115 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1116 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1117 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1118 ];
1119 let parsed_timestamps = func(&string_array)
1120 .expect("that to_timestamp with format args parsed values without error");
1121 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1122 assert_eq!(parsed_array.len(), 1);
1123 match time_unit {
1124 Nanosecond => {
1125 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1126 }
1127 Millisecond => {
1128 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1129 }
1130 Microsecond => {
1131 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1132 }
1133 Second => {
1134 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1135 }
1136 };
1137 } else {
1138 panic!("Expected a columnar array")
1139 }
1140
1141 let string_array = [
1143 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1144 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1145 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1146 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1147 ];
1148 let parsed_timestamps = func(&string_array)
1149 .expect("that to_timestamp with format args parsed values without error");
1150 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1151 assert_eq!(parsed_array.len(), 1);
1152 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1153
1154 match time_unit {
1155 Nanosecond => {
1156 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1157 }
1158 Millisecond => {
1159 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1160 }
1161 Microsecond => {
1162 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1163 }
1164 Second => {
1165 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1166 }
1167 };
1168 } else {
1169 panic!("Expected a columnar array")
1170 }
1171
1172 let string_array = [
1174 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1175 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1176 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1177 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1178 ];
1179
1180 let expected = "Unsupported data type Int32 for function".to_string();
1181 let actual = func(&string_array).unwrap_err().to_string();
1182 assert_contains!(actual, expected);
1183
1184 let string_array = [
1186 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1187 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1188 vec![1i64].into(),
1189 None,
1190 )) as ArrayRef),
1191 ];
1192
1193 let expected = "Unsupported data type".to_string();
1194 let actual = func(&string_array).unwrap_err().to_string();
1195 assert_contains!(actual, expected);
1196 }
1197 }
1198}