datafusion_functions/datetime/
to_timestamp.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::datatypes::DataType::*;
23use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
24use arrow::datatypes::{
25    ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
26    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
27};
28use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
29use datafusion_expr::{
30    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
31};
32use datafusion_macros::user_doc;
33
34#[user_doc(
35    doc_section(label = "Time and Date Functions"),
36    description = r#"
37Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
38
39Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
40"#,
41    syntax_example = "to_timestamp(expression[, ..., format_n])",
42    sql_example = r#"```sql
43> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
44+-----------------------------------------------------------+
45| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
46+-----------------------------------------------------------+
47| 2023-01-31T14:26:56.123456789                             |
48+-----------------------------------------------------------+
49> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
50+--------------------------------------------------------------------------------------------------------+
51| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
52+--------------------------------------------------------------------------------------------------------+
53| 2023-05-17T03:59:00.123456789                                                                          |
54+--------------------------------------------------------------------------------------------------------+
55```
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
57"#,
58    argument(
59        name = "expression",
60        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
61    ),
62    argument(
63        name = "format_n",
64        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
65    )
66)]
67#[derive(Debug)]
68pub struct ToTimestampFunc {
69    signature: Signature,
70}
71
72#[user_doc(
73    doc_section(label = "Time and Date Functions"),
74    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
75    syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
76    sql_example = r#"```sql
77> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
78+-------------------------------------------------------------------+
79| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
80+-------------------------------------------------------------------+
81| 2023-01-31T14:26:56                                               |
82+-------------------------------------------------------------------+
83> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
84+----------------------------------------------------------------------------------------------------------------+
85| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
86+----------------------------------------------------------------------------------------------------------------+
87| 2023-05-17T03:59:00                                                                                            |
88+----------------------------------------------------------------------------------------------------------------+
89```
90Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
91"#,
92    argument(
93        name = "expression",
94        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
95    ),
96    argument(
97        name = "format_n",
98        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
99    )
100)]
101#[derive(Debug)]
102pub struct ToTimestampSecondsFunc {
103    signature: Signature,
104}
105
106#[user_doc(
107    doc_section(label = "Time and Date Functions"),
108    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
109    syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
110    sql_example = r#"```sql
111> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
112+------------------------------------------------------------------+
113| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
114+------------------------------------------------------------------+
115| 2023-01-31T14:26:56.123                                          |
116+------------------------------------------------------------------+
117> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
118+---------------------------------------------------------------------------------------------------------------+
119| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
120+---------------------------------------------------------------------------------------------------------------+
121| 2023-05-17T03:59:00.123                                                                                       |
122+---------------------------------------------------------------------------------------------------------------+
123```
124Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
125"#,
126    argument(
127        name = "expression",
128        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
129    ),
130    argument(
131        name = "format_n",
132        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
133    )
134)]
135#[derive(Debug)]
136pub struct ToTimestampMillisFunc {
137    signature: Signature,
138}
139
140#[user_doc(
141    doc_section(label = "Time and Date Functions"),
142    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
143    syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
144    sql_example = r#"```sql
145> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
146+------------------------------------------------------------------+
147| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
148+------------------------------------------------------------------+
149| 2023-01-31T14:26:56.123456                                       |
150+------------------------------------------------------------------+
151> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
152+---------------------------------------------------------------------------------------------------------------+
153| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
154+---------------------------------------------------------------------------------------------------------------+
155| 2023-05-17T03:59:00.123456                                                                                    |
156+---------------------------------------------------------------------------------------------------------------+
157```
158Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
159"#,
160    argument(
161        name = "expression",
162        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
163    ),
164    argument(
165        name = "format_n",
166        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
167    )
168)]
169#[derive(Debug)]
170pub struct ToTimestampMicrosFunc {
171    signature: Signature,
172}
173
174#[user_doc(
175    doc_section(label = "Time and Date Functions"),
176    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
177    syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
178    sql_example = r#"```sql
179> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
180+-----------------------------------------------------------------+
181| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
182+-----------------------------------------------------------------+
183| 2023-01-31T14:26:56.123456789                                   |
184+-----------------------------------------------------------------+
185> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
186+--------------------------------------------------------------------------------------------------------------+
187| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
188+--------------------------------------------------------------------------------------------------------------+
189| 2023-05-17T03:59:00.123456789                                                                                |
190+---------------------------------------------------------------------------------------------------------------+
191```
192Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
193"#,
194    argument(
195        name = "expression",
196        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
197    ),
198    argument(
199        name = "format_n",
200        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
201    )
202)]
203#[derive(Debug)]
204pub struct ToTimestampNanosFunc {
205    signature: Signature,
206}
207
208impl Default for ToTimestampFunc {
209    fn default() -> Self {
210        Self::new()
211    }
212}
213
214impl ToTimestampFunc {
215    pub fn new() -> Self {
216        Self {
217            signature: Signature::variadic_any(Volatility::Immutable),
218        }
219    }
220}
221
222impl Default for ToTimestampSecondsFunc {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228impl ToTimestampSecondsFunc {
229    pub fn new() -> Self {
230        Self {
231            signature: Signature::variadic_any(Volatility::Immutable),
232        }
233    }
234}
235
236impl Default for ToTimestampMillisFunc {
237    fn default() -> Self {
238        Self::new()
239    }
240}
241
242impl ToTimestampMillisFunc {
243    pub fn new() -> Self {
244        Self {
245            signature: Signature::variadic_any(Volatility::Immutable),
246        }
247    }
248}
249
250impl Default for ToTimestampMicrosFunc {
251    fn default() -> Self {
252        Self::new()
253    }
254}
255
256impl ToTimestampMicrosFunc {
257    pub fn new() -> Self {
258        Self {
259            signature: Signature::variadic_any(Volatility::Immutable),
260        }
261    }
262}
263
264impl Default for ToTimestampNanosFunc {
265    fn default() -> Self {
266        Self::new()
267    }
268}
269
270impl ToTimestampNanosFunc {
271    pub fn new() -> Self {
272        Self {
273            signature: Signature::variadic_any(Volatility::Immutable),
274        }
275    }
276}
277
278/// to_timestamp SQL function
279///
280/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
281/// The supported range for integer input is between `-9223372037` and `9223372036`.
282/// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
283/// Please use `to_timestamp_seconds` for the input outside of supported bounds.
284impl ScalarUDFImpl for ToTimestampFunc {
285    fn as_any(&self) -> &dyn Any {
286        self
287    }
288
289    fn name(&self) -> &str {
290        "to_timestamp"
291    }
292
293    fn signature(&self) -> &Signature {
294        &self.signature
295    }
296
297    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
298        Ok(return_type_for(&arg_types[0], Nanosecond))
299    }
300
301    fn invoke_with_args(
302        &self,
303        args: datafusion_expr::ScalarFunctionArgs,
304    ) -> Result<ColumnarValue> {
305        let args = args.args;
306        if args.is_empty() {
307            return exec_err!(
308                "to_timestamp function requires 1 or more arguments, got {}",
309                args.len()
310            );
311        }
312
313        // validate that any args after the first one are Utf8
314        if args.len() > 1 {
315            validate_data_types(&args, "to_timestamp")?;
316        }
317
318        match args[0].data_type() {
319            Int32 | Int64 => args[0]
320                .cast_to(&Timestamp(Second, None), None)?
321                .cast_to(&Timestamp(Nanosecond, None), None),
322            Null | Float64 | Timestamp(_, None) => {
323                args[0].cast_to(&Timestamp(Nanosecond, None), None)
324            }
325            Timestamp(_, Some(tz)) => {
326                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
327            }
328            Utf8View | LargeUtf8 | Utf8 => {
329                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
330            }
331            Decimal128(_, _) => {
332                match &args[0] {
333                    ColumnarValue::Scalar(ScalarValue::Decimal128(
334                        Some(value),
335                        _,
336                        scale,
337                    )) => {
338                        // Convert decimal to seconds and nanoseconds
339                        let scale_factor = 10_i128.pow(*scale as u32);
340                        let seconds = value / scale_factor;
341                        let fraction = value % scale_factor;
342
343                        let nanos = (fraction * 1_000_000_000) / scale_factor;
344
345                        let timestamp_nanos = seconds * 1_000_000_000 + nanos;
346
347                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
348                            Some(timestamp_nanos as i64),
349                            None,
350                        )))
351                    }
352                    _ => exec_err!("Invalid decimal value"),
353                }
354            }
355            other => {
356                exec_err!(
357                    "Unsupported data type {:?} for function to_timestamp",
358                    other
359                )
360            }
361        }
362    }
363    fn documentation(&self) -> Option<&Documentation> {
364        self.doc()
365    }
366}
367
368impl ScalarUDFImpl for ToTimestampSecondsFunc {
369    fn as_any(&self) -> &dyn Any {
370        self
371    }
372
373    fn name(&self) -> &str {
374        "to_timestamp_seconds"
375    }
376
377    fn signature(&self) -> &Signature {
378        &self.signature
379    }
380
381    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
382        Ok(return_type_for(&arg_types[0], Second))
383    }
384
385    fn invoke_with_args(
386        &self,
387        args: datafusion_expr::ScalarFunctionArgs,
388    ) -> Result<ColumnarValue> {
389        let args = args.args;
390        if args.is_empty() {
391            return exec_err!(
392                "to_timestamp_seconds function requires 1 or more arguments, got {}",
393                args.len()
394            );
395        }
396
397        // validate that any args after the first one are Utf8
398        if args.len() > 1 {
399            validate_data_types(&args, "to_timestamp")?;
400        }
401
402        match args[0].data_type() {
403            Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
404                args[0].cast_to(&Timestamp(Second, None), None)
405            }
406            Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
407            Utf8View | LargeUtf8 | Utf8 => {
408                to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
409            }
410            other => {
411                exec_err!(
412                    "Unsupported data type {:?} for function to_timestamp_seconds",
413                    other
414                )
415            }
416        }
417    }
418    fn documentation(&self) -> Option<&Documentation> {
419        self.doc()
420    }
421}
422
423impl ScalarUDFImpl for ToTimestampMillisFunc {
424    fn as_any(&self) -> &dyn Any {
425        self
426    }
427
428    fn name(&self) -> &str {
429        "to_timestamp_millis"
430    }
431
432    fn signature(&self) -> &Signature {
433        &self.signature
434    }
435
436    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
437        Ok(return_type_for(&arg_types[0], Millisecond))
438    }
439
440    fn invoke_with_args(
441        &self,
442        args: datafusion_expr::ScalarFunctionArgs,
443    ) -> Result<ColumnarValue> {
444        let args = args.args;
445        if args.is_empty() {
446            return exec_err!(
447                "to_timestamp_millis function requires 1 or more arguments, got {}",
448                args.len()
449            );
450        }
451
452        // validate that any args after the first one are Utf8
453        if args.len() > 1 {
454            validate_data_types(&args, "to_timestamp")?;
455        }
456
457        match args[0].data_type() {
458            Null | Int32 | Int64 | Timestamp(_, None) => {
459                args[0].cast_to(&Timestamp(Millisecond, None), None)
460            }
461            Timestamp(_, Some(tz)) => {
462                args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
463            }
464            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
465                &args,
466                "to_timestamp_millis",
467            ),
468            other => {
469                exec_err!(
470                    "Unsupported data type {:?} for function to_timestamp_millis",
471                    other
472                )
473            }
474        }
475    }
476    fn documentation(&self) -> Option<&Documentation> {
477        self.doc()
478    }
479}
480
481impl ScalarUDFImpl for ToTimestampMicrosFunc {
482    fn as_any(&self) -> &dyn Any {
483        self
484    }
485
486    fn name(&self) -> &str {
487        "to_timestamp_micros"
488    }
489
490    fn signature(&self) -> &Signature {
491        &self.signature
492    }
493
494    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
495        Ok(return_type_for(&arg_types[0], Microsecond))
496    }
497
498    fn invoke_with_args(
499        &self,
500        args: datafusion_expr::ScalarFunctionArgs,
501    ) -> Result<ColumnarValue> {
502        let args = args.args;
503        if args.is_empty() {
504            return exec_err!(
505                "to_timestamp_micros function requires 1 or more arguments, got {}",
506                args.len()
507            );
508        }
509
510        // validate that any args after the first one are Utf8
511        if args.len() > 1 {
512            validate_data_types(&args, "to_timestamp")?;
513        }
514
515        match args[0].data_type() {
516            Null | Int32 | Int64 | Timestamp(_, None) => {
517                args[0].cast_to(&Timestamp(Microsecond, None), None)
518            }
519            Timestamp(_, Some(tz)) => {
520                args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
521            }
522            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
523                &args,
524                "to_timestamp_micros",
525            ),
526            other => {
527                exec_err!(
528                    "Unsupported data type {:?} for function to_timestamp_micros",
529                    other
530                )
531            }
532        }
533    }
534    fn documentation(&self) -> Option<&Documentation> {
535        self.doc()
536    }
537}
538
539impl ScalarUDFImpl for ToTimestampNanosFunc {
540    fn as_any(&self) -> &dyn Any {
541        self
542    }
543
544    fn name(&self) -> &str {
545        "to_timestamp_nanos"
546    }
547
548    fn signature(&self) -> &Signature {
549        &self.signature
550    }
551
552    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
553        Ok(return_type_for(&arg_types[0], Nanosecond))
554    }
555
556    fn invoke_with_args(
557        &self,
558        args: datafusion_expr::ScalarFunctionArgs,
559    ) -> Result<ColumnarValue> {
560        let args = args.args;
561        if args.is_empty() {
562            return exec_err!(
563                "to_timestamp_nanos function requires 1 or more arguments, got {}",
564                args.len()
565            );
566        }
567
568        // validate that any args after the first one are Utf8
569        if args.len() > 1 {
570            validate_data_types(&args, "to_timestamp")?;
571        }
572
573        match args[0].data_type() {
574            Null | Int32 | Int64 | Timestamp(_, None) => {
575                args[0].cast_to(&Timestamp(Nanosecond, None), None)
576            }
577            Timestamp(_, Some(tz)) => {
578                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
579            }
580            Utf8View | LargeUtf8 | Utf8 => {
581                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
582            }
583            other => {
584                exec_err!(
585                    "Unsupported data type {:?} for function to_timestamp_nanos",
586                    other
587                )
588            }
589        }
590    }
591    fn documentation(&self) -> Option<&Documentation> {
592        self.doc()
593    }
594}
595
596/// Returns the return type for the to_timestamp_* function, preserving
597/// the timezone if it exists.
598fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
599    match arg {
600        Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
601        _ => Timestamp(unit, None),
602    }
603}
604
605fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
606    args: &[ColumnarValue],
607    name: &str,
608) -> Result<ColumnarValue> {
609    let factor = match T::UNIT {
610        Second => 1_000_000_000,
611        Millisecond => 1_000_000,
612        Microsecond => 1_000,
613        Nanosecond => 1,
614    };
615
616    match args.len() {
617        1 => handle::<T, _, T>(
618            args,
619            |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
620            name,
621        ),
622        n if n >= 2 => handle_multiple::<T, _, T, _>(
623            args,
624            string_to_timestamp_nanos_formatted,
625            |n| n / factor,
626            name,
627        ),
628        _ => exec_err!("Unsupported 0 argument count for function {name}"),
629    }
630}
631
632#[cfg(test)]
633mod tests {
634    use std::sync::Arc;
635
636    use arrow::array::types::Int64Type;
637    use arrow::array::{
638        Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
639        TimestampNanosecondArray, TimestampSecondArray,
640    };
641    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
642    use arrow::datatypes::TimeUnit;
643    use chrono::Utc;
644    use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
645    use datafusion_expr::ScalarFunctionImplementation;
646
647    use super::*;
648
649    fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
650        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
651    }
652
653    /// to_timestamp_millis SQL function
654    fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
655        to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
656    }
657
658    /// to_timestamp_micros SQL function
659    fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
660        to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
661    }
662
663    /// to_timestamp_nanos SQL function
664    fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
665        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
666    }
667
668    /// to_timestamp_seconds SQL function
669    fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
670        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
671    }
672
673    #[test]
674    fn to_timestamp_arrays_and_nulls() -> Result<()> {
675        // ensure that arrow array implementation is wired up and handles nulls correctly
676
677        let mut string_builder = StringBuilder::with_capacity(2, 1024);
678        let mut ts_builder = TimestampNanosecondArray::builder(2);
679
680        string_builder.append_value("2020-09-08T13:42:29.190855");
681        ts_builder.append_value(1599572549190855000);
682
683        string_builder.append_null();
684        ts_builder.append_null();
685        let expected_timestamps = &ts_builder.finish() as &dyn Array;
686
687        let string_array =
688            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
689        let parsed_timestamps = to_timestamp(&[string_array])
690            .expect("that to_timestamp parsed values without error");
691        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
692            assert_eq!(parsed_array.len(), 2);
693            assert_eq!(expected_timestamps, parsed_array.as_ref());
694        } else {
695            panic!("Expected a columnar array")
696        }
697        Ok(())
698    }
699
700    #[test]
701    fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
702        // ensure that arrow array implementation is wired up and handles nulls correctly
703
704        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
705        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
706        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
707        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
708        let mut ts_builder = TimestampNanosecondArray::builder(2);
709
710        date_string_builder.append_null();
711        format1_builder.append_null();
712        format2_builder.append_null();
713        format3_builder.append_null();
714        ts_builder.append_null();
715
716        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
717        format1_builder.append_value("%s");
718        format2_builder.append_value("%c");
719        format3_builder.append_value("%+");
720        ts_builder.append_value(1599572549190850000);
721
722        let expected_timestamps = &ts_builder.finish() as &dyn Array;
723
724        let string_array = [
725            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
726            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
727            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
728            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
729        ];
730        let parsed_timestamps = to_timestamp(&string_array)
731            .expect("that to_timestamp with format args parsed values without error");
732        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
733            assert_eq!(parsed_array.len(), 2);
734            assert_eq!(expected_timestamps, parsed_array.as_ref());
735        } else {
736            panic!("Expected a columnar array")
737        }
738        Ok(())
739    }
740
741    #[test]
742    fn to_timestamp_invalid_input_type() -> Result<()> {
743        // pass the wrong type of input array to to_timestamp and test
744        // that we get an error.
745
746        let mut builder = Int64Array::builder(1);
747        builder.append_value(1);
748        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
749
750        let expected_err =
751            "Execution error: Unsupported data type Int64 for function to_timestamp";
752        match to_timestamp(&[int64array]) {
753            Ok(_) => panic!("Expected error but got success"),
754            Err(e) => {
755                assert!(
756                    e.to_string().contains(expected_err),
757                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
758                );
759            }
760        }
761        Ok(())
762    }
763
764    #[test]
765    fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
766        // pass the wrong type of input array to to_timestamp and test
767        // that we get an error.
768
769        let mut builder = Int64Array::builder(1);
770        builder.append_value(1);
771        let int64array = [
772            ColumnarValue::Array(Arc::new(builder.finish())),
773            ColumnarValue::Array(Arc::new(builder.finish())),
774        ];
775
776        let expected_err =
777            "Execution error: Unsupported data type Int64 for function to_timestamp";
778        match to_timestamp(&int64array) {
779            Ok(_) => panic!("Expected error but got success"),
780            Err(e) => {
781                assert!(
782                    e.to_string().contains(expected_err),
783                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
784                );
785            }
786        }
787        Ok(())
788    }
789
790    #[test]
791    fn to_timestamp_with_unparseable_data() -> Result<()> {
792        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
793
794        date_string_builder.append_null();
795
796        date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
797
798        let string_array =
799            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
800
801        let expected_err =
802            "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
803        match to_timestamp(&[string_array]) {
804            Ok(_) => panic!("Expected error but got success"),
805            Err(e) => {
806                assert!(
807                    e.to_string().contains(expected_err),
808                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
809                );
810            }
811        }
812        Ok(())
813    }
814
815    #[test]
816    fn to_timestamp_with_invalid_tz() -> Result<()> {
817        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
818
819        date_string_builder.append_null();
820
821        date_string_builder.append_value("2020-09-08T13:42:29ZZ");
822
823        let string_array =
824            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
825
826        let expected_err =
827            "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
828        match to_timestamp(&[string_array]) {
829            Ok(_) => panic!("Expected error but got success"),
830            Err(e) => {
831                assert!(
832                    e.to_string().contains(expected_err),
833                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
834                );
835            }
836        }
837        Ok(())
838    }
839
840    #[test]
841    fn to_timestamp_with_no_matching_formats() -> Result<()> {
842        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
843        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
844        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
845        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
846
847        date_string_builder.append_null();
848        format1_builder.append_null();
849        format2_builder.append_null();
850        format3_builder.append_null();
851
852        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
853        format1_builder.append_value("%s");
854        format2_builder.append_value("%c");
855        format3_builder.append_value("%H:%M:%S");
856
857        let string_array = [
858            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
859            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
860            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
861            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
862        ];
863
864        let expected_err =
865            "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
866        match to_timestamp(&string_array) {
867            Ok(_) => panic!("Expected error but got success"),
868            Err(e) => {
869                assert!(
870                    e.to_string().contains(expected_err),
871                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
872                );
873            }
874        }
875        Ok(())
876    }
877
878    #[test]
879    fn string_to_timestamp_formatted() {
880        // Explicit timezone
881        assert_eq!(
882            1599572549190855000,
883            parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
884        );
885        assert_eq!(
886            1599572549190855000,
887            parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
888        );
889        assert_eq!(
890            1599572549000000000,
891            parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
892        ); // no fractional part
893        assert_eq!(
894            1599590549190855000,
895            parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
896        );
897        assert_eq!(
898            1599590549000000000,
899            parse_timestamp_formatted("1599590549", "%s").unwrap()
900        );
901        assert_eq!(
902            1599572549000000000,
903            parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
904                .unwrap()
905        );
906        assert_eq!(
907            1642896000000000000,
908            parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
909        );
910    }
911
912    fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
913        let result = string_to_timestamp_nanos_formatted(s, format);
914        if let Err(e) = &result {
915            eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
916        }
917        result
918    }
919
920    #[test]
921    fn string_to_timestamp_formatted_invalid() {
922        // Test parsing invalid formats
923        let cases = [
924            ("", "%Y%m%d %H%M%S", "premature end of input"),
925            ("SS", "%c", "premature end of input"),
926            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
927            (
928                "Wed, 18 Feb 2015 23:16:09 GMT",
929                "%XX",
930                "input contains invalid characters",
931            ),
932            (
933                "Wed, 18 Feb 2015 23:16:09 GMT",
934                "%Y%m%d %H%M%S",
935                "input contains invalid characters",
936            ),
937        ];
938
939        for (s, f, ctx) in cases {
940            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
941            let actual = string_to_datetime_formatted(&Utc, s, f)
942                .unwrap_err()
943                .to_string();
944            assert_eq!(actual, expected)
945        }
946    }
947
948    #[test]
949    fn string_to_timestamp_invalid_arguments() {
950        // Test parsing invalid formats
951        let cases = [
952            ("", "%Y%m%d %H%M%S", "premature end of input"),
953            ("SS", "%c", "premature end of input"),
954            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
955            (
956                "Wed, 18 Feb 2015 23:16:09 GMT",
957                "%XX",
958                "input contains invalid characters",
959            ),
960            (
961                "Wed, 18 Feb 2015 23:16:09 GMT",
962                "%Y%m%d %H%M%S",
963                "input contains invalid characters",
964            ),
965        ];
966
967        for (s, f, ctx) in cases {
968            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
969            let actual = string_to_datetime_formatted(&Utc, s, f)
970                .unwrap_err()
971                .to_string();
972            assert_eq!(actual, expected)
973        }
974    }
975
976    #[test]
977    fn test_tz() {
978        let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
979            Box::new(ToTimestampFunc::new()),
980            Box::new(ToTimestampSecondsFunc::new()),
981            Box::new(ToTimestampMillisFunc::new()),
982            Box::new(ToTimestampNanosFunc::new()),
983            Box::new(ToTimestampSecondsFunc::new()),
984        ];
985
986        let mut nanos_builder = TimestampNanosecondArray::builder(2);
987        let mut millis_builder = TimestampMillisecondArray::builder(2);
988        let mut micros_builder = TimestampMicrosecondArray::builder(2);
989        let mut sec_builder = TimestampSecondArray::builder(2);
990
991        nanos_builder.append_value(1599572549190850000);
992        millis_builder.append_value(1599572549190);
993        micros_builder.append_value(1599572549190850);
994        sec_builder.append_value(1599572549);
995
996        let nanos_timestamps =
997            Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
998        let millis_timestamps =
999            Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1000        let micros_timestamps =
1001            Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1002        let sec_timestamps =
1003            Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1004
1005        let arrays = &[
1006            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1007            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1008            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1009            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1010        ];
1011
1012        for udf in &udfs {
1013            for array in arrays {
1014                let rt = udf.return_type(&[array.data_type()]).unwrap();
1015                assert!(matches!(rt, Timestamp(_, Some(_))));
1016                let args = datafusion_expr::ScalarFunctionArgs {
1017                    args: vec![array.clone()],
1018                    number_rows: 4,
1019                    return_type: &rt,
1020                };
1021                let res = udf
1022                    .invoke_with_args(args)
1023                    .expect("that to_timestamp parsed values without error");
1024                let array = match res {
1025                    ColumnarValue::Array(res) => res,
1026                    _ => panic!("Expected a columnar array"),
1027                };
1028                let ty = array.data_type();
1029                assert!(matches!(ty, Timestamp(_, Some(_))));
1030            }
1031        }
1032
1033        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1034        let mut millis_builder = TimestampMillisecondArray::builder(2);
1035        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1036        let mut sec_builder = TimestampSecondArray::builder(2);
1037        let mut i64_builder = Int64Array::builder(2);
1038
1039        nanos_builder.append_value(1599572549190850000);
1040        millis_builder.append_value(1599572549190);
1041        micros_builder.append_value(1599572549190850);
1042        sec_builder.append_value(1599572549);
1043        i64_builder.append_value(1599572549);
1044
1045        let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1046        let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1047        let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1048        let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1049        let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1050
1051        let arrays = &[
1052            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1053            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1054            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1055            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1056            ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1057        ];
1058
1059        for udf in &udfs {
1060            for array in arrays {
1061                let rt = udf.return_type(&[array.data_type()]).unwrap();
1062                assert!(matches!(rt, Timestamp(_, None)));
1063                let args = datafusion_expr::ScalarFunctionArgs {
1064                    args: vec![array.clone()],
1065                    number_rows: 5,
1066                    return_type: &rt,
1067                };
1068                let res = udf
1069                    .invoke_with_args(args)
1070                    .expect("that to_timestamp parsed values without error");
1071                let array = match res {
1072                    ColumnarValue::Array(res) => res,
1073                    _ => panic!("Expected a columnar array"),
1074                };
1075                let ty = array.data_type();
1076                assert!(matches!(ty, Timestamp(_, None)));
1077            }
1078        }
1079    }
1080
1081    #[test]
1082    fn test_to_timestamp_arg_validation() {
1083        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1084        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1085
1086        let data = date_string_builder.finish();
1087
1088        let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1089            (Arc::new(to_timestamp), Nanosecond),
1090            (Arc::new(to_timestamp_micros), Microsecond),
1091            (Arc::new(to_timestamp_millis), Millisecond),
1092            (Arc::new(to_timestamp_nanos), Nanosecond),
1093            (Arc::new(to_timestamp_seconds), Second),
1094        ];
1095
1096        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1097        let mut millis_builder = TimestampMillisecondArray::builder(2);
1098        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1099        let mut sec_builder = TimestampSecondArray::builder(2);
1100
1101        nanos_builder.append_value(1599572549190850000);
1102        millis_builder.append_value(1599572549190);
1103        micros_builder.append_value(1599572549190850);
1104        sec_builder.append_value(1599572549);
1105
1106        let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1107        let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1108        let micros_expected_timestamps = &micros_builder.finish() as &dyn Array;
1109        let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1110
1111        for (func, time_unit) in funcs {
1112            // test UTF8
1113            let string_array = [
1114                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1115                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1116                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1117                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1118            ];
1119            let parsed_timestamps = func(&string_array)
1120                .expect("that to_timestamp with format args parsed values without error");
1121            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1122                assert_eq!(parsed_array.len(), 1);
1123                match time_unit {
1124                    Nanosecond => {
1125                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1126                    }
1127                    Millisecond => {
1128                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1129                    }
1130                    Microsecond => {
1131                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1132                    }
1133                    Second => {
1134                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1135                    }
1136                };
1137            } else {
1138                panic!("Expected a columnar array")
1139            }
1140
1141            // test LargeUTF8
1142            let string_array = [
1143                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1144                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1145                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1146                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1147            ];
1148            let parsed_timestamps = func(&string_array)
1149                .expect("that to_timestamp with format args parsed values without error");
1150            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1151                assert_eq!(parsed_array.len(), 1);
1152                assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1153
1154                match time_unit {
1155                    Nanosecond => {
1156                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1157                    }
1158                    Millisecond => {
1159                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1160                    }
1161                    Microsecond => {
1162                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1163                    }
1164                    Second => {
1165                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1166                    }
1167                };
1168            } else {
1169                panic!("Expected a columnar array")
1170            }
1171
1172            // test other types
1173            let string_array = [
1174                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1175                ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1176                ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1177                ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1178            ];
1179
1180            let expected = "Unsupported data type Int32 for function".to_string();
1181            let actual = func(&string_array).unwrap_err().to_string();
1182            assert_contains!(actual, expected);
1183
1184            // test other types
1185            let string_array = [
1186                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1187                ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1188                    vec![1i64].into(),
1189                    None,
1190                )) as ArrayRef),
1191            ];
1192
1193            let expected = "Unsupported data type".to_string();
1194            let actual = func(&string_array).unwrap_err().to_string();
1195            assert_contains!(actual, expected);
1196        }
1197    }
1198}