datafusion_functions/datetime/
to_timestamp.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::datatypes::DataType::*;
22use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
23use arrow::datatypes::{
24    ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
25    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
26};
27
28use crate::datetime::common::*;
29use datafusion_common::{exec_err, Result, ScalarType};
30use datafusion_expr::{
31    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_macros::user_doc;
34
35#[user_doc(
36    doc_section(label = "Time and Date Functions"),
37    description = r#"
38Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
39
40Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
41"#,
42    syntax_example = "to_timestamp(expression[, ..., format_n])",
43    sql_example = r#"```sql
44> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
45+-----------------------------------------------------------+
46| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
47+-----------------------------------------------------------+
48| 2023-01-31T14:26:56.123456789                             |
49+-----------------------------------------------------------+
50> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
51+--------------------------------------------------------------------------------------------------------+
52| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
53+--------------------------------------------------------------------------------------------------------+
54| 2023-05-17T03:59:00.123456789                                                                          |
55+--------------------------------------------------------------------------------------------------------+
56```
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
58"#,
59    argument(
60        name = "expression",
61        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
62    ),
63    argument(
64        name = "format_n",
65        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
66    )
67)]
68#[derive(Debug)]
69pub struct ToTimestampFunc {
70    signature: Signature,
71}
72
73#[user_doc(
74    doc_section(label = "Time and Date Functions"),
75    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
76    syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
77    sql_example = r#"```sql
78> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
79+-------------------------------------------------------------------+
80| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
81+-------------------------------------------------------------------+
82| 2023-01-31T14:26:56                                               |
83+-------------------------------------------------------------------+
84> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
85+----------------------------------------------------------------------------------------------------------------+
86| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
87+----------------------------------------------------------------------------------------------------------------+
88| 2023-05-17T03:59:00                                                                                            |
89+----------------------------------------------------------------------------------------------------------------+
90```
91Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
92"#,
93    argument(
94        name = "expression",
95        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
96    ),
97    argument(
98        name = "format_n",
99        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
100    )
101)]
102#[derive(Debug)]
103pub struct ToTimestampSecondsFunc {
104    signature: Signature,
105}
106
107#[user_doc(
108    doc_section(label = "Time and Date Functions"),
109    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
110    syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
111    sql_example = r#"```sql
112> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
113+------------------------------------------------------------------+
114| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
115+------------------------------------------------------------------+
116| 2023-01-31T14:26:56.123                                          |
117+------------------------------------------------------------------+
118> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
119+---------------------------------------------------------------------------------------------------------------+
120| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
121+---------------------------------------------------------------------------------------------------------------+
122| 2023-05-17T03:59:00.123                                                                                       |
123+---------------------------------------------------------------------------------------------------------------+
124```
125Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
126"#,
127    argument(
128        name = "expression",
129        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
130    ),
131    argument(
132        name = "format_n",
133        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
134    )
135)]
136#[derive(Debug)]
137pub struct ToTimestampMillisFunc {
138    signature: Signature,
139}
140
141#[user_doc(
142    doc_section(label = "Time and Date Functions"),
143    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
144    syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
145    sql_example = r#"```sql
146> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
147+------------------------------------------------------------------+
148| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
149+------------------------------------------------------------------+
150| 2023-01-31T14:26:56.123456                                       |
151+------------------------------------------------------------------+
152> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
153+---------------------------------------------------------------------------------------------------------------+
154| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
155+---------------------------------------------------------------------------------------------------------------+
156| 2023-05-17T03:59:00.123456                                                                                    |
157+---------------------------------------------------------------------------------------------------------------+
158```
159Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
160"#,
161    argument(
162        name = "expression",
163        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
164    ),
165    argument(
166        name = "format_n",
167        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
168    )
169)]
170#[derive(Debug)]
171pub struct ToTimestampMicrosFunc {
172    signature: Signature,
173}
174
175#[user_doc(
176    doc_section(label = "Time and Date Functions"),
177    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
178    syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
179    sql_example = r#"```sql
180> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
181+-----------------------------------------------------------------+
182| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
183+-----------------------------------------------------------------+
184| 2023-01-31T14:26:56.123456789                                   |
185+-----------------------------------------------------------------+
186> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
187+--------------------------------------------------------------------------------------------------------------+
188| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
189+--------------------------------------------------------------------------------------------------------------+
190| 2023-05-17T03:59:00.123456789                                                                                |
191+---------------------------------------------------------------------------------------------------------------+
192```
193Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
194"#,
195    argument(
196        name = "expression",
197        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
198    ),
199    argument(
200        name = "format_n",
201        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
202    )
203)]
204#[derive(Debug)]
205pub struct ToTimestampNanosFunc {
206    signature: Signature,
207}
208
209impl Default for ToTimestampFunc {
210    fn default() -> Self {
211        Self::new()
212    }
213}
214
215impl ToTimestampFunc {
216    pub fn new() -> Self {
217        Self {
218            signature: Signature::variadic_any(Volatility::Immutable),
219        }
220    }
221}
222
223impl Default for ToTimestampSecondsFunc {
224    fn default() -> Self {
225        Self::new()
226    }
227}
228
229impl ToTimestampSecondsFunc {
230    pub fn new() -> Self {
231        Self {
232            signature: Signature::variadic_any(Volatility::Immutable),
233        }
234    }
235}
236
237impl Default for ToTimestampMillisFunc {
238    fn default() -> Self {
239        Self::new()
240    }
241}
242
243impl ToTimestampMillisFunc {
244    pub fn new() -> Self {
245        Self {
246            signature: Signature::variadic_any(Volatility::Immutable),
247        }
248    }
249}
250
251impl Default for ToTimestampMicrosFunc {
252    fn default() -> Self {
253        Self::new()
254    }
255}
256
257impl ToTimestampMicrosFunc {
258    pub fn new() -> Self {
259        Self {
260            signature: Signature::variadic_any(Volatility::Immutable),
261        }
262    }
263}
264
265impl Default for ToTimestampNanosFunc {
266    fn default() -> Self {
267        Self::new()
268    }
269}
270
271impl ToTimestampNanosFunc {
272    pub fn new() -> Self {
273        Self {
274            signature: Signature::variadic_any(Volatility::Immutable),
275        }
276    }
277}
278
279/// to_timestamp SQL function
280///
281/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
282/// The supported range for integer input is between `-9223372037` and `9223372036`.
283/// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
284/// Please use `to_timestamp_seconds` for the input outside of supported bounds.
285impl ScalarUDFImpl for ToTimestampFunc {
286    fn as_any(&self) -> &dyn Any {
287        self
288    }
289
290    fn name(&self) -> &str {
291        "to_timestamp"
292    }
293
294    fn signature(&self) -> &Signature {
295        &self.signature
296    }
297
298    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
299        Ok(return_type_for(&arg_types[0], Nanosecond))
300    }
301
302    fn invoke_with_args(
303        &self,
304        args: datafusion_expr::ScalarFunctionArgs,
305    ) -> Result<ColumnarValue> {
306        let args = args.args;
307        if args.is_empty() {
308            return exec_err!(
309                "to_timestamp function requires 1 or more arguments, got {}",
310                args.len()
311            );
312        }
313
314        // validate that any args after the first one are Utf8
315        if args.len() > 1 {
316            validate_data_types(&args, "to_timestamp")?;
317        }
318
319        match args[0].data_type() {
320            Int32 | Int64 => args[0]
321                .cast_to(&Timestamp(Second, None), None)?
322                .cast_to(&Timestamp(Nanosecond, None), None),
323            Null | Float64 | Timestamp(_, None) => {
324                args[0].cast_to(&Timestamp(Nanosecond, None), None)
325            }
326            Timestamp(_, Some(tz)) => {
327                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
328            }
329            Utf8View | LargeUtf8 | Utf8 => {
330                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
331            }
332            other => {
333                exec_err!(
334                    "Unsupported data type {:?} for function to_timestamp",
335                    other
336                )
337            }
338        }
339    }
340    fn documentation(&self) -> Option<&Documentation> {
341        self.doc()
342    }
343}
344
345impl ScalarUDFImpl for ToTimestampSecondsFunc {
346    fn as_any(&self) -> &dyn Any {
347        self
348    }
349
350    fn name(&self) -> &str {
351        "to_timestamp_seconds"
352    }
353
354    fn signature(&self) -> &Signature {
355        &self.signature
356    }
357
358    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
359        Ok(return_type_for(&arg_types[0], Second))
360    }
361
362    fn invoke_with_args(
363        &self,
364        args: datafusion_expr::ScalarFunctionArgs,
365    ) -> Result<ColumnarValue> {
366        let args = args.args;
367        if args.is_empty() {
368            return exec_err!(
369                "to_timestamp_seconds function requires 1 or more arguments, got {}",
370                args.len()
371            );
372        }
373
374        // validate that any args after the first one are Utf8
375        if args.len() > 1 {
376            validate_data_types(&args, "to_timestamp")?;
377        }
378
379        match args[0].data_type() {
380            Null | Int32 | Int64 | Timestamp(_, None) => {
381                args[0].cast_to(&Timestamp(Second, None), None)
382            }
383            Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
384            Utf8View | LargeUtf8 | Utf8 => {
385                to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
386            }
387            other => {
388                exec_err!(
389                    "Unsupported data type {:?} for function to_timestamp_seconds",
390                    other
391                )
392            }
393        }
394    }
395    fn documentation(&self) -> Option<&Documentation> {
396        self.doc()
397    }
398}
399
400impl ScalarUDFImpl for ToTimestampMillisFunc {
401    fn as_any(&self) -> &dyn Any {
402        self
403    }
404
405    fn name(&self) -> &str {
406        "to_timestamp_millis"
407    }
408
409    fn signature(&self) -> &Signature {
410        &self.signature
411    }
412
413    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
414        Ok(return_type_for(&arg_types[0], Millisecond))
415    }
416
417    fn invoke_with_args(
418        &self,
419        args: datafusion_expr::ScalarFunctionArgs,
420    ) -> Result<ColumnarValue> {
421        let args = args.args;
422        if args.is_empty() {
423            return exec_err!(
424                "to_timestamp_millis function requires 1 or more arguments, got {}",
425                args.len()
426            );
427        }
428
429        // validate that any args after the first one are Utf8
430        if args.len() > 1 {
431            validate_data_types(&args, "to_timestamp")?;
432        }
433
434        match args[0].data_type() {
435            Null | Int32 | Int64 | Timestamp(_, None) => {
436                args[0].cast_to(&Timestamp(Millisecond, None), None)
437            }
438            Timestamp(_, Some(tz)) => {
439                args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
440            }
441            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
442                &args,
443                "to_timestamp_millis",
444            ),
445            other => {
446                exec_err!(
447                    "Unsupported data type {:?} for function to_timestamp_millis",
448                    other
449                )
450            }
451        }
452    }
453    fn documentation(&self) -> Option<&Documentation> {
454        self.doc()
455    }
456}
457
458impl ScalarUDFImpl for ToTimestampMicrosFunc {
459    fn as_any(&self) -> &dyn Any {
460        self
461    }
462
463    fn name(&self) -> &str {
464        "to_timestamp_micros"
465    }
466
467    fn signature(&self) -> &Signature {
468        &self.signature
469    }
470
471    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
472        Ok(return_type_for(&arg_types[0], Microsecond))
473    }
474
475    fn invoke_with_args(
476        &self,
477        args: datafusion_expr::ScalarFunctionArgs,
478    ) -> Result<ColumnarValue> {
479        let args = args.args;
480        if args.is_empty() {
481            return exec_err!(
482                "to_timestamp_micros function requires 1 or more arguments, got {}",
483                args.len()
484            );
485        }
486
487        // validate that any args after the first one are Utf8
488        if args.len() > 1 {
489            validate_data_types(&args, "to_timestamp")?;
490        }
491
492        match args[0].data_type() {
493            Null | Int32 | Int64 | Timestamp(_, None) => {
494                args[0].cast_to(&Timestamp(Microsecond, None), None)
495            }
496            Timestamp(_, Some(tz)) => {
497                args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
498            }
499            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
500                &args,
501                "to_timestamp_micros",
502            ),
503            other => {
504                exec_err!(
505                    "Unsupported data type {:?} for function to_timestamp_micros",
506                    other
507                )
508            }
509        }
510    }
511    fn documentation(&self) -> Option<&Documentation> {
512        self.doc()
513    }
514}
515
516impl ScalarUDFImpl for ToTimestampNanosFunc {
517    fn as_any(&self) -> &dyn Any {
518        self
519    }
520
521    fn name(&self) -> &str {
522        "to_timestamp_nanos"
523    }
524
525    fn signature(&self) -> &Signature {
526        &self.signature
527    }
528
529    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
530        Ok(return_type_for(&arg_types[0], Nanosecond))
531    }
532
533    fn invoke_with_args(
534        &self,
535        args: datafusion_expr::ScalarFunctionArgs,
536    ) -> Result<ColumnarValue> {
537        let args = args.args;
538        if args.is_empty() {
539            return exec_err!(
540                "to_timestamp_nanos function requires 1 or more arguments, got {}",
541                args.len()
542            );
543        }
544
545        // validate that any args after the first one are Utf8
546        if args.len() > 1 {
547            validate_data_types(&args, "to_timestamp")?;
548        }
549
550        match args[0].data_type() {
551            Null | Int32 | Int64 | Timestamp(_, None) => {
552                args[0].cast_to(&Timestamp(Nanosecond, None), None)
553            }
554            Timestamp(_, Some(tz)) => {
555                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
556            }
557            Utf8View | LargeUtf8 | Utf8 => {
558                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
559            }
560            other => {
561                exec_err!(
562                    "Unsupported data type {:?} for function to_timestamp_nanos",
563                    other
564                )
565            }
566        }
567    }
568    fn documentation(&self) -> Option<&Documentation> {
569        self.doc()
570    }
571}
572
573/// Returns the return type for the to_timestamp_* function, preserving
574/// the timezone if it exists.
575fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
576    match arg {
577        Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
578        _ => Timestamp(unit, None),
579    }
580}
581
582fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
583    args: &[ColumnarValue],
584    name: &str,
585) -> Result<ColumnarValue> {
586    let factor = match T::UNIT {
587        Second => 1_000_000_000,
588        Millisecond => 1_000_000,
589        Microsecond => 1_000,
590        Nanosecond => 1,
591    };
592
593    match args.len() {
594        1 => handle::<T, _, T>(
595            args,
596            |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
597            name,
598        ),
599        n if n >= 2 => handle_multiple::<T, _, T, _>(
600            args,
601            string_to_timestamp_nanos_formatted,
602            |n| n / factor,
603            name,
604        ),
605        _ => exec_err!("Unsupported 0 argument count for function {name}"),
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use std::sync::Arc;
612
613    use arrow::array::types::Int64Type;
614    use arrow::array::{
615        Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
616        TimestampNanosecondArray, TimestampSecondArray,
617    };
618    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
619    use arrow::datatypes::TimeUnit;
620    use chrono::Utc;
621    use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
622    use datafusion_expr::ScalarFunctionImplementation;
623
624    use super::*;
625
626    fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
627        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
628    }
629
630    /// to_timestamp_millis SQL function
631    fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
632        to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
633    }
634
635    /// to_timestamp_micros SQL function
636    fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
637        to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
638    }
639
640    /// to_timestamp_nanos SQL function
641    fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
642        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
643    }
644
645    /// to_timestamp_seconds SQL function
646    fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
647        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
648    }
649
650    #[test]
651    fn to_timestamp_arrays_and_nulls() -> Result<()> {
652        // ensure that arrow array implementation is wired up and handles nulls correctly
653
654        let mut string_builder = StringBuilder::with_capacity(2, 1024);
655        let mut ts_builder = TimestampNanosecondArray::builder(2);
656
657        string_builder.append_value("2020-09-08T13:42:29.190855");
658        ts_builder.append_value(1599572549190855000);
659
660        string_builder.append_null();
661        ts_builder.append_null();
662        let expected_timestamps = &ts_builder.finish() as &dyn Array;
663
664        let string_array =
665            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
666        let parsed_timestamps = to_timestamp(&[string_array])
667            .expect("that to_timestamp parsed values without error");
668        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
669            assert_eq!(parsed_array.len(), 2);
670            assert_eq!(expected_timestamps, parsed_array.as_ref());
671        } else {
672            panic!("Expected a columnar array")
673        }
674        Ok(())
675    }
676
677    #[test]
678    fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
679        // ensure that arrow array implementation is wired up and handles nulls correctly
680
681        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
682        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
683        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
684        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
685        let mut ts_builder = TimestampNanosecondArray::builder(2);
686
687        date_string_builder.append_null();
688        format1_builder.append_null();
689        format2_builder.append_null();
690        format3_builder.append_null();
691        ts_builder.append_null();
692
693        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
694        format1_builder.append_value("%s");
695        format2_builder.append_value("%c");
696        format3_builder.append_value("%+");
697        ts_builder.append_value(1599572549190850000);
698
699        let expected_timestamps = &ts_builder.finish() as &dyn Array;
700
701        let string_array = [
702            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
703            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
704            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
705            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
706        ];
707        let parsed_timestamps = to_timestamp(&string_array)
708            .expect("that to_timestamp with format args parsed values without error");
709        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
710            assert_eq!(parsed_array.len(), 2);
711            assert_eq!(expected_timestamps, parsed_array.as_ref());
712        } else {
713            panic!("Expected a columnar array")
714        }
715        Ok(())
716    }
717
718    #[test]
719    fn to_timestamp_invalid_input_type() -> Result<()> {
720        // pass the wrong type of input array to to_timestamp and test
721        // that we get an error.
722
723        let mut builder = Int64Array::builder(1);
724        builder.append_value(1);
725        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
726
727        let expected_err =
728            "Execution error: Unsupported data type Int64 for function to_timestamp";
729        match to_timestamp(&[int64array]) {
730            Ok(_) => panic!("Expected error but got success"),
731            Err(e) => {
732                assert!(
733                    e.to_string().contains(expected_err),
734                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
735                );
736            }
737        }
738        Ok(())
739    }
740
741    #[test]
742    fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
743        // pass the wrong type of input array to to_timestamp and test
744        // that we get an error.
745
746        let mut builder = Int64Array::builder(1);
747        builder.append_value(1);
748        let int64array = [
749            ColumnarValue::Array(Arc::new(builder.finish())),
750            ColumnarValue::Array(Arc::new(builder.finish())),
751        ];
752
753        let expected_err =
754            "Execution error: Unsupported data type Int64 for function to_timestamp";
755        match to_timestamp(&int64array) {
756            Ok(_) => panic!("Expected error but got success"),
757            Err(e) => {
758                assert!(
759                    e.to_string().contains(expected_err),
760                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
761                );
762            }
763        }
764        Ok(())
765    }
766
767    #[test]
768    fn to_timestamp_with_unparseable_data() -> Result<()> {
769        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
770
771        date_string_builder.append_null();
772
773        date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
774
775        let string_array =
776            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
777
778        let expected_err =
779            "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
780        match to_timestamp(&[string_array]) {
781            Ok(_) => panic!("Expected error but got success"),
782            Err(e) => {
783                assert!(
784                    e.to_string().contains(expected_err),
785                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
786                );
787            }
788        }
789        Ok(())
790    }
791
792    #[test]
793    fn to_timestamp_with_invalid_tz() -> Result<()> {
794        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
795
796        date_string_builder.append_null();
797
798        date_string_builder.append_value("2020-09-08T13:42:29ZZ");
799
800        let string_array =
801            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
802
803        let expected_err =
804            "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
805        match to_timestamp(&[string_array]) {
806            Ok(_) => panic!("Expected error but got success"),
807            Err(e) => {
808                assert!(
809                    e.to_string().contains(expected_err),
810                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
811                );
812            }
813        }
814        Ok(())
815    }
816
817    #[test]
818    fn to_timestamp_with_no_matching_formats() -> Result<()> {
819        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
820        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
821        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
822        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
823
824        date_string_builder.append_null();
825        format1_builder.append_null();
826        format2_builder.append_null();
827        format3_builder.append_null();
828
829        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
830        format1_builder.append_value("%s");
831        format2_builder.append_value("%c");
832        format3_builder.append_value("%H:%M:%S");
833
834        let string_array = [
835            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
836            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
837            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
838            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
839        ];
840
841        let expected_err =
842            "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
843        match to_timestamp(&string_array) {
844            Ok(_) => panic!("Expected error but got success"),
845            Err(e) => {
846                assert!(
847                    e.to_string().contains(expected_err),
848                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
849                );
850            }
851        }
852        Ok(())
853    }
854
855    #[test]
856    fn string_to_timestamp_formatted() {
857        // Explicit timezone
858        assert_eq!(
859            1599572549190855000,
860            parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
861        );
862        assert_eq!(
863            1599572549190855000,
864            parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
865        );
866        assert_eq!(
867            1599572549000000000,
868            parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
869        ); // no fractional part
870        assert_eq!(
871            1599590549190855000,
872            parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
873        );
874        assert_eq!(
875            1599590549000000000,
876            parse_timestamp_formatted("1599590549", "%s").unwrap()
877        );
878        assert_eq!(
879            1599572549000000000,
880            parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
881                .unwrap()
882        );
883        assert_eq!(
884            1642896000000000000,
885            parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
886        );
887    }
888
889    fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
890        let result = string_to_timestamp_nanos_formatted(s, format);
891        if let Err(e) = &result {
892            eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
893        }
894        result
895    }
896
897    #[test]
898    fn string_to_timestamp_formatted_invalid() {
899        // Test parsing invalid formats
900        let cases = [
901            ("", "%Y%m%d %H%M%S", "premature end of input"),
902            ("SS", "%c", "premature end of input"),
903            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
904            (
905                "Wed, 18 Feb 2015 23:16:09 GMT",
906                "%XX",
907                "input contains invalid characters",
908            ),
909            (
910                "Wed, 18 Feb 2015 23:16:09 GMT",
911                "%Y%m%d %H%M%S",
912                "input contains invalid characters",
913            ),
914        ];
915
916        for (s, f, ctx) in cases {
917            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
918            let actual = string_to_datetime_formatted(&Utc, s, f)
919                .unwrap_err()
920                .to_string();
921            assert_eq!(actual, expected)
922        }
923    }
924
925    #[test]
926    fn string_to_timestamp_invalid_arguments() {
927        // Test parsing invalid formats
928        let cases = [
929            ("", "%Y%m%d %H%M%S", "premature end of input"),
930            ("SS", "%c", "premature end of input"),
931            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
932            (
933                "Wed, 18 Feb 2015 23:16:09 GMT",
934                "%XX",
935                "input contains invalid characters",
936            ),
937            (
938                "Wed, 18 Feb 2015 23:16:09 GMT",
939                "%Y%m%d %H%M%S",
940                "input contains invalid characters",
941            ),
942        ];
943
944        for (s, f, ctx) in cases {
945            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
946            let actual = string_to_datetime_formatted(&Utc, s, f)
947                .unwrap_err()
948                .to_string();
949            assert_eq!(actual, expected)
950        }
951    }
952
953    #[test]
954    fn test_tz() {
955        let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
956            Box::new(ToTimestampFunc::new()),
957            Box::new(ToTimestampSecondsFunc::new()),
958            Box::new(ToTimestampMillisFunc::new()),
959            Box::new(ToTimestampNanosFunc::new()),
960            Box::new(ToTimestampSecondsFunc::new()),
961        ];
962
963        let mut nanos_builder = TimestampNanosecondArray::builder(2);
964        let mut millis_builder = TimestampMillisecondArray::builder(2);
965        let mut micros_builder = TimestampMicrosecondArray::builder(2);
966        let mut sec_builder = TimestampSecondArray::builder(2);
967
968        nanos_builder.append_value(1599572549190850000);
969        millis_builder.append_value(1599572549190);
970        micros_builder.append_value(1599572549190850);
971        sec_builder.append_value(1599572549);
972
973        let nanos_timestamps =
974            Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
975        let millis_timestamps =
976            Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
977        let micros_timestamps =
978            Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
979        let sec_timestamps =
980            Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
981
982        let arrays = &[
983            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
984            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
985            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
986            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
987        ];
988
989        for udf in &udfs {
990            for array in arrays {
991                let rt = udf.return_type(&[array.data_type()]).unwrap();
992                assert!(matches!(rt, Timestamp(_, Some(_))));
993                let args = datafusion_expr::ScalarFunctionArgs {
994                    args: vec![array.clone()],
995                    number_rows: 4,
996                    return_type: &rt,
997                };
998                let res = udf
999                    .invoke_with_args(args)
1000                    .expect("that to_timestamp parsed values without error");
1001                let array = match res {
1002                    ColumnarValue::Array(res) => res,
1003                    _ => panic!("Expected a columnar array"),
1004                };
1005                let ty = array.data_type();
1006                assert!(matches!(ty, Timestamp(_, Some(_))));
1007            }
1008        }
1009
1010        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1011        let mut millis_builder = TimestampMillisecondArray::builder(2);
1012        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1013        let mut sec_builder = TimestampSecondArray::builder(2);
1014        let mut i64_builder = Int64Array::builder(2);
1015
1016        nanos_builder.append_value(1599572549190850000);
1017        millis_builder.append_value(1599572549190);
1018        micros_builder.append_value(1599572549190850);
1019        sec_builder.append_value(1599572549);
1020        i64_builder.append_value(1599572549);
1021
1022        let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1023        let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1024        let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1025        let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1026        let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1027
1028        let arrays = &[
1029            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1030            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1031            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1032            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1033            ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1034        ];
1035
1036        for udf in &udfs {
1037            for array in arrays {
1038                let rt = udf.return_type(&[array.data_type()]).unwrap();
1039                assert!(matches!(rt, Timestamp(_, None)));
1040                let args = datafusion_expr::ScalarFunctionArgs {
1041                    args: vec![array.clone()],
1042                    number_rows: 5,
1043                    return_type: &rt,
1044                };
1045                let res = udf
1046                    .invoke_with_args(args)
1047                    .expect("that to_timestamp parsed values without error");
1048                let array = match res {
1049                    ColumnarValue::Array(res) => res,
1050                    _ => panic!("Expected a columnar array"),
1051                };
1052                let ty = array.data_type();
1053                assert!(matches!(ty, Timestamp(_, None)));
1054            }
1055        }
1056    }
1057
1058    #[test]
1059    fn test_to_timestamp_arg_validation() {
1060        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1061        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1062
1063        let data = date_string_builder.finish();
1064
1065        let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1066            (Arc::new(to_timestamp), Nanosecond),
1067            (Arc::new(to_timestamp_micros), Microsecond),
1068            (Arc::new(to_timestamp_millis), Millisecond),
1069            (Arc::new(to_timestamp_nanos), Nanosecond),
1070            (Arc::new(to_timestamp_seconds), Second),
1071        ];
1072
1073        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1074        let mut millis_builder = TimestampMillisecondArray::builder(2);
1075        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1076        let mut sec_builder = TimestampSecondArray::builder(2);
1077
1078        nanos_builder.append_value(1599572549190850000);
1079        millis_builder.append_value(1599572549190);
1080        micros_builder.append_value(1599572549190850);
1081        sec_builder.append_value(1599572549);
1082
1083        let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1084        let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1085        let micros_expected_timestamps = &micros_builder.finish() as &dyn Array;
1086        let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1087
1088        for (func, time_unit) in funcs {
1089            // test UTF8
1090            let string_array = [
1091                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1092                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1093                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1094                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1095            ];
1096            let parsed_timestamps = func(&string_array)
1097                .expect("that to_timestamp with format args parsed values without error");
1098            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1099                assert_eq!(parsed_array.len(), 1);
1100                match time_unit {
1101                    Nanosecond => {
1102                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1103                    }
1104                    Millisecond => {
1105                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1106                    }
1107                    Microsecond => {
1108                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1109                    }
1110                    Second => {
1111                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1112                    }
1113                };
1114            } else {
1115                panic!("Expected a columnar array")
1116            }
1117
1118            // test LargeUTF8
1119            let string_array = [
1120                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1121                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1122                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1123                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1124            ];
1125            let parsed_timestamps = func(&string_array)
1126                .expect("that to_timestamp with format args parsed values without error");
1127            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1128                assert_eq!(parsed_array.len(), 1);
1129                assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1130
1131                match time_unit {
1132                    Nanosecond => {
1133                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1134                    }
1135                    Millisecond => {
1136                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1137                    }
1138                    Microsecond => {
1139                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1140                    }
1141                    Second => {
1142                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1143                    }
1144                };
1145            } else {
1146                panic!("Expected a columnar array")
1147            }
1148
1149            // test other types
1150            let string_array = [
1151                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1152                ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1153                ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1154                ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1155            ];
1156
1157            let expected = "Unsupported data type Int32 for function".to_string();
1158            let actual = func(&string_array).unwrap_err().to_string();
1159            assert_contains!(actual, expected);
1160
1161            // test other types
1162            let string_array = [
1163                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1164                ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1165                    vec![1i64].into(),
1166                    None,
1167                )) as ArrayRef),
1168            ];
1169
1170            let expected = "Unsupported data type".to_string();
1171            let actual = func(&string_array).unwrap_err().to_string();
1172            assert_contains!(actual, expected);
1173        }
1174    }
1175}