datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::ops::{Add, Sub};
20use std::str::FromStr;
21use std::sync::Arc;
22
23use arrow::array::temporal_conversions::{
24    as_datetime_with_timezone, timestamp_ns_to_datetime,
25};
26use arrow::array::timezone::Tz;
27use arrow::array::types::{
28    ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
29    TimestampNanosecondType, TimestampSecondType,
30};
31use arrow::array::{Array, PrimitiveArray};
32use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
33use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
34use datafusion_common::cast::as_primitive_array;
35use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
36use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
37use datafusion_expr::TypeSignature::Exact;
38use datafusion_expr::{
39    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
40};
41use datafusion_macros::user_doc;
42
43use chrono::{
44    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
45};
46
47#[user_doc(
48    doc_section(label = "Time and Date Functions"),
49    description = "Truncates a timestamp value to a specified precision.",
50    syntax_example = "date_trunc(precision, expression)",
51    argument(
52        name = "precision",
53        description = r#"Time precision to truncate to. The following precisions are supported:
54
55    - year / YEAR
56    - quarter / QUARTER
57    - month / MONTH
58    - week / WEEK
59    - day / DAY
60    - hour / HOUR
61    - minute / MINUTE
62    - second / SECOND
63"#
64    ),
65    argument(
66        name = "expression",
67        description = "Time expression to operate on. Can be a constant, column, or function."
68    )
69)]
70#[derive(Debug)]
71pub struct DateTruncFunc {
72    signature: Signature,
73    aliases: Vec<String>,
74}
75
76impl Default for DateTruncFunc {
77    fn default() -> Self {
78        Self::new()
79    }
80}
81
82impl DateTruncFunc {
83    pub fn new() -> Self {
84        Self {
85            signature: Signature::one_of(
86                vec![
87                    Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
88                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
89                    Exact(vec![
90                        Utf8,
91                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
92                    ]),
93                    Exact(vec![
94                        Utf8View,
95                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
96                    ]),
97                    Exact(vec![Utf8, Timestamp(Microsecond, None)]),
98                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
99                    Exact(vec![
100                        Utf8,
101                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
102                    ]),
103                    Exact(vec![
104                        Utf8View,
105                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
106                    ]),
107                    Exact(vec![Utf8, Timestamp(Millisecond, None)]),
108                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
109                    Exact(vec![
110                        Utf8,
111                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
112                    ]),
113                    Exact(vec![
114                        Utf8View,
115                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
116                    ]),
117                    Exact(vec![Utf8, Timestamp(Second, None)]),
118                    Exact(vec![Utf8View, Timestamp(Second, None)]),
119                    Exact(vec![
120                        Utf8,
121                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
122                    ]),
123                    Exact(vec![
124                        Utf8View,
125                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
126                    ]),
127                ],
128                Volatility::Immutable,
129            ),
130            aliases: vec![String::from("datetrunc")],
131        }
132    }
133}
134
135impl ScalarUDFImpl for DateTruncFunc {
136    fn as_any(&self) -> &dyn Any {
137        self
138    }
139
140    fn name(&self) -> &str {
141        "date_trunc"
142    }
143
144    fn signature(&self) -> &Signature {
145        &self.signature
146    }
147
148    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
149        match &arg_types[1] {
150            Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
151                Ok(Timestamp(Nanosecond, None))
152            }
153            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
154            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
155            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
156            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
157            _ => plan_err!(
158                "The date_trunc function can only accept timestamp as the second arg."
159            ),
160        }
161    }
162
163    fn invoke_with_args(
164        &self,
165        args: datafusion_expr::ScalarFunctionArgs,
166    ) -> Result<ColumnarValue> {
167        let args = args.args;
168        let (granularity, array) = (&args[0], &args[1]);
169
170        let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
171            granularity
172        {
173            v.to_lowercase()
174        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
175        {
176            v.to_lowercase()
177        } else {
178            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
179        };
180
181        fn process_array<T: ArrowTimestampType>(
182            array: &dyn Array,
183            granularity: String,
184            tz_opt: &Option<Arc<str>>,
185        ) -> Result<ColumnarValue> {
186            let parsed_tz = parse_tz(tz_opt)?;
187            let array = as_primitive_array::<T>(array)?;
188            let array: PrimitiveArray<T> = array
189                .try_unary(|x| {
190                    general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
191                })?
192                .with_timezone_opt(tz_opt.clone());
193            Ok(ColumnarValue::Array(Arc::new(array)))
194        }
195
196        fn process_scalar<T: ArrowTimestampType>(
197            v: &Option<i64>,
198            granularity: String,
199            tz_opt: &Option<Arc<str>>,
200        ) -> Result<ColumnarValue> {
201            let parsed_tz = parse_tz(tz_opt)?;
202            let value = if let Some(v) = v {
203                Some(general_date_trunc(
204                    T::UNIT,
205                    *v,
206                    parsed_tz,
207                    granularity.as_str(),
208                )?)
209            } else {
210                None
211            };
212            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
213            Ok(ColumnarValue::Scalar(value))
214        }
215
216        Ok(match array {
217            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
218                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
219            }
220            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
221                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
222            }
223            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
224                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
225            }
226            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
227                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
228            }
229            ColumnarValue::Array(array) => {
230                let array_type = array.data_type();
231                if let Timestamp(unit, tz_opt) = array_type {
232                    match unit {
233                        Second => process_array::<TimestampSecondType>(
234                            array,
235                            granularity,
236                            tz_opt,
237                        )?,
238                        Millisecond => process_array::<TimestampMillisecondType>(
239                            array,
240                            granularity,
241                            tz_opt,
242                        )?,
243                        Microsecond => process_array::<TimestampMicrosecondType>(
244                            array,
245                            granularity,
246                            tz_opt,
247                        )?,
248                        Nanosecond => process_array::<TimestampNanosecondType>(
249                            array,
250                            granularity,
251                            tz_opt,
252                        )?,
253                    }
254                } else {
255                    return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
256                }
257            }
258            _ => {
259                return exec_err!(
260                    "second argument of `date_trunc` must be timestamp scalar or array"
261                );
262            }
263        })
264    }
265
266    fn aliases(&self) -> &[String] {
267        &self.aliases
268    }
269
270    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
271        // The DATE_TRUNC function preserves the order of its second argument.
272        let precision = &input[0];
273        let date_value = &input[1];
274
275        if precision.sort_properties.eq(&SortProperties::Singleton) {
276            Ok(date_value.sort_properties)
277        } else {
278            Ok(SortProperties::Unordered)
279        }
280    }
281    fn documentation(&self) -> Option<&Documentation> {
282        self.doc()
283    }
284}
285
286fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
287where
288    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
289{
290    let value = match granularity {
291        "millisecond" => value,
292        "microsecond" => value,
293        "second" => value.and_then(|d| d.with_nanosecond(0)),
294        "minute" => value
295            .and_then(|d| d.with_nanosecond(0))
296            .and_then(|d| d.with_second(0)),
297        "hour" => value
298            .and_then(|d| d.with_nanosecond(0))
299            .and_then(|d| d.with_second(0))
300            .and_then(|d| d.with_minute(0)),
301        "day" => value
302            .and_then(|d| d.with_nanosecond(0))
303            .and_then(|d| d.with_second(0))
304            .and_then(|d| d.with_minute(0))
305            .and_then(|d| d.with_hour(0)),
306        "week" => value
307            .and_then(|d| d.with_nanosecond(0))
308            .and_then(|d| d.with_second(0))
309            .and_then(|d| d.with_minute(0))
310            .and_then(|d| d.with_hour(0))
311            .map(|d| {
312                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
313            }),
314        "month" => value
315            .and_then(|d| d.with_nanosecond(0))
316            .and_then(|d| d.with_second(0))
317            .and_then(|d| d.with_minute(0))
318            .and_then(|d| d.with_hour(0))
319            .and_then(|d| d.with_day0(0)),
320        "quarter" => value
321            .and_then(|d| d.with_nanosecond(0))
322            .and_then(|d| d.with_second(0))
323            .and_then(|d| d.with_minute(0))
324            .and_then(|d| d.with_hour(0))
325            .and_then(|d| d.with_day0(0))
326            .and_then(|d| d.with_month(quarter_month(&d))),
327        "year" => value
328            .and_then(|d| d.with_nanosecond(0))
329            .and_then(|d| d.with_second(0))
330            .and_then(|d| d.with_minute(0))
331            .and_then(|d| d.with_hour(0))
332            .and_then(|d| d.with_day0(0))
333            .and_then(|d| d.with_month0(0)),
334        unsupported => {
335            return exec_err!("Unsupported date_trunc granularity: {unsupported}");
336        }
337    };
338    Ok(value)
339}
340
341fn quarter_month<T>(date: &T) -> u32
342where
343    T: Datelike,
344{
345    1 + 3 * ((date.month() - 1) / 3)
346}
347
348fn _date_trunc_coarse_with_tz(
349    granularity: &str,
350    value: Option<DateTime<Tz>>,
351) -> Result<Option<i64>> {
352    if let Some(value) = value {
353        let local = value.naive_local();
354        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
355        let truncated = truncated.and_then(|truncated| {
356            match truncated.and_local_timezone(value.timezone()) {
357                LocalResult::None => {
358                    // This can happen if the date_trunc operation moves the time into
359                    // an hour that doesn't exist due to daylight savings. On known example where
360                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
361                    // To account for this adjust the time by a few hours, convert to local time,
362                    // and then adjust the time back.
363                    truncated
364                        .sub(TimeDelta::try_hours(3).unwrap())
365                        .and_local_timezone(value.timezone())
366                        .single()
367                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
368                }
369                LocalResult::Single(datetime) => Some(datetime),
370                LocalResult::Ambiguous(datetime1, datetime2) => {
371                    // Because we are truncating from an equally or more specific time
372                    // the original time must have been within the ambiguous local time
373                    // period. Therefore the offset of one of these times should match the
374                    // offset of the original time.
375                    if datetime1.offset().fix() == value.offset().fix() {
376                        Some(datetime1)
377                    } else {
378                        Some(datetime2)
379                    }
380                }
381            }
382        });
383        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
384    } else {
385        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
386        Ok(None)
387    }
388}
389
390fn _date_trunc_coarse_without_tz(
391    granularity: &str,
392    value: Option<NaiveDateTime>,
393) -> Result<Option<i64>> {
394    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
395    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
396}
397
398/// Truncates the single `value`, expressed in nanoseconds since the
399/// epoch, for granularities greater than 1 second, in taking into
400/// account that some granularities are not uniform durations of time
401/// (e.g. months are not always the same lengths, leap seconds, etc)
402fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
403    let value = match tz {
404        Some(tz) => {
405            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
406            // and NaiveDateTime (ISO 8601) has no concept of timezones
407            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
408                .ok_or(DataFusionError::Execution(format!(
409                "Timestamp {value} out of range"
410            )))?;
411            _date_trunc_coarse_with_tz(granularity, Some(value))
412        }
413        None => {
414            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
415            let value = timestamp_ns_to_datetime(value).ok_or_else(|| {
416                DataFusionError::Execution(format!("Timestamp {value} out of range"))
417            })?;
418            _date_trunc_coarse_without_tz(granularity, Some(value))
419        }
420    }?;
421
422    // `with_x(0)` are infallible because `0` are always a valid
423    Ok(value.unwrap())
424}
425
426// truncates a single value with the given timeunit to the specified granularity
427fn general_date_trunc(
428    tu: TimeUnit,
429    value: i64,
430    tz: Option<Tz>,
431    granularity: &str,
432) -> Result<i64, DataFusionError> {
433    let scale = match tu {
434        Second => 1_000_000_000,
435        Millisecond => 1_000_000,
436        Microsecond => 1_000,
437        Nanosecond => 1,
438    };
439
440    // convert to nanoseconds
441    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
442
443    let result = match tu {
444        Second => match granularity {
445            "minute" => nano / 1_000_000_000 / 60 * 60,
446            _ => nano / 1_000_000_000,
447        },
448        Millisecond => match granularity {
449            "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
450            "second" => nano / 1_000_000 / 1_000 * 1_000,
451            _ => nano / 1_000_000,
452        },
453        Microsecond => match granularity {
454            "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
455            "second" => nano / 1_000 / 1_000_000 * 1_000_000,
456            "millisecond" => nano / 1_000 / 1_000 * 1_000,
457            _ => nano / 1_000,
458        },
459        _ => match granularity {
460            "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
461            "second" => nano / 1_000_000_000 * 1_000_000_000,
462            "millisecond" => nano / 1_000_000 * 1_000_000,
463            "microsecond" => nano / 1_000 * 1_000,
464            _ => nano,
465        },
466    };
467    Ok(result)
468}
469
470fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
471    tz.as_ref()
472        .map(|tz| {
473            Tz::from_str(tz).map_err(|op| {
474                DataFusionError::Execution(format!("failed on timezone {tz}: {:?}", op))
475            })
476        })
477        .transpose()
478}
479
480#[cfg(test)]
481mod tests {
482    use std::sync::Arc;
483
484    use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
485
486    use arrow::array::cast::as_primitive_array;
487    use arrow::array::types::TimestampNanosecondType;
488    use arrow::array::{Array, TimestampNanosecondArray};
489    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
490    use arrow::datatypes::{DataType, TimeUnit};
491    use datafusion_common::ScalarValue;
492    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
493
494    #[test]
495    fn date_trunc_test() {
496        let cases = vec![
497            (
498                "2020-09-08T13:42:29.190855Z",
499                "second",
500                "2020-09-08T13:42:29.000000Z",
501            ),
502            (
503                "2020-09-08T13:42:29.190855Z",
504                "minute",
505                "2020-09-08T13:42:00.000000Z",
506            ),
507            (
508                "2020-09-08T13:42:29.190855Z",
509                "hour",
510                "2020-09-08T13:00:00.000000Z",
511            ),
512            (
513                "2020-09-08T13:42:29.190855Z",
514                "day",
515                "2020-09-08T00:00:00.000000Z",
516            ),
517            (
518                "2020-09-08T13:42:29.190855Z",
519                "week",
520                "2020-09-07T00:00:00.000000Z",
521            ),
522            (
523                "2020-09-08T13:42:29.190855Z",
524                "month",
525                "2020-09-01T00:00:00.000000Z",
526            ),
527            (
528                "2020-09-08T13:42:29.190855Z",
529                "year",
530                "2020-01-01T00:00:00.000000Z",
531            ),
532            // week
533            (
534                "2021-01-01T13:42:29.190855Z",
535                "week",
536                "2020-12-28T00:00:00.000000Z",
537            ),
538            (
539                "2020-01-01T13:42:29.190855Z",
540                "week",
541                "2019-12-30T00:00:00.000000Z",
542            ),
543            // quarter
544            (
545                "2020-01-01T13:42:29.190855Z",
546                "quarter",
547                "2020-01-01T00:00:00.000000Z",
548            ),
549            (
550                "2020-02-01T13:42:29.190855Z",
551                "quarter",
552                "2020-01-01T00:00:00.000000Z",
553            ),
554            (
555                "2020-03-01T13:42:29.190855Z",
556                "quarter",
557                "2020-01-01T00:00:00.000000Z",
558            ),
559            (
560                "2020-04-01T13:42:29.190855Z",
561                "quarter",
562                "2020-04-01T00:00:00.000000Z",
563            ),
564            (
565                "2020-08-01T13:42:29.190855Z",
566                "quarter",
567                "2020-07-01T00:00:00.000000Z",
568            ),
569            (
570                "2020-11-01T13:42:29.190855Z",
571                "quarter",
572                "2020-10-01T00:00:00.000000Z",
573            ),
574            (
575                "2020-12-01T13:42:29.190855Z",
576                "quarter",
577                "2020-10-01T00:00:00.000000Z",
578            ),
579        ];
580
581        cases.iter().for_each(|(original, granularity, expected)| {
582            let left = string_to_timestamp_nanos(original).unwrap();
583            let right = string_to_timestamp_nanos(expected).unwrap();
584            let result = date_trunc_coarse(granularity, left, None).unwrap();
585            assert_eq!(result, right, "{original} = {expected}");
586        });
587    }
588
589    #[test]
590    fn test_date_trunc_timezones() {
591        let cases = vec![
592            (
593                vec![
594                    "2020-09-08T00:00:00Z",
595                    "2020-09-08T01:00:00Z",
596                    "2020-09-08T02:00:00Z",
597                    "2020-09-08T03:00:00Z",
598                    "2020-09-08T04:00:00Z",
599                ],
600                Some("+00".into()),
601                vec![
602                    "2020-09-08T00:00:00Z",
603                    "2020-09-08T00:00:00Z",
604                    "2020-09-08T00:00:00Z",
605                    "2020-09-08T00:00:00Z",
606                    "2020-09-08T00:00:00Z",
607                ],
608            ),
609            (
610                vec![
611                    "2020-09-08T00:00:00Z",
612                    "2020-09-08T01:00:00Z",
613                    "2020-09-08T02:00:00Z",
614                    "2020-09-08T03:00:00Z",
615                    "2020-09-08T04:00:00Z",
616                ],
617                None,
618                vec![
619                    "2020-09-08T00:00:00Z",
620                    "2020-09-08T00:00:00Z",
621                    "2020-09-08T00:00:00Z",
622                    "2020-09-08T00:00:00Z",
623                    "2020-09-08T00:00:00Z",
624                ],
625            ),
626            (
627                vec![
628                    "2020-09-08T00:00:00Z",
629                    "2020-09-08T01:00:00Z",
630                    "2020-09-08T02:00:00Z",
631                    "2020-09-08T03:00:00Z",
632                    "2020-09-08T04:00:00Z",
633                ],
634                Some("-02".into()),
635                vec![
636                    "2020-09-07T02:00:00Z",
637                    "2020-09-07T02:00:00Z",
638                    "2020-09-08T02:00:00Z",
639                    "2020-09-08T02:00:00Z",
640                    "2020-09-08T02:00:00Z",
641                ],
642            ),
643            (
644                vec![
645                    "2020-09-08T00:00:00+05",
646                    "2020-09-08T01:00:00+05",
647                    "2020-09-08T02:00:00+05",
648                    "2020-09-08T03:00:00+05",
649                    "2020-09-08T04:00:00+05",
650                ],
651                Some("+05".into()),
652                vec![
653                    "2020-09-08T00:00:00+05",
654                    "2020-09-08T00:00:00+05",
655                    "2020-09-08T00:00:00+05",
656                    "2020-09-08T00:00:00+05",
657                    "2020-09-08T00:00:00+05",
658                ],
659            ),
660            (
661                vec![
662                    "2020-09-08T00:00:00+08",
663                    "2020-09-08T01:00:00+08",
664                    "2020-09-08T02:00:00+08",
665                    "2020-09-08T03:00:00+08",
666                    "2020-09-08T04:00:00+08",
667                ],
668                Some("+08".into()),
669                vec![
670                    "2020-09-08T00:00:00+08",
671                    "2020-09-08T00:00:00+08",
672                    "2020-09-08T00:00:00+08",
673                    "2020-09-08T00:00:00+08",
674                    "2020-09-08T00:00:00+08",
675                ],
676            ),
677            (
678                vec![
679                    "2024-10-26T23:00:00Z",
680                    "2024-10-27T00:00:00Z",
681                    "2024-10-27T01:00:00Z",
682                    "2024-10-27T02:00:00Z",
683                ],
684                Some("Europe/Berlin".into()),
685                vec![
686                    "2024-10-27T00:00:00+02",
687                    "2024-10-27T00:00:00+02",
688                    "2024-10-27T00:00:00+02",
689                    "2024-10-27T00:00:00+02",
690                ],
691            ),
692            (
693                vec![
694                    "2018-02-18T00:00:00Z",
695                    "2018-02-18T01:00:00Z",
696                    "2018-02-18T02:00:00Z",
697                    "2018-02-18T03:00:00Z",
698                    "2018-11-04T01:00:00Z",
699                    "2018-11-04T02:00:00Z",
700                    "2018-11-04T03:00:00Z",
701                    "2018-11-04T04:00:00Z",
702                ],
703                Some("America/Sao_Paulo".into()),
704                vec![
705                    "2018-02-17T00:00:00-02",
706                    "2018-02-17T00:00:00-02",
707                    "2018-02-17T00:00:00-02",
708                    "2018-02-18T00:00:00-03",
709                    "2018-11-03T00:00:00-03",
710                    "2018-11-03T00:00:00-03",
711                    "2018-11-04T01:00:00-02",
712                    "2018-11-04T01:00:00-02",
713                ],
714            ),
715        ];
716
717        cases.iter().for_each(|(original, tz_opt, expected)| {
718            let input = original
719                .iter()
720                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
721                .collect::<TimestampNanosecondArray>()
722                .with_timezone_opt(tz_opt.clone());
723            let right = expected
724                .iter()
725                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
726                .collect::<TimestampNanosecondArray>()
727                .with_timezone_opt(tz_opt.clone());
728            let batch_len = input.len();
729            let args = datafusion_expr::ScalarFunctionArgs {
730                args: vec![
731                    ColumnarValue::Scalar(ScalarValue::from("day")),
732                    ColumnarValue::Array(Arc::new(input)),
733                ],
734                number_rows: batch_len,
735                return_type: &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
736            };
737            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
738            if let ColumnarValue::Array(result) = result {
739                assert_eq!(
740                    result.data_type(),
741                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
742                );
743                let left = as_primitive_array::<TimestampNanosecondType>(&result);
744                assert_eq!(left, &right);
745            } else {
746                panic!("unexpected column type");
747            }
748        });
749    }
750
751    #[test]
752    fn test_date_trunc_hour_timezones() {
753        let cases = vec![
754            (
755                vec![
756                    "2020-09-08T00:30:00Z",
757                    "2020-09-08T01:30:00Z",
758                    "2020-09-08T02:30:00Z",
759                    "2020-09-08T03:30:00Z",
760                    "2020-09-08T04:30:00Z",
761                ],
762                Some("+00".into()),
763                vec![
764                    "2020-09-08T00:00:00Z",
765                    "2020-09-08T01:00:00Z",
766                    "2020-09-08T02:00:00Z",
767                    "2020-09-08T03:00:00Z",
768                    "2020-09-08T04:00:00Z",
769                ],
770            ),
771            (
772                vec![
773                    "2020-09-08T00:30:00Z",
774                    "2020-09-08T01:30:00Z",
775                    "2020-09-08T02:30:00Z",
776                    "2020-09-08T03:30:00Z",
777                    "2020-09-08T04:30:00Z",
778                ],
779                None,
780                vec![
781                    "2020-09-08T00:00:00Z",
782                    "2020-09-08T01:00:00Z",
783                    "2020-09-08T02:00:00Z",
784                    "2020-09-08T03:00:00Z",
785                    "2020-09-08T04:00:00Z",
786                ],
787            ),
788            (
789                vec![
790                    "2020-09-08T00:30:00Z",
791                    "2020-09-08T01:30:00Z",
792                    "2020-09-08T02:30:00Z",
793                    "2020-09-08T03:30:00Z",
794                    "2020-09-08T04:30:00Z",
795                ],
796                Some("-02".into()),
797                vec![
798                    "2020-09-08T00:00:00Z",
799                    "2020-09-08T01:00:00Z",
800                    "2020-09-08T02:00:00Z",
801                    "2020-09-08T03:00:00Z",
802                    "2020-09-08T04:00:00Z",
803                ],
804            ),
805            (
806                vec![
807                    "2020-09-08T00:30:00+05",
808                    "2020-09-08T01:30:00+05",
809                    "2020-09-08T02:30:00+05",
810                    "2020-09-08T03:30:00+05",
811                    "2020-09-08T04:30:00+05",
812                ],
813                Some("+05".into()),
814                vec![
815                    "2020-09-08T00:00:00+05",
816                    "2020-09-08T01:00:00+05",
817                    "2020-09-08T02:00:00+05",
818                    "2020-09-08T03:00:00+05",
819                    "2020-09-08T04:00:00+05",
820                ],
821            ),
822            (
823                vec![
824                    "2020-09-08T00:30:00+08",
825                    "2020-09-08T01:30:00+08",
826                    "2020-09-08T02:30:00+08",
827                    "2020-09-08T03:30:00+08",
828                    "2020-09-08T04:30:00+08",
829                ],
830                Some("+08".into()),
831                vec![
832                    "2020-09-08T00:00:00+08",
833                    "2020-09-08T01:00:00+08",
834                    "2020-09-08T02:00:00+08",
835                    "2020-09-08T03:00:00+08",
836                    "2020-09-08T04:00:00+08",
837                ],
838            ),
839            (
840                vec![
841                    "2024-10-26T23:30:00Z",
842                    "2024-10-27T00:30:00Z",
843                    "2024-10-27T01:30:00Z",
844                    "2024-10-27T02:30:00Z",
845                ],
846                Some("Europe/Berlin".into()),
847                vec![
848                    "2024-10-27T01:00:00+02",
849                    "2024-10-27T02:00:00+02",
850                    "2024-10-27T02:00:00+01",
851                    "2024-10-27T03:00:00+01",
852                ],
853            ),
854            (
855                vec![
856                    "2018-02-18T00:30:00Z",
857                    "2018-02-18T01:30:00Z",
858                    "2018-02-18T02:30:00Z",
859                    "2018-02-18T03:30:00Z",
860                    "2018-11-04T01:00:00Z",
861                    "2018-11-04T02:00:00Z",
862                    "2018-11-04T03:00:00Z",
863                    "2018-11-04T04:00:00Z",
864                ],
865                Some("America/Sao_Paulo".into()),
866                vec![
867                    "2018-02-17T22:00:00-02",
868                    "2018-02-17T23:00:00-02",
869                    "2018-02-17T23:00:00-03",
870                    "2018-02-18T00:00:00-03",
871                    "2018-11-03T22:00:00-03",
872                    "2018-11-03T23:00:00-03",
873                    "2018-11-04T01:00:00-02",
874                    "2018-11-04T02:00:00-02",
875                ],
876            ),
877        ];
878
879        cases.iter().for_each(|(original, tz_opt, expected)| {
880            let input = original
881                .iter()
882                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
883                .collect::<TimestampNanosecondArray>()
884                .with_timezone_opt(tz_opt.clone());
885            let right = expected
886                .iter()
887                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
888                .collect::<TimestampNanosecondArray>()
889                .with_timezone_opt(tz_opt.clone());
890            let batch_len = input.len();
891            let args = datafusion_expr::ScalarFunctionArgs {
892                args: vec![
893                    ColumnarValue::Scalar(ScalarValue::from("hour")),
894                    ColumnarValue::Array(Arc::new(input)),
895                ],
896                number_rows: batch_len,
897                return_type: &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
898            };
899            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
900            if let ColumnarValue::Array(result) = result {
901                assert_eq!(
902                    result.data_type(),
903                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
904                );
905                let left = as_primitive_array::<TimestampNanosecondType>(&result);
906                assert_eq!(left, &right);
907            } else {
908                panic!("unexpected column type");
909            }
910        });
911    }
912}