arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466/// This API is only stable since 1.70 so can't use it when current MSRV is lower
467#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469    match opt {
470        None => false,
471        Some(x) => f(x),
472    }
473}
474
475macro_rules! parser_primitive {
476    ($t:ty) => {
477        impl Parser for $t {
478            fn parse(string: &str) -> Option<Self::Native> {
479                if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480                    return None;
481                }
482                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483                    string.as_bytes(),
484                ) {
485                    (Some(n), x) if x == string.len() => Some(n),
486                    _ => None,
487                }
488            }
489        }
490    };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506    fn parse(string: &str) -> Option<i64> {
507        string_to_timestamp_nanos(string).ok()
508    }
509}
510
511impl Parser for TimestampMicrosecondType {
512    fn parse(string: &str) -> Option<i64> {
513        let nanos = string_to_timestamp_nanos(string).ok();
514        nanos.map(|x| x / 1000)
515    }
516}
517
518impl Parser for TimestampMillisecondType {
519    fn parse(string: &str) -> Option<i64> {
520        let nanos = string_to_timestamp_nanos(string).ok();
521        nanos.map(|x| x / 1_000_000)
522    }
523}
524
525impl Parser for TimestampSecondType {
526    fn parse(string: &str) -> Option<i64> {
527        let nanos = string_to_timestamp_nanos(string).ok();
528        nanos.map(|x| x / 1_000_000_000)
529    }
530}
531
532impl Parser for Time64NanosecondType {
533    // Will truncate any fractions of a nanosecond
534    fn parse(string: &str) -> Option<Self::Native> {
535        string_to_time_nanoseconds(string)
536            .ok()
537            .or_else(|| string.parse::<Self::Native>().ok())
538    }
539
540    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541        let nt = NaiveTime::parse_from_str(string, format).ok()?;
542        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543    }
544}
545
546impl Parser for Time64MicrosecondType {
547    // Will truncate any fractions of a microsecond
548    fn parse(string: &str) -> Option<Self::Native> {
549        string_to_time_nanoseconds(string)
550            .ok()
551            .map(|nanos| nanos / 1_000)
552            .or_else(|| string.parse::<Self::Native>().ok())
553    }
554
555    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556        let nt = NaiveTime::parse_from_str(string, format).ok()?;
557        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558    }
559}
560
561impl Parser for Time32MillisecondType {
562    // Will truncate any fractions of a millisecond
563    fn parse(string: &str) -> Option<Self::Native> {
564        string_to_time_nanoseconds(string)
565            .ok()
566            .map(|nanos| (nanos / 1_000_000) as i32)
567            .or_else(|| string.parse::<Self::Native>().ok())
568    }
569
570    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571        let nt = NaiveTime::parse_from_str(string, format).ok()?;
572        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573    }
574}
575
576impl Parser for Time32SecondType {
577    // Will truncate any fractions of a second
578    fn parse(string: &str) -> Option<Self::Native> {
579        string_to_time_nanoseconds(string)
580            .ok()
581            .map(|nanos| (nanos / 1_000_000_000) as i32)
582            .or_else(|| string.parse::<Self::Native>().ok())
583    }
584
585    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586        let nt = NaiveTime::parse_from_str(string, format).ok()?;
587        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588    }
589}
590
591/// Number of days between 0001-01-01 and 1970-01-01
592const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594/// Error message if nanosecond conversion request beyond supported interval
595const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
599    //
600    // According to [ISO 8601], years have:
601    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
602    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
603    //
604    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
605    if string.starts_with('+') || string.starts_with('-') {
606        // Skip the sign and look for the hyphen that terminates the year digits.
607        // According to ISO 8601 the unsigned part must be at least 4 digits.
608        let rest = &string[1..];
609        let hyphen = rest.find('-')?;
610        if hyphen < 4 {
611            return None;
612        }
613        // The year substring is the sign and the digits (but not the separator)
614        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
615        let year: i32 = string[..hyphen + 1].parse().ok()?;
616        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
617        let remainder = string[hyphen + 1..].strip_prefix('-')?;
618        let mut parts = remainder.splitn(2, '-');
619        let month: u32 = parts.next()?.parse().ok()?;
620        let day: u32 = parts.next()?.parse().ok()?;
621        return NaiveDate::from_ymd_opt(year, month, day);
622    }
623
624    if string.len() > 10 {
625        // Try to parse as datetime and return just the date part
626        return string_to_datetime(&Utc, string)
627            .map(|dt| dt.date_naive())
628            .ok();
629    };
630    let mut digits = [0; 10];
631    let mut mask = 0;
632
633    // Treating all bytes the same way, helps LLVM vectorise this correctly
634    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
635        *o = i.wrapping_sub(b'0');
636        mask |= ((*o < 10) as u16) << idx
637    }
638
639    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
640
641    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
642    if digits[4] != HYPHEN {
643        let (year, month, day) = match (mask, string.len()) {
644            (0b11111111, 8) => (
645                digits[0] as u16 * 1000
646                    + digits[1] as u16 * 100
647                    + digits[2] as u16 * 10
648                    + digits[3] as u16,
649                digits[4] * 10 + digits[5],
650                digits[6] * 10 + digits[7],
651            ),
652            _ => return None,
653        };
654        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
655    }
656
657    let (month, day) = match mask {
658        0b1101101111 => {
659            if digits[7] != HYPHEN {
660                return None;
661            }
662            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
663        }
664        0b101101111 => {
665            if digits[7] != HYPHEN {
666                return None;
667            }
668            (digits[5] * 10 + digits[6], digits[8])
669        }
670        0b110101111 => {
671            if digits[6] != HYPHEN {
672                return None;
673            }
674            (digits[5], digits[7] * 10 + digits[8])
675        }
676        0b10101111 => {
677            if digits[6] != HYPHEN {
678                return None;
679            }
680            (digits[5], digits[7])
681        }
682        _ => return None,
683    };
684
685    let year =
686        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
687
688    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
689}
690
691impl Parser for Date32Type {
692    fn parse(string: &str) -> Option<i32> {
693        let date = parse_date(string)?;
694        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
695    }
696
697    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
698        let date = NaiveDate::parse_from_str(string, format).ok()?;
699        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
700    }
701}
702
703impl Parser for Date64Type {
704    fn parse(string: &str) -> Option<i64> {
705        if string.len() <= 10 {
706            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
707            Some(datetime.and_utc().timestamp_millis())
708        } else {
709            let date_time = string_to_datetime(&Utc, string).ok()?;
710            Some(date_time.timestamp_millis())
711        }
712    }
713
714    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
715        use chrono::format::Fixed;
716        use chrono::format::StrftimeItems;
717        let fmt = StrftimeItems::new(format);
718        let has_zone = fmt.into_iter().any(|item| match item {
719            chrono::format::Item::Fixed(fixed_item) => matches!(
720                fixed_item,
721                Fixed::RFC2822
722                    | Fixed::RFC3339
723                    | Fixed::TimezoneName
724                    | Fixed::TimezoneOffsetColon
725                    | Fixed::TimezoneOffsetColonZ
726                    | Fixed::TimezoneOffset
727                    | Fixed::TimezoneOffsetZ
728            ),
729            _ => false,
730        });
731        if has_zone {
732            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
733            Some(date_time.timestamp_millis())
734        } else {
735            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
736            Some(date_time.and_utc().timestamp_millis())
737        }
738    }
739}
740
741fn parse_e_notation<T: DecimalType>(
742    s: &str,
743    mut digits: u16,
744    mut fractionals: i16,
745    mut result: T::Native,
746    index: usize,
747    precision: u16,
748    scale: i16,
749) -> Result<T::Native, ArrowError> {
750    let mut exp: i16 = 0;
751    let base = T::Native::usize_as(10);
752
753    let mut exp_start: bool = false;
754    // e has a plus sign
755    let mut pos_shift_direction: bool = true;
756
757    // skip to point or exponent index
758    let mut bs;
759    if fractionals > 0 {
760        // it's a fraction, so the point index needs to be skipped, so +1
761        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
762    } else {
763        // it's actually an integer that is already written into the result, so let's skip on to e
764        bs = s.as_bytes().iter().skip(index);
765    }
766
767    while let Some(b) = bs.next() {
768        match b {
769            b'0'..=b'9' => {
770                result = result.mul_wrapping(base);
771                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
772                if fractionals > 0 {
773                    fractionals += 1;
774                }
775                digits += 1;
776            }
777            &b'e' | &b'E' => {
778                exp_start = true;
779            }
780            _ => {
781                return Err(ArrowError::ParseError(format!(
782                    "can't parse the string value {s} to decimal"
783                )));
784            }
785        };
786
787        if exp_start {
788            pos_shift_direction = match bs.next() {
789                Some(&b'-') => false,
790                Some(&b'+') => true,
791                Some(b) => {
792                    if !b.is_ascii_digit() {
793                        return Err(ArrowError::ParseError(format!(
794                            "can't parse the string value {s} to decimal"
795                        )));
796                    }
797
798                    exp *= 10;
799                    exp += (b - b'0') as i16;
800
801                    true
802                }
803                None => {
804                    return Err(ArrowError::ParseError(format!(
805                        "can't parse the string value {s} to decimal"
806                    )))
807                }
808            };
809
810            for b in bs.by_ref() {
811                if !b.is_ascii_digit() {
812                    return Err(ArrowError::ParseError(format!(
813                        "can't parse the string value {s} to decimal"
814                    )));
815                }
816                exp *= 10;
817                exp += (b - b'0') as i16;
818            }
819        }
820    }
821
822    if digits == 0 && fractionals == 0 && exp == 0 {
823        return Err(ArrowError::ParseError(format!(
824            "can't parse the string value {s} to decimal"
825        )));
826    }
827
828    if !pos_shift_direction {
829        // exponent has a large negative sign
830        // 1.12345e-30 => 0.0{29}12345, scale = 5
831        if exp - (digits as i16 + scale) > 0 {
832            return Ok(T::Native::usize_as(0));
833        }
834        exp *= -1;
835    }
836
837    // point offset
838    exp = fractionals - exp;
839    // We have zeros on the left, we need to count them
840    if !pos_shift_direction && exp > digits as i16 {
841        digits = exp as u16;
842    }
843    // Number of numbers to be removed or added
844    exp = scale - exp;
845
846    if (digits as i16 + exp) as u16 > precision {
847        return Err(ArrowError::ParseError(format!(
848            "parse decimal overflow ({s})"
849        )));
850    }
851
852    if exp < 0 {
853        result = result.div_wrapping(base.pow_wrapping(-exp as _));
854    } else {
855        result = result.mul_wrapping(base.pow_wrapping(exp as _));
856    }
857
858    Ok(result)
859}
860
861/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
862/// The result value can't be out of bounds.
863pub fn parse_decimal<T: DecimalType>(
864    s: &str,
865    precision: u8,
866    scale: i8,
867) -> Result<T::Native, ArrowError> {
868    let mut result = T::Native::usize_as(0);
869    let mut fractionals: i8 = 0;
870    let mut digits: u8 = 0;
871    let base = T::Native::usize_as(10);
872
873    let bs = s.as_bytes();
874    let (signed, negative) = match bs.first() {
875        Some(b'-') => (true, true),
876        Some(b'+') => (true, false),
877        _ => (false, false),
878    };
879
880    if bs.is_empty() || signed && bs.len() == 1 {
881        return Err(ArrowError::ParseError(format!(
882            "can't parse the string value {s} to decimal"
883        )));
884    }
885
886    // Iterate over the raw input bytes, skipping the sign if any
887    let mut bs = bs.iter().enumerate().skip(signed as usize);
888
889    let mut is_e_notation = false;
890
891    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
892    // Thus, if we validate the precision correctly, we can skip overflow checks.
893    while let Some((index, b)) = bs.next() {
894        match b {
895            b'0'..=b'9' => {
896                if digits == 0 && *b == b'0' {
897                    // Ignore leading zeros.
898                    continue;
899                }
900                digits += 1;
901                result = result.mul_wrapping(base);
902                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
903            }
904            b'.' => {
905                let point_index = index;
906
907                for (_, b) in bs.by_ref() {
908                    if !b.is_ascii_digit() {
909                        if *b == b'e' || *b == b'E' {
910                            result = parse_e_notation::<T>(
911                                s,
912                                digits as u16,
913                                fractionals as i16,
914                                result,
915                                point_index,
916                                precision as u16,
917                                scale as i16,
918                            )?;
919
920                            is_e_notation = true;
921
922                            break;
923                        }
924                        return Err(ArrowError::ParseError(format!(
925                            "can't parse the string value {s} to decimal"
926                        )));
927                    }
928                    if fractionals == scale && scale != 0 {
929                        // We have processed all the digits that we need. All that
930                        // is left is to validate that the rest of the string contains
931                        // valid digits.
932                        continue;
933                    }
934                    fractionals += 1;
935                    digits += 1;
936                    result = result.mul_wrapping(base);
937                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
938                }
939
940                if is_e_notation {
941                    break;
942                }
943
944                // Fail on "."
945                if digits == 0 {
946                    return Err(ArrowError::ParseError(format!(
947                        "can't parse the string value {s} to decimal"
948                    )));
949                }
950            }
951            b'e' | b'E' => {
952                result = parse_e_notation::<T>(
953                    s,
954                    digits as u16,
955                    fractionals as i16,
956                    result,
957                    index,
958                    precision as u16,
959                    scale as i16,
960                )?;
961
962                is_e_notation = true;
963
964                break;
965            }
966            _ => {
967                return Err(ArrowError::ParseError(format!(
968                    "can't parse the string value {s} to decimal"
969                )));
970            }
971        }
972    }
973
974    if !is_e_notation {
975        if fractionals < scale {
976            let exp = scale - fractionals;
977            if exp as u8 + digits > precision {
978                return Err(ArrowError::ParseError(format!(
979                    "parse decimal overflow ({s})"
980                )));
981            }
982            let mul = base.pow_wrapping(exp as _);
983            result = result.mul_wrapping(mul);
984        } else if digits > precision {
985            return Err(ArrowError::ParseError(format!(
986                "parse decimal overflow ({s})"
987            )));
988        }
989    }
990
991    Ok(if negative {
992        result.neg_wrapping()
993    } else {
994        result
995    })
996}
997
998/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
999pub fn parse_interval_year_month(
1000    value: &str,
1001) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
1002    let config = IntervalParseConfig::new(IntervalUnit::Year);
1003    let interval = Interval::parse(value, &config)?;
1004
1005    let months = interval.to_year_months().map_err(|_| {
1006        ArrowError::CastError(format!(
1007            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
1008        ))
1009    })?;
1010
1011    Ok(IntervalYearMonthType::make_value(0, months))
1012}
1013
1014/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
1015pub fn parse_interval_day_time(
1016    value: &str,
1017) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1018    let config = IntervalParseConfig::new(IntervalUnit::Day);
1019    let interval = Interval::parse(value, &config)?;
1020
1021    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1022        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1023    )))?;
1024
1025    Ok(IntervalDayTimeType::make_value(days, millis))
1026}
1027
1028/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1029pub fn parse_interval_month_day_nano_config(
1030    value: &str,
1031    config: IntervalParseConfig,
1032) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1033    let interval = Interval::parse(value, &config)?;
1034
1035    let (months, days, nanos) = interval.to_month_day_nanos();
1036
1037    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1038}
1039
1040/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1041pub fn parse_interval_month_day_nano(
1042    value: &str,
1043) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1044    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1045}
1046
1047const NANOS_PER_MILLIS: i64 = 1_000_000;
1048const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1049const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1050const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1051#[cfg(test)]
1052const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1053
1054/// Config to parse interval strings
1055///
1056/// Currently stores the `default_unit` to use if the string doesn't have one specified
1057#[derive(Debug, Clone)]
1058pub struct IntervalParseConfig {
1059    /// The default unit to use if none is specified
1060    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1061    default_unit: IntervalUnit,
1062}
1063
1064impl IntervalParseConfig {
1065    /// Create a new [IntervalParseConfig] with the given default unit
1066    pub fn new(default_unit: IntervalUnit) -> Self {
1067        Self { default_unit }
1068    }
1069}
1070
1071#[rustfmt::skip]
1072#[derive(Debug, Clone, Copy)]
1073#[repr(u16)]
1074/// Represents the units of an interval, with each variant
1075/// corresponding to a bit in the interval's bitfield representation
1076pub enum IntervalUnit {
1077    /// A Century
1078    Century     = 0b_0000_0000_0001,
1079    /// A Decade
1080    Decade      = 0b_0000_0000_0010,
1081    /// A Year
1082    Year        = 0b_0000_0000_0100,
1083    /// A Month
1084    Month       = 0b_0000_0000_1000,
1085    /// A Week
1086    Week        = 0b_0000_0001_0000,
1087    /// A Day
1088    Day         = 0b_0000_0010_0000,
1089    /// An Hour
1090    Hour        = 0b_0000_0100_0000,
1091    /// A Minute
1092    Minute      = 0b_0000_1000_0000,
1093    /// A Second
1094    Second      = 0b_0001_0000_0000,
1095    /// A Millisecond
1096    Millisecond = 0b_0010_0000_0000,
1097    /// A Microsecond
1098    Microsecond = 0b_0100_0000_0000,
1099    /// A Nanosecond
1100    Nanosecond  = 0b_1000_0000_0000,
1101}
1102
1103/// Logic for parsing interval unit strings
1104///
1105/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1106/// for a list of unit names supported by PostgreSQL which we try to match here.
1107impl FromStr for IntervalUnit {
1108    type Err = ArrowError;
1109
1110    fn from_str(s: &str) -> Result<Self, ArrowError> {
1111        match s.to_lowercase().as_str() {
1112            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1113            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1114            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1115            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1116            "w" | "week" | "weeks" => Ok(Self::Week),
1117            "d" | "day" | "days" => Ok(Self::Day),
1118            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1119            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1120            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1121            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1122                Ok(Self::Millisecond)
1123            }
1124            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1125                Ok(Self::Microsecond)
1126            }
1127            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1128            _ => Err(ArrowError::InvalidArgumentError(format!(
1129                "Unknown interval type: {s}"
1130            ))),
1131        }
1132    }
1133}
1134
1135impl IntervalUnit {
1136    fn from_str_or_config(
1137        s: Option<&str>,
1138        config: &IntervalParseConfig,
1139    ) -> Result<Self, ArrowError> {
1140        match s {
1141            Some(s) => s.parse(),
1142            None => Ok(config.default_unit),
1143        }
1144    }
1145}
1146
1147/// A tuple representing (months, days, nanoseconds) in an interval
1148pub type MonthDayNano = (i32, i32, i64);
1149
1150/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1151const INTERVAL_PRECISION: u32 = 15;
1152
1153#[derive(Clone, Copy, Debug, PartialEq)]
1154struct IntervalAmount {
1155    /// The integer component of the interval amount
1156    integer: i64,
1157    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1158    frac: i64,
1159}
1160
1161#[cfg(test)]
1162impl IntervalAmount {
1163    fn new(integer: i64, frac: i64) -> Self {
1164        Self { integer, frac }
1165    }
1166}
1167
1168impl FromStr for IntervalAmount {
1169    type Err = ArrowError;
1170
1171    fn from_str(s: &str) -> Result<Self, Self::Err> {
1172        match s.split_once('.') {
1173            Some((integer, frac))
1174                if frac.len() <= INTERVAL_PRECISION as usize
1175                    && !frac.is_empty()
1176                    && !frac.starts_with('-') =>
1177            {
1178                // integer will be "" for values like ".5"
1179                // and "-" for values like "-.5"
1180                let explicit_neg = integer.starts_with('-');
1181                let integer = if integer.is_empty() || integer == "-" {
1182                    Ok(0)
1183                } else {
1184                    integer.parse::<i64>().map_err(|_| {
1185                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1186                    })
1187                }?;
1188
1189                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1190                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1191                })?;
1192
1193                // scale fractional part by interval precision
1194                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1195
1196                // propagate the sign of the integer part to the fractional part
1197                let frac = if integer < 0 || explicit_neg {
1198                    -frac
1199                } else {
1200                    frac
1201                };
1202
1203                let result = Self { integer, frac };
1204
1205                Ok(result)
1206            }
1207            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1208                "Failed to parse {s} as interval amount"
1209            ))),
1210            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1211                Err(ArrowError::ParseError(format!(
1212                    "{s} exceeds the precision available for interval amount"
1213                )))
1214            }
1215            Some(_) | None => {
1216                let integer = s.parse::<i64>().map_err(|_| {
1217                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1218                })?;
1219
1220                let result = Self { integer, frac: 0 };
1221                Ok(result)
1222            }
1223        }
1224    }
1225}
1226
1227#[derive(Debug, Default, PartialEq)]
1228struct Interval {
1229    months: i32,
1230    days: i32,
1231    nanos: i64,
1232}
1233
1234impl Interval {
1235    fn new(months: i32, days: i32, nanos: i64) -> Self {
1236        Self {
1237            months,
1238            days,
1239            nanos,
1240        }
1241    }
1242
1243    fn to_year_months(&self) -> Result<i32, ArrowError> {
1244        match (self.months, self.days, self.nanos) {
1245            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1246            _ => Err(ArrowError::InvalidArgumentError(format!(
1247                "Unable to represent interval with days and nanos as year-months: {:?}",
1248                self
1249            ))),
1250        }
1251    }
1252
1253    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1254        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1255
1256        match self.nanos {
1257            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1258                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1259                    ArrowError::InvalidArgumentError(format!(
1260                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1261                        self.nanos
1262                    ))
1263                })?;
1264
1265                Ok((days, millis))
1266            }
1267            nanos => Err(ArrowError::InvalidArgumentError(format!(
1268                "Unable to represent {nanos} as milliseconds"
1269            ))),
1270        }
1271    }
1272
1273    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1274        (self.months, self.days, self.nanos)
1275    }
1276
1277    /// Parse string value in traditional Postgres format such as
1278    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1279    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1280        let components = parse_interval_components(value, config)?;
1281
1282        components
1283            .into_iter()
1284            .try_fold(Self::default(), |result, (amount, unit)| {
1285                result.add(amount, unit)
1286            })
1287    }
1288
1289    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1290    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1291    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1292    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1293    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1294    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1295    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1296        let result = match unit {
1297            IntervalUnit::Century => {
1298                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1299                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1300                let months = months_int
1301                    .add_checked(month_frac)?
1302                    .try_into()
1303                    .map_err(|_| {
1304                        ArrowError::ParseError(format!(
1305                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1306                            &amount.integer
1307                        ))
1308                    })?;
1309
1310                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1311            }
1312            IntervalUnit::Decade => {
1313                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1314
1315                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1316                let months = months_int
1317                    .add_checked(month_frac)?
1318                    .try_into()
1319                    .map_err(|_| {
1320                        ArrowError::ParseError(format!(
1321                            "Unable to represent {} decades as months in a signed 32-bit integer",
1322                            &amount.integer
1323                        ))
1324                    })?;
1325
1326                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1327            }
1328            IntervalUnit::Year => {
1329                let months_int = amount.integer.mul_checked(12)?;
1330                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1331                let months = months_int
1332                    .add_checked(month_frac)?
1333                    .try_into()
1334                    .map_err(|_| {
1335                        ArrowError::ParseError(format!(
1336                            "Unable to represent {} years as months in a signed 32-bit integer",
1337                            &amount.integer
1338                        ))
1339                    })?;
1340
1341                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1342            }
1343            IntervalUnit::Month => {
1344                let months = amount.integer.try_into().map_err(|_| {
1345                    ArrowError::ParseError(format!(
1346                        "Unable to represent {} months in a signed 32-bit integer",
1347                        &amount.integer
1348                    ))
1349                })?;
1350
1351                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1352                let days = days.try_into().map_err(|_| {
1353                    ArrowError::ParseError(format!(
1354                        "Unable to represent {} months as days in a signed 32-bit integer",
1355                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1356                    ))
1357                })?;
1358
1359                Self::new(
1360                    self.months.add_checked(months)?,
1361                    self.days.add_checked(days)?,
1362                    self.nanos,
1363                )
1364            }
1365            IntervalUnit::Week => {
1366                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1367                    ArrowError::ParseError(format!(
1368                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1369                        &amount.integer
1370                    ))
1371                })?;
1372
1373                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1374
1375                Self::new(
1376                    self.months,
1377                    self.days.add_checked(days)?,
1378                    self.nanos.add_checked(nanos)?,
1379                )
1380            }
1381            IntervalUnit::Day => {
1382                let days = amount.integer.try_into().map_err(|_| {
1383                    ArrowError::InvalidArgumentError(format!(
1384                        "Unable to represent {} days in a signed 32-bit integer",
1385                        amount.integer
1386                    ))
1387                })?;
1388
1389                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1390
1391                Self::new(
1392                    self.months,
1393                    self.days.add_checked(days)?,
1394                    self.nanos.add_checked(nanos)?,
1395                )
1396            }
1397            IntervalUnit::Hour => {
1398                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1399                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1400                let nanos = nanos_int.add_checked(nanos_frac)?;
1401
1402                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1403            }
1404            IntervalUnit::Minute => {
1405                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1406                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1407
1408                let nanos = nanos_int.add_checked(nanos_frac)?;
1409
1410                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1411            }
1412            IntervalUnit::Second => {
1413                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1414                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1415                let nanos = nanos_int.add_checked(nanos_frac)?;
1416
1417                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1418            }
1419            IntervalUnit::Millisecond => {
1420                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1421                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1422                let nanos = nanos_int.add_checked(nanos_frac)?;
1423
1424                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1425            }
1426            IntervalUnit::Microsecond => {
1427                let nanos_int = amount.integer.mul_checked(1_000)?;
1428                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1429                let nanos = nanos_int.add_checked(nanos_frac)?;
1430
1431                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1432            }
1433            IntervalUnit::Nanosecond => {
1434                let nanos_int = amount.integer;
1435                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1436                let nanos = nanos_int.add_checked(nanos_frac)?;
1437
1438                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1439            }
1440        };
1441
1442        Ok(result)
1443    }
1444}
1445
1446/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1447fn parse_interval_components(
1448    value: &str,
1449    config: &IntervalParseConfig,
1450) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1451    let raw_pairs = split_interval_components(value);
1452
1453    // parse amounts and units
1454    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1455        .iter()
1456        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1457        .collect()
1458    else {
1459        return Err(ArrowError::ParseError(format!(
1460            "Invalid input syntax for type interval: {value:?}"
1461        )));
1462    };
1463
1464    // collect parsed results
1465    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1466
1467    // duplicate units?
1468    let mut observed_interval_types = 0;
1469    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1470        if observed_interval_types & (*unit as u16) != 0 {
1471            return Err(ArrowError::ParseError(format!(
1472                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1473                value,
1474                raw_unit.unwrap_or_default(),
1475            )));
1476        }
1477
1478        observed_interval_types |= *unit as u16;
1479    }
1480
1481    let result = amounts.iter().copied().zip(units.iter().copied());
1482
1483    Ok(result.collect::<Vec<_>>())
1484}
1485
1486/// Split an interval into a vec of amounts and units.
1487///
1488/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1489///
1490/// This should match the behavior of PostgreSQL's interval parser.
1491fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1492    let mut result = vec![];
1493    let mut words = value.split(char::is_whitespace);
1494    while let Some(word) = words.next() {
1495        if let Some(split_word_at) = word.find(not_interval_amount) {
1496            let (amount, unit) = word.split_at(split_word_at);
1497            result.push((amount, Some(unit)));
1498        } else if let Some(unit) = words.next() {
1499            result.push((word, Some(unit)));
1500        } else {
1501            result.push((word, None));
1502            break;
1503        }
1504    }
1505    result
1506}
1507
1508/// test if a character is NOT part of an interval numeric amount
1509fn not_interval_amount(c: char) -> bool {
1510    !c.is_ascii_digit() && c != '.' && c != '-'
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515    use super::*;
1516    use arrow_array::temporal_conversions::date32_to_datetime;
1517    use arrow_buffer::i256;
1518
1519    #[test]
1520    fn test_parse_nanos() {
1521        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1522        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1523        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1524    }
1525
1526    #[test]
1527    fn string_to_timestamp_timezone() {
1528        // Explicit timezone
1529        assert_eq!(
1530            1599572549190855000,
1531            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1532        );
1533        assert_eq!(
1534            1599572549190855000,
1535            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1536        );
1537        assert_eq!(
1538            1599572549000000000,
1539            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1540        ); // no fractional part
1541        assert_eq!(
1542            1599590549190855000,
1543            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1544        );
1545    }
1546
1547    #[test]
1548    fn string_to_timestamp_timezone_space() {
1549        // Ensure space rather than T between time and date is accepted
1550        assert_eq!(
1551            1599572549190855000,
1552            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1553        );
1554        assert_eq!(
1555            1599572549190855000,
1556            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1557        );
1558        assert_eq!(
1559            1599572549000000000,
1560            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1561        ); // no fractional part
1562        assert_eq!(
1563            1599590549190855000,
1564            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1565        );
1566    }
1567
1568    #[test]
1569    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1570    fn string_to_timestamp_no_timezone() {
1571        // This test is designed to succeed in regardless of the local
1572        // timezone the test machine is running. Thus it is still
1573        // somewhat susceptible to bugs in the use of chrono
1574        let naive_datetime = NaiveDateTime::new(
1575            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1576            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1577        );
1578
1579        // Ensure both T and ' ' variants work
1580        assert_eq!(
1581            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1582            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1583        );
1584
1585        assert_eq!(
1586            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1587            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1588        );
1589
1590        // Also ensure that parsing timestamps with no fractional
1591        // second part works as well
1592        let datetime_whole_secs = NaiveDateTime::new(
1593            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1594            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1595        )
1596        .and_utc();
1597
1598        // Ensure both T and ' ' variants work
1599        assert_eq!(
1600            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1601            parse_timestamp("2020-09-08T13:42:29").unwrap()
1602        );
1603
1604        assert_eq!(
1605            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1606            parse_timestamp("2020-09-08 13:42:29").unwrap()
1607        );
1608
1609        // ensure without time work
1610        // no time, should be the nano second at
1611        // 2020-09-08 0:0:0
1612        let datetime_no_time = NaiveDateTime::new(
1613            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1614            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1615        )
1616        .and_utc();
1617
1618        assert_eq!(
1619            datetime_no_time.timestamp_nanos_opt().unwrap(),
1620            parse_timestamp("2020-09-08").unwrap()
1621        )
1622    }
1623
1624    #[test]
1625    fn string_to_timestamp_chrono() {
1626        let cases = [
1627            "2020-09-08T13:42:29Z",
1628            "1969-01-01T00:00:00.1Z",
1629            "2020-09-08T12:00:12.12345678+00:00",
1630            "2020-09-08T12:00:12+00:00",
1631            "2020-09-08T12:00:12.1+00:00",
1632            "2020-09-08T12:00:12.12+00:00",
1633            "2020-09-08T12:00:12.123+00:00",
1634            "2020-09-08T12:00:12.1234+00:00",
1635            "2020-09-08T12:00:12.12345+00:00",
1636            "2020-09-08T12:00:12.123456+00:00",
1637            "2020-09-08T12:00:12.1234567+00:00",
1638            "2020-09-08T12:00:12.12345678+00:00",
1639            "2020-09-08T12:00:12.123456789+00:00",
1640            "2020-09-08T12:00:12.12345678912z",
1641            "2020-09-08T12:00:12.123456789123Z",
1642            "2020-09-08T12:00:12.123456789123+02:00",
1643            "2020-09-08T12:00:12.12345678912345Z",
1644            "2020-09-08T12:00:12.1234567891234567+02:00",
1645            "2020-09-08T12:00:60Z",
1646            "2020-09-08T12:00:60.123Z",
1647            "2020-09-08T12:00:60.123456+02:00",
1648            "2020-09-08T12:00:60.1234567891234567+02:00",
1649            "2020-09-08T12:00:60.999999999+02:00",
1650            "2020-09-08t12:00:12.12345678+00:00",
1651            "2020-09-08t12:00:12+00:00",
1652            "2020-09-08t12:00:12Z",
1653        ];
1654
1655        for case in cases {
1656            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1657            let chrono_utc = chrono.with_timezone(&Utc);
1658
1659            let custom = string_to_datetime(&Utc, case).unwrap();
1660            assert_eq!(chrono_utc, custom)
1661        }
1662    }
1663
1664    #[test]
1665    fn string_to_timestamp_naive() {
1666        let cases = [
1667            "2018-11-13T17:11:10.011375885995",
1668            "2030-12-04T17:11:10.123",
1669            "2030-12-04T17:11:10.1234",
1670            "2030-12-04T17:11:10.123456",
1671        ];
1672        for case in cases {
1673            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1674            let custom = string_to_datetime(&Utc, case).unwrap();
1675            assert_eq!(chrono, custom.naive_utc())
1676        }
1677    }
1678
1679    #[test]
1680    fn string_to_timestamp_invalid() {
1681        // Test parsing invalid formats
1682        let cases = [
1683            ("", "timestamp must contain at least 10 characters"),
1684            ("SS", "timestamp must contain at least 10 characters"),
1685            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1686            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1687            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1688            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1689            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1690            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1691            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1692            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1693            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1694            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1695            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1696            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1697            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1698            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1699            ("1997-01-31T092656.123Z", "error parsing time"),
1700            ("1997-01-10T12:00:06.", "error parsing time"),
1701            ("1997-01-10T12:00:06. ", "error parsing time"),
1702        ];
1703
1704        for (s, ctx) in cases {
1705            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1706            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1707            assert_eq!(actual, expected)
1708        }
1709    }
1710
1711    // Parse a timestamp to timestamp int with a useful human readable error message
1712    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1713        let result = string_to_timestamp_nanos(s);
1714        if let Err(e) = &result {
1715            eprintln!("Error parsing timestamp '{s}': {e:?}");
1716        }
1717        result
1718    }
1719
1720    #[test]
1721    fn string_without_timezone_to_timestamp() {
1722        // string without timezone should always output the same regardless the local or session timezone
1723
1724        let naive_datetime = NaiveDateTime::new(
1725            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1726            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1727        );
1728
1729        // Ensure both T and ' ' variants work
1730        assert_eq!(
1731            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1732            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1733        );
1734
1735        assert_eq!(
1736            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1737            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1738        );
1739
1740        let naive_datetime = NaiveDateTime::new(
1741            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1742            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1743        );
1744
1745        // Ensure both T and ' ' variants work
1746        assert_eq!(
1747            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1748            parse_timestamp("2020-09-08T13:42:29").unwrap()
1749        );
1750
1751        assert_eq!(
1752            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1753            parse_timestamp("2020-09-08 13:42:29").unwrap()
1754        );
1755
1756        let tz: Tz = "+02:00".parse().unwrap();
1757        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1758        let utc = date.naive_utc().to_string();
1759        assert_eq!(utc, "2020-09-08 11:42:29");
1760        let local = date.naive_local().to_string();
1761        assert_eq!(local, "2020-09-08 13:42:29");
1762
1763        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1764        let utc = date.naive_utc().to_string();
1765        assert_eq!(utc, "2020-09-08 13:42:29");
1766        let local = date.naive_local().to_string();
1767        assert_eq!(local, "2020-09-08 15:42:29");
1768
1769        let dt =
1770            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1771        let local: Tz = "+08:00".parse().unwrap();
1772
1773        // Parsed as offset from UTC
1774        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1775        assert_eq!(dt, date.naive_utc());
1776        assert_ne!(dt, date.naive_local());
1777
1778        // Parsed as offset from local
1779        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1780        assert_eq!(dt, date.naive_local());
1781        assert_ne!(dt, date.naive_utc());
1782    }
1783
1784    #[test]
1785    fn parse_date32() {
1786        let cases = [
1787            "2020-09-08",
1788            "2020-9-8",
1789            "2020-09-8",
1790            "2020-9-08",
1791            "2020-12-1",
1792            "1690-2-5",
1793            "2020-09-08 01:02:03",
1794        ];
1795        for case in cases {
1796            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1797            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1798                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1799                .unwrap();
1800            assert_eq!(v.date(), expected);
1801        }
1802
1803        let err_cases = [
1804            "",
1805            "80-01-01",
1806            "342",
1807            "Foo",
1808            "2020-09-08-03",
1809            "2020--04-03",
1810            "2020--",
1811            "2020-09-08 01",
1812            "2020-09-08 01:02",
1813            "2020-09-08 01-02-03",
1814            "2020-9-8 01:02:03",
1815            "2020-09-08 1:2:3",
1816        ];
1817        for case in err_cases {
1818            assert_eq!(Date32Type::parse(case), None);
1819        }
1820    }
1821
1822    #[test]
1823    fn parse_time64_nanos() {
1824        assert_eq!(
1825            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1826            Some(7_801_123_456_789)
1827        );
1828        assert_eq!(
1829            Time64NanosecondType::parse("02:10:01.1234567"),
1830            Some(7_801_123_456_700)
1831        );
1832        assert_eq!(
1833            Time64NanosecondType::parse("2:10:01.1234567"),
1834            Some(7_801_123_456_700)
1835        );
1836        assert_eq!(
1837            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1838            Some(601_123_456_789)
1839        );
1840        assert_eq!(
1841            Time64NanosecondType::parse("12:10:01.123456789 am"),
1842            Some(601_123_456_789)
1843        );
1844        assert_eq!(
1845            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1846            Some(51_001_123_456_780)
1847        );
1848        assert_eq!(
1849            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1850            Some(51_001_123_456_780)
1851        );
1852        assert_eq!(
1853            Time64NanosecondType::parse("02:10:01"),
1854            Some(7_801_000_000_000)
1855        );
1856        assert_eq!(
1857            Time64NanosecondType::parse("2:10:01"),
1858            Some(7_801_000_000_000)
1859        );
1860        assert_eq!(
1861            Time64NanosecondType::parse("12:10:01 AM"),
1862            Some(601_000_000_000)
1863        );
1864        assert_eq!(
1865            Time64NanosecondType::parse("12:10:01 am"),
1866            Some(601_000_000_000)
1867        );
1868        assert_eq!(
1869            Time64NanosecondType::parse("2:10:01 PM"),
1870            Some(51_001_000_000_000)
1871        );
1872        assert_eq!(
1873            Time64NanosecondType::parse("2:10:01 pm"),
1874            Some(51_001_000_000_000)
1875        );
1876        assert_eq!(
1877            Time64NanosecondType::parse("02:10"),
1878            Some(7_800_000_000_000)
1879        );
1880        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1881        assert_eq!(
1882            Time64NanosecondType::parse("12:10 AM"),
1883            Some(600_000_000_000)
1884        );
1885        assert_eq!(
1886            Time64NanosecondType::parse("12:10 am"),
1887            Some(600_000_000_000)
1888        );
1889        assert_eq!(
1890            Time64NanosecondType::parse("2:10 PM"),
1891            Some(51_000_000_000_000)
1892        );
1893        assert_eq!(
1894            Time64NanosecondType::parse("2:10 pm"),
1895            Some(51_000_000_000_000)
1896        );
1897
1898        // parse directly as nanoseconds
1899        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1900
1901        // leap second
1902        assert_eq!(
1903            Time64NanosecondType::parse("23:59:60"),
1904            Some(86_400_000_000_000)
1905        );
1906
1907        // custom format
1908        assert_eq!(
1909            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1910            Some(7_801_123_456_700)
1911        );
1912    }
1913
1914    #[test]
1915    fn parse_time64_micros() {
1916        // expected formats
1917        assert_eq!(
1918            Time64MicrosecondType::parse("02:10:01.1234"),
1919            Some(7_801_123_400)
1920        );
1921        assert_eq!(
1922            Time64MicrosecondType::parse("2:10:01.1234"),
1923            Some(7_801_123_400)
1924        );
1925        assert_eq!(
1926            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1927            Some(601_123_456)
1928        );
1929        assert_eq!(
1930            Time64MicrosecondType::parse("12:10:01.123456 am"),
1931            Some(601_123_456)
1932        );
1933        assert_eq!(
1934            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1935            Some(51_001_123_450)
1936        );
1937        assert_eq!(
1938            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1939            Some(51_001_123_450)
1940        );
1941        assert_eq!(
1942            Time64MicrosecondType::parse("02:10:01"),
1943            Some(7_801_000_000)
1944        );
1945        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1946        assert_eq!(
1947            Time64MicrosecondType::parse("12:10:01 AM"),
1948            Some(601_000_000)
1949        );
1950        assert_eq!(
1951            Time64MicrosecondType::parse("12:10:01 am"),
1952            Some(601_000_000)
1953        );
1954        assert_eq!(
1955            Time64MicrosecondType::parse("2:10:01 PM"),
1956            Some(51_001_000_000)
1957        );
1958        assert_eq!(
1959            Time64MicrosecondType::parse("2:10:01 pm"),
1960            Some(51_001_000_000)
1961        );
1962        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1963        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1964        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1965        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1966        assert_eq!(
1967            Time64MicrosecondType::parse("2:10 PM"),
1968            Some(51_000_000_000)
1969        );
1970        assert_eq!(
1971            Time64MicrosecondType::parse("2:10 pm"),
1972            Some(51_000_000_000)
1973        );
1974
1975        // parse directly as microseconds
1976        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1977
1978        // leap second
1979        assert_eq!(
1980            Time64MicrosecondType::parse("23:59:60"),
1981            Some(86_400_000_000)
1982        );
1983
1984        // custom format
1985        assert_eq!(
1986            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1987            Some(7_801_123_400)
1988        );
1989    }
1990
1991    #[test]
1992    fn parse_time32_millis() {
1993        // expected formats
1994        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1995        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1996        assert_eq!(
1997            Time32MillisecondType::parse("12:10:01.123 AM"),
1998            Some(601_123)
1999        );
2000        assert_eq!(
2001            Time32MillisecondType::parse("12:10:01.123 am"),
2002            Some(601_123)
2003        );
2004        assert_eq!(
2005            Time32MillisecondType::parse("2:10:01.12 PM"),
2006            Some(51_001_120)
2007        );
2008        assert_eq!(
2009            Time32MillisecondType::parse("2:10:01.12 pm"),
2010            Some(51_001_120)
2011        );
2012        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2013        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2014        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2015        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2016        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2017        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2018        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2019        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2020        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2021        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2022        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2023        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2024
2025        // parse directly as milliseconds
2026        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2027
2028        // leap second
2029        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2030
2031        // custom format
2032        assert_eq!(
2033            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2034            Some(7_801_100)
2035        );
2036    }
2037
2038    #[test]
2039    fn parse_time32_secs() {
2040        // expected formats
2041        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2042        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2043        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2044        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2045        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2046        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2047        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2048        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2049        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2050        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2051        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2052        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2053        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2054
2055        // parse directly as seconds
2056        assert_eq!(Time32SecondType::parse("1"), Some(1));
2057
2058        // leap second
2059        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2060
2061        // custom format
2062        assert_eq!(
2063            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2064            Some(7_801)
2065        );
2066    }
2067
2068    #[test]
2069    fn test_string_to_time_invalid() {
2070        let cases = [
2071            "25:00",
2072            "9:00:",
2073            "009:00",
2074            "09:0:00",
2075            "25:00:00",
2076            "13:00 AM",
2077            "13:00 PM",
2078            "12:00. AM",
2079            "09:0:00",
2080            "09:01:0",
2081            "09:01:1",
2082            "9:1:0",
2083            "09:01:0",
2084            "1:00.123",
2085            "1:00:00.123f",
2086            " 9:00:00",
2087            ":09:00",
2088            "T9:00:00",
2089            "AM",
2090        ];
2091        for case in cases {
2092            assert!(string_to_time(case).is_none(), "{case}");
2093        }
2094    }
2095
2096    #[test]
2097    fn test_string_to_time_chrono() {
2098        let cases = [
2099            ("1:00", "%H:%M"),
2100            ("12:00", "%H:%M"),
2101            ("13:00", "%H:%M"),
2102            ("24:00", "%H:%M"),
2103            ("1:00:00", "%H:%M:%S"),
2104            ("12:00:30", "%H:%M:%S"),
2105            ("13:00:59", "%H:%M:%S"),
2106            ("24:00:60", "%H:%M:%S"),
2107            ("09:00:00", "%H:%M:%S%.f"),
2108            ("0:00:30.123456", "%H:%M:%S%.f"),
2109            ("0:00 AM", "%I:%M %P"),
2110            ("1:00 AM", "%I:%M %P"),
2111            ("12:00 AM", "%I:%M %P"),
2112            ("13:00 AM", "%I:%M %P"),
2113            ("0:00 PM", "%I:%M %P"),
2114            ("1:00 PM", "%I:%M %P"),
2115            ("12:00 PM", "%I:%M %P"),
2116            ("13:00 PM", "%I:%M %P"),
2117            ("1:00 pM", "%I:%M %P"),
2118            ("1:00 Pm", "%I:%M %P"),
2119            ("1:00 aM", "%I:%M %P"),
2120            ("1:00 Am", "%I:%M %P"),
2121            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2122            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2123            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2124            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2125            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2126            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2127            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2128        ];
2129        for (s, format) in cases {
2130            let chrono = NaiveTime::parse_from_str(s, format).ok();
2131            let custom = string_to_time(s);
2132            assert_eq!(chrono, custom, "{s}");
2133        }
2134    }
2135
2136    #[test]
2137    fn test_parse_interval() {
2138        let config = IntervalParseConfig::new(IntervalUnit::Month);
2139
2140        assert_eq!(
2141            Interval::new(1i32, 0i32, 0i64),
2142            Interval::parse("1 month", &config).unwrap(),
2143        );
2144
2145        assert_eq!(
2146            Interval::new(2i32, 0i32, 0i64),
2147            Interval::parse("2 month", &config).unwrap(),
2148        );
2149
2150        assert_eq!(
2151            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2152            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2153        );
2154
2155        assert_eq!(
2156            Interval::new(0i32, 15i32, 0),
2157            Interval::parse("0.5 months", &config).unwrap(),
2158        );
2159
2160        assert_eq!(
2161            Interval::new(0i32, 15i32, 0),
2162            Interval::parse(".5 months", &config).unwrap(),
2163        );
2164
2165        assert_eq!(
2166            Interval::new(0i32, -15i32, 0),
2167            Interval::parse("-0.5 months", &config).unwrap(),
2168        );
2169
2170        assert_eq!(
2171            Interval::new(0i32, -15i32, 0),
2172            Interval::parse("-.5 months", &config).unwrap(),
2173        );
2174
2175        assert_eq!(
2176            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2177            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2178        );
2179
2180        assert_eq!(
2181            Interval::parse("1 centurys 1 month", &config)
2182                .unwrap_err()
2183                .to_string(),
2184            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2185        );
2186
2187        assert_eq!(
2188            Interval::new(37i32, 0i32, 0i64),
2189            Interval::parse("3 year 1 month", &config).unwrap(),
2190        );
2191
2192        assert_eq!(
2193            Interval::new(35i32, 0i32, 0i64),
2194            Interval::parse("3 year -1 month", &config).unwrap(),
2195        );
2196
2197        assert_eq!(
2198            Interval::new(-37i32, 0i32, 0i64),
2199            Interval::parse("-3 year -1 month", &config).unwrap(),
2200        );
2201
2202        assert_eq!(
2203            Interval::new(-35i32, 0i32, 0i64),
2204            Interval::parse("-3 year 1 month", &config).unwrap(),
2205        );
2206
2207        assert_eq!(
2208            Interval::new(0i32, 5i32, 0i64),
2209            Interval::parse("5 days", &config).unwrap(),
2210        );
2211
2212        assert_eq!(
2213            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2214            Interval::parse("7 days 3 hours", &config).unwrap(),
2215        );
2216
2217        assert_eq!(
2218            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2219            Interval::parse("7 days 5 minutes", &config).unwrap(),
2220        );
2221
2222        assert_eq!(
2223            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2224            Interval::parse("7 days -5 minutes", &config).unwrap(),
2225        );
2226
2227        assert_eq!(
2228            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2229            Interval::parse("-7 days 5 hours", &config).unwrap(),
2230        );
2231
2232        assert_eq!(
2233            Interval::new(
2234                0i32,
2235                -7i32,
2236                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2237            ),
2238            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2239        );
2240
2241        assert_eq!(
2242            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2243            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2244        );
2245
2246        assert_eq!(
2247            Interval::new(
2248                12i32,
2249                1i32,
2250                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2251            ),
2252            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2253        );
2254
2255        assert_eq!(
2256            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2257            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2258        );
2259
2260        assert_eq!(
2261            Interval::new(12i32, 1i32, 1000i64),
2262            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2263        );
2264
2265        assert_eq!(
2266            Interval::new(12i32, 1i32, 1i64),
2267            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2268        );
2269
2270        assert_eq!(
2271            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2272            Interval::parse("1 month -1 second", &config).unwrap(),
2273        );
2274
2275        assert_eq!(
2276            Interval::new(
2277                -13i32,
2278                -8i32,
2279                -NANOS_PER_HOUR
2280                    - NANOS_PER_MINUTE
2281                    - NANOS_PER_SECOND
2282                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2283            ),
2284            Interval::parse(
2285                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2286                &config
2287            )
2288            .unwrap(),
2289        );
2290
2291        // no units
2292        assert_eq!(
2293            Interval::new(1, 0, 0),
2294            Interval::parse("1", &config).unwrap()
2295        );
2296        assert_eq!(
2297            Interval::new(42, 0, 0),
2298            Interval::parse("42", &config).unwrap()
2299        );
2300        assert_eq!(
2301            Interval::new(0, 0, 42_000_000_000),
2302            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2303        );
2304
2305        // shorter units
2306        assert_eq!(
2307            Interval::new(1, 0, 0),
2308            Interval::parse("1 mon", &config).unwrap()
2309        );
2310        assert_eq!(
2311            Interval::new(1, 0, 0),
2312            Interval::parse("1 mons", &config).unwrap()
2313        );
2314        assert_eq!(
2315            Interval::new(0, 0, 1_000_000),
2316            Interval::parse("1 ms", &config).unwrap()
2317        );
2318        assert_eq!(
2319            Interval::new(0, 0, 1_000),
2320            Interval::parse("1 us", &config).unwrap()
2321        );
2322
2323        // no space
2324        assert_eq!(
2325            Interval::new(0, 0, 1_000),
2326            Interval::parse("1us", &config).unwrap()
2327        );
2328        assert_eq!(
2329            Interval::new(0, 0, NANOS_PER_SECOND),
2330            Interval::parse("1s", &config).unwrap()
2331        );
2332        assert_eq!(
2333            Interval::new(1, 2, 10_864_000_000_000),
2334            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2335        );
2336
2337        assert_eq!(
2338            Interval::new(
2339                -13i32,
2340                -8i32,
2341                -NANOS_PER_HOUR
2342                    - NANOS_PER_MINUTE
2343                    - NANOS_PER_SECOND
2344                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2345            ),
2346            Interval::parse(
2347                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2348                &config
2349            )
2350            .unwrap(),
2351        );
2352
2353        assert_eq!(
2354            Interval::parse("1h s", &config).unwrap_err().to_string(),
2355            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2356        );
2357
2358        assert_eq!(
2359            Interval::parse("1XX", &config).unwrap_err().to_string(),
2360            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2361        );
2362    }
2363
2364    #[test]
2365    fn test_duplicate_interval_type() {
2366        let config = IntervalParseConfig::new(IntervalUnit::Month);
2367
2368        let err = Interval::parse("1 month 1 second 1 second", &config)
2369            .expect_err("parsing interval should have failed");
2370        assert_eq!(
2371            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2372            format!("{err:?}")
2373        );
2374
2375        // test with singular and plural forms
2376        let err = Interval::parse("1 century 2 centuries", &config)
2377            .expect_err("parsing interval should have failed");
2378        assert_eq!(
2379            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2380            format!("{err:?}")
2381        );
2382    }
2383
2384    #[test]
2385    fn test_interval_amount_parsing() {
2386        // integer
2387        let result = IntervalAmount::from_str("123").unwrap();
2388        let expected = IntervalAmount::new(123, 0);
2389
2390        assert_eq!(result, expected);
2391
2392        // positive w/ fractional
2393        let result = IntervalAmount::from_str("0.3").unwrap();
2394        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2395
2396        assert_eq!(result, expected);
2397
2398        // negative w/ fractional
2399        let result = IntervalAmount::from_str("-3.5").unwrap();
2400        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2401
2402        assert_eq!(result, expected);
2403
2404        // invalid: missing fractional
2405        let result = IntervalAmount::from_str("3.");
2406        assert!(result.is_err());
2407
2408        // invalid: sign in fractional
2409        let result = IntervalAmount::from_str("3.-5");
2410        assert!(result.is_err());
2411    }
2412
2413    #[test]
2414    fn test_interval_precision() {
2415        let config = IntervalParseConfig::new(IntervalUnit::Month);
2416
2417        let result = Interval::parse("100000.1 days", &config).unwrap();
2418        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2419
2420        assert_eq!(result, expected);
2421    }
2422
2423    #[test]
2424    fn test_interval_addition() {
2425        // add 4.1 centuries
2426        let start = Interval::new(1, 2, 3);
2427        let expected = Interval::new(4921, 2, 3);
2428
2429        let result = start
2430            .add(
2431                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2432                IntervalUnit::Century,
2433            )
2434            .unwrap();
2435
2436        assert_eq!(result, expected);
2437
2438        // add 10.25 decades
2439        let start = Interval::new(1, 2, 3);
2440        let expected = Interval::new(1231, 2, 3);
2441
2442        let result = start
2443            .add(
2444                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2445                IntervalUnit::Decade,
2446            )
2447            .unwrap();
2448
2449        assert_eq!(result, expected);
2450
2451        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2452        let start = Interval::new(1, 2, 3);
2453        let expected = Interval::new(364, 2, 3);
2454
2455        let result = start
2456            .add(
2457                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2458                IntervalUnit::Year,
2459            )
2460            .unwrap();
2461
2462        assert_eq!(result, expected);
2463
2464        // add 1.5 months
2465        let start = Interval::new(1, 2, 3);
2466        let expected = Interval::new(2, 17, 3);
2467
2468        let result = start
2469            .add(
2470                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2471                IntervalUnit::Month,
2472            )
2473            .unwrap();
2474
2475        assert_eq!(result, expected);
2476
2477        // add -2 weeks
2478        let start = Interval::new(1, 25, 3);
2479        let expected = Interval::new(1, 11, 3);
2480
2481        let result = start
2482            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2483            .unwrap();
2484
2485        assert_eq!(result, expected);
2486
2487        // add 2.2 days
2488        let start = Interval::new(12, 15, 3);
2489        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2490
2491        let result = start
2492            .add(
2493                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2494                IntervalUnit::Day,
2495            )
2496            .unwrap();
2497
2498        assert_eq!(result, expected);
2499
2500        // add 12.5 hours
2501        let start = Interval::new(1, 2, 3);
2502        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2503
2504        let result = start
2505            .add(
2506                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2507                IntervalUnit::Hour,
2508            )
2509            .unwrap();
2510
2511        assert_eq!(result, expected);
2512
2513        // add -1.5 minutes
2514        let start = Interval::new(0, 0, -3);
2515        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2516
2517        let result = start
2518            .add(
2519                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2520                IntervalUnit::Minute,
2521            )
2522            .unwrap();
2523
2524        assert_eq!(result, expected);
2525    }
2526
2527    #[test]
2528    fn string_to_timestamp_old() {
2529        parse_timestamp("1677-06-14T07:29:01.256")
2530            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2531            .unwrap_err();
2532    }
2533
2534    #[test]
2535    fn test_parse_decimal_with_parameter() {
2536        let tests = [
2537            ("0", 0i128),
2538            ("123.123", 123123i128),
2539            ("123.1234", 123123i128),
2540            ("123.1", 123100i128),
2541            ("123", 123000i128),
2542            ("-123.123", -123123i128),
2543            ("-123.1234", -123123i128),
2544            ("-123.1", -123100i128),
2545            ("-123", -123000i128),
2546            ("0.0000123", 0i128),
2547            ("12.", 12000i128),
2548            ("-12.", -12000i128),
2549            ("00.1", 100i128),
2550            ("-00.1", -100i128),
2551            ("12345678912345678.1234", 12345678912345678123i128),
2552            ("-12345678912345678.1234", -12345678912345678123i128),
2553            ("99999999999999999.999", 99999999999999999999i128),
2554            ("-99999999999999999.999", -99999999999999999999i128),
2555            (".123", 123i128),
2556            ("-.123", -123i128),
2557            ("123.", 123000i128),
2558            ("-123.", -123000i128),
2559        ];
2560        for (s, i) in tests {
2561            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2562            assert_eq!(i, result_128.unwrap());
2563            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2564            assert_eq!(i256::from_i128(i), result_256.unwrap());
2565        }
2566
2567        let e_notation_tests = [
2568            ("1.23e3", "1230.0", 2),
2569            ("5.6714e+2", "567.14", 4),
2570            ("5.6714e-2", "0.056714", 4),
2571            ("5.6714e-2", "0.056714", 3),
2572            ("5.6741214125e2", "567.41214125", 4),
2573            ("8.91E4", "89100.0", 2),
2574            ("3.14E+5", "314000.0", 2),
2575            ("2.718e0", "2.718", 2),
2576            ("9.999999e-1", "0.9999999", 4),
2577            ("1.23e+3", "1230", 2),
2578            ("1.234559e+3", "1234.559", 2),
2579            ("1.00E-10", "0.0000000001", 11),
2580            ("1.23e-4", "0.000123", 2),
2581            ("9.876e7", "98760000.0", 2),
2582            ("5.432E+8", "543200000.0", 10),
2583            ("1.234567e9", "1234567000.0", 2),
2584            ("1.234567e2", "123.45670000", 2),
2585            ("4749.3e-5", "0.047493", 10),
2586            ("4749.3e+5", "474930000", 10),
2587            ("4749.3e-5", "0.047493", 1),
2588            ("4749.3e+5", "474930000", 1),
2589            ("0E-8", "0", 10),
2590            ("0E+6", "0", 10),
2591            ("1E-8", "0.00000001", 10),
2592            ("12E+6", "12000000", 10),
2593            ("12E-6", "0.000012", 10),
2594            ("0.1e-6", "0.0000001", 10),
2595            ("0.1e+6", "100000", 10),
2596            ("0.12e-6", "0.00000012", 10),
2597            ("0.12e+6", "120000", 10),
2598            ("000000000001e0", "000000000001", 3),
2599            ("000001.1034567002e0", "000001.1034567002", 3),
2600            ("1.234e16", "12340000000000000", 0),
2601            ("123.4e16", "1234000000000000000", 0),
2602        ];
2603        for (e, d, scale) in e_notation_tests {
2604            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2605            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2606            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2607            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2608            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2609            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2610        }
2611        let can_not_parse_tests = [
2612            "123,123",
2613            ".",
2614            "123.123.123",
2615            "",
2616            "+",
2617            "-",
2618            "e",
2619            "1.3e+e3",
2620            "5.6714ee-2",
2621            "4.11ee-+4",
2622            "4.11e++4",
2623            "1.1e.12",
2624            "1.23e+3.",
2625            "1.23e+3.1",
2626        ];
2627        for s in can_not_parse_tests {
2628            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2629            assert_eq!(
2630                format!("Parser error: can't parse the string value {s} to decimal"),
2631                result_128.unwrap_err().to_string()
2632            );
2633            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2634            assert_eq!(
2635                format!("Parser error: can't parse the string value {s} to decimal"),
2636                result_256.unwrap_err().to_string()
2637            );
2638        }
2639        let overflow_parse_tests = [
2640            ("12345678", 3),
2641            ("1.2345678e7", 3),
2642            ("12345678.9", 3),
2643            ("1.23456789e+7", 3),
2644            ("99999999.99", 3),
2645            ("9.999999999e7", 3),
2646            ("12345678908765.123456", 3),
2647            ("123456789087651234.56e-4", 3),
2648            ("1234560000000", 0),
2649            ("1.23456e12", 0),
2650        ];
2651        for (s, scale) in overflow_parse_tests {
2652            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2653            let expected_128 = "Parser error: parse decimal overflow";
2654            let actual_128 = result_128.unwrap_err().to_string();
2655
2656            assert!(
2657                actual_128.contains(expected_128),
2658                "actual: '{actual_128}', expected: '{expected_128}'"
2659            );
2660
2661            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2662            let expected_256 = "Parser error: parse decimal overflow";
2663            let actual_256 = result_256.unwrap_err().to_string();
2664
2665            assert!(
2666                actual_256.contains(expected_256),
2667                "actual: '{actual_256}', expected: '{expected_256}'"
2668            );
2669        }
2670
2671        let edge_tests_128 = [
2672            (
2673                "99999999999999999999999999999999999999",
2674                99999999999999999999999999999999999999i128,
2675                0,
2676            ),
2677            (
2678                "999999999999999999999999999999999999.99",
2679                99999999999999999999999999999999999999i128,
2680                2,
2681            ),
2682            (
2683                "9999999999999999999999999.9999999999999",
2684                99999999999999999999999999999999999999i128,
2685                13,
2686            ),
2687            (
2688                "9999999999999999999999999",
2689                99999999999999999999999990000000000000i128,
2690                13,
2691            ),
2692            (
2693                "0.99999999999999999999999999999999999999",
2694                99999999999999999999999999999999999999i128,
2695                38,
2696            ),
2697            (
2698                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2699                0i128,
2700                15,
2701            ),
2702            (
2703                "1.016744e-320",
2704                0i128,
2705                15,
2706            ),
2707            (
2708                "-1e3",
2709                -1000000000i128,
2710                6,
2711            ),
2712            (
2713                "+1e3",
2714                1000000000i128,
2715                6,
2716            ),
2717            (
2718                "-1e31",
2719                -10000000000000000000000000000000000000i128,
2720                6,
2721            ),
2722        ];
2723        for (s, i, scale) in edge_tests_128 {
2724            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2725            assert_eq!(i, result_128.unwrap());
2726        }
2727        let edge_tests_256 = [
2728            (
2729                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2730                i256::from_string(
2731                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2732                )
2733                .unwrap(),
2734                0,
2735            ),
2736            (
2737                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2738                i256::from_string(
2739                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2740                )
2741                .unwrap(),
2742                4,
2743            ),
2744            (
2745                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2746                i256::from_string(
2747                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2748                )
2749                .unwrap(),
2750                26,
2751            ),
2752            (
2753                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2754                i256::from_string(
2755                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2756                )
2757                .unwrap(),
2758                26,
2759            ),
2760            (
2761                "99999999999999999999999999999999999999999999999999",
2762                i256::from_string(
2763                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2764                )
2765                .unwrap(),
2766                26,
2767            ),
2768            (
2769                "9.9999999999999999999999999999999999999999999999999e+49",
2770                i256::from_string(
2771                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2772                )
2773                .unwrap(),
2774                26,
2775            ),
2776        ];
2777        for (s, i, scale) in edge_tests_256 {
2778            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2779            assert_eq!(i, result.unwrap());
2780        }
2781    }
2782
2783    #[test]
2784    fn test_parse_empty() {
2785        assert_eq!(Int32Type::parse(""), None);
2786        assert_eq!(Int64Type::parse(""), None);
2787        assert_eq!(UInt32Type::parse(""), None);
2788        assert_eq!(UInt64Type::parse(""), None);
2789        assert_eq!(Float32Type::parse(""), None);
2790        assert_eq!(Float64Type::parse(""), None);
2791        assert_eq!(Int32Type::parse("+"), None);
2792        assert_eq!(Int64Type::parse("+"), None);
2793        assert_eq!(UInt32Type::parse("+"), None);
2794        assert_eq!(UInt64Type::parse("+"), None);
2795        assert_eq!(Float32Type::parse("+"), None);
2796        assert_eq!(Float64Type::parse("+"), None);
2797        assert_eq!(TimestampNanosecondType::parse(""), None);
2798        assert_eq!(Date32Type::parse(""), None);
2799    }
2800
2801    #[test]
2802    fn test_parse_interval_month_day_nano_config() {
2803        let interval = parse_interval_month_day_nano_config(
2804            "1",
2805            IntervalParseConfig::new(IntervalUnit::Second),
2806        )
2807        .unwrap();
2808        assert_eq!(interval.months, 0);
2809        assert_eq!(interval.days, 0);
2810        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2811    }
2812}