1use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469 match opt {
470 None => false,
471 Some(x) => f(x),
472 }
473}
474
475macro_rules! parser_primitive {
476 ($t:ty) => {
477 impl Parser for $t {
478 fn parse(string: &str) -> Option<Self::Native> {
479 if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480 return None;
481 }
482 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483 string.as_bytes(),
484 ) {
485 (Some(n), x) if x == string.len() => Some(n),
486 _ => None,
487 }
488 }
489 }
490 };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506 fn parse(string: &str) -> Option<i64> {
507 string_to_timestamp_nanos(string).ok()
508 }
509}
510
511impl Parser for TimestampMicrosecondType {
512 fn parse(string: &str) -> Option<i64> {
513 let nanos = string_to_timestamp_nanos(string).ok();
514 nanos.map(|x| x / 1000)
515 }
516}
517
518impl Parser for TimestampMillisecondType {
519 fn parse(string: &str) -> Option<i64> {
520 let nanos = string_to_timestamp_nanos(string).ok();
521 nanos.map(|x| x / 1_000_000)
522 }
523}
524
525impl Parser for TimestampSecondType {
526 fn parse(string: &str) -> Option<i64> {
527 let nanos = string_to_timestamp_nanos(string).ok();
528 nanos.map(|x| x / 1_000_000_000)
529 }
530}
531
532impl Parser for Time64NanosecondType {
533 fn parse(string: &str) -> Option<Self::Native> {
535 string_to_time_nanoseconds(string)
536 .ok()
537 .or_else(|| string.parse::<Self::Native>().ok())
538 }
539
540 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541 let nt = NaiveTime::parse_from_str(string, format).ok()?;
542 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543 }
544}
545
546impl Parser for Time64MicrosecondType {
547 fn parse(string: &str) -> Option<Self::Native> {
549 string_to_time_nanoseconds(string)
550 .ok()
551 .map(|nanos| nanos / 1_000)
552 .or_else(|| string.parse::<Self::Native>().ok())
553 }
554
555 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556 let nt = NaiveTime::parse_from_str(string, format).ok()?;
557 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558 }
559}
560
561impl Parser for Time32MillisecondType {
562 fn parse(string: &str) -> Option<Self::Native> {
564 string_to_time_nanoseconds(string)
565 .ok()
566 .map(|nanos| (nanos / 1_000_000) as i32)
567 .or_else(|| string.parse::<Self::Native>().ok())
568 }
569
570 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571 let nt = NaiveTime::parse_from_str(string, format).ok()?;
572 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573 }
574}
575
576impl Parser for Time32SecondType {
577 fn parse(string: &str) -> Option<Self::Native> {
579 string_to_time_nanoseconds(string)
580 .ok()
581 .map(|nanos| (nanos / 1_000_000_000) as i32)
582 .or_else(|| string.parse::<Self::Native>().ok())
583 }
584
585 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586 let nt = NaiveTime::parse_from_str(string, format).ok()?;
587 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588 }
589}
590
591const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598 if string.starts_with('+') || string.starts_with('-') {
606 let rest = &string[1..];
609 let hyphen = rest.find('-')?;
610 if hyphen < 4 {
611 return None;
612 }
613 let year: i32 = string[..hyphen + 1].parse().ok()?;
616 let remainder = string[hyphen + 1..].strip_prefix('-')?;
618 let mut parts = remainder.splitn(2, '-');
619 let month: u32 = parts.next()?.parse().ok()?;
620 let day: u32 = parts.next()?.parse().ok()?;
621 return NaiveDate::from_ymd_opt(year, month, day);
622 }
623
624 if string.len() > 10 {
625 return string_to_datetime(&Utc, string)
627 .map(|dt| dt.date_naive())
628 .ok();
629 };
630 let mut digits = [0; 10];
631 let mut mask = 0;
632
633 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
635 *o = i.wrapping_sub(b'0');
636 mask |= ((*o < 10) as u16) << idx
637 }
638
639 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
640
641 if digits[4] != HYPHEN {
643 let (year, month, day) = match (mask, string.len()) {
644 (0b11111111, 8) => (
645 digits[0] as u16 * 1000
646 + digits[1] as u16 * 100
647 + digits[2] as u16 * 10
648 + digits[3] as u16,
649 digits[4] * 10 + digits[5],
650 digits[6] * 10 + digits[7],
651 ),
652 _ => return None,
653 };
654 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
655 }
656
657 let (month, day) = match mask {
658 0b1101101111 => {
659 if digits[7] != HYPHEN {
660 return None;
661 }
662 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
663 }
664 0b101101111 => {
665 if digits[7] != HYPHEN {
666 return None;
667 }
668 (digits[5] * 10 + digits[6], digits[8])
669 }
670 0b110101111 => {
671 if digits[6] != HYPHEN {
672 return None;
673 }
674 (digits[5], digits[7] * 10 + digits[8])
675 }
676 0b10101111 => {
677 if digits[6] != HYPHEN {
678 return None;
679 }
680 (digits[5], digits[7])
681 }
682 _ => return None,
683 };
684
685 let year =
686 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
687
688 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
689}
690
691impl Parser for Date32Type {
692 fn parse(string: &str) -> Option<i32> {
693 let date = parse_date(string)?;
694 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
695 }
696
697 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
698 let date = NaiveDate::parse_from_str(string, format).ok()?;
699 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
700 }
701}
702
703impl Parser for Date64Type {
704 fn parse(string: &str) -> Option<i64> {
705 if string.len() <= 10 {
706 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
707 Some(datetime.and_utc().timestamp_millis())
708 } else {
709 let date_time = string_to_datetime(&Utc, string).ok()?;
710 Some(date_time.timestamp_millis())
711 }
712 }
713
714 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
715 use chrono::format::Fixed;
716 use chrono::format::StrftimeItems;
717 let fmt = StrftimeItems::new(format);
718 let has_zone = fmt.into_iter().any(|item| match item {
719 chrono::format::Item::Fixed(fixed_item) => matches!(
720 fixed_item,
721 Fixed::RFC2822
722 | Fixed::RFC3339
723 | Fixed::TimezoneName
724 | Fixed::TimezoneOffsetColon
725 | Fixed::TimezoneOffsetColonZ
726 | Fixed::TimezoneOffset
727 | Fixed::TimezoneOffsetZ
728 ),
729 _ => false,
730 });
731 if has_zone {
732 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
733 Some(date_time.timestamp_millis())
734 } else {
735 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
736 Some(date_time.and_utc().timestamp_millis())
737 }
738 }
739}
740
741fn parse_e_notation<T: DecimalType>(
742 s: &str,
743 mut digits: u16,
744 mut fractionals: i16,
745 mut result: T::Native,
746 index: usize,
747 precision: u16,
748 scale: i16,
749) -> Result<T::Native, ArrowError> {
750 let mut exp: i16 = 0;
751 let base = T::Native::usize_as(10);
752
753 let mut exp_start: bool = false;
754 let mut pos_shift_direction: bool = true;
756
757 let mut bs;
759 if fractionals > 0 {
760 bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
762 } else {
763 bs = s.as_bytes().iter().skip(index);
765 }
766
767 while let Some(b) = bs.next() {
768 match b {
769 b'0'..=b'9' => {
770 result = result.mul_wrapping(base);
771 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
772 if fractionals > 0 {
773 fractionals += 1;
774 }
775 digits += 1;
776 }
777 &b'e' | &b'E' => {
778 exp_start = true;
779 }
780 _ => {
781 return Err(ArrowError::ParseError(format!(
782 "can't parse the string value {s} to decimal"
783 )));
784 }
785 };
786
787 if exp_start {
788 pos_shift_direction = match bs.next() {
789 Some(&b'-') => false,
790 Some(&b'+') => true,
791 Some(b) => {
792 if !b.is_ascii_digit() {
793 return Err(ArrowError::ParseError(format!(
794 "can't parse the string value {s} to decimal"
795 )));
796 }
797
798 exp *= 10;
799 exp += (b - b'0') as i16;
800
801 true
802 }
803 None => {
804 return Err(ArrowError::ParseError(format!(
805 "can't parse the string value {s} to decimal"
806 )))
807 }
808 };
809
810 for b in bs.by_ref() {
811 if !b.is_ascii_digit() {
812 return Err(ArrowError::ParseError(format!(
813 "can't parse the string value {s} to decimal"
814 )));
815 }
816 exp *= 10;
817 exp += (b - b'0') as i16;
818 }
819 }
820 }
821
822 if digits == 0 && fractionals == 0 && exp == 0 {
823 return Err(ArrowError::ParseError(format!(
824 "can't parse the string value {s} to decimal"
825 )));
826 }
827
828 if !pos_shift_direction {
829 if exp - (digits as i16 + scale) > 0 {
832 return Ok(T::Native::usize_as(0));
833 }
834 exp *= -1;
835 }
836
837 exp = fractionals - exp;
839 if !pos_shift_direction && exp > digits as i16 {
841 digits = exp as u16;
842 }
843 exp = scale - exp;
845
846 if (digits as i16 + exp) as u16 > precision {
847 return Err(ArrowError::ParseError(format!(
848 "parse decimal overflow ({s})"
849 )));
850 }
851
852 if exp < 0 {
853 result = result.div_wrapping(base.pow_wrapping(-exp as _));
854 } else {
855 result = result.mul_wrapping(base.pow_wrapping(exp as _));
856 }
857
858 Ok(result)
859}
860
861pub fn parse_decimal<T: DecimalType>(
864 s: &str,
865 precision: u8,
866 scale: i8,
867) -> Result<T::Native, ArrowError> {
868 let mut result = T::Native::usize_as(0);
869 let mut fractionals: i8 = 0;
870 let mut digits: u8 = 0;
871 let base = T::Native::usize_as(10);
872
873 let bs = s.as_bytes();
874 let (signed, negative) = match bs.first() {
875 Some(b'-') => (true, true),
876 Some(b'+') => (true, false),
877 _ => (false, false),
878 };
879
880 if bs.is_empty() || signed && bs.len() == 1 {
881 return Err(ArrowError::ParseError(format!(
882 "can't parse the string value {s} to decimal"
883 )));
884 }
885
886 let mut bs = bs.iter().enumerate().skip(signed as usize);
888
889 let mut is_e_notation = false;
890
891 while let Some((index, b)) = bs.next() {
894 match b {
895 b'0'..=b'9' => {
896 if digits == 0 && *b == b'0' {
897 continue;
899 }
900 digits += 1;
901 result = result.mul_wrapping(base);
902 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
903 }
904 b'.' => {
905 let point_index = index;
906
907 for (_, b) in bs.by_ref() {
908 if !b.is_ascii_digit() {
909 if *b == b'e' || *b == b'E' {
910 result = parse_e_notation::<T>(
911 s,
912 digits as u16,
913 fractionals as i16,
914 result,
915 point_index,
916 precision as u16,
917 scale as i16,
918 )?;
919
920 is_e_notation = true;
921
922 break;
923 }
924 return Err(ArrowError::ParseError(format!(
925 "can't parse the string value {s} to decimal"
926 )));
927 }
928 if fractionals == scale && scale != 0 {
929 continue;
933 }
934 fractionals += 1;
935 digits += 1;
936 result = result.mul_wrapping(base);
937 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
938 }
939
940 if is_e_notation {
941 break;
942 }
943
944 if digits == 0 {
946 return Err(ArrowError::ParseError(format!(
947 "can't parse the string value {s} to decimal"
948 )));
949 }
950 }
951 b'e' | b'E' => {
952 result = parse_e_notation::<T>(
953 s,
954 digits as u16,
955 fractionals as i16,
956 result,
957 index,
958 precision as u16,
959 scale as i16,
960 )?;
961
962 is_e_notation = true;
963
964 break;
965 }
966 _ => {
967 return Err(ArrowError::ParseError(format!(
968 "can't parse the string value {s} to decimal"
969 )));
970 }
971 }
972 }
973
974 if !is_e_notation {
975 if fractionals < scale {
976 let exp = scale - fractionals;
977 if exp as u8 + digits > precision {
978 return Err(ArrowError::ParseError(format!(
979 "parse decimal overflow ({s})"
980 )));
981 }
982 let mul = base.pow_wrapping(exp as _);
983 result = result.mul_wrapping(mul);
984 } else if digits > precision {
985 return Err(ArrowError::ParseError(format!(
986 "parse decimal overflow ({s})"
987 )));
988 }
989 }
990
991 Ok(if negative {
992 result.neg_wrapping()
993 } else {
994 result
995 })
996}
997
998pub fn parse_interval_year_month(
1000 value: &str,
1001) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
1002 let config = IntervalParseConfig::new(IntervalUnit::Year);
1003 let interval = Interval::parse(value, &config)?;
1004
1005 let months = interval.to_year_months().map_err(|_| {
1006 ArrowError::CastError(format!(
1007 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
1008 ))
1009 })?;
1010
1011 Ok(IntervalYearMonthType::make_value(0, months))
1012}
1013
1014pub fn parse_interval_day_time(
1016 value: &str,
1017) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1018 let config = IntervalParseConfig::new(IntervalUnit::Day);
1019 let interval = Interval::parse(value, &config)?;
1020
1021 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1022 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1023 )))?;
1024
1025 Ok(IntervalDayTimeType::make_value(days, millis))
1026}
1027
1028pub fn parse_interval_month_day_nano_config(
1030 value: &str,
1031 config: IntervalParseConfig,
1032) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1033 let interval = Interval::parse(value, &config)?;
1034
1035 let (months, days, nanos) = interval.to_month_day_nanos();
1036
1037 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1038}
1039
1040pub fn parse_interval_month_day_nano(
1042 value: &str,
1043) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1044 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1045}
1046
1047const NANOS_PER_MILLIS: i64 = 1_000_000;
1048const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1049const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1050const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1051#[cfg(test)]
1052const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1053
1054#[derive(Debug, Clone)]
1058pub struct IntervalParseConfig {
1059 default_unit: IntervalUnit,
1062}
1063
1064impl IntervalParseConfig {
1065 pub fn new(default_unit: IntervalUnit) -> Self {
1067 Self { default_unit }
1068 }
1069}
1070
1071#[rustfmt::skip]
1072#[derive(Debug, Clone, Copy)]
1073#[repr(u16)]
1074pub enum IntervalUnit {
1077 Century = 0b_0000_0000_0001,
1079 Decade = 0b_0000_0000_0010,
1081 Year = 0b_0000_0000_0100,
1083 Month = 0b_0000_0000_1000,
1085 Week = 0b_0000_0001_0000,
1087 Day = 0b_0000_0010_0000,
1089 Hour = 0b_0000_0100_0000,
1091 Minute = 0b_0000_1000_0000,
1093 Second = 0b_0001_0000_0000,
1095 Millisecond = 0b_0010_0000_0000,
1097 Microsecond = 0b_0100_0000_0000,
1099 Nanosecond = 0b_1000_0000_0000,
1101}
1102
1103impl FromStr for IntervalUnit {
1108 type Err = ArrowError;
1109
1110 fn from_str(s: &str) -> Result<Self, ArrowError> {
1111 match s.to_lowercase().as_str() {
1112 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1113 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1114 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1115 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1116 "w" | "week" | "weeks" => Ok(Self::Week),
1117 "d" | "day" | "days" => Ok(Self::Day),
1118 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1119 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1120 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1121 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1122 Ok(Self::Millisecond)
1123 }
1124 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1125 Ok(Self::Microsecond)
1126 }
1127 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1128 _ => Err(ArrowError::InvalidArgumentError(format!(
1129 "Unknown interval type: {s}"
1130 ))),
1131 }
1132 }
1133}
1134
1135impl IntervalUnit {
1136 fn from_str_or_config(
1137 s: Option<&str>,
1138 config: &IntervalParseConfig,
1139 ) -> Result<Self, ArrowError> {
1140 match s {
1141 Some(s) => s.parse(),
1142 None => Ok(config.default_unit),
1143 }
1144 }
1145}
1146
1147pub type MonthDayNano = (i32, i32, i64);
1149
1150const INTERVAL_PRECISION: u32 = 15;
1152
1153#[derive(Clone, Copy, Debug, PartialEq)]
1154struct IntervalAmount {
1155 integer: i64,
1157 frac: i64,
1159}
1160
1161#[cfg(test)]
1162impl IntervalAmount {
1163 fn new(integer: i64, frac: i64) -> Self {
1164 Self { integer, frac }
1165 }
1166}
1167
1168impl FromStr for IntervalAmount {
1169 type Err = ArrowError;
1170
1171 fn from_str(s: &str) -> Result<Self, Self::Err> {
1172 match s.split_once('.') {
1173 Some((integer, frac))
1174 if frac.len() <= INTERVAL_PRECISION as usize
1175 && !frac.is_empty()
1176 && !frac.starts_with('-') =>
1177 {
1178 let explicit_neg = integer.starts_with('-');
1181 let integer = if integer.is_empty() || integer == "-" {
1182 Ok(0)
1183 } else {
1184 integer.parse::<i64>().map_err(|_| {
1185 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1186 })
1187 }?;
1188
1189 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1190 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1191 })?;
1192
1193 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1195
1196 let frac = if integer < 0 || explicit_neg {
1198 -frac
1199 } else {
1200 frac
1201 };
1202
1203 let result = Self { integer, frac };
1204
1205 Ok(result)
1206 }
1207 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1208 "Failed to parse {s} as interval amount"
1209 ))),
1210 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1211 Err(ArrowError::ParseError(format!(
1212 "{s} exceeds the precision available for interval amount"
1213 )))
1214 }
1215 Some(_) | None => {
1216 let integer = s.parse::<i64>().map_err(|_| {
1217 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1218 })?;
1219
1220 let result = Self { integer, frac: 0 };
1221 Ok(result)
1222 }
1223 }
1224 }
1225}
1226
1227#[derive(Debug, Default, PartialEq)]
1228struct Interval {
1229 months: i32,
1230 days: i32,
1231 nanos: i64,
1232}
1233
1234impl Interval {
1235 fn new(months: i32, days: i32, nanos: i64) -> Self {
1236 Self {
1237 months,
1238 days,
1239 nanos,
1240 }
1241 }
1242
1243 fn to_year_months(&self) -> Result<i32, ArrowError> {
1244 match (self.months, self.days, self.nanos) {
1245 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1246 _ => Err(ArrowError::InvalidArgumentError(format!(
1247 "Unable to represent interval with days and nanos as year-months: {:?}",
1248 self
1249 ))),
1250 }
1251 }
1252
1253 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1254 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1255
1256 match self.nanos {
1257 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1258 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1259 ArrowError::InvalidArgumentError(format!(
1260 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1261 self.nanos
1262 ))
1263 })?;
1264
1265 Ok((days, millis))
1266 }
1267 nanos => Err(ArrowError::InvalidArgumentError(format!(
1268 "Unable to represent {nanos} as milliseconds"
1269 ))),
1270 }
1271 }
1272
1273 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1274 (self.months, self.days, self.nanos)
1275 }
1276
1277 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1280 let components = parse_interval_components(value, config)?;
1281
1282 components
1283 .into_iter()
1284 .try_fold(Self::default(), |result, (amount, unit)| {
1285 result.add(amount, unit)
1286 })
1287 }
1288
1289 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1296 let result = match unit {
1297 IntervalUnit::Century => {
1298 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1299 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1300 let months = months_int
1301 .add_checked(month_frac)?
1302 .try_into()
1303 .map_err(|_| {
1304 ArrowError::ParseError(format!(
1305 "Unable to represent {} centuries as months in a signed 32-bit integer",
1306 &amount.integer
1307 ))
1308 })?;
1309
1310 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1311 }
1312 IntervalUnit::Decade => {
1313 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1314
1315 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1316 let months = months_int
1317 .add_checked(month_frac)?
1318 .try_into()
1319 .map_err(|_| {
1320 ArrowError::ParseError(format!(
1321 "Unable to represent {} decades as months in a signed 32-bit integer",
1322 &amount.integer
1323 ))
1324 })?;
1325
1326 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1327 }
1328 IntervalUnit::Year => {
1329 let months_int = amount.integer.mul_checked(12)?;
1330 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1331 let months = months_int
1332 .add_checked(month_frac)?
1333 .try_into()
1334 .map_err(|_| {
1335 ArrowError::ParseError(format!(
1336 "Unable to represent {} years as months in a signed 32-bit integer",
1337 &amount.integer
1338 ))
1339 })?;
1340
1341 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1342 }
1343 IntervalUnit::Month => {
1344 let months = amount.integer.try_into().map_err(|_| {
1345 ArrowError::ParseError(format!(
1346 "Unable to represent {} months in a signed 32-bit integer",
1347 &amount.integer
1348 ))
1349 })?;
1350
1351 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1352 let days = days.try_into().map_err(|_| {
1353 ArrowError::ParseError(format!(
1354 "Unable to represent {} months as days in a signed 32-bit integer",
1355 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1356 ))
1357 })?;
1358
1359 Self::new(
1360 self.months.add_checked(months)?,
1361 self.days.add_checked(days)?,
1362 self.nanos,
1363 )
1364 }
1365 IntervalUnit::Week => {
1366 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1367 ArrowError::ParseError(format!(
1368 "Unable to represent {} weeks as days in a signed 32-bit integer",
1369 &amount.integer
1370 ))
1371 })?;
1372
1373 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1374
1375 Self::new(
1376 self.months,
1377 self.days.add_checked(days)?,
1378 self.nanos.add_checked(nanos)?,
1379 )
1380 }
1381 IntervalUnit::Day => {
1382 let days = amount.integer.try_into().map_err(|_| {
1383 ArrowError::InvalidArgumentError(format!(
1384 "Unable to represent {} days in a signed 32-bit integer",
1385 amount.integer
1386 ))
1387 })?;
1388
1389 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1390
1391 Self::new(
1392 self.months,
1393 self.days.add_checked(days)?,
1394 self.nanos.add_checked(nanos)?,
1395 )
1396 }
1397 IntervalUnit::Hour => {
1398 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1399 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1400 let nanos = nanos_int.add_checked(nanos_frac)?;
1401
1402 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1403 }
1404 IntervalUnit::Minute => {
1405 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1406 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1407
1408 let nanos = nanos_int.add_checked(nanos_frac)?;
1409
1410 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1411 }
1412 IntervalUnit::Second => {
1413 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1414 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1415 let nanos = nanos_int.add_checked(nanos_frac)?;
1416
1417 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1418 }
1419 IntervalUnit::Millisecond => {
1420 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1421 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1422 let nanos = nanos_int.add_checked(nanos_frac)?;
1423
1424 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1425 }
1426 IntervalUnit::Microsecond => {
1427 let nanos_int = amount.integer.mul_checked(1_000)?;
1428 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1429 let nanos = nanos_int.add_checked(nanos_frac)?;
1430
1431 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1432 }
1433 IntervalUnit::Nanosecond => {
1434 let nanos_int = amount.integer;
1435 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1436 let nanos = nanos_int.add_checked(nanos_frac)?;
1437
1438 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1439 }
1440 };
1441
1442 Ok(result)
1443 }
1444}
1445
1446fn parse_interval_components(
1448 value: &str,
1449 config: &IntervalParseConfig,
1450) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1451 let raw_pairs = split_interval_components(value);
1452
1453 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1455 .iter()
1456 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1457 .collect()
1458 else {
1459 return Err(ArrowError::ParseError(format!(
1460 "Invalid input syntax for type interval: {value:?}"
1461 )));
1462 };
1463
1464 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1466
1467 let mut observed_interval_types = 0;
1469 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1470 if observed_interval_types & (*unit as u16) != 0 {
1471 return Err(ArrowError::ParseError(format!(
1472 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1473 value,
1474 raw_unit.unwrap_or_default(),
1475 )));
1476 }
1477
1478 observed_interval_types |= *unit as u16;
1479 }
1480
1481 let result = amounts.iter().copied().zip(units.iter().copied());
1482
1483 Ok(result.collect::<Vec<_>>())
1484}
1485
1486fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1492 let mut result = vec![];
1493 let mut words = value.split(char::is_whitespace);
1494 while let Some(word) = words.next() {
1495 if let Some(split_word_at) = word.find(not_interval_amount) {
1496 let (amount, unit) = word.split_at(split_word_at);
1497 result.push((amount, Some(unit)));
1498 } else if let Some(unit) = words.next() {
1499 result.push((word, Some(unit)));
1500 } else {
1501 result.push((word, None));
1502 break;
1503 }
1504 }
1505 result
1506}
1507
1508fn not_interval_amount(c: char) -> bool {
1510 !c.is_ascii_digit() && c != '.' && c != '-'
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515 use super::*;
1516 use arrow_array::temporal_conversions::date32_to_datetime;
1517 use arrow_buffer::i256;
1518
1519 #[test]
1520 fn test_parse_nanos() {
1521 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1522 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1523 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1524 }
1525
1526 #[test]
1527 fn string_to_timestamp_timezone() {
1528 assert_eq!(
1530 1599572549190855000,
1531 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1532 );
1533 assert_eq!(
1534 1599572549190855000,
1535 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1536 );
1537 assert_eq!(
1538 1599572549000000000,
1539 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1540 ); assert_eq!(
1542 1599590549190855000,
1543 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1544 );
1545 }
1546
1547 #[test]
1548 fn string_to_timestamp_timezone_space() {
1549 assert_eq!(
1551 1599572549190855000,
1552 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1553 );
1554 assert_eq!(
1555 1599572549190855000,
1556 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1557 );
1558 assert_eq!(
1559 1599572549000000000,
1560 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1561 ); assert_eq!(
1563 1599590549190855000,
1564 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1565 );
1566 }
1567
1568 #[test]
1569 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1571 let naive_datetime = NaiveDateTime::new(
1575 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1576 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1577 );
1578
1579 assert_eq!(
1581 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1582 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1583 );
1584
1585 assert_eq!(
1586 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1587 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1588 );
1589
1590 let datetime_whole_secs = NaiveDateTime::new(
1593 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1594 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1595 )
1596 .and_utc();
1597
1598 assert_eq!(
1600 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1601 parse_timestamp("2020-09-08T13:42:29").unwrap()
1602 );
1603
1604 assert_eq!(
1605 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1606 parse_timestamp("2020-09-08 13:42:29").unwrap()
1607 );
1608
1609 let datetime_no_time = NaiveDateTime::new(
1613 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1614 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1615 )
1616 .and_utc();
1617
1618 assert_eq!(
1619 datetime_no_time.timestamp_nanos_opt().unwrap(),
1620 parse_timestamp("2020-09-08").unwrap()
1621 )
1622 }
1623
1624 #[test]
1625 fn string_to_timestamp_chrono() {
1626 let cases = [
1627 "2020-09-08T13:42:29Z",
1628 "1969-01-01T00:00:00.1Z",
1629 "2020-09-08T12:00:12.12345678+00:00",
1630 "2020-09-08T12:00:12+00:00",
1631 "2020-09-08T12:00:12.1+00:00",
1632 "2020-09-08T12:00:12.12+00:00",
1633 "2020-09-08T12:00:12.123+00:00",
1634 "2020-09-08T12:00:12.1234+00:00",
1635 "2020-09-08T12:00:12.12345+00:00",
1636 "2020-09-08T12:00:12.123456+00:00",
1637 "2020-09-08T12:00:12.1234567+00:00",
1638 "2020-09-08T12:00:12.12345678+00:00",
1639 "2020-09-08T12:00:12.123456789+00:00",
1640 "2020-09-08T12:00:12.12345678912z",
1641 "2020-09-08T12:00:12.123456789123Z",
1642 "2020-09-08T12:00:12.123456789123+02:00",
1643 "2020-09-08T12:00:12.12345678912345Z",
1644 "2020-09-08T12:00:12.1234567891234567+02:00",
1645 "2020-09-08T12:00:60Z",
1646 "2020-09-08T12:00:60.123Z",
1647 "2020-09-08T12:00:60.123456+02:00",
1648 "2020-09-08T12:00:60.1234567891234567+02:00",
1649 "2020-09-08T12:00:60.999999999+02:00",
1650 "2020-09-08t12:00:12.12345678+00:00",
1651 "2020-09-08t12:00:12+00:00",
1652 "2020-09-08t12:00:12Z",
1653 ];
1654
1655 for case in cases {
1656 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1657 let chrono_utc = chrono.with_timezone(&Utc);
1658
1659 let custom = string_to_datetime(&Utc, case).unwrap();
1660 assert_eq!(chrono_utc, custom)
1661 }
1662 }
1663
1664 #[test]
1665 fn string_to_timestamp_naive() {
1666 let cases = [
1667 "2018-11-13T17:11:10.011375885995",
1668 "2030-12-04T17:11:10.123",
1669 "2030-12-04T17:11:10.1234",
1670 "2030-12-04T17:11:10.123456",
1671 ];
1672 for case in cases {
1673 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1674 let custom = string_to_datetime(&Utc, case).unwrap();
1675 assert_eq!(chrono, custom.naive_utc())
1676 }
1677 }
1678
1679 #[test]
1680 fn string_to_timestamp_invalid() {
1681 let cases = [
1683 ("", "timestamp must contain at least 10 characters"),
1684 ("SS", "timestamp must contain at least 10 characters"),
1685 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1686 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1687 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1688 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1689 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1690 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1691 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1692 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1693 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1694 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1695 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1696 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1697 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1698 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1699 ("1997-01-31T092656.123Z", "error parsing time"),
1700 ("1997-01-10T12:00:06.", "error parsing time"),
1701 ("1997-01-10T12:00:06. ", "error parsing time"),
1702 ];
1703
1704 for (s, ctx) in cases {
1705 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1706 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1707 assert_eq!(actual, expected)
1708 }
1709 }
1710
1711 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1713 let result = string_to_timestamp_nanos(s);
1714 if let Err(e) = &result {
1715 eprintln!("Error parsing timestamp '{s}': {e:?}");
1716 }
1717 result
1718 }
1719
1720 #[test]
1721 fn string_without_timezone_to_timestamp() {
1722 let naive_datetime = NaiveDateTime::new(
1725 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1726 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1727 );
1728
1729 assert_eq!(
1731 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1732 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1733 );
1734
1735 assert_eq!(
1736 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1737 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1738 );
1739
1740 let naive_datetime = NaiveDateTime::new(
1741 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1742 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1743 );
1744
1745 assert_eq!(
1747 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1748 parse_timestamp("2020-09-08T13:42:29").unwrap()
1749 );
1750
1751 assert_eq!(
1752 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1753 parse_timestamp("2020-09-08 13:42:29").unwrap()
1754 );
1755
1756 let tz: Tz = "+02:00".parse().unwrap();
1757 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1758 let utc = date.naive_utc().to_string();
1759 assert_eq!(utc, "2020-09-08 11:42:29");
1760 let local = date.naive_local().to_string();
1761 assert_eq!(local, "2020-09-08 13:42:29");
1762
1763 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1764 let utc = date.naive_utc().to_string();
1765 assert_eq!(utc, "2020-09-08 13:42:29");
1766 let local = date.naive_local().to_string();
1767 assert_eq!(local, "2020-09-08 15:42:29");
1768
1769 let dt =
1770 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1771 let local: Tz = "+08:00".parse().unwrap();
1772
1773 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1775 assert_eq!(dt, date.naive_utc());
1776 assert_ne!(dt, date.naive_local());
1777
1778 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1780 assert_eq!(dt, date.naive_local());
1781 assert_ne!(dt, date.naive_utc());
1782 }
1783
1784 #[test]
1785 fn parse_date32() {
1786 let cases = [
1787 "2020-09-08",
1788 "2020-9-8",
1789 "2020-09-8",
1790 "2020-9-08",
1791 "2020-12-1",
1792 "1690-2-5",
1793 "2020-09-08 01:02:03",
1794 ];
1795 for case in cases {
1796 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1797 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1798 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1799 .unwrap();
1800 assert_eq!(v.date(), expected);
1801 }
1802
1803 let err_cases = [
1804 "",
1805 "80-01-01",
1806 "342",
1807 "Foo",
1808 "2020-09-08-03",
1809 "2020--04-03",
1810 "2020--",
1811 "2020-09-08 01",
1812 "2020-09-08 01:02",
1813 "2020-09-08 01-02-03",
1814 "2020-9-8 01:02:03",
1815 "2020-09-08 1:2:3",
1816 ];
1817 for case in err_cases {
1818 assert_eq!(Date32Type::parse(case), None);
1819 }
1820 }
1821
1822 #[test]
1823 fn parse_time64_nanos() {
1824 assert_eq!(
1825 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1826 Some(7_801_123_456_789)
1827 );
1828 assert_eq!(
1829 Time64NanosecondType::parse("02:10:01.1234567"),
1830 Some(7_801_123_456_700)
1831 );
1832 assert_eq!(
1833 Time64NanosecondType::parse("2:10:01.1234567"),
1834 Some(7_801_123_456_700)
1835 );
1836 assert_eq!(
1837 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1838 Some(601_123_456_789)
1839 );
1840 assert_eq!(
1841 Time64NanosecondType::parse("12:10:01.123456789 am"),
1842 Some(601_123_456_789)
1843 );
1844 assert_eq!(
1845 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1846 Some(51_001_123_456_780)
1847 );
1848 assert_eq!(
1849 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1850 Some(51_001_123_456_780)
1851 );
1852 assert_eq!(
1853 Time64NanosecondType::parse("02:10:01"),
1854 Some(7_801_000_000_000)
1855 );
1856 assert_eq!(
1857 Time64NanosecondType::parse("2:10:01"),
1858 Some(7_801_000_000_000)
1859 );
1860 assert_eq!(
1861 Time64NanosecondType::parse("12:10:01 AM"),
1862 Some(601_000_000_000)
1863 );
1864 assert_eq!(
1865 Time64NanosecondType::parse("12:10:01 am"),
1866 Some(601_000_000_000)
1867 );
1868 assert_eq!(
1869 Time64NanosecondType::parse("2:10:01 PM"),
1870 Some(51_001_000_000_000)
1871 );
1872 assert_eq!(
1873 Time64NanosecondType::parse("2:10:01 pm"),
1874 Some(51_001_000_000_000)
1875 );
1876 assert_eq!(
1877 Time64NanosecondType::parse("02:10"),
1878 Some(7_800_000_000_000)
1879 );
1880 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1881 assert_eq!(
1882 Time64NanosecondType::parse("12:10 AM"),
1883 Some(600_000_000_000)
1884 );
1885 assert_eq!(
1886 Time64NanosecondType::parse("12:10 am"),
1887 Some(600_000_000_000)
1888 );
1889 assert_eq!(
1890 Time64NanosecondType::parse("2:10 PM"),
1891 Some(51_000_000_000_000)
1892 );
1893 assert_eq!(
1894 Time64NanosecondType::parse("2:10 pm"),
1895 Some(51_000_000_000_000)
1896 );
1897
1898 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1900
1901 assert_eq!(
1903 Time64NanosecondType::parse("23:59:60"),
1904 Some(86_400_000_000_000)
1905 );
1906
1907 assert_eq!(
1909 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1910 Some(7_801_123_456_700)
1911 );
1912 }
1913
1914 #[test]
1915 fn parse_time64_micros() {
1916 assert_eq!(
1918 Time64MicrosecondType::parse("02:10:01.1234"),
1919 Some(7_801_123_400)
1920 );
1921 assert_eq!(
1922 Time64MicrosecondType::parse("2:10:01.1234"),
1923 Some(7_801_123_400)
1924 );
1925 assert_eq!(
1926 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1927 Some(601_123_456)
1928 );
1929 assert_eq!(
1930 Time64MicrosecondType::parse("12:10:01.123456 am"),
1931 Some(601_123_456)
1932 );
1933 assert_eq!(
1934 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1935 Some(51_001_123_450)
1936 );
1937 assert_eq!(
1938 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1939 Some(51_001_123_450)
1940 );
1941 assert_eq!(
1942 Time64MicrosecondType::parse("02:10:01"),
1943 Some(7_801_000_000)
1944 );
1945 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1946 assert_eq!(
1947 Time64MicrosecondType::parse("12:10:01 AM"),
1948 Some(601_000_000)
1949 );
1950 assert_eq!(
1951 Time64MicrosecondType::parse("12:10:01 am"),
1952 Some(601_000_000)
1953 );
1954 assert_eq!(
1955 Time64MicrosecondType::parse("2:10:01 PM"),
1956 Some(51_001_000_000)
1957 );
1958 assert_eq!(
1959 Time64MicrosecondType::parse("2:10:01 pm"),
1960 Some(51_001_000_000)
1961 );
1962 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1963 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1964 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1965 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1966 assert_eq!(
1967 Time64MicrosecondType::parse("2:10 PM"),
1968 Some(51_000_000_000)
1969 );
1970 assert_eq!(
1971 Time64MicrosecondType::parse("2:10 pm"),
1972 Some(51_000_000_000)
1973 );
1974
1975 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1977
1978 assert_eq!(
1980 Time64MicrosecondType::parse("23:59:60"),
1981 Some(86_400_000_000)
1982 );
1983
1984 assert_eq!(
1986 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1987 Some(7_801_123_400)
1988 );
1989 }
1990
1991 #[test]
1992 fn parse_time32_millis() {
1993 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1995 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1996 assert_eq!(
1997 Time32MillisecondType::parse("12:10:01.123 AM"),
1998 Some(601_123)
1999 );
2000 assert_eq!(
2001 Time32MillisecondType::parse("12:10:01.123 am"),
2002 Some(601_123)
2003 );
2004 assert_eq!(
2005 Time32MillisecondType::parse("2:10:01.12 PM"),
2006 Some(51_001_120)
2007 );
2008 assert_eq!(
2009 Time32MillisecondType::parse("2:10:01.12 pm"),
2010 Some(51_001_120)
2011 );
2012 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2013 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2014 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2015 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2016 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2017 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2018 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2019 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2020 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2021 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2022 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2023 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2024
2025 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2027
2028 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2030
2031 assert_eq!(
2033 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2034 Some(7_801_100)
2035 );
2036 }
2037
2038 #[test]
2039 fn parse_time32_secs() {
2040 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2042 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2043 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2044 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2045 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2046 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2047 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2048 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2049 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2050 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2051 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2052 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2053 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2054
2055 assert_eq!(Time32SecondType::parse("1"), Some(1));
2057
2058 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2060
2061 assert_eq!(
2063 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2064 Some(7_801)
2065 );
2066 }
2067
2068 #[test]
2069 fn test_string_to_time_invalid() {
2070 let cases = [
2071 "25:00",
2072 "9:00:",
2073 "009:00",
2074 "09:0:00",
2075 "25:00:00",
2076 "13:00 AM",
2077 "13:00 PM",
2078 "12:00. AM",
2079 "09:0:00",
2080 "09:01:0",
2081 "09:01:1",
2082 "9:1:0",
2083 "09:01:0",
2084 "1:00.123",
2085 "1:00:00.123f",
2086 " 9:00:00",
2087 ":09:00",
2088 "T9:00:00",
2089 "AM",
2090 ];
2091 for case in cases {
2092 assert!(string_to_time(case).is_none(), "{case}");
2093 }
2094 }
2095
2096 #[test]
2097 fn test_string_to_time_chrono() {
2098 let cases = [
2099 ("1:00", "%H:%M"),
2100 ("12:00", "%H:%M"),
2101 ("13:00", "%H:%M"),
2102 ("24:00", "%H:%M"),
2103 ("1:00:00", "%H:%M:%S"),
2104 ("12:00:30", "%H:%M:%S"),
2105 ("13:00:59", "%H:%M:%S"),
2106 ("24:00:60", "%H:%M:%S"),
2107 ("09:00:00", "%H:%M:%S%.f"),
2108 ("0:00:30.123456", "%H:%M:%S%.f"),
2109 ("0:00 AM", "%I:%M %P"),
2110 ("1:00 AM", "%I:%M %P"),
2111 ("12:00 AM", "%I:%M %P"),
2112 ("13:00 AM", "%I:%M %P"),
2113 ("0:00 PM", "%I:%M %P"),
2114 ("1:00 PM", "%I:%M %P"),
2115 ("12:00 PM", "%I:%M %P"),
2116 ("13:00 PM", "%I:%M %P"),
2117 ("1:00 pM", "%I:%M %P"),
2118 ("1:00 Pm", "%I:%M %P"),
2119 ("1:00 aM", "%I:%M %P"),
2120 ("1:00 Am", "%I:%M %P"),
2121 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2122 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2123 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2124 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2125 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2126 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2127 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2128 ];
2129 for (s, format) in cases {
2130 let chrono = NaiveTime::parse_from_str(s, format).ok();
2131 let custom = string_to_time(s);
2132 assert_eq!(chrono, custom, "{s}");
2133 }
2134 }
2135
2136 #[test]
2137 fn test_parse_interval() {
2138 let config = IntervalParseConfig::new(IntervalUnit::Month);
2139
2140 assert_eq!(
2141 Interval::new(1i32, 0i32, 0i64),
2142 Interval::parse("1 month", &config).unwrap(),
2143 );
2144
2145 assert_eq!(
2146 Interval::new(2i32, 0i32, 0i64),
2147 Interval::parse("2 month", &config).unwrap(),
2148 );
2149
2150 assert_eq!(
2151 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2152 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2153 );
2154
2155 assert_eq!(
2156 Interval::new(0i32, 15i32, 0),
2157 Interval::parse("0.5 months", &config).unwrap(),
2158 );
2159
2160 assert_eq!(
2161 Interval::new(0i32, 15i32, 0),
2162 Interval::parse(".5 months", &config).unwrap(),
2163 );
2164
2165 assert_eq!(
2166 Interval::new(0i32, -15i32, 0),
2167 Interval::parse("-0.5 months", &config).unwrap(),
2168 );
2169
2170 assert_eq!(
2171 Interval::new(0i32, -15i32, 0),
2172 Interval::parse("-.5 months", &config).unwrap(),
2173 );
2174
2175 assert_eq!(
2176 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2177 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2178 );
2179
2180 assert_eq!(
2181 Interval::parse("1 centurys 1 month", &config)
2182 .unwrap_err()
2183 .to_string(),
2184 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2185 );
2186
2187 assert_eq!(
2188 Interval::new(37i32, 0i32, 0i64),
2189 Interval::parse("3 year 1 month", &config).unwrap(),
2190 );
2191
2192 assert_eq!(
2193 Interval::new(35i32, 0i32, 0i64),
2194 Interval::parse("3 year -1 month", &config).unwrap(),
2195 );
2196
2197 assert_eq!(
2198 Interval::new(-37i32, 0i32, 0i64),
2199 Interval::parse("-3 year -1 month", &config).unwrap(),
2200 );
2201
2202 assert_eq!(
2203 Interval::new(-35i32, 0i32, 0i64),
2204 Interval::parse("-3 year 1 month", &config).unwrap(),
2205 );
2206
2207 assert_eq!(
2208 Interval::new(0i32, 5i32, 0i64),
2209 Interval::parse("5 days", &config).unwrap(),
2210 );
2211
2212 assert_eq!(
2213 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2214 Interval::parse("7 days 3 hours", &config).unwrap(),
2215 );
2216
2217 assert_eq!(
2218 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2219 Interval::parse("7 days 5 minutes", &config).unwrap(),
2220 );
2221
2222 assert_eq!(
2223 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2224 Interval::parse("7 days -5 minutes", &config).unwrap(),
2225 );
2226
2227 assert_eq!(
2228 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2229 Interval::parse("-7 days 5 hours", &config).unwrap(),
2230 );
2231
2232 assert_eq!(
2233 Interval::new(
2234 0i32,
2235 -7i32,
2236 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2237 ),
2238 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2239 );
2240
2241 assert_eq!(
2242 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2243 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2244 );
2245
2246 assert_eq!(
2247 Interval::new(
2248 12i32,
2249 1i32,
2250 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2251 ),
2252 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2253 );
2254
2255 assert_eq!(
2256 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2257 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2258 );
2259
2260 assert_eq!(
2261 Interval::new(12i32, 1i32, 1000i64),
2262 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2263 );
2264
2265 assert_eq!(
2266 Interval::new(12i32, 1i32, 1i64),
2267 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2268 );
2269
2270 assert_eq!(
2271 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2272 Interval::parse("1 month -1 second", &config).unwrap(),
2273 );
2274
2275 assert_eq!(
2276 Interval::new(
2277 -13i32,
2278 -8i32,
2279 -NANOS_PER_HOUR
2280 - NANOS_PER_MINUTE
2281 - NANOS_PER_SECOND
2282 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2283 ),
2284 Interval::parse(
2285 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2286 &config
2287 )
2288 .unwrap(),
2289 );
2290
2291 assert_eq!(
2293 Interval::new(1, 0, 0),
2294 Interval::parse("1", &config).unwrap()
2295 );
2296 assert_eq!(
2297 Interval::new(42, 0, 0),
2298 Interval::parse("42", &config).unwrap()
2299 );
2300 assert_eq!(
2301 Interval::new(0, 0, 42_000_000_000),
2302 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2303 );
2304
2305 assert_eq!(
2307 Interval::new(1, 0, 0),
2308 Interval::parse("1 mon", &config).unwrap()
2309 );
2310 assert_eq!(
2311 Interval::new(1, 0, 0),
2312 Interval::parse("1 mons", &config).unwrap()
2313 );
2314 assert_eq!(
2315 Interval::new(0, 0, 1_000_000),
2316 Interval::parse("1 ms", &config).unwrap()
2317 );
2318 assert_eq!(
2319 Interval::new(0, 0, 1_000),
2320 Interval::parse("1 us", &config).unwrap()
2321 );
2322
2323 assert_eq!(
2325 Interval::new(0, 0, 1_000),
2326 Interval::parse("1us", &config).unwrap()
2327 );
2328 assert_eq!(
2329 Interval::new(0, 0, NANOS_PER_SECOND),
2330 Interval::parse("1s", &config).unwrap()
2331 );
2332 assert_eq!(
2333 Interval::new(1, 2, 10_864_000_000_000),
2334 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2335 );
2336
2337 assert_eq!(
2338 Interval::new(
2339 -13i32,
2340 -8i32,
2341 -NANOS_PER_HOUR
2342 - NANOS_PER_MINUTE
2343 - NANOS_PER_SECOND
2344 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2345 ),
2346 Interval::parse(
2347 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2348 &config
2349 )
2350 .unwrap(),
2351 );
2352
2353 assert_eq!(
2354 Interval::parse("1h s", &config).unwrap_err().to_string(),
2355 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2356 );
2357
2358 assert_eq!(
2359 Interval::parse("1XX", &config).unwrap_err().to_string(),
2360 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2361 );
2362 }
2363
2364 #[test]
2365 fn test_duplicate_interval_type() {
2366 let config = IntervalParseConfig::new(IntervalUnit::Month);
2367
2368 let err = Interval::parse("1 month 1 second 1 second", &config)
2369 .expect_err("parsing interval should have failed");
2370 assert_eq!(
2371 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2372 format!("{err:?}")
2373 );
2374
2375 let err = Interval::parse("1 century 2 centuries", &config)
2377 .expect_err("parsing interval should have failed");
2378 assert_eq!(
2379 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2380 format!("{err:?}")
2381 );
2382 }
2383
2384 #[test]
2385 fn test_interval_amount_parsing() {
2386 let result = IntervalAmount::from_str("123").unwrap();
2388 let expected = IntervalAmount::new(123, 0);
2389
2390 assert_eq!(result, expected);
2391
2392 let result = IntervalAmount::from_str("0.3").unwrap();
2394 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2395
2396 assert_eq!(result, expected);
2397
2398 let result = IntervalAmount::from_str("-3.5").unwrap();
2400 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2401
2402 assert_eq!(result, expected);
2403
2404 let result = IntervalAmount::from_str("3.");
2406 assert!(result.is_err());
2407
2408 let result = IntervalAmount::from_str("3.-5");
2410 assert!(result.is_err());
2411 }
2412
2413 #[test]
2414 fn test_interval_precision() {
2415 let config = IntervalParseConfig::new(IntervalUnit::Month);
2416
2417 let result = Interval::parse("100000.1 days", &config).unwrap();
2418 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2419
2420 assert_eq!(result, expected);
2421 }
2422
2423 #[test]
2424 fn test_interval_addition() {
2425 let start = Interval::new(1, 2, 3);
2427 let expected = Interval::new(4921, 2, 3);
2428
2429 let result = start
2430 .add(
2431 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2432 IntervalUnit::Century,
2433 )
2434 .unwrap();
2435
2436 assert_eq!(result, expected);
2437
2438 let start = Interval::new(1, 2, 3);
2440 let expected = Interval::new(1231, 2, 3);
2441
2442 let result = start
2443 .add(
2444 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2445 IntervalUnit::Decade,
2446 )
2447 .unwrap();
2448
2449 assert_eq!(result, expected);
2450
2451 let start = Interval::new(1, 2, 3);
2453 let expected = Interval::new(364, 2, 3);
2454
2455 let result = start
2456 .add(
2457 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2458 IntervalUnit::Year,
2459 )
2460 .unwrap();
2461
2462 assert_eq!(result, expected);
2463
2464 let start = Interval::new(1, 2, 3);
2466 let expected = Interval::new(2, 17, 3);
2467
2468 let result = start
2469 .add(
2470 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2471 IntervalUnit::Month,
2472 )
2473 .unwrap();
2474
2475 assert_eq!(result, expected);
2476
2477 let start = Interval::new(1, 25, 3);
2479 let expected = Interval::new(1, 11, 3);
2480
2481 let result = start
2482 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2483 .unwrap();
2484
2485 assert_eq!(result, expected);
2486
2487 let start = Interval::new(12, 15, 3);
2489 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2490
2491 let result = start
2492 .add(
2493 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2494 IntervalUnit::Day,
2495 )
2496 .unwrap();
2497
2498 assert_eq!(result, expected);
2499
2500 let start = Interval::new(1, 2, 3);
2502 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2503
2504 let result = start
2505 .add(
2506 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2507 IntervalUnit::Hour,
2508 )
2509 .unwrap();
2510
2511 assert_eq!(result, expected);
2512
2513 let start = Interval::new(0, 0, -3);
2515 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2516
2517 let result = start
2518 .add(
2519 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2520 IntervalUnit::Minute,
2521 )
2522 .unwrap();
2523
2524 assert_eq!(result, expected);
2525 }
2526
2527 #[test]
2528 fn string_to_timestamp_old() {
2529 parse_timestamp("1677-06-14T07:29:01.256")
2530 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2531 .unwrap_err();
2532 }
2533
2534 #[test]
2535 fn test_parse_decimal_with_parameter() {
2536 let tests = [
2537 ("0", 0i128),
2538 ("123.123", 123123i128),
2539 ("123.1234", 123123i128),
2540 ("123.1", 123100i128),
2541 ("123", 123000i128),
2542 ("-123.123", -123123i128),
2543 ("-123.1234", -123123i128),
2544 ("-123.1", -123100i128),
2545 ("-123", -123000i128),
2546 ("0.0000123", 0i128),
2547 ("12.", 12000i128),
2548 ("-12.", -12000i128),
2549 ("00.1", 100i128),
2550 ("-00.1", -100i128),
2551 ("12345678912345678.1234", 12345678912345678123i128),
2552 ("-12345678912345678.1234", -12345678912345678123i128),
2553 ("99999999999999999.999", 99999999999999999999i128),
2554 ("-99999999999999999.999", -99999999999999999999i128),
2555 (".123", 123i128),
2556 ("-.123", -123i128),
2557 ("123.", 123000i128),
2558 ("-123.", -123000i128),
2559 ];
2560 for (s, i) in tests {
2561 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2562 assert_eq!(i, result_128.unwrap());
2563 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2564 assert_eq!(i256::from_i128(i), result_256.unwrap());
2565 }
2566
2567 let e_notation_tests = [
2568 ("1.23e3", "1230.0", 2),
2569 ("5.6714e+2", "567.14", 4),
2570 ("5.6714e-2", "0.056714", 4),
2571 ("5.6714e-2", "0.056714", 3),
2572 ("5.6741214125e2", "567.41214125", 4),
2573 ("8.91E4", "89100.0", 2),
2574 ("3.14E+5", "314000.0", 2),
2575 ("2.718e0", "2.718", 2),
2576 ("9.999999e-1", "0.9999999", 4),
2577 ("1.23e+3", "1230", 2),
2578 ("1.234559e+3", "1234.559", 2),
2579 ("1.00E-10", "0.0000000001", 11),
2580 ("1.23e-4", "0.000123", 2),
2581 ("9.876e7", "98760000.0", 2),
2582 ("5.432E+8", "543200000.0", 10),
2583 ("1.234567e9", "1234567000.0", 2),
2584 ("1.234567e2", "123.45670000", 2),
2585 ("4749.3e-5", "0.047493", 10),
2586 ("4749.3e+5", "474930000", 10),
2587 ("4749.3e-5", "0.047493", 1),
2588 ("4749.3e+5", "474930000", 1),
2589 ("0E-8", "0", 10),
2590 ("0E+6", "0", 10),
2591 ("1E-8", "0.00000001", 10),
2592 ("12E+6", "12000000", 10),
2593 ("12E-6", "0.000012", 10),
2594 ("0.1e-6", "0.0000001", 10),
2595 ("0.1e+6", "100000", 10),
2596 ("0.12e-6", "0.00000012", 10),
2597 ("0.12e+6", "120000", 10),
2598 ("000000000001e0", "000000000001", 3),
2599 ("000001.1034567002e0", "000001.1034567002", 3),
2600 ("1.234e16", "12340000000000000", 0),
2601 ("123.4e16", "1234000000000000000", 0),
2602 ];
2603 for (e, d, scale) in e_notation_tests {
2604 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2605 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2606 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2607 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2608 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2609 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2610 }
2611 let can_not_parse_tests = [
2612 "123,123",
2613 ".",
2614 "123.123.123",
2615 "",
2616 "+",
2617 "-",
2618 "e",
2619 "1.3e+e3",
2620 "5.6714ee-2",
2621 "4.11ee-+4",
2622 "4.11e++4",
2623 "1.1e.12",
2624 "1.23e+3.",
2625 "1.23e+3.1",
2626 ];
2627 for s in can_not_parse_tests {
2628 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2629 assert_eq!(
2630 format!("Parser error: can't parse the string value {s} to decimal"),
2631 result_128.unwrap_err().to_string()
2632 );
2633 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2634 assert_eq!(
2635 format!("Parser error: can't parse the string value {s} to decimal"),
2636 result_256.unwrap_err().to_string()
2637 );
2638 }
2639 let overflow_parse_tests = [
2640 ("12345678", 3),
2641 ("1.2345678e7", 3),
2642 ("12345678.9", 3),
2643 ("1.23456789e+7", 3),
2644 ("99999999.99", 3),
2645 ("9.999999999e7", 3),
2646 ("12345678908765.123456", 3),
2647 ("123456789087651234.56e-4", 3),
2648 ("1234560000000", 0),
2649 ("1.23456e12", 0),
2650 ];
2651 for (s, scale) in overflow_parse_tests {
2652 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2653 let expected_128 = "Parser error: parse decimal overflow";
2654 let actual_128 = result_128.unwrap_err().to_string();
2655
2656 assert!(
2657 actual_128.contains(expected_128),
2658 "actual: '{actual_128}', expected: '{expected_128}'"
2659 );
2660
2661 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2662 let expected_256 = "Parser error: parse decimal overflow";
2663 let actual_256 = result_256.unwrap_err().to_string();
2664
2665 assert!(
2666 actual_256.contains(expected_256),
2667 "actual: '{actual_256}', expected: '{expected_256}'"
2668 );
2669 }
2670
2671 let edge_tests_128 = [
2672 (
2673 "99999999999999999999999999999999999999",
2674 99999999999999999999999999999999999999i128,
2675 0,
2676 ),
2677 (
2678 "999999999999999999999999999999999999.99",
2679 99999999999999999999999999999999999999i128,
2680 2,
2681 ),
2682 (
2683 "9999999999999999999999999.9999999999999",
2684 99999999999999999999999999999999999999i128,
2685 13,
2686 ),
2687 (
2688 "9999999999999999999999999",
2689 99999999999999999999999990000000000000i128,
2690 13,
2691 ),
2692 (
2693 "0.99999999999999999999999999999999999999",
2694 99999999999999999999999999999999999999i128,
2695 38,
2696 ),
2697 (
2698 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2699 0i128,
2700 15,
2701 ),
2702 (
2703 "1.016744e-320",
2704 0i128,
2705 15,
2706 ),
2707 (
2708 "-1e3",
2709 -1000000000i128,
2710 6,
2711 ),
2712 (
2713 "+1e3",
2714 1000000000i128,
2715 6,
2716 ),
2717 (
2718 "-1e31",
2719 -10000000000000000000000000000000000000i128,
2720 6,
2721 ),
2722 ];
2723 for (s, i, scale) in edge_tests_128 {
2724 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2725 assert_eq!(i, result_128.unwrap());
2726 }
2727 let edge_tests_256 = [
2728 (
2729 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2730 i256::from_string(
2731 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2732 )
2733 .unwrap(),
2734 0,
2735 ),
2736 (
2737 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2738 i256::from_string(
2739 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2740 )
2741 .unwrap(),
2742 4,
2743 ),
2744 (
2745 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2746 i256::from_string(
2747 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2748 )
2749 .unwrap(),
2750 26,
2751 ),
2752 (
2753 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2754 i256::from_string(
2755 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2756 )
2757 .unwrap(),
2758 26,
2759 ),
2760 (
2761 "99999999999999999999999999999999999999999999999999",
2762 i256::from_string(
2763 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2764 )
2765 .unwrap(),
2766 26,
2767 ),
2768 (
2769 "9.9999999999999999999999999999999999999999999999999e+49",
2770 i256::from_string(
2771 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2772 )
2773 .unwrap(),
2774 26,
2775 ),
2776 ];
2777 for (s, i, scale) in edge_tests_256 {
2778 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2779 assert_eq!(i, result.unwrap());
2780 }
2781 }
2782
2783 #[test]
2784 fn test_parse_empty() {
2785 assert_eq!(Int32Type::parse(""), None);
2786 assert_eq!(Int64Type::parse(""), None);
2787 assert_eq!(UInt32Type::parse(""), None);
2788 assert_eq!(UInt64Type::parse(""), None);
2789 assert_eq!(Float32Type::parse(""), None);
2790 assert_eq!(Float64Type::parse(""), None);
2791 assert_eq!(Int32Type::parse("+"), None);
2792 assert_eq!(Int64Type::parse("+"), None);
2793 assert_eq!(UInt32Type::parse("+"), None);
2794 assert_eq!(UInt64Type::parse("+"), None);
2795 assert_eq!(Float32Type::parse("+"), None);
2796 assert_eq!(Float64Type::parse("+"), None);
2797 assert_eq!(TimestampNanosecondType::parse(""), None);
2798 assert_eq!(Date32Type::parse(""), None);
2799 }
2800
2801 #[test]
2802 fn test_parse_interval_month_day_nano_config() {
2803 let interval = parse_interval_month_day_nano_config(
2804 "1",
2805 IntervalParseConfig::new(IntervalUnit::Second),
2806 )
2807 .unwrap();
2808 assert_eq!(interval.months, 0);
2809 assert_eq!(interval.days, 0);
2810 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2811 }
2812}