arrow_cast/
display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Functions for printing array values as human-readable strings.
19//!
20//! This is often used for debugging or logging purposes.
21//!
22//! See the [`pretty`] crate for additional functions for
23//! record batch pretty printing.
24//!
25//! [`pretty`]: crate::pretty
26use std::fmt::{Display, Formatter, Write};
27use std::ops::Range;
28
29use arrow_array::cast::*;
30use arrow_array::temporal_conversions::*;
31use arrow_array::timezone::Tz;
32use arrow_array::types::*;
33use arrow_array::*;
34use arrow_buffer::ArrowNativeType;
35use arrow_schema::*;
36use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc};
37use lexical_core::FormattedSize;
38
39type TimeFormat<'a> = Option<&'a str>;
40
41/// Format for displaying durations
42#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
43#[non_exhaustive]
44pub enum DurationFormat {
45    /// ISO 8601 - `P198DT72932.972880S`
46    ISO8601,
47    /// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs`
48    Pretty,
49}
50
51/// Options for formatting arrays
52///
53/// By default nulls are formatted as `""` and temporal types formatted
54/// according to RFC3339
55///
56#[derive(Debug, Clone, PartialEq, Eq, Hash)]
57pub struct FormatOptions<'a> {
58    /// If set to `true` any formatting errors will be written to the output
59    /// instead of being converted into a [`std::fmt::Error`]
60    safe: bool,
61    /// Format string for nulls
62    null: &'a str,
63    /// Date format for date arrays
64    date_format: TimeFormat<'a>,
65    /// Format for DateTime arrays
66    datetime_format: TimeFormat<'a>,
67    /// Timestamp format for timestamp arrays
68    timestamp_format: TimeFormat<'a>,
69    /// Timestamp format for timestamp with timezone arrays
70    timestamp_tz_format: TimeFormat<'a>,
71    /// Time format for time arrays
72    time_format: TimeFormat<'a>,
73    /// Duration format
74    duration_format: DurationFormat,
75}
76
77impl Default for FormatOptions<'_> {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl<'a> FormatOptions<'a> {
84    /// Creates a new set of format options
85    pub const fn new() -> Self {
86        Self {
87            safe: true,
88            null: "",
89            date_format: None,
90            datetime_format: None,
91            timestamp_format: None,
92            timestamp_tz_format: None,
93            time_format: None,
94            duration_format: DurationFormat::ISO8601,
95        }
96    }
97
98    /// If set to `true` any formatting errors will be written to the output
99    /// instead of being converted into a [`std::fmt::Error`]
100    pub const fn with_display_error(mut self, safe: bool) -> Self {
101        self.safe = safe;
102        self
103    }
104
105    /// Overrides the string used to represent a null
106    ///
107    /// Defaults to `""`
108    pub const fn with_null(self, null: &'a str) -> Self {
109        Self { null, ..self }
110    }
111
112    /// Overrides the format used for [`DataType::Date32`] columns
113    pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
114        Self {
115            date_format,
116            ..self
117        }
118    }
119
120    /// Overrides the format used for [`DataType::Date64`] columns
121    pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
122        Self {
123            datetime_format,
124            ..self
125        }
126    }
127
128    /// Overrides the format used for [`DataType::Timestamp`] columns without a timezone
129    pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
130        Self {
131            timestamp_format,
132            ..self
133        }
134    }
135
136    /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
137    pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
138        Self {
139            timestamp_tz_format,
140            ..self
141        }
142    }
143
144    /// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns
145    pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
146        Self {
147            time_format,
148            ..self
149        }
150    }
151
152    /// Overrides the format used for duration columns
153    ///
154    /// Defaults to [`DurationFormat::ISO8601`]
155    pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
156        Self {
157            duration_format,
158            ..self
159        }
160    }
161}
162
163/// Implements [`Display`] for a specific array value
164pub struct ValueFormatter<'a> {
165    idx: usize,
166    formatter: &'a ArrayFormatter<'a>,
167}
168
169impl ValueFormatter<'_> {
170    /// Writes this value to the provided [`Write`]
171    ///
172    /// Note: this ignores [`FormatOptions::with_display_error`] and
173    /// will return an error on formatting issue
174    pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> {
175        match self.formatter.format.write(self.idx, s) {
176            Ok(_) => Ok(()),
177            Err(FormatError::Arrow(e)) => Err(e),
178            Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
179        }
180    }
181
182    /// Fallibly converts this to a string
183    pub fn try_to_string(&self) -> Result<String, ArrowError> {
184        let mut s = String::new();
185        self.write(&mut s)?;
186        Ok(s)
187    }
188}
189
190impl Display for ValueFormatter<'_> {
191    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
192        match self.formatter.format.write(self.idx, f) {
193            Ok(()) => Ok(()),
194            Err(FormatError::Arrow(e)) if self.formatter.safe => {
195                write!(f, "ERROR: {e}")
196            }
197            Err(_) => Err(std::fmt::Error),
198        }
199    }
200}
201
202/// A string formatter for an [`Array`]
203///
204/// This can be used with [`std::write`] to write type-erased `dyn Array`
205///
206/// ```
207/// # use std::fmt::{Display, Formatter, Write};
208/// # use arrow_array::{Array, ArrayRef, Int32Array};
209/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
210/// # use arrow_schema::ArrowError;
211/// struct MyContainer {
212///     values: ArrayRef,
213/// }
214///
215/// impl Display for MyContainer {
216///     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
217///         let options = FormatOptions::default();
218///         let formatter = ArrayFormatter::try_new(self.values.as_ref(), &options)
219///             .map_err(|_| std::fmt::Error)?;
220///
221///         let mut iter = 0..self.values.len();
222///         if let Some(idx) = iter.next() {
223///             write!(f, "{}", formatter.value(idx))?;
224///         }
225///         for idx in iter {
226///             write!(f, ", {}", formatter.value(idx))?;
227///         }
228///         Ok(())
229///     }
230/// }
231/// ```
232///
233/// [`ValueFormatter::write`] can also be used to get a semantic error, instead of the
234/// opaque [`std::fmt::Error`]
235///
236/// ```
237/// # use std::fmt::Write;
238/// # use arrow_array::Array;
239/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
240/// # use arrow_schema::ArrowError;
241/// fn format_array(
242///     f: &mut dyn Write,
243///     array: &dyn Array,
244///     options: &FormatOptions,
245/// ) -> Result<(), ArrowError> {
246///     let formatter = ArrayFormatter::try_new(array, options)?;
247///     for i in 0..array.len() {
248///         formatter.value(i).write(f)?
249///     }
250///     Ok(())
251/// }
252/// ```
253///
254pub struct ArrayFormatter<'a> {
255    format: Box<dyn DisplayIndex + 'a>,
256    safe: bool,
257}
258
259impl<'a> ArrayFormatter<'a> {
260    /// Returns an [`ArrayFormatter`] that can be used to format `array`
261    ///
262    /// This returns an error if an array of the given data type cannot be formatted
263    pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
264        Ok(Self {
265            format: make_formatter(array, options)?,
266            safe: options.safe,
267        })
268    }
269
270    /// Returns a [`ValueFormatter`] that implements [`Display`] for
271    /// the value of the array at `idx`
272    pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
273        ValueFormatter {
274            formatter: self,
275            idx,
276        }
277    }
278}
279
280fn make_formatter<'a>(
281    array: &'a dyn Array,
282    options: &FormatOptions<'a>,
283) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
284    downcast_primitive_array! {
285        array => array_format(array, options),
286        DataType::Null => array_format(as_null_array(array), options),
287        DataType::Boolean => array_format(as_boolean_array(array), options),
288        DataType::Utf8 => array_format(array.as_string::<i32>(), options),
289        DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
290        DataType::Utf8View => array_format(array.as_string_view(), options),
291        DataType::Binary => array_format(array.as_binary::<i32>(), options),
292        DataType::BinaryView => array_format(array.as_binary_view(), options),
293        DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
294        DataType::FixedSizeBinary(_) => {
295            let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
296            array_format(a, options)
297        }
298        DataType::Dictionary(_, _) => downcast_dictionary_array! {
299            array => array_format(array, options),
300            _ => unreachable!()
301        }
302        DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options),
303        DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options),
304        DataType::FixedSizeList(_, _) => {
305            let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
306            array_format(a, options)
307        }
308        DataType::Struct(_) => array_format(as_struct_array(array), options),
309        DataType::Map(_, _) => array_format(as_map_array(array), options),
310        DataType::Union(_, _) => array_format(as_union_array(array), options),
311        DataType::RunEndEncoded(_, _) => downcast_run_array! {
312            array => array_format(array, options),
313            _ => unreachable!()
314        },
315        d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))),
316    }
317}
318
319/// Either an [`ArrowError`] or [`std::fmt::Error`]
320enum FormatError {
321    Format(std::fmt::Error),
322    Arrow(ArrowError),
323}
324
325type FormatResult = Result<(), FormatError>;
326
327impl From<std::fmt::Error> for FormatError {
328    fn from(value: std::fmt::Error) -> Self {
329        Self::Format(value)
330    }
331}
332
333impl From<ArrowError> for FormatError {
334    fn from(value: ArrowError) -> Self {
335        Self::Arrow(value)
336    }
337}
338
339/// [`Display`] but accepting an index
340trait DisplayIndex {
341    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
342}
343
344/// [`DisplayIndex`] with additional state
345trait DisplayIndexState<'a> {
346    type State;
347
348    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>;
349
350    fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult;
351}
352
353impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T {
354    type State = ();
355
356    fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
357        Ok(())
358    }
359
360    fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
361        DisplayIndex::write(self, idx, f)
362    }
363}
364
365struct ArrayFormat<'a, F: DisplayIndexState<'a>> {
366    state: F::State,
367    array: F,
368    null: &'a str,
369}
370
371fn array_format<'a, F>(
372    array: F,
373    options: &FormatOptions<'a>,
374) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError>
375where
376    F: DisplayIndexState<'a> + Array + 'a,
377{
378    let state = array.prepare(options)?;
379    Ok(Box::new(ArrayFormat {
380        state,
381        array,
382        null: options.null,
383    }))
384}
385
386impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> {
387    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
388        if self.array.is_null(idx) {
389            if !self.null.is_empty() {
390                f.write_str(self.null)?
391            }
392            return Ok(());
393        }
394        DisplayIndexState::write(&self.array, &self.state, idx, f)
395    }
396}
397
398impl DisplayIndex for &BooleanArray {
399    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
400        write!(f, "{}", self.value(idx))?;
401        Ok(())
402    }
403}
404
405impl<'a> DisplayIndexState<'a> for &'a NullArray {
406    type State = &'a str;
407
408    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
409        Ok(options.null)
410    }
411
412    fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
413        f.write_str(state)?;
414        Ok(())
415    }
416}
417
418macro_rules! primitive_display {
419    ($($t:ty),+) => {
420        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
421        {
422            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
423                let value = self.value(idx);
424                let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE];
425                let b = lexical_core::write(value, &mut buffer);
426                // Lexical core produces valid UTF-8
427                let s = unsafe { std::str::from_utf8_unchecked(b) };
428                f.write_str(s)?;
429                Ok(())
430            }
431        })+
432    };
433}
434
435macro_rules! primitive_display_float {
436    ($($t:ty),+) => {
437        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
438        {
439            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
440                let value = self.value(idx);
441                let mut buffer = ryu::Buffer::new();
442                f.write_str(buffer.format(value))?;
443                Ok(())
444            }
445        })+
446    };
447}
448
449primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
450primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
451primitive_display_float!(Float32Type, Float64Type);
452
453impl DisplayIndex for &PrimitiveArray<Float16Type> {
454    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
455        write!(f, "{}", self.value(idx))?;
456        Ok(())
457    }
458}
459
460macro_rules! decimal_display {
461    ($($t:ty),+) => {
462        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
463            type State = (u8, i8);
464
465            fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
466                Ok((self.precision(), self.scale()))
467            }
468
469            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
470                write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
471                Ok(())
472            }
473        })+
474    };
475}
476
477decimal_display!(Decimal128Type, Decimal256Type);
478
479fn write_timestamp(
480    f: &mut dyn Write,
481    naive: NaiveDateTime,
482    timezone: Option<Tz>,
483    format: Option<&str>,
484) -> FormatResult {
485    match timezone {
486        Some(tz) => {
487            let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
488            match format {
489                Some(s) => write!(f, "{}", date.format(s))?,
490                None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
491            }
492        }
493        None => match format {
494            Some(s) => write!(f, "{}", naive.format(s))?,
495            None => write!(f, "{naive:?}")?,
496        },
497    }
498    Ok(())
499}
500
501macro_rules! timestamp_display {
502    ($($t:ty),+) => {
503        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
504            type State = (Option<Tz>, TimeFormat<'a>);
505
506            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
507                match self.data_type() {
508                    DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)),
509                    DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)),
510                    _ => unreachable!(),
511                }
512            }
513
514            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
515                let value = self.value(idx);
516                let naive = as_datetime::<$t>(value).ok_or_else(|| {
517                    ArrowError::CastError(format!(
518                        "Failed to convert {} to datetime for {}",
519                        value,
520                        self.data_type()
521                    ))
522                })?;
523
524                write_timestamp(f, naive, s.0, s.1.clone())
525            }
526        })+
527    };
528}
529
530timestamp_display!(
531    TimestampSecondType,
532    TimestampMillisecondType,
533    TimestampMicrosecondType,
534    TimestampNanosecondType
535);
536
537macro_rules! temporal_display {
538    ($convert:ident, $format:ident, $t:ty) => {
539        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
540            type State = TimeFormat<'a>;
541
542            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
543                Ok(options.$format)
544            }
545
546            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
547                let value = self.value(idx);
548                let naive = $convert(value as _).ok_or_else(|| {
549                    ArrowError::CastError(format!(
550                        "Failed to convert {} to temporal for {}",
551                        value,
552                        self.data_type()
553                    ))
554                })?;
555
556                match fmt {
557                    Some(s) => write!(f, "{}", naive.format(s))?,
558                    None => write!(f, "{naive:?}")?,
559                }
560                Ok(())
561            }
562        }
563    };
564}
565
566#[inline]
567fn date32_to_date(value: i32) -> Option<NaiveDate> {
568    Some(date32_to_datetime(value)?.date())
569}
570
571temporal_display!(date32_to_date, date_format, Date32Type);
572temporal_display!(date64_to_datetime, datetime_format, Date64Type);
573temporal_display!(time32s_to_time, time_format, Time32SecondType);
574temporal_display!(time32ms_to_time, time_format, Time32MillisecondType);
575temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
576temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
577
578macro_rules! duration_display {
579    ($convert:ident, $t:ty, $scale:tt) => {
580        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
581            type State = DurationFormat;
582
583            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
584                Ok(options.duration_format)
585            }
586
587            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
588                let v = self.value(idx);
589                match fmt {
590                    DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
591                    DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
592                }
593                Ok(())
594            }
595        }
596    };
597}
598
599macro_rules! duration_fmt {
600    ($f:ident, $v:expr, 0) => {{
601        let secs = $v;
602        let mins = secs / 60;
603        let hours = mins / 60;
604        let days = hours / 24;
605
606        let secs = secs - (mins * 60);
607        let mins = mins - (hours * 60);
608        let hours = hours - (days * 24);
609        write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
610    }};
611    ($f:ident, $v:expr, $scale:tt) => {{
612        let subsec = $v;
613        let secs = subsec / 10_i64.pow($scale);
614        let mins = secs / 60;
615        let hours = mins / 60;
616        let days = hours / 24;
617
618        let subsec = subsec - (secs * 10_i64.pow($scale));
619        let secs = secs - (mins * 60);
620        let mins = mins - (hours * 60);
621        let hours = hours - (days * 24);
622        match subsec.is_negative() {
623            true => {
624                write!(
625                    $f,
626                    concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
627                    days,
628                    hours,
629                    mins,
630                    secs.abs(),
631                    subsec.abs()
632                )
633            }
634            false => {
635                write!(
636                    $f,
637                    concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
638                    days, hours, mins, secs, subsec
639                )
640            }
641        }
642    }};
643}
644
645duration_display!(duration_s_to_duration, DurationSecondType, 0);
646duration_display!(duration_ms_to_duration, DurationMillisecondType, 3);
647duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
648duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
649
650impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
651    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
652        let interval = self.value(idx) as f64;
653        let years = (interval / 12_f64).floor();
654        let month = interval - (years * 12_f64);
655
656        write!(f, "{years} years {month} mons",)?;
657        Ok(())
658    }
659}
660
661impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
662    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
663        let value = self.value(idx);
664        let mut prefix = "";
665
666        if value.days != 0 {
667            write!(f, "{prefix}{} days", value.days)?;
668            prefix = " ";
669        }
670
671        if value.milliseconds != 0 {
672            let millis_fmt = MillisecondsFormatter {
673                milliseconds: value.milliseconds,
674                prefix,
675            };
676
677            f.write_fmt(format_args!("{millis_fmt}"))?;
678        }
679
680        Ok(())
681    }
682}
683
684impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
685    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
686        let value = self.value(idx);
687        let mut prefix = "";
688
689        if value.months != 0 {
690            write!(f, "{prefix}{} mons", value.months)?;
691            prefix = " ";
692        }
693
694        if value.days != 0 {
695            write!(f, "{prefix}{} days", value.days)?;
696            prefix = " ";
697        }
698
699        if value.nanoseconds != 0 {
700            let nano_fmt = NanosecondsFormatter {
701                nanoseconds: value.nanoseconds,
702                prefix,
703            };
704            f.write_fmt(format_args!("{nano_fmt}"))?;
705        }
706
707        Ok(())
708    }
709}
710
711struct NanosecondsFormatter<'a> {
712    nanoseconds: i64,
713    prefix: &'a str,
714}
715
716impl Display for NanosecondsFormatter<'_> {
717    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
718        let mut prefix = self.prefix;
719
720        let secs = self.nanoseconds / 1_000_000_000;
721        let mins = secs / 60;
722        let hours = mins / 60;
723
724        let secs = secs - (mins * 60);
725        let mins = mins - (hours * 60);
726
727        let nanoseconds = self.nanoseconds % 1_000_000_000;
728
729        if hours != 0 {
730            write!(f, "{prefix}{} hours", hours)?;
731            prefix = " ";
732        }
733
734        if mins != 0 {
735            write!(f, "{prefix}{} mins", mins)?;
736            prefix = " ";
737        }
738
739        if secs != 0 || nanoseconds != 0 {
740            let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
741            write!(
742                f,
743                "{prefix}{}{}.{:09} secs",
744                secs_sign,
745                secs.abs(),
746                nanoseconds.abs()
747            )?;
748        }
749
750        Ok(())
751    }
752}
753
754struct MillisecondsFormatter<'a> {
755    milliseconds: i32,
756    prefix: &'a str,
757}
758
759impl Display for MillisecondsFormatter<'_> {
760    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
761        let mut prefix = self.prefix;
762
763        let secs = self.milliseconds / 1_000;
764        let mins = secs / 60;
765        let hours = mins / 60;
766
767        let secs = secs - (mins * 60);
768        let mins = mins - (hours * 60);
769
770        let milliseconds = self.milliseconds % 1_000;
771
772        if hours != 0 {
773            write!(f, "{prefix}{} hours", hours,)?;
774            prefix = " ";
775        }
776
777        if mins != 0 {
778            write!(f, "{prefix}{} mins", mins,)?;
779            prefix = " ";
780        }
781
782        if secs != 0 || milliseconds != 0 {
783            let secs_sign = if secs < 0 || milliseconds < 0 {
784                "-"
785            } else {
786                ""
787            };
788
789            write!(
790                f,
791                "{prefix}{}{}.{:03} secs",
792                secs_sign,
793                secs.abs(),
794                milliseconds.abs()
795            )?;
796        }
797
798        Ok(())
799    }
800}
801
802impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> {
803    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
804        write!(f, "{}", self.value(idx))?;
805        Ok(())
806    }
807}
808
809impl DisplayIndex for &StringViewArray {
810    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
811        write!(f, "{}", self.value(idx))?;
812        Ok(())
813    }
814}
815
816impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> {
817    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
818        let v = self.value(idx);
819        for byte in v {
820            write!(f, "{byte:02x}")?;
821        }
822        Ok(())
823    }
824}
825
826impl DisplayIndex for &BinaryViewArray {
827    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
828        let v = self.value(idx);
829        for byte in v {
830            write!(f, "{byte:02x}")?;
831        }
832        Ok(())
833    }
834}
835
836impl DisplayIndex for &FixedSizeBinaryArray {
837    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
838        let v = self.value(idx);
839        for byte in v {
840            write!(f, "{byte:02x}")?;
841        }
842        Ok(())
843    }
844}
845
846impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> {
847    type State = Box<dyn DisplayIndex + 'a>;
848
849    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
850        make_formatter(self.values().as_ref(), options)
851    }
852
853    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
854        let value_idx = self.keys().values()[idx].as_usize();
855        s.as_ref().write(value_idx, f)
856    }
857}
858
859impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
860    type State = Box<dyn DisplayIndex + 'a>;
861
862    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
863        make_formatter(self.values().as_ref(), options)
864    }
865
866    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
867        let value_idx = self.get_physical_index(idx);
868        s.as_ref().write(value_idx, f)
869    }
870}
871
872fn write_list(
873    f: &mut dyn Write,
874    mut range: Range<usize>,
875    values: &dyn DisplayIndex,
876) -> FormatResult {
877    f.write_char('[')?;
878    if let Some(idx) = range.next() {
879        values.write(idx, f)?;
880    }
881    for idx in range {
882        write!(f, ", ")?;
883        values.write(idx, f)?;
884    }
885    f.write_char(']')?;
886    Ok(())
887}
888
889impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
890    type State = Box<dyn DisplayIndex + 'a>;
891
892    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
893        make_formatter(self.values().as_ref(), options)
894    }
895
896    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
897        let offsets = self.value_offsets();
898        let end = offsets[idx + 1].as_usize();
899        let start = offsets[idx].as_usize();
900        write_list(f, start..end, s.as_ref())
901    }
902}
903
904impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
905    type State = (usize, Box<dyn DisplayIndex + 'a>);
906
907    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
908        let values = make_formatter(self.values().as_ref(), options)?;
909        let length = self.value_length();
910        Ok((length as usize, values))
911    }
912
913    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
914        let start = idx * s.0;
915        let end = start + s.0;
916        write_list(f, start..end, s.1.as_ref())
917    }
918}
919
920/// Pairs a boxed [`DisplayIndex`] with its field name
921type FieldDisplay<'a> = (&'a str, Box<dyn DisplayIndex + 'a>);
922
923impl<'a> DisplayIndexState<'a> for &'a StructArray {
924    type State = Vec<FieldDisplay<'a>>;
925
926    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
927        let fields = match (*self).data_type() {
928            DataType::Struct(f) => f,
929            _ => unreachable!(),
930        };
931
932        self.columns()
933            .iter()
934            .zip(fields)
935            .map(|(a, f)| {
936                let format = make_formatter(a.as_ref(), options)?;
937                Ok((f.name().as_str(), format))
938            })
939            .collect()
940    }
941
942    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
943        let mut iter = s.iter();
944        f.write_char('{')?;
945        if let Some((name, display)) = iter.next() {
946            write!(f, "{name}: ")?;
947            display.as_ref().write(idx, f)?;
948        }
949        for (name, display) in iter {
950            write!(f, ", {name}: ")?;
951            display.as_ref().write(idx, f)?;
952        }
953        f.write_char('}')?;
954        Ok(())
955    }
956}
957
958impl<'a> DisplayIndexState<'a> for &'a MapArray {
959    type State = (Box<dyn DisplayIndex + 'a>, Box<dyn DisplayIndex + 'a>);
960
961    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
962        let keys = make_formatter(self.keys().as_ref(), options)?;
963        let values = make_formatter(self.values().as_ref(), options)?;
964        Ok((keys, values))
965    }
966
967    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
968        let offsets = self.value_offsets();
969        let end = offsets[idx + 1].as_usize();
970        let start = offsets[idx].as_usize();
971        let mut iter = start..end;
972
973        f.write_char('{')?;
974        if let Some(idx) = iter.next() {
975            s.0.write(idx, f)?;
976            write!(f, ": ")?;
977            s.1.write(idx, f)?;
978        }
979
980        for idx in iter {
981            write!(f, ", ")?;
982            s.0.write(idx, f)?;
983            write!(f, ": ")?;
984            s.1.write(idx, f)?;
985        }
986
987        f.write_char('}')?;
988        Ok(())
989    }
990}
991
992impl<'a> DisplayIndexState<'a> for &'a UnionArray {
993    type State = (
994        Vec<Option<(&'a str, Box<dyn DisplayIndex + 'a>)>>,
995        UnionMode,
996    );
997
998    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
999        let (fields, mode) = match (*self).data_type() {
1000            DataType::Union(fields, mode) => (fields, mode),
1001            _ => unreachable!(),
1002        };
1003
1004        let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
1005        let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
1006        for (i, field) in fields.iter() {
1007            let formatter = make_formatter(self.child(i).as_ref(), options)?;
1008            out[i as usize] = Some((field.name().as_str(), formatter))
1009        }
1010        Ok((out, *mode))
1011    }
1012
1013    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1014        let id = self.type_id(idx);
1015        let idx = match s.1 {
1016            UnionMode::Dense => self.value_offset(idx),
1017            UnionMode::Sparse => idx,
1018        };
1019        let (name, field) = s.0[id as usize].as_ref().unwrap();
1020
1021        write!(f, "{{{name}=")?;
1022        field.write(idx, f)?;
1023        f.write_char('}')?;
1024        Ok(())
1025    }
1026}
1027
1028/// Get the value at the given row in an array as a String.
1029///
1030/// Note this function is quite inefficient and is unlikely to be
1031/// suitable for converting large arrays or record batches.
1032///
1033/// Please see [`ArrayFormatter`] for a more performant interface
1034pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
1035    let options = FormatOptions::default().with_display_error(true);
1036    let formatter = ArrayFormatter::try_new(column, &options)?;
1037    Ok(formatter.value(row).to_string())
1038}
1039
1040/// Converts numeric type to a `String`
1041pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
1042    let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
1043    unsafe {
1044        // JUSTIFICATION
1045        //  Benefit
1046        //      Allows using the faster serializer lexical core and convert to string
1047        //  Soundness
1048        //      Length of buf is set as written length afterwards. lexical_core
1049        //      creates a valid string, so doesn't need to be checked.
1050        let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
1051        let len = lexical_core::write(n, slice).len();
1052        buf.set_len(len);
1053        String::from_utf8_unchecked(buf)
1054    }
1055}
1056
1057#[cfg(test)]
1058mod tests {
1059    use arrow_array::builder::StringRunBuilder;
1060
1061    use super::*;
1062
1063    /// Test to verify options can be constant. See #4580
1064    const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
1065        .with_date_format(Some("foo"))
1066        .with_timestamp_format(Some("404"));
1067
1068    #[test]
1069    fn test_const_options() {
1070        assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
1071    }
1072
1073    #[test]
1074    fn test_map_array_to_string() {
1075        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1076        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1077
1078        // Construct a buffer for value offsets, for the nested array:
1079        //  [[a, b, c], [d, e, f], [g, h]]
1080        let entry_offsets = [0, 3, 6, 8];
1081
1082        let map_array =
1083            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1084                .unwrap();
1085        assert_eq!(
1086            "{d: 30, e: 40, f: 50}",
1087            array_value_to_string(&map_array, 1).unwrap()
1088        );
1089    }
1090
1091    fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
1092        let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
1093        (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
1094    }
1095
1096    #[test]
1097    fn test_array_value_to_string_duration() {
1098        let iso_fmt = FormatOptions::new();
1099        let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
1100
1101        let array = DurationNanosecondArray::from(vec![
1102            1,
1103            -1,
1104            1000,
1105            -1000,
1106            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
1107            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
1108        ]);
1109        let iso = format_array(&array, &iso_fmt);
1110        let pretty = format_array(&array, &pretty_fmt);
1111
1112        assert_eq!(iso[0], "PT0.000000001S");
1113        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
1114        assert_eq!(iso[1], "-PT0.000000001S");
1115        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
1116        assert_eq!(iso[2], "PT0.000001S");
1117        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
1118        assert_eq!(iso[3], "-PT0.000001S");
1119        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
1120        assert_eq!(iso[4], "PT3938554.123456789S");
1121        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs");
1122        assert_eq!(iso[5], "-PT3938554.123456789S");
1123        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs");
1124
1125        let array = DurationMicrosecondArray::from(vec![
1126            1,
1127            -1,
1128            1000,
1129            -1000,
1130            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
1131            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
1132        ]);
1133        let iso = format_array(&array, &iso_fmt);
1134        let pretty = format_array(&array, &pretty_fmt);
1135
1136        assert_eq!(iso[0], "PT0.000001S");
1137        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
1138        assert_eq!(iso[1], "-PT0.000001S");
1139        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
1140        assert_eq!(iso[2], "PT0.001S");
1141        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
1142        assert_eq!(iso[3], "-PT0.001S");
1143        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
1144        assert_eq!(iso[4], "PT3938554.123456S");
1145        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs");
1146        assert_eq!(iso[5], "-PT3938554.123456S");
1147        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs");
1148
1149        let array = DurationMillisecondArray::from(vec![
1150            1,
1151            -1,
1152            1000,
1153            -1000,
1154            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
1155            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
1156        ]);
1157        let iso = format_array(&array, &iso_fmt);
1158        let pretty = format_array(&array, &pretty_fmt);
1159
1160        assert_eq!(iso[0], "PT0.001S");
1161        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
1162        assert_eq!(iso[1], "-PT0.001S");
1163        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
1164        assert_eq!(iso[2], "PT1S");
1165        assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
1166        assert_eq!(iso[3], "-PT1S");
1167        assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
1168        assert_eq!(iso[4], "PT3938554.123S");
1169        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs");
1170        assert_eq!(iso[5], "-PT3938554.123S");
1171        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs");
1172
1173        let array = DurationSecondArray::from(vec![
1174            1,
1175            -1,
1176            1000,
1177            -1000,
1178            45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
1179            -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
1180        ]);
1181        let iso = format_array(&array, &iso_fmt);
1182        let pretty = format_array(&array, &pretty_fmt);
1183
1184        assert_eq!(iso[0], "PT1S");
1185        assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
1186        assert_eq!(iso[1], "-PT1S");
1187        assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
1188        assert_eq!(iso[2], "PT1000S");
1189        assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
1190        assert_eq!(iso[3], "-PT1000S");
1191        assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
1192        assert_eq!(iso[4], "PT3938554S");
1193        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs");
1194        assert_eq!(iso[5], "-PT3938554S");
1195        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
1196    }
1197
1198    #[test]
1199    fn test_null() {
1200        let array = NullArray::new(2);
1201        let options = FormatOptions::new().with_null("NULL");
1202        let formatted = format_array(&array, &options);
1203        assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
1204    }
1205
1206    #[test]
1207    fn test_string_run_arry_to_string() {
1208        let mut builder = StringRunBuilder::<Int32Type>::new();
1209
1210        builder.append_value("input_value");
1211        builder.append_value("input_value");
1212        builder.append_value("input_value");
1213        builder.append_value("input_value1");
1214
1215        let map_array = builder.finish();
1216        assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap());
1217        assert_eq!(
1218            "input_value1",
1219            array_value_to_string(&map_array, 3).unwrap()
1220        );
1221    }
1222}