datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod consts;
21mod struct_builder;
22
23use std::borrow::Borrow;
24use std::cmp::Ordering;
25use std::collections::{HashSet, VecDeque};
26use std::convert::Infallible;
27use std::fmt;
28use std::hash::Hash;
29use std::hash::Hasher;
30use std::iter::repeat;
31use std::mem::{size_of, size_of_val};
32use std::str::FromStr;
33use std::sync::Arc;
34
35use crate::arrow_datafusion_err;
36use crate::cast::{
37    as_decimal128_array, as_decimal256_array, as_dictionary_array,
38    as_fixed_size_binary_array, as_fixed_size_list_array,
39};
40use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
41use crate::hash_utils::create_hashes;
42use crate::utils::SingleRowListArrayBuilder;
43use arrow::array::{
44    types::{IntervalDayTime, IntervalMonthDayNano},
45    *,
46};
47use arrow::buffer::ScalarBuffer;
48use arrow::compute::kernels::{
49    cast::{cast_with_options, CastOptions},
50    numeric::*,
51};
52use arrow::datatypes::{
53    i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
54    Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
55    Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
56    IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
57    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
58    UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
59};
60use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
61
62use crate::format::DEFAULT_CAST_OPTIONS;
63use half::f16;
64pub use struct_builder::ScalarStructBuilder;
65
66/// A dynamically typed, nullable single value.
67///
68/// While an arrow  [`Array`]) stores one or more values of the same type, in a
69/// single column, a `ScalarValue` stores a single value of a single type, the
70/// equivalent of 1 row and one column.
71///
72/// ```text
73///  ┌────────┐
74///  │ value1 │
75///  │ value2 │                  ┌────────┐
76///  │ value3 │                  │ value2 │
77///  │  ...   │                  └────────┘
78///  │ valueN │
79///  └────────┘
80///
81///    Array                     ScalarValue
82///
83/// stores multiple,             stores a single,
84/// possibly null, values of     possible null, value
85/// the same type
86/// ```
87///
88/// # Performance
89///
90/// In general, performance will be better using arrow [`Array`]s rather than
91/// [`ScalarValue`], as it is far more efficient to process multiple values at
92/// once (vectorized processing).
93///
94/// # Example
95/// ```
96/// # use datafusion_common::ScalarValue;
97/// // Create single scalar value for an Int32 value
98/// let s1 = ScalarValue::Int32(Some(10));
99///
100/// // You can also create values using the From impl:
101/// let s2 = ScalarValue::from(10i32);
102/// assert_eq!(s1, s2);
103/// ```
104///
105/// # Null Handling
106///
107/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
108/// "typed" in the sense that a null value in an [`Int32Array`] is different
109/// from a null value in a [`Float64Array`], and is different from the values in
110/// a [`NullArray`].
111///
112/// ```
113/// # fn main() -> datafusion_common::Result<()> {
114/// # use std::collections::hash_set::Difference;
115/// # use datafusion_common::ScalarValue;
116/// # use arrow::datatypes::DataType;
117/// // You can create a 'null' Int32 value directly:
118/// let s1 = ScalarValue::Int32(None);
119///
120/// // You can also create a null value for a given datatype:
121/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
122/// assert_eq!(s1, s2);
123///
124/// // Note that this is DIFFERENT than a `ScalarValue::Null`
125/// let s3 = ScalarValue::Null;
126/// assert_ne!(s1, s3);
127/// # Ok(())
128/// # }
129/// ```
130///
131/// # Nested Types
132///
133/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
134/// single element array of the corresponding type.
135///
136/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
137/// ```
138/// # use std::sync::Arc;
139/// # use arrow::datatypes::{DataType, Field};
140/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
141/// // Build a struct like: {a: 1, b: "foo"}
142/// let field_a = Field::new("a", DataType::Int32, false);
143/// let field_b = Field::new("b", DataType::Utf8, false);
144///
145/// let s1 = ScalarStructBuilder::new()
146///    .with_scalar(field_a, ScalarValue::from(1i32))
147///    .with_scalar(field_b, ScalarValue::from("foo"))
148///    .build();
149/// ```
150///
151/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
152/// ```
153/// # use std::sync::Arc;
154/// # use arrow::datatypes::{DataType, Field};
155/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
156/// // Build a struct representing a NULL value
157/// let fields = vec![
158///     Field::new("a", DataType::Int32, false),
159///     Field::new("b", DataType::Utf8, false),
160/// ];
161///
162/// let s1 = ScalarStructBuilder::new_null(fields);
163/// ```
164///
165/// ## Example: Creating [`ScalarValue::Struct`] directly
166/// ```
167/// # use std::sync::Arc;
168/// # use arrow::datatypes::{DataType, Field, Fields};
169/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
170/// # use datafusion_common::ScalarValue;
171/// // Build a struct like: {a: 1, b: "foo"}
172/// // Field description
173/// let fields = Fields::from(vec![
174///   Field::new("a", DataType::Int32, false),
175///   Field::new("b", DataType::Utf8, false),
176/// ]);
177/// // one row arrays for each field
178/// let arrays: Vec<ArrayRef> = vec![
179///   Arc::new(Int32Array::from(vec![1])),
180///   Arc::new(StringArray::from(vec!["foo"])),
181/// ];
182/// // no nulls for this array
183/// let nulls = None;
184/// let arr = StructArray::new(fields, arrays, nulls);
185///
186/// // Create a ScalarValue::Struct directly
187/// let s1 = ScalarValue::Struct(Arc::new(arr));
188/// ```
189///
190///
191/// # Further Reading
192/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
193/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
194/// for the definitive reference.
195#[derive(Clone)]
196pub enum ScalarValue {
197    /// represents `DataType::Null` (castable to/from any other type)
198    Null,
199    /// true or false value
200    Boolean(Option<bool>),
201    /// 16bit float
202    Float16(Option<f16>),
203    /// 32bit float
204    Float32(Option<f32>),
205    /// 64bit float
206    Float64(Option<f64>),
207    /// 128bit decimal, using the i128 to represent the decimal, precision scale
208    Decimal128(Option<i128>, u8, i8),
209    /// 256bit decimal, using the i256 to represent the decimal, precision scale
210    Decimal256(Option<i256>, u8, i8),
211    /// signed 8bit int
212    Int8(Option<i8>),
213    /// signed 16bit int
214    Int16(Option<i16>),
215    /// signed 32bit int
216    Int32(Option<i32>),
217    /// signed 64bit int
218    Int64(Option<i64>),
219    /// unsigned 8bit int
220    UInt8(Option<u8>),
221    /// unsigned 16bit int
222    UInt16(Option<u16>),
223    /// unsigned 32bit int
224    UInt32(Option<u32>),
225    /// unsigned 64bit int
226    UInt64(Option<u64>),
227    /// utf-8 encoded string.
228    Utf8(Option<String>),
229    /// utf-8 encoded string but from view types.
230    Utf8View(Option<String>),
231    /// utf-8 encoded string representing a LargeString's arrow type.
232    LargeUtf8(Option<String>),
233    /// binary
234    Binary(Option<Vec<u8>>),
235    /// binary but from view types.
236    BinaryView(Option<Vec<u8>>),
237    /// fixed size binary
238    FixedSizeBinary(i32, Option<Vec<u8>>),
239    /// large binary
240    LargeBinary(Option<Vec<u8>>),
241    /// Fixed size list scalar.
242    ///
243    /// The array must be a FixedSizeListArray with length 1.
244    FixedSizeList(Arc<FixedSizeListArray>),
245    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
246    ///
247    /// The array must be a ListArray with length 1.
248    List(Arc<ListArray>),
249    /// The array must be a LargeListArray with length 1.
250    LargeList(Arc<LargeListArray>),
251    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
252    /// [`ScalarValue`] for examples of how to create instances of this type.
253    Struct(Arc<StructArray>),
254    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
255    Map(Arc<MapArray>),
256    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
257    Date32(Option<i32>),
258    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
259    Date64(Option<i64>),
260    /// Time stored as a signed 32bit int as seconds since midnight
261    Time32Second(Option<i32>),
262    /// Time stored as a signed 32bit int as milliseconds since midnight
263    Time32Millisecond(Option<i32>),
264    /// Time stored as a signed 64bit int as microseconds since midnight
265    Time64Microsecond(Option<i64>),
266    /// Time stored as a signed 64bit int as nanoseconds since midnight
267    Time64Nanosecond(Option<i64>),
268    /// Timestamp Second
269    TimestampSecond(Option<i64>, Option<Arc<str>>),
270    /// Timestamp Milliseconds
271    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
272    /// Timestamp Microseconds
273    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
274    /// Timestamp Nanoseconds
275    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
276    /// Number of elapsed whole months
277    IntervalYearMonth(Option<i32>),
278    /// Number of elapsed days and milliseconds (no leap seconds)
279    /// stored as 2 contiguous 32-bit signed integers
280    IntervalDayTime(Option<IntervalDayTime>),
281    /// A triple of the number of elapsed months, days, and nanoseconds.
282    /// Months and days are encoded as 32-bit signed integers.
283    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
284    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
285    /// Duration in seconds
286    DurationSecond(Option<i64>),
287    /// Duration in milliseconds
288    DurationMillisecond(Option<i64>),
289    /// Duration in microseconds
290    DurationMicrosecond(Option<i64>),
291    /// Duration in nanoseconds
292    DurationNanosecond(Option<i64>),
293    /// A nested datatype that can represent slots of differing types. Components:
294    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
295    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
296    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
297    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
298    /// Dictionary type: index type and value
299    Dictionary(Box<DataType>, Box<ScalarValue>),
300}
301
302impl Hash for Fl<f16> {
303    fn hash<H: Hasher>(&self, state: &mut H) {
304        self.0.to_bits().hash(state);
305    }
306}
307
308// manual implementation of `PartialEq`
309impl PartialEq for ScalarValue {
310    fn eq(&self, other: &Self) -> bool {
311        use ScalarValue::*;
312        // This purposely doesn't have a catch-all "(_, _)" so that
313        // any newly added enum variant will require editing this list
314        // or else face a compile error
315        match (self, other) {
316            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
317                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
318            }
319            (Decimal128(_, _, _), _) => false,
320            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
321                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
322            }
323            (Decimal256(_, _, _), _) => false,
324            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
325            (Boolean(_), _) => false,
326            (Float32(v1), Float32(v2)) => match (v1, v2) {
327                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
328                _ => v1.eq(v2),
329            },
330            (Float16(v1), Float16(v2)) => match (v1, v2) {
331                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
332                _ => v1.eq(v2),
333            },
334            (Float32(_), _) => false,
335            (Float16(_), _) => false,
336            (Float64(v1), Float64(v2)) => match (v1, v2) {
337                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
338                _ => v1.eq(v2),
339            },
340            (Float64(_), _) => false,
341            (Int8(v1), Int8(v2)) => v1.eq(v2),
342            (Int8(_), _) => false,
343            (Int16(v1), Int16(v2)) => v1.eq(v2),
344            (Int16(_), _) => false,
345            (Int32(v1), Int32(v2)) => v1.eq(v2),
346            (Int32(_), _) => false,
347            (Int64(v1), Int64(v2)) => v1.eq(v2),
348            (Int64(_), _) => false,
349            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
350            (UInt8(_), _) => false,
351            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
352            (UInt16(_), _) => false,
353            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
354            (UInt32(_), _) => false,
355            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
356            (UInt64(_), _) => false,
357            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
358            (Utf8(_), _) => false,
359            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
360            (Utf8View(_), _) => false,
361            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
362            (LargeUtf8(_), _) => false,
363            (Binary(v1), Binary(v2)) => v1.eq(v2),
364            (Binary(_), _) => false,
365            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
366            (BinaryView(_), _) => false,
367            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
368            (FixedSizeBinary(_, _), _) => false,
369            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
370            (LargeBinary(_), _) => false,
371            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
372            (FixedSizeList(_), _) => false,
373            (List(v1), List(v2)) => v1.eq(v2),
374            (List(_), _) => false,
375            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
376            (LargeList(_), _) => false,
377            (Struct(v1), Struct(v2)) => v1.eq(v2),
378            (Struct(_), _) => false,
379            (Map(v1), Map(v2)) => v1.eq(v2),
380            (Map(_), _) => false,
381            (Date32(v1), Date32(v2)) => v1.eq(v2),
382            (Date32(_), _) => false,
383            (Date64(v1), Date64(v2)) => v1.eq(v2),
384            (Date64(_), _) => false,
385            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
386            (Time32Second(_), _) => false,
387            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
388            (Time32Millisecond(_), _) => false,
389            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
390            (Time64Microsecond(_), _) => false,
391            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
392            (Time64Nanosecond(_), _) => false,
393            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
394            (TimestampSecond(_, _), _) => false,
395            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
396            (TimestampMillisecond(_, _), _) => false,
397            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
398            (TimestampMicrosecond(_, _), _) => false,
399            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
400            (TimestampNanosecond(_, _), _) => false,
401            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
402            (DurationSecond(_), _) => false,
403            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
404            (DurationMillisecond(_), _) => false,
405            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
406            (DurationMicrosecond(_), _) => false,
407            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
408            (DurationNanosecond(_), _) => false,
409            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
410            (IntervalYearMonth(_), _) => false,
411            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
412            (IntervalDayTime(_), _) => false,
413            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
414            (IntervalMonthDayNano(_), _) => false,
415            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
416                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
417            }
418            (Union(_, _, _), _) => false,
419            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
420            (Dictionary(_, _), _) => false,
421            (Null, Null) => true,
422            (Null, _) => false,
423        }
424    }
425}
426
427// manual implementation of `PartialOrd`
428impl PartialOrd for ScalarValue {
429    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
430        use ScalarValue::*;
431        // This purposely doesn't have a catch-all "(_, _)" so that
432        // any newly added enum variant will require editing this list
433        // or else face a compile error
434        match (self, other) {
435            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
436                if p1.eq(p2) && s1.eq(s2) {
437                    v1.partial_cmp(v2)
438                } else {
439                    // Two decimal values can be compared if they have the same precision and scale.
440                    None
441                }
442            }
443            (Decimal128(_, _, _), _) => None,
444            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
445                if p1.eq(p2) && s1.eq(s2) {
446                    v1.partial_cmp(v2)
447                } else {
448                    // Two decimal values can be compared if they have the same precision and scale.
449                    None
450                }
451            }
452            (Decimal256(_, _, _), _) => None,
453            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
454            (Boolean(_), _) => None,
455            (Float32(v1), Float32(v2)) => match (v1, v2) {
456                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
457                _ => v1.partial_cmp(v2),
458            },
459            (Float16(v1), Float16(v2)) => match (v1, v2) {
460                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
461                _ => v1.partial_cmp(v2),
462            },
463            (Float32(_), _) => None,
464            (Float16(_), _) => None,
465            (Float64(v1), Float64(v2)) => match (v1, v2) {
466                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
467                _ => v1.partial_cmp(v2),
468            },
469            (Float64(_), _) => None,
470            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
471            (Int8(_), _) => None,
472            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
473            (Int16(_), _) => None,
474            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
475            (Int32(_), _) => None,
476            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
477            (Int64(_), _) => None,
478            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
479            (UInt8(_), _) => None,
480            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
481            (UInt16(_), _) => None,
482            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
483            (UInt32(_), _) => None,
484            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
485            (UInt64(_), _) => None,
486            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
487            (Utf8(_), _) => None,
488            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
489            (LargeUtf8(_), _) => None,
490            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
491            (Utf8View(_), _) => None,
492            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
493            (Binary(_), _) => None,
494            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
495            (BinaryView(_), _) => None,
496            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
497            (FixedSizeBinary(_, _), _) => None,
498            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
499            (LargeBinary(_), _) => None,
500            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
501            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
502            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
503                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
504            }
505            (LargeList(arr1), LargeList(arr2)) => {
506                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
507            }
508            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
509            (Struct(struct_arr1), Struct(struct_arr2)) => {
510                partial_cmp_struct(struct_arr1, struct_arr2)
511            }
512            (Struct(_), _) => None,
513            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
514            (Map(_), _) => None,
515            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
516            (Date32(_), _) => None,
517            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
518            (Date64(_), _) => None,
519            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
520            (Time32Second(_), _) => None,
521            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
522            (Time32Millisecond(_), _) => None,
523            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
524            (Time64Microsecond(_), _) => None,
525            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
526            (Time64Nanosecond(_), _) => None,
527            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
528            (TimestampSecond(_, _), _) => None,
529            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
530                v1.partial_cmp(v2)
531            }
532            (TimestampMillisecond(_, _), _) => None,
533            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
534                v1.partial_cmp(v2)
535            }
536            (TimestampMicrosecond(_, _), _) => None,
537            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
538                v1.partial_cmp(v2)
539            }
540            (TimestampNanosecond(_, _), _) => None,
541            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
542            (IntervalYearMonth(_), _) => None,
543            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
544            (IntervalDayTime(_), _) => None,
545            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
546            (IntervalMonthDayNano(_), _) => None,
547            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
548            (DurationSecond(_), _) => None,
549            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
550            (DurationMillisecond(_), _) => None,
551            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
552            (DurationMicrosecond(_), _) => None,
553            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
554            (DurationNanosecond(_), _) => None,
555            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
556                if t1.eq(t2) && m1.eq(m2) {
557                    v1.partial_cmp(v2)
558                } else {
559                    None
560                }
561            }
562            (Union(_, _, _), _) => None,
563            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
564                // Don't compare if the key types don't match (it is effectively a different datatype)
565                if k1 == k2 {
566                    v1.partial_cmp(v2)
567                } else {
568                    None
569                }
570            }
571            (Dictionary(_, _), _) => None,
572            (Null, Null) => Some(Ordering::Equal),
573            (Null, _) => None,
574        }
575    }
576}
577
578/// List/LargeList/FixedSizeList scalars always have a single element
579/// array. This function returns that array
580fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
581    assert_eq!(arr.len(), 1);
582    if let Some(arr) = arr.as_list_opt::<i32>() {
583        arr.value(0)
584    } else if let Some(arr) = arr.as_list_opt::<i64>() {
585        arr.value(0)
586    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
587        arr.value(0)
588    } else {
589        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
590    }
591}
592
593/// Compares two List/LargeList/FixedSizeList scalars
594fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
595    if arr1.data_type() != arr2.data_type() {
596        return None;
597    }
598    let arr1 = first_array_for_list(arr1);
599    let arr2 = first_array_for_list(arr2);
600
601    let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?;
602    let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?;
603
604    for j in 0..lt_res.len() {
605        if lt_res.is_valid(j) && lt_res.value(j) {
606            return Some(Ordering::Less);
607        }
608        if eq_res.is_valid(j) && !eq_res.value(j) {
609            return Some(Ordering::Greater);
610        }
611    }
612
613    Some(Ordering::Equal)
614}
615
616fn partial_cmp_struct(s1: &Arc<StructArray>, s2: &Arc<StructArray>) -> Option<Ordering> {
617    if s1.len() != s2.len() {
618        return None;
619    }
620
621    if s1.data_type() != s2.data_type() {
622        return None;
623    }
624
625    for col_index in 0..s1.num_columns() {
626        let arr1 = s1.column(col_index);
627        let arr2 = s2.column(col_index);
628
629        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
630        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
631
632        for j in 0..lt_res.len() {
633            if lt_res.is_valid(j) && lt_res.value(j) {
634                return Some(Ordering::Less);
635            }
636            if eq_res.is_valid(j) && !eq_res.value(j) {
637                return Some(Ordering::Greater);
638            }
639        }
640    }
641    Some(Ordering::Equal)
642}
643
644fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
645    if m1.len() != m2.len() {
646        return None;
647    }
648
649    if m1.data_type() != m2.data_type() {
650        return None;
651    }
652
653    for col_index in 0..m1.len() {
654        let arr1 = m1.entries().column(col_index);
655        let arr2 = m2.entries().column(col_index);
656
657        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
658        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
659
660        for j in 0..lt_res.len() {
661            if lt_res.is_valid(j) && lt_res.value(j) {
662                return Some(Ordering::Less);
663            }
664            if eq_res.is_valid(j) && !eq_res.value(j) {
665                return Some(Ordering::Greater);
666            }
667        }
668    }
669    Some(Ordering::Equal)
670}
671
672impl Eq for ScalarValue {}
673
674//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
675struct Fl<T>(T);
676
677macro_rules! hash_float_value {
678    ($(($t:ty, $i:ty)),+) => {
679        $(impl std::hash::Hash for Fl<$t> {
680            #[inline]
681            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
682                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
683            }
684        })+
685    };
686}
687
688hash_float_value!((f64, u64), (f32, u32));
689
690// manual implementation of `Hash`
691//
692// # Panics
693//
694// Panics if there is an error when creating hash values for rows
695impl Hash for ScalarValue {
696    fn hash<H: Hasher>(&self, state: &mut H) {
697        use ScalarValue::*;
698        match self {
699            Decimal128(v, p, s) => {
700                v.hash(state);
701                p.hash(state);
702                s.hash(state)
703            }
704            Decimal256(v, p, s) => {
705                v.hash(state);
706                p.hash(state);
707                s.hash(state)
708            }
709            Boolean(v) => v.hash(state),
710            Float16(v) => v.map(Fl).hash(state),
711            Float32(v) => v.map(Fl).hash(state),
712            Float64(v) => v.map(Fl).hash(state),
713            Int8(v) => v.hash(state),
714            Int16(v) => v.hash(state),
715            Int32(v) => v.hash(state),
716            Int64(v) => v.hash(state),
717            UInt8(v) => v.hash(state),
718            UInt16(v) => v.hash(state),
719            UInt32(v) => v.hash(state),
720            UInt64(v) => v.hash(state),
721            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
722            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
723                v.hash(state)
724            }
725            List(arr) => {
726                hash_nested_array(arr.to_owned() as ArrayRef, state);
727            }
728            LargeList(arr) => {
729                hash_nested_array(arr.to_owned() as ArrayRef, state);
730            }
731            FixedSizeList(arr) => {
732                hash_nested_array(arr.to_owned() as ArrayRef, state);
733            }
734            Struct(arr) => {
735                hash_nested_array(arr.to_owned() as ArrayRef, state);
736            }
737            Map(arr) => {
738                hash_nested_array(arr.to_owned() as ArrayRef, state);
739            }
740            Date32(v) => v.hash(state),
741            Date64(v) => v.hash(state),
742            Time32Second(v) => v.hash(state),
743            Time32Millisecond(v) => v.hash(state),
744            Time64Microsecond(v) => v.hash(state),
745            Time64Nanosecond(v) => v.hash(state),
746            TimestampSecond(v, _) => v.hash(state),
747            TimestampMillisecond(v, _) => v.hash(state),
748            TimestampMicrosecond(v, _) => v.hash(state),
749            TimestampNanosecond(v, _) => v.hash(state),
750            DurationSecond(v) => v.hash(state),
751            DurationMillisecond(v) => v.hash(state),
752            DurationMicrosecond(v) => v.hash(state),
753            DurationNanosecond(v) => v.hash(state),
754            IntervalYearMonth(v) => v.hash(state),
755            IntervalDayTime(v) => v.hash(state),
756            IntervalMonthDayNano(v) => v.hash(state),
757            Union(v, t, m) => {
758                v.hash(state);
759                t.hash(state);
760                m.hash(state);
761            }
762            Dictionary(k, v) => {
763                k.hash(state);
764                v.hash(state);
765            }
766            // stable hash for Null value
767            Null => 1.hash(state),
768        }
769    }
770}
771
772fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
773    let arrays = vec![arr.to_owned()];
774    let hashes_buffer = &mut vec![0; arr.len()];
775    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
776    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
777    // Hash back to std::hash::Hasher
778    hashes.hash(state);
779}
780
781/// Return a reference to the values array and the index into it for a
782/// dictionary array
783///
784/// # Errors
785///
786/// Errors if the array cannot be downcasted to DictionaryArray
787#[inline]
788pub fn get_dict_value<K: ArrowDictionaryKeyType>(
789    array: &dyn Array,
790    index: usize,
791) -> Result<(&ArrayRef, Option<usize>)> {
792    let dict_array = as_dictionary_array::<K>(array)?;
793    Ok((dict_array.values(), dict_array.key(index)))
794}
795
796/// Create a dictionary array representing `value` repeated `size`
797/// times
798fn dict_from_scalar<K: ArrowDictionaryKeyType>(
799    value: &ScalarValue,
800    size: usize,
801) -> Result<ArrayRef> {
802    // values array is one element long (the value)
803    let values_array = value.to_array_of_size(1)?;
804
805    // Create a key array with `size` elements, each of 0
806    let key_array: PrimitiveArray<K> = repeat(if value.is_null() {
807        None
808    } else {
809        Some(K::default_value())
810    })
811    .take(size)
812    .collect();
813
814    // create a new DictionaryArray
815    //
816    // Note: this path could be made faster by using the ArrayData
817    // APIs and skipping validation, if it every comes up in
818    // performance traces.
819    Ok(Arc::new(
820        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
821    ))
822}
823
824/// Create a dictionary array representing all the values in values
825fn dict_from_values<K: ArrowDictionaryKeyType>(
826    values_array: ArrayRef,
827) -> Result<ArrayRef> {
828    // Create a key array with `size` elements of 0..array_len for all
829    // non-null value elements
830    let key_array: PrimitiveArray<K> = (0..values_array.len())
831        .map(|index| {
832            if values_array.is_valid(index) {
833                let native_index = K::Native::from_usize(index).ok_or_else(|| {
834                    DataFusionError::Internal(format!(
835                        "Can not create index of type {} from value {}",
836                        K::DATA_TYPE,
837                        index
838                    ))
839                })?;
840                Ok(Some(native_index))
841            } else {
842                Ok(None)
843            }
844        })
845        .collect::<Result<Vec<_>>>()?
846        .into_iter()
847        .collect();
848
849    // create a new DictionaryArray
850    //
851    // Note: this path could be made faster by using the ArrayData
852    // APIs and skipping validation, if it every comes up in
853    // performance traces.
854    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
855    Ok(Arc::new(dict_array))
856}
857
858macro_rules! typed_cast_tz {
859    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{
860        use std::any::type_name;
861        let array = $array
862            .as_any()
863            .downcast_ref::<$ARRAYTYPE>()
864            .ok_or_else(|| {
865                DataFusionError::Internal(format!(
866                    "could not cast value to {}",
867                    type_name::<$ARRAYTYPE>()
868                ))
869            })?;
870        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
871            match array.is_null($index) {
872                true => None,
873                false => Some(array.value($index).into()),
874            },
875            $TZ.clone(),
876        ))
877    }};
878}
879
880macro_rules! typed_cast {
881    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
882        use std::any::type_name;
883        let array = $array
884            .as_any()
885            .downcast_ref::<$ARRAYTYPE>()
886            .ok_or_else(|| {
887                DataFusionError::Internal(format!(
888                    "could not cast value to {}",
889                    type_name::<$ARRAYTYPE>()
890                ))
891            })?;
892        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
893            match array.is_null($index) {
894                true => None,
895                false => Some(array.value($index).into()),
896            },
897        ))
898    }};
899}
900
901macro_rules! build_array_from_option {
902    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
903        match $EXPR {
904            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
905            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
906        }
907    }};
908    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
909        match $EXPR {
910            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
911            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
912        }
913    }};
914}
915
916macro_rules! build_timestamp_array_from_option {
917    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
918        match $EXPR {
919            Some(value) => {
920                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
921            }
922            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
923        }
924    };
925}
926
927macro_rules! eq_array_primitive {
928    ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{
929        use std::any::type_name;
930        let array = $array
931            .as_any()
932            .downcast_ref::<$ARRAYTYPE>()
933            .ok_or_else(|| {
934                DataFusionError::Internal(format!(
935                    "could not cast value to {}",
936                    type_name::<$ARRAYTYPE>()
937                ))
938            })?;
939        let is_valid = array.is_valid($index);
940        Ok::<bool, DataFusionError>(match $VALUE {
941            Some(val) => is_valid && &array.value($index) == val,
942            None => !is_valid,
943        })
944    }};
945}
946
947impl ScalarValue {
948    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
949    ///
950    /// # Panics
951    ///
952    /// Panics if d is not compatible with T
953    pub fn new_primitive<T: ArrowPrimitiveType>(
954        a: Option<T::Native>,
955        d: &DataType,
956    ) -> Result<Self> {
957        match a {
958            None => d.try_into(),
959            Some(v) => {
960                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
961                    .with_data_type(d.clone());
962                Self::try_from_array(&array, 0)
963            }
964        }
965    }
966
967    /// Create a decimal Scalar from value/precision and scale.
968    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
969        // make sure the precision and scale is valid
970        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
971            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
972        }
973        _internal_err!(
974            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
975        )
976    }
977
978    /// Create a Null instance of ScalarValue for this datatype
979    ///
980    /// Example
981    /// ```
982    /// use datafusion_common::ScalarValue;
983    /// use arrow::datatypes::DataType;
984    ///
985    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
986    /// assert_eq!(scalar.is_null(), true);
987    /// assert_eq!(scalar.data_type(), DataType::Int32);
988    /// ```
989    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
990        Ok(match data_type {
991            DataType::Boolean => ScalarValue::Boolean(None),
992            DataType::Float16 => ScalarValue::Float16(None),
993            DataType::Float64 => ScalarValue::Float64(None),
994            DataType::Float32 => ScalarValue::Float32(None),
995            DataType::Int8 => ScalarValue::Int8(None),
996            DataType::Int16 => ScalarValue::Int16(None),
997            DataType::Int32 => ScalarValue::Int32(None),
998            DataType::Int64 => ScalarValue::Int64(None),
999            DataType::UInt8 => ScalarValue::UInt8(None),
1000            DataType::UInt16 => ScalarValue::UInt16(None),
1001            DataType::UInt32 => ScalarValue::UInt32(None),
1002            DataType::UInt64 => ScalarValue::UInt64(None),
1003            DataType::Decimal128(precision, scale) => {
1004                ScalarValue::Decimal128(None, *precision, *scale)
1005            }
1006            DataType::Decimal256(precision, scale) => {
1007                ScalarValue::Decimal256(None, *precision, *scale)
1008            }
1009            DataType::Utf8 => ScalarValue::Utf8(None),
1010            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1011            DataType::Utf8View => ScalarValue::Utf8View(None),
1012            DataType::Binary => ScalarValue::Binary(None),
1013            DataType::BinaryView => ScalarValue::BinaryView(None),
1014            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1015            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1016            DataType::Date32 => ScalarValue::Date32(None),
1017            DataType::Date64 => ScalarValue::Date64(None),
1018            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1019            DataType::Time32(TimeUnit::Millisecond) => {
1020                ScalarValue::Time32Millisecond(None)
1021            }
1022            DataType::Time64(TimeUnit::Microsecond) => {
1023                ScalarValue::Time64Microsecond(None)
1024            }
1025            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1026            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1027                ScalarValue::TimestampSecond(None, tz_opt.clone())
1028            }
1029            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1030                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1031            }
1032            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1033                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1034            }
1035            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1036                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1037            }
1038            DataType::Interval(IntervalUnit::YearMonth) => {
1039                ScalarValue::IntervalYearMonth(None)
1040            }
1041            DataType::Interval(IntervalUnit::DayTime) => {
1042                ScalarValue::IntervalDayTime(None)
1043            }
1044            DataType::Interval(IntervalUnit::MonthDayNano) => {
1045                ScalarValue::IntervalMonthDayNano(None)
1046            }
1047            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1048            DataType::Duration(TimeUnit::Millisecond) => {
1049                ScalarValue::DurationMillisecond(None)
1050            }
1051            DataType::Duration(TimeUnit::Microsecond) => {
1052                ScalarValue::DurationMicrosecond(None)
1053            }
1054            DataType::Duration(TimeUnit::Nanosecond) => {
1055                ScalarValue::DurationNanosecond(None)
1056            }
1057            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1058                index_type.clone(),
1059                Box::new(value_type.as_ref().try_into()?),
1060            ),
1061            // `ScalaValue::List` contains single element `ListArray`.
1062            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1063                GenericListArray::new_null(Arc::clone(field_ref), 1),
1064            )),
1065            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1066            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1067                GenericListArray::new_null(Arc::clone(field_ref), 1),
1068            )),
1069            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
1070            DataType::FixedSizeList(field_ref, fixed_length) => {
1071                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1072                    Arc::clone(field_ref),
1073                    *fixed_length,
1074                    1,
1075                )))
1076            }
1077            DataType::Struct(fields) => ScalarValue::Struct(
1078                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1079                    .as_struct()
1080                    .to_owned()
1081                    .into(),
1082            ),
1083            DataType::Map(fields, sorted) => ScalarValue::Map(
1084                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1085                    .as_map()
1086                    .to_owned()
1087                    .into(),
1088            ),
1089            DataType::Union(fields, mode) => {
1090                ScalarValue::Union(None, fields.clone(), *mode)
1091            }
1092            DataType::Null => ScalarValue::Null,
1093            _ => {
1094                return _not_impl_err!(
1095                    "Can't create a null scalar from data_type \"{data_type:?}\""
1096                );
1097            }
1098        })
1099    }
1100
1101    /// Returns a [`ScalarValue::Utf8`] representing `val`
1102    pub fn new_utf8(val: impl Into<String>) -> Self {
1103        ScalarValue::from(val.into())
1104    }
1105
1106    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1107    pub fn new_utf8view(val: impl Into<String>) -> Self {
1108        ScalarValue::Utf8View(Some(val.into()))
1109    }
1110
1111    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1112    /// `years` years and `months` months
1113    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1114        let val = IntervalYearMonthType::make_value(years, months);
1115        ScalarValue::IntervalYearMonth(Some(val))
1116    }
1117
1118    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1119    /// `days` days and `millis` milliseconds
1120    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1121        let val = IntervalDayTimeType::make_value(days, millis);
1122        Self::IntervalDayTime(Some(val))
1123    }
1124
1125    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1126    /// `months` months and `days` days, and `nanos` nanoseconds
1127    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1128        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1129        ScalarValue::IntervalMonthDayNano(Some(val))
1130    }
1131
1132    /// Returns a [`ScalarValue`] representing
1133    /// `value` and `tz_opt` timezone
1134    pub fn new_timestamp<T: ArrowTimestampType>(
1135        value: Option<i64>,
1136        tz_opt: Option<Arc<str>>,
1137    ) -> Self {
1138        match T::UNIT {
1139            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1140            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1141            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1142            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1143        }
1144    }
1145
1146    /// Returns a [`ScalarValue`] representing PI
1147    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1148        match datatype {
1149            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1150            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1151            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1152        }
1153    }
1154
1155    /// Returns a [`ScalarValue`] representing PI's upper bound
1156    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1157        // TODO: replace the constants with next_up/next_down when
1158        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
1159        match datatype {
1160            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1161            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1162            _ => {
1163                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1164            }
1165        }
1166    }
1167
1168    /// Returns a [`ScalarValue`] representing -PI's lower bound
1169    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1170        match datatype {
1171            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1172            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1173            _ => {
1174                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1175            }
1176        }
1177    }
1178
1179    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1180    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1181        match datatype {
1182            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1183            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1184            _ => {
1185                _internal_err!(
1186                    "PI_UPPER/2 is not supported for data type: {:?}",
1187                    datatype
1188                )
1189            }
1190        }
1191    }
1192
1193    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1194    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1195        match datatype {
1196            DataType::Float32 => {
1197                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1198            }
1199            DataType::Float64 => {
1200                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1201            }
1202            _ => {
1203                _internal_err!(
1204                    "-PI/2_LOWER is not supported for data type: {:?}",
1205                    datatype
1206                )
1207            }
1208        }
1209    }
1210
1211    /// Returns a [`ScalarValue`] representing -PI
1212    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1213        match datatype {
1214            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1215            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1216            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1217        }
1218    }
1219
1220    /// Returns a [`ScalarValue`] representing PI/2
1221    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1222        match datatype {
1223            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1224            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1225            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1226        }
1227    }
1228
1229    /// Returns a [`ScalarValue`] representing -PI/2
1230    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1231        match datatype {
1232            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1233            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1234            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1235        }
1236    }
1237
1238    /// Returns a [`ScalarValue`] representing infinity
1239    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1240        match datatype {
1241            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1242            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1243            _ => {
1244                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1245            }
1246        }
1247    }
1248
1249    /// Returns a [`ScalarValue`] representing negative infinity
1250    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1251        match datatype {
1252            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1253            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1254            _ => {
1255                _internal_err!(
1256                    "Negative Infinity is not supported for data type: {:?}",
1257                    datatype
1258                )
1259            }
1260        }
1261    }
1262
1263    /// Create a zero value in the given type.
1264    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1265        Ok(match datatype {
1266            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1267            DataType::Int8 => ScalarValue::Int8(Some(0)),
1268            DataType::Int16 => ScalarValue::Int16(Some(0)),
1269            DataType::Int32 => ScalarValue::Int32(Some(0)),
1270            DataType::Int64 => ScalarValue::Int64(Some(0)),
1271            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1272            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1273            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1274            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1275            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1276            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1277            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1278            DataType::Decimal128(precision, scale) => {
1279                ScalarValue::Decimal128(Some(0), *precision, *scale)
1280            }
1281            DataType::Decimal256(precision, scale) => {
1282                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1283            }
1284            DataType::Timestamp(TimeUnit::Second, tz) => {
1285                ScalarValue::TimestampSecond(Some(0), tz.clone())
1286            }
1287            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1288                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1289            }
1290            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1291                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1292            }
1293            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1294                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1295            }
1296            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1297            DataType::Time32(TimeUnit::Millisecond) => {
1298                ScalarValue::Time32Millisecond(Some(0))
1299            }
1300            DataType::Time64(TimeUnit::Microsecond) => {
1301                ScalarValue::Time64Microsecond(Some(0))
1302            }
1303            DataType::Time64(TimeUnit::Nanosecond) => {
1304                ScalarValue::Time64Nanosecond(Some(0))
1305            }
1306            DataType::Interval(IntervalUnit::YearMonth) => {
1307                ScalarValue::IntervalYearMonth(Some(0))
1308            }
1309            DataType::Interval(IntervalUnit::DayTime) => {
1310                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1311            }
1312            DataType::Interval(IntervalUnit::MonthDayNano) => {
1313                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1314            }
1315            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1316            DataType::Duration(TimeUnit::Millisecond) => {
1317                ScalarValue::DurationMillisecond(Some(0))
1318            }
1319            DataType::Duration(TimeUnit::Microsecond) => {
1320                ScalarValue::DurationMicrosecond(Some(0))
1321            }
1322            DataType::Duration(TimeUnit::Nanosecond) => {
1323                ScalarValue::DurationNanosecond(Some(0))
1324            }
1325            DataType::Date32 => ScalarValue::Date32(Some(0)),
1326            DataType::Date64 => ScalarValue::Date64(Some(0)),
1327            _ => {
1328                return _not_impl_err!(
1329                    "Can't create a zero scalar from data_type \"{datatype:?}\""
1330                );
1331            }
1332        })
1333    }
1334
1335    /// Create an one value in the given type.
1336    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1337        Ok(match datatype {
1338            DataType::Int8 => ScalarValue::Int8(Some(1)),
1339            DataType::Int16 => ScalarValue::Int16(Some(1)),
1340            DataType::Int32 => ScalarValue::Int32(Some(1)),
1341            DataType::Int64 => ScalarValue::Int64(Some(1)),
1342            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1343            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1344            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1345            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1346            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1347            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1348            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1349            _ => {
1350                return _not_impl_err!(
1351                    "Can't create an one scalar from data_type \"{datatype:?}\""
1352                );
1353            }
1354        })
1355    }
1356
1357    /// Create a negative one value in the given type.
1358    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1359        Ok(match datatype {
1360            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1361            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1362            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1363            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1364            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1365            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1366            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1367            _ => {
1368                return _not_impl_err!(
1369                    "Can't create a negative one scalar from data_type \"{datatype:?}\""
1370                );
1371            }
1372        })
1373    }
1374
1375    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1376        Ok(match datatype {
1377            DataType::Int8 => ScalarValue::Int8(Some(10)),
1378            DataType::Int16 => ScalarValue::Int16(Some(10)),
1379            DataType::Int32 => ScalarValue::Int32(Some(10)),
1380            DataType::Int64 => ScalarValue::Int64(Some(10)),
1381            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1382            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1383            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1384            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1385            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1386            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1387            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1388            _ => {
1389                return _not_impl_err!(
1390                    "Can't create a ten scalar from data_type \"{datatype:?}\""
1391                );
1392            }
1393        })
1394    }
1395
1396    /// return the [`DataType`] of this `ScalarValue`
1397    pub fn data_type(&self) -> DataType {
1398        match self {
1399            ScalarValue::Boolean(_) => DataType::Boolean,
1400            ScalarValue::UInt8(_) => DataType::UInt8,
1401            ScalarValue::UInt16(_) => DataType::UInt16,
1402            ScalarValue::UInt32(_) => DataType::UInt32,
1403            ScalarValue::UInt64(_) => DataType::UInt64,
1404            ScalarValue::Int8(_) => DataType::Int8,
1405            ScalarValue::Int16(_) => DataType::Int16,
1406            ScalarValue::Int32(_) => DataType::Int32,
1407            ScalarValue::Int64(_) => DataType::Int64,
1408            ScalarValue::Decimal128(_, precision, scale) => {
1409                DataType::Decimal128(*precision, *scale)
1410            }
1411            ScalarValue::Decimal256(_, precision, scale) => {
1412                DataType::Decimal256(*precision, *scale)
1413            }
1414            ScalarValue::TimestampSecond(_, tz_opt) => {
1415                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1416            }
1417            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1418                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1419            }
1420            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1421                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1422            }
1423            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1424                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1425            }
1426            ScalarValue::Float16(_) => DataType::Float16,
1427            ScalarValue::Float32(_) => DataType::Float32,
1428            ScalarValue::Float64(_) => DataType::Float64,
1429            ScalarValue::Utf8(_) => DataType::Utf8,
1430            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1431            ScalarValue::Utf8View(_) => DataType::Utf8View,
1432            ScalarValue::Binary(_) => DataType::Binary,
1433            ScalarValue::BinaryView(_) => DataType::BinaryView,
1434            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1435            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1436            ScalarValue::List(arr) => arr.data_type().to_owned(),
1437            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1438            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1439            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1440            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1441            ScalarValue::Date32(_) => DataType::Date32,
1442            ScalarValue::Date64(_) => DataType::Date64,
1443            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1444            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1445            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1446            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1447            ScalarValue::IntervalYearMonth(_) => {
1448                DataType::Interval(IntervalUnit::YearMonth)
1449            }
1450            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1451            ScalarValue::IntervalMonthDayNano(_) => {
1452                DataType::Interval(IntervalUnit::MonthDayNano)
1453            }
1454            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1455            ScalarValue::DurationMillisecond(_) => {
1456                DataType::Duration(TimeUnit::Millisecond)
1457            }
1458            ScalarValue::DurationMicrosecond(_) => {
1459                DataType::Duration(TimeUnit::Microsecond)
1460            }
1461            ScalarValue::DurationNanosecond(_) => {
1462                DataType::Duration(TimeUnit::Nanosecond)
1463            }
1464            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1465            ScalarValue::Dictionary(k, v) => {
1466                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1467            }
1468            ScalarValue::Null => DataType::Null,
1469        }
1470    }
1471
1472    /// Calculate arithmetic negation for a scalar value
1473    pub fn arithmetic_negate(&self) -> Result<Self> {
1474        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1475            v: T,
1476            ctx: impl Fn() -> String,
1477        ) -> Result<T> {
1478            v.neg_checked()
1479                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1480        }
1481        match self {
1482            ScalarValue::Int8(None)
1483            | ScalarValue::Int16(None)
1484            | ScalarValue::Int32(None)
1485            | ScalarValue::Int64(None)
1486            | ScalarValue::Float16(None)
1487            | ScalarValue::Float32(None)
1488            | ScalarValue::Float64(None) => Ok(self.clone()),
1489            ScalarValue::Float16(Some(v)) => {
1490                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1491            }
1492            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1493            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1494            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1495            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1496            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1497            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1498            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1499                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1500                    format!("In negation of IntervalYearMonth({v})")
1501                })?)),
1502            ),
1503            ScalarValue::IntervalDayTime(Some(v)) => {
1504                let (days, ms) = IntervalDayTimeType::to_parts(*v);
1505                let val = IntervalDayTimeType::make_value(
1506                    neg_checked_with_ctx(days, || {
1507                        format!("In negation of days {days} in IntervalDayTime")
1508                    })?,
1509                    neg_checked_with_ctx(ms, || {
1510                        format!("In negation of milliseconds {ms} in IntervalDayTime")
1511                    })?,
1512                );
1513                Ok(ScalarValue::IntervalDayTime(Some(val)))
1514            }
1515            ScalarValue::IntervalMonthDayNano(Some(v)) => {
1516                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1517                let val = IntervalMonthDayNanoType::make_value(
1518                    neg_checked_with_ctx(months, || {
1519                        format!("In negation of months {months} of IntervalMonthDayNano")
1520                    })?,
1521                    neg_checked_with_ctx(days, || {
1522                        format!("In negation of days {days} of IntervalMonthDayNano")
1523                    })?,
1524                    neg_checked_with_ctx(nanos, || {
1525                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1526                    })?,
1527                );
1528                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1529            }
1530            ScalarValue::Decimal128(Some(v), precision, scale) => {
1531                Ok(ScalarValue::Decimal128(
1532                    Some(neg_checked_with_ctx(*v, || {
1533                        format!("In negation of Decimal128({v}, {precision}, {scale})")
1534                    })?),
1535                    *precision,
1536                    *scale,
1537                ))
1538            }
1539            ScalarValue::Decimal256(Some(v), precision, scale) => {
1540                Ok(ScalarValue::Decimal256(
1541                    Some(neg_checked_with_ctx(*v, || {
1542                        format!("In negation of Decimal256({v}, {precision}, {scale})")
1543                    })?),
1544                    *precision,
1545                    *scale,
1546                ))
1547            }
1548            ScalarValue::TimestampSecond(Some(v), tz) => {
1549                Ok(ScalarValue::TimestampSecond(
1550                    Some(neg_checked_with_ctx(*v, || {
1551                        format!("In negation of TimestampSecond({v})")
1552                    })?),
1553                    tz.clone(),
1554                ))
1555            }
1556            ScalarValue::TimestampNanosecond(Some(v), tz) => {
1557                Ok(ScalarValue::TimestampNanosecond(
1558                    Some(neg_checked_with_ctx(*v, || {
1559                        format!("In negation of TimestampNanoSecond({v})")
1560                    })?),
1561                    tz.clone(),
1562                ))
1563            }
1564            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1565                Ok(ScalarValue::TimestampMicrosecond(
1566                    Some(neg_checked_with_ctx(*v, || {
1567                        format!("In negation of TimestampMicroSecond({v})")
1568                    })?),
1569                    tz.clone(),
1570                ))
1571            }
1572            ScalarValue::TimestampMillisecond(Some(v), tz) => {
1573                Ok(ScalarValue::TimestampMillisecond(
1574                    Some(neg_checked_with_ctx(*v, || {
1575                        format!("In negation of TimestampMilliSecond({v})")
1576                    })?),
1577                    tz.clone(),
1578                ))
1579            }
1580            value => _internal_err!(
1581                "Can not run arithmetic negative on scalar value {value:?}"
1582            ),
1583        }
1584    }
1585
1586    /// Wrapping addition of `ScalarValue`
1587    ///
1588    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1589    /// should operate on Arrays directly, using vectorized array kernels
1590    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1591        let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1592        Self::try_from_array(r.as_ref(), 0)
1593    }
1594    /// Checked addition of `ScalarValue`
1595    ///
1596    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1597    /// should operate on Arrays directly, using vectorized array kernels
1598    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1599        let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1600        Self::try_from_array(r.as_ref(), 0)
1601    }
1602
1603    /// Wrapping subtraction of `ScalarValue`
1604    ///
1605    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1606    /// should operate on Arrays directly, using vectorized array kernels
1607    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1608        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1609        Self::try_from_array(r.as_ref(), 0)
1610    }
1611
1612    /// Checked subtraction of `ScalarValue`
1613    ///
1614    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1615    /// should operate on Arrays directly, using vectorized array kernels
1616    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1617        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1618        Self::try_from_array(r.as_ref(), 0)
1619    }
1620
1621    /// Wrapping multiplication of `ScalarValue`
1622    ///
1623    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1624    /// should operate on Arrays directly, using vectorized array kernels.
1625    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1626        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1627        Self::try_from_array(r.as_ref(), 0)
1628    }
1629
1630    /// Checked multiplication of `ScalarValue`
1631    ///
1632    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1633    /// should operate on Arrays directly, using vectorized array kernels.
1634    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1635        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1636        Self::try_from_array(r.as_ref(), 0)
1637    }
1638
1639    /// Performs `lhs / rhs`
1640    ///
1641    /// Overflow or division by zero will result in an error, with exception to
1642    /// floating point numbers, which instead follow the IEEE 754 rules.
1643    ///
1644    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1645    /// should operate on Arrays directly, using vectorized array kernels.
1646    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1647        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1648        Self::try_from_array(r.as_ref(), 0)
1649    }
1650
1651    /// Performs `lhs % rhs`
1652    ///
1653    /// Overflow or division by zero will result in an error, with exception to
1654    /// floating point numbers, which instead follow the IEEE 754 rules.
1655    ///
1656    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1657    /// should operate on Arrays directly, using vectorized array kernels.
1658    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1659        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1660        Self::try_from_array(r.as_ref(), 0)
1661    }
1662
1663    pub fn is_unsigned(&self) -> bool {
1664        matches!(
1665            self,
1666            ScalarValue::UInt8(_)
1667                | ScalarValue::UInt16(_)
1668                | ScalarValue::UInt32(_)
1669                | ScalarValue::UInt64(_)
1670        )
1671    }
1672
1673    /// whether this value is null or not.
1674    pub fn is_null(&self) -> bool {
1675        match self {
1676            ScalarValue::Boolean(v) => v.is_none(),
1677            ScalarValue::Null => true,
1678            ScalarValue::Float16(v) => v.is_none(),
1679            ScalarValue::Float32(v) => v.is_none(),
1680            ScalarValue::Float64(v) => v.is_none(),
1681            ScalarValue::Decimal128(v, _, _) => v.is_none(),
1682            ScalarValue::Decimal256(v, _, _) => v.is_none(),
1683            ScalarValue::Int8(v) => v.is_none(),
1684            ScalarValue::Int16(v) => v.is_none(),
1685            ScalarValue::Int32(v) => v.is_none(),
1686            ScalarValue::Int64(v) => v.is_none(),
1687            ScalarValue::UInt8(v) => v.is_none(),
1688            ScalarValue::UInt16(v) => v.is_none(),
1689            ScalarValue::UInt32(v) => v.is_none(),
1690            ScalarValue::UInt64(v) => v.is_none(),
1691            ScalarValue::Utf8(v)
1692            | ScalarValue::Utf8View(v)
1693            | ScalarValue::LargeUtf8(v) => v.is_none(),
1694            ScalarValue::Binary(v)
1695            | ScalarValue::BinaryView(v)
1696            | ScalarValue::FixedSizeBinary(_, v)
1697            | ScalarValue::LargeBinary(v) => v.is_none(),
1698            // arr.len() should be 1 for a list scalar, but we don't seem to
1699            // enforce that anywhere, so we still check against array length.
1700            ScalarValue::List(arr) => arr.len() == arr.null_count(),
1701            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1702            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1703            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1704            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1705            ScalarValue::Date32(v) => v.is_none(),
1706            ScalarValue::Date64(v) => v.is_none(),
1707            ScalarValue::Time32Second(v) => v.is_none(),
1708            ScalarValue::Time32Millisecond(v) => v.is_none(),
1709            ScalarValue::Time64Microsecond(v) => v.is_none(),
1710            ScalarValue::Time64Nanosecond(v) => v.is_none(),
1711            ScalarValue::TimestampSecond(v, _) => v.is_none(),
1712            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1713            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1714            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1715            ScalarValue::IntervalYearMonth(v) => v.is_none(),
1716            ScalarValue::IntervalDayTime(v) => v.is_none(),
1717            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1718            ScalarValue::DurationSecond(v) => v.is_none(),
1719            ScalarValue::DurationMillisecond(v) => v.is_none(),
1720            ScalarValue::DurationMicrosecond(v) => v.is_none(),
1721            ScalarValue::DurationNanosecond(v) => v.is_none(),
1722            ScalarValue::Union(v, _, _) => match v {
1723                Some((_, s)) => s.is_null(),
1724                None => true,
1725            },
1726            ScalarValue::Dictionary(_, v) => v.is_null(),
1727        }
1728    }
1729
1730    /// Absolute distance between two numeric values (of the same type). This method will return
1731    /// None if either one of the arguments are null. It might also return None if the resulting
1732    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
1733    /// rounded to the nearest integer.
1734    ///
1735    ///
1736    /// Note: the datatype itself must support subtraction.
1737    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1738        match (self, other) {
1739            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1740            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1741            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1742            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1743            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1744            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1745            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1746            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1747            // TODO: we might want to look into supporting ceil/floor here for floats.
1748            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1749                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1750            }
1751            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1752                Some((l - r).abs().round() as _)
1753            }
1754            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1755                Some((l - r).abs().round() as _)
1756            }
1757            _ => None,
1758        }
1759    }
1760
1761    /// Converts a scalar value into an 1-row array.
1762    ///
1763    /// # Errors
1764    ///
1765    /// Errors if the ScalarValue cannot be converted into a 1-row array
1766    pub fn to_array(&self) -> Result<ArrayRef> {
1767        self.to_array_of_size(1)
1768    }
1769
1770    /// Converts a scalar into an arrow [`Scalar`] (which implements
1771    /// the [`Datum`] interface).
1772    ///
1773    /// This can be used to call arrow compute kernels such as `lt`
1774    ///
1775    /// # Errors
1776    ///
1777    /// Errors if the ScalarValue cannot be converted into a 1-row array
1778    ///
1779    /// # Example
1780    /// ```
1781    /// use datafusion_common::ScalarValue;
1782    /// use arrow::array::{BooleanArray, Int32Array};
1783    ///
1784    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
1785    /// let five = ScalarValue::Int32(Some(5));
1786    ///
1787    /// let result = arrow::compute::kernels::cmp::lt(
1788    ///   &arr,
1789    ///   &five.to_scalar().unwrap(),
1790    /// ).unwrap();
1791    ///
1792    /// let expected = BooleanArray::from(vec![
1793    ///     Some(true),
1794    ///     None,
1795    ///     Some(false)
1796    ///   ]
1797    /// );
1798    ///
1799    /// assert_eq!(&result, &expected);
1800    /// ```
1801    /// [`Datum`]: arrow::array::Datum
1802    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1803        Ok(Scalar::new(self.to_array_of_size(1)?))
1804    }
1805
1806    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
1807    /// corresponding to those values. For example, an iterator of
1808    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
1809    ///
1810    /// Returns an error if the iterator is empty or if the
1811    /// [`ScalarValue`]s are not all the same type
1812    ///
1813    /// # Panics
1814    ///
1815    /// Panics if `self` is a dictionary with invalid key type
1816    ///
1817    /// # Example
1818    /// ```
1819    /// use datafusion_common::ScalarValue;
1820    /// use arrow::array::{ArrayRef, BooleanArray};
1821    ///
1822    /// let scalars = vec![
1823    ///   ScalarValue::Boolean(Some(true)),
1824    ///   ScalarValue::Boolean(None),
1825    ///   ScalarValue::Boolean(Some(false)),
1826    /// ];
1827    ///
1828    /// // Build an Array from the list of ScalarValues
1829    /// let array = ScalarValue::iter_to_array(scalars.into_iter())
1830    ///   .unwrap();
1831    ///
1832    /// let expected: ArrayRef = std::sync::Arc::new(
1833    ///   BooleanArray::from(vec![
1834    ///     Some(true),
1835    ///     None,
1836    ///     Some(false)
1837    ///   ]
1838    /// ));
1839    ///
1840    /// assert_eq!(&array, &expected);
1841    /// ```
1842    pub fn iter_to_array(
1843        scalars: impl IntoIterator<Item = ScalarValue>,
1844    ) -> Result<ArrayRef> {
1845        let mut scalars = scalars.into_iter().peekable();
1846
1847        // figure out the type based on the first element
1848        let data_type = match scalars.peek() {
1849            None => {
1850                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1851            }
1852            Some(sv) => sv.data_type(),
1853        };
1854
1855        /// Creates an array of $ARRAY_TY by unpacking values of
1856        /// SCALAR_TY for primitive types
1857        macro_rules! build_array_primitive {
1858            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1859                {
1860                    let array = scalars.map(|sv| {
1861                        if let ScalarValue::$SCALAR_TY(v) = sv {
1862                            Ok(v)
1863                        } else {
1864                            _exec_err!(
1865                                "Inconsistent types in ScalarValue::iter_to_array. \
1866                                    Expected {:?}, got {:?}",
1867                                data_type, sv
1868                            )
1869                        }
1870                    })
1871                    .collect::<Result<$ARRAY_TY>>()?;
1872                    Arc::new(array)
1873                }
1874            }};
1875        }
1876
1877        macro_rules! build_array_primitive_tz {
1878            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1879                {
1880                    let array = scalars.map(|sv| {
1881                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
1882                            Ok(v)
1883                        } else {
1884                            _exec_err!(
1885                                "Inconsistent types in ScalarValue::iter_to_array. \
1886                                    Expected {:?}, got {:?}",
1887                                data_type, sv
1888                            )
1889                        }
1890                    })
1891                    .collect::<Result<$ARRAY_TY>>()?;
1892                    Arc::new(array.with_timezone_opt($TZ.clone()))
1893                }
1894            }};
1895        }
1896
1897        /// Creates an array of $ARRAY_TY by unpacking values of
1898        /// SCALAR_TY for "string-like" types.
1899        macro_rules! build_array_string {
1900            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1901                {
1902                    let array = scalars.map(|sv| {
1903                        if let ScalarValue::$SCALAR_TY(v) = sv {
1904                            Ok(v)
1905                        } else {
1906                            _exec_err!(
1907                                "Inconsistent types in ScalarValue::iter_to_array. \
1908                                    Expected {:?}, got {:?}",
1909                                data_type, sv
1910                            )
1911                        }
1912                    })
1913                    .collect::<Result<$ARRAY_TY>>()?;
1914                    Arc::new(array)
1915                }
1916            }};
1917        }
1918
1919        let array: ArrayRef = match &data_type {
1920            DataType::Decimal128(precision, scale) => {
1921                let decimal_array =
1922                    ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1923                Arc::new(decimal_array)
1924            }
1925            DataType::Decimal256(precision, scale) => {
1926                let decimal_array =
1927                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1928                Arc::new(decimal_array)
1929            }
1930            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1931            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1932            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1933            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1934            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1935            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1936            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1937            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1938            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1939            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1940            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1941            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1942            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1943            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1944            DataType::Utf8 => build_array_string!(StringArray, Utf8),
1945            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1946            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1947            DataType::Binary => build_array_string!(BinaryArray, Binary),
1948            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1949            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1950            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1951            DataType::Time32(TimeUnit::Second) => {
1952                build_array_primitive!(Time32SecondArray, Time32Second)
1953            }
1954            DataType::Time32(TimeUnit::Millisecond) => {
1955                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1956            }
1957            DataType::Time64(TimeUnit::Microsecond) => {
1958                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1959            }
1960            DataType::Time64(TimeUnit::Nanosecond) => {
1961                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
1962            }
1963            DataType::Timestamp(TimeUnit::Second, tz) => {
1964                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
1965            }
1966            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1967                build_array_primitive_tz!(
1968                    TimestampMillisecondArray,
1969                    TimestampMillisecond,
1970                    tz
1971                )
1972            }
1973            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1974                build_array_primitive_tz!(
1975                    TimestampMicrosecondArray,
1976                    TimestampMicrosecond,
1977                    tz
1978                )
1979            }
1980            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1981                build_array_primitive_tz!(
1982                    TimestampNanosecondArray,
1983                    TimestampNanosecond,
1984                    tz
1985                )
1986            }
1987            DataType::Duration(TimeUnit::Second) => {
1988                build_array_primitive!(DurationSecondArray, DurationSecond)
1989            }
1990            DataType::Duration(TimeUnit::Millisecond) => {
1991                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
1992            }
1993            DataType::Duration(TimeUnit::Microsecond) => {
1994                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
1995            }
1996            DataType::Duration(TimeUnit::Nanosecond) => {
1997                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
1998            }
1999            DataType::Interval(IntervalUnit::DayTime) => {
2000                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2001            }
2002            DataType::Interval(IntervalUnit::YearMonth) => {
2003                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2004            }
2005            DataType::Interval(IntervalUnit::MonthDayNano) => {
2006                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2007            }
2008            DataType::FixedSizeList(_, _) => {
2009                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2010                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2011                // the length of non-nulls.
2012                let mut arrays =
2013                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2014                let first_non_null_data_type = arrays
2015                    .iter()
2016                    .find(|sv| !sv.is_null(0))
2017                    .map(|sv| sv.data_type().to_owned());
2018                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2019                    for array in arrays.iter_mut() {
2020                        if array.is_null(0) {
2021                            *array = Arc::new(FixedSizeListArray::new_null(
2022                                Arc::clone(&f),
2023                                l,
2024                                1,
2025                            ));
2026                        }
2027                    }
2028                }
2029                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2030                arrow::compute::concat(arrays.as_slice())?
2031            }
2032            DataType::List(_)
2033            | DataType::LargeList(_)
2034            | DataType::Map(_, _)
2035            | DataType::Struct(_)
2036            | DataType::Union(_, _) => {
2037                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2038                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2039                arrow::compute::concat(arrays.as_slice())?
2040            }
2041            DataType::Dictionary(key_type, value_type) => {
2042                // create the values array
2043                let value_scalars = scalars
2044                    .map(|scalar| match scalar {
2045                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2046                            if &inner_key_type == key_type {
2047                                Ok(*scalar)
2048                            } else {
2049                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2050                            }
2051                        }
2052                        _ => {
2053                            _exec_err!(
2054                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2055                            )
2056                        }
2057                    })
2058                    .collect::<Result<Vec<_>>>()?;
2059
2060                let values = Self::iter_to_array(value_scalars)?;
2061                assert_eq!(values.data_type(), value_type.as_ref());
2062
2063                match key_type.as_ref() {
2064                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2065                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2066                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2067                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2068                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2069                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2070                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2071                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2072                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2073                }
2074            }
2075            DataType::FixedSizeBinary(size) => {
2076                let array = scalars
2077                    .map(|sv| {
2078                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
2079                            Ok(v)
2080                        } else {
2081                            _exec_err!(
2082                                "Inconsistent types in ScalarValue::iter_to_array. \
2083                                Expected {data_type:?}, got {sv:?}"
2084                            )
2085                        }
2086                    })
2087                    .collect::<Result<Vec<_>>>()?;
2088                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2089                    array.into_iter(),
2090                    *size,
2091                )?;
2092                Arc::new(array)
2093            }
2094            // explicitly enumerate unsupported types so newly added
2095            // types must be aknowledged, Time32 and Time64 types are
2096            // not supported if the TimeUnit is not valid (Time32 can
2097            // only be used with Second and Millisecond, Time64 only
2098            // with Microsecond and Nanosecond)
2099            DataType::Time32(TimeUnit::Microsecond)
2100            | DataType::Time32(TimeUnit::Nanosecond)
2101            | DataType::Time64(TimeUnit::Second)
2102            | DataType::Time64(TimeUnit::Millisecond)
2103            | DataType::RunEndEncoded(_, _)
2104            | DataType::ListView(_)
2105            | DataType::LargeListView(_) => {
2106                return _not_impl_err!(
2107                    "Unsupported creation of {:?} array from ScalarValue {:?}",
2108                    data_type,
2109                    scalars.peek()
2110                );
2111            }
2112        };
2113        Ok(array)
2114    }
2115
2116    fn iter_to_null_array(
2117        scalars: impl IntoIterator<Item = ScalarValue>,
2118    ) -> Result<ArrayRef> {
2119        let length = scalars.into_iter().try_fold(
2120            0usize,
2121            |r, element: ScalarValue| match element {
2122                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2123                s => {
2124                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2125                }
2126            },
2127        )?;
2128        Ok(new_null_array(&DataType::Null, length))
2129    }
2130
2131    fn iter_to_decimal_array(
2132        scalars: impl IntoIterator<Item = ScalarValue>,
2133        precision: u8,
2134        scale: i8,
2135    ) -> Result<Decimal128Array> {
2136        let array = scalars
2137            .into_iter()
2138            .map(|element: ScalarValue| match element {
2139                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2140                s => {
2141                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2142                }
2143            })
2144            .collect::<Result<Decimal128Array>>()?
2145            .with_precision_and_scale(precision, scale)?;
2146        Ok(array)
2147    }
2148
2149    fn iter_to_decimal256_array(
2150        scalars: impl IntoIterator<Item = ScalarValue>,
2151        precision: u8,
2152        scale: i8,
2153    ) -> Result<Decimal256Array> {
2154        let array = scalars
2155            .into_iter()
2156            .map(|element: ScalarValue| match element {
2157                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2158                s => {
2159                    _internal_err!(
2160                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2161                    )
2162                }
2163            })
2164            .collect::<Result<Decimal256Array>>()?
2165            .with_precision_and_scale(precision, scale)?;
2166        Ok(array)
2167    }
2168
2169    fn build_decimal_array(
2170        value: Option<i128>,
2171        precision: u8,
2172        scale: i8,
2173        size: usize,
2174    ) -> Result<Decimal128Array> {
2175        Ok(match value {
2176            Some(val) => Decimal128Array::from(vec![val; size])
2177                .with_precision_and_scale(precision, scale)?,
2178            None => {
2179                let mut builder = Decimal128Array::builder(size)
2180                    .with_precision_and_scale(precision, scale)?;
2181                builder.append_nulls(size);
2182                builder.finish()
2183            }
2184        })
2185    }
2186
2187    fn build_decimal256_array(
2188        value: Option<i256>,
2189        precision: u8,
2190        scale: i8,
2191        size: usize,
2192    ) -> Result<Decimal256Array> {
2193        Ok(repeat(value)
2194            .take(size)
2195            .collect::<Decimal256Array>()
2196            .with_precision_and_scale(precision, scale)?)
2197    }
2198
2199    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2200    /// `data_type`, to a single element [`ListArray`].
2201    ///
2202    /// Example
2203    /// ```
2204    /// use datafusion_common::ScalarValue;
2205    /// use arrow::array::{ListArray, Int32Array};
2206    /// use arrow::datatypes::{DataType, Int32Type};
2207    /// use datafusion_common::cast::as_list_array;
2208    ///
2209    /// let scalars = vec![
2210    ///    ScalarValue::Int32(Some(1)),
2211    ///    ScalarValue::Int32(None),
2212    ///    ScalarValue::Int32(Some(2))
2213    /// ];
2214    ///
2215    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2216    ///
2217    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2218    ///     vec![
2219    ///        Some(vec![Some(1), None, Some(2)])
2220    ///     ]);
2221    ///
2222    /// assert_eq!(*result, expected);
2223    /// ```
2224    pub fn new_list(
2225        values: &[ScalarValue],
2226        data_type: &DataType,
2227        nullable: bool,
2228    ) -> Arc<ListArray> {
2229        let values = if values.is_empty() {
2230            new_empty_array(data_type)
2231        } else {
2232            Self::iter_to_array(values.iter().cloned()).unwrap()
2233        };
2234        Arc::new(
2235            SingleRowListArrayBuilder::new(values)
2236                .with_nullable(nullable)
2237                .build_list_array(),
2238        )
2239    }
2240
2241    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2242    pub fn new_list_nullable(
2243        values: &[ScalarValue],
2244        data_type: &DataType,
2245    ) -> Arc<ListArray> {
2246        Self::new_list(values, data_type, true)
2247    }
2248
2249    /// Create ListArray with Null with specific data type
2250    ///
2251    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2252    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2253        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2254        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2255            &data_type, null_len,
2256        ))))
2257    }
2258
2259    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2260    /// `data_type`, to a [`ListArray`].
2261    ///
2262    /// Example
2263    /// ```
2264    /// use datafusion_common::ScalarValue;
2265    /// use arrow::array::{ListArray, Int32Array};
2266    /// use arrow::datatypes::{DataType, Int32Type};
2267    /// use datafusion_common::cast::as_list_array;
2268    ///
2269    /// let scalars = vec![
2270    ///    ScalarValue::Int32(Some(1)),
2271    ///    ScalarValue::Int32(None),
2272    ///    ScalarValue::Int32(Some(2))
2273    /// ];
2274    ///
2275    /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2276    ///
2277    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2278    ///     vec![
2279    ///        Some(vec![Some(1), None, Some(2)])
2280    ///     ]);
2281    ///
2282    /// assert_eq!(*result, expected);
2283    /// ```
2284    pub fn new_list_from_iter(
2285        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2286        data_type: &DataType,
2287        nullable: bool,
2288    ) -> Arc<ListArray> {
2289        let values = if values.len() == 0 {
2290            new_empty_array(data_type)
2291        } else {
2292            Self::iter_to_array(values).unwrap()
2293        };
2294        Arc::new(
2295            SingleRowListArrayBuilder::new(values)
2296                .with_nullable(nullable)
2297                .build_list_array(),
2298        )
2299    }
2300
2301    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2302    /// `data_type`, to a [`LargeListArray`].
2303    ///
2304    /// Example
2305    /// ```
2306    /// use datafusion_common::ScalarValue;
2307    /// use arrow::array::{LargeListArray, Int32Array};
2308    /// use arrow::datatypes::{DataType, Int32Type};
2309    /// use datafusion_common::cast::as_large_list_array;
2310    ///
2311    /// let scalars = vec![
2312    ///    ScalarValue::Int32(Some(1)),
2313    ///    ScalarValue::Int32(None),
2314    ///    ScalarValue::Int32(Some(2))
2315    /// ];
2316    ///
2317    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2318    ///
2319    /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(
2320    ///     vec![
2321    ///        Some(vec![Some(1), None, Some(2)])
2322    ///     ]);
2323    ///
2324    /// assert_eq!(*result, expected);
2325    /// ```
2326    pub fn new_large_list(
2327        values: &[ScalarValue],
2328        data_type: &DataType,
2329    ) -> Arc<LargeListArray> {
2330        let values = if values.is_empty() {
2331            new_empty_array(data_type)
2332        } else {
2333            Self::iter_to_array(values.iter().cloned()).unwrap()
2334        };
2335        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2336    }
2337
2338    /// Converts a scalar value into an array of `size` rows.
2339    ///
2340    /// # Errors
2341    ///
2342    /// Errors if `self` is
2343    /// - a decimal that fails be converted to a decimal array of size
2344    /// - a `FixedsizeList` that fails to be concatenated into an array of size
2345    /// - a `List` that fails to be concatenated into an array of size
2346    /// - a `Dictionary` that fails be converted to a dictionary array of size
2347    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2348        Ok(match self {
2349            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2350                ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2351            ),
2352            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2353                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2354            ),
2355            ScalarValue::Boolean(e) => {
2356                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2357            }
2358            ScalarValue::Float64(e) => {
2359                build_array_from_option!(Float64, Float64Array, e, size)
2360            }
2361            ScalarValue::Float32(e) => {
2362                build_array_from_option!(Float32, Float32Array, e, size)
2363            }
2364            ScalarValue::Float16(e) => {
2365                build_array_from_option!(Float16, Float16Array, e, size)
2366            }
2367            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2368            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2369            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2370            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2371            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2372            ScalarValue::UInt16(e) => {
2373                build_array_from_option!(UInt16, UInt16Array, e, size)
2374            }
2375            ScalarValue::UInt32(e) => {
2376                build_array_from_option!(UInt32, UInt32Array, e, size)
2377            }
2378            ScalarValue::UInt64(e) => {
2379                build_array_from_option!(UInt64, UInt64Array, e, size)
2380            }
2381            ScalarValue::TimestampSecond(e, tz_opt) => {
2382                build_timestamp_array_from_option!(
2383                    TimeUnit::Second,
2384                    tz_opt.clone(),
2385                    TimestampSecondArray,
2386                    e,
2387                    size
2388                )
2389            }
2390            ScalarValue::TimestampMillisecond(e, tz_opt) => {
2391                build_timestamp_array_from_option!(
2392                    TimeUnit::Millisecond,
2393                    tz_opt.clone(),
2394                    TimestampMillisecondArray,
2395                    e,
2396                    size
2397                )
2398            }
2399
2400            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2401                build_timestamp_array_from_option!(
2402                    TimeUnit::Microsecond,
2403                    tz_opt.clone(),
2404                    TimestampMicrosecondArray,
2405                    e,
2406                    size
2407                )
2408            }
2409            ScalarValue::TimestampNanosecond(e, tz_opt) => {
2410                build_timestamp_array_from_option!(
2411                    TimeUnit::Nanosecond,
2412                    tz_opt.clone(),
2413                    TimestampNanosecondArray,
2414                    e,
2415                    size
2416                )
2417            }
2418            ScalarValue::Utf8(e) => match e {
2419                Some(value) => {
2420                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
2421                }
2422                None => new_null_array(&DataType::Utf8, size),
2423            },
2424            ScalarValue::Utf8View(e) => match e {
2425                Some(value) => {
2426                    Arc::new(StringViewArray::from_iter_values(repeat(value).take(size)))
2427                }
2428                None => new_null_array(&DataType::Utf8View, size),
2429            },
2430            ScalarValue::LargeUtf8(e) => match e {
2431                Some(value) => {
2432                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
2433                }
2434                None => new_null_array(&DataType::LargeUtf8, size),
2435            },
2436            ScalarValue::Binary(e) => match e {
2437                Some(value) => Arc::new(
2438                    repeat(Some(value.as_slice()))
2439                        .take(size)
2440                        .collect::<BinaryArray>(),
2441                ),
2442                None => {
2443                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
2444                }
2445            },
2446            ScalarValue::BinaryView(e) => match e {
2447                Some(value) => Arc::new(
2448                    repeat(Some(value.as_slice()))
2449                        .take(size)
2450                        .collect::<BinaryViewArray>(),
2451                ),
2452                None => {
2453                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryViewArray>())
2454                }
2455            },
2456            ScalarValue::FixedSizeBinary(s, e) => match e {
2457                Some(value) => Arc::new(
2458                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2459                        repeat(Some(value.as_slice())).take(size),
2460                        *s,
2461                    )
2462                    .unwrap(),
2463                ),
2464                None => Arc::new(
2465                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2466                        repeat(None::<&[u8]>).take(size),
2467                        *s,
2468                    )
2469                    .unwrap(),
2470                ),
2471            },
2472            ScalarValue::LargeBinary(e) => match e {
2473                Some(value) => Arc::new(
2474                    repeat(Some(value.as_slice()))
2475                        .take(size)
2476                        .collect::<LargeBinaryArray>(),
2477                ),
2478                None => Arc::new(
2479                    repeat(None::<&str>)
2480                        .take(size)
2481                        .collect::<LargeBinaryArray>(),
2482                ),
2483            },
2484            ScalarValue::List(arr) => {
2485                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2486            }
2487            ScalarValue::LargeList(arr) => {
2488                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2489            }
2490            ScalarValue::FixedSizeList(arr) => {
2491                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2492            }
2493            ScalarValue::Struct(arr) => {
2494                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2495            }
2496            ScalarValue::Map(arr) => {
2497                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2498            }
2499            ScalarValue::Date32(e) => {
2500                build_array_from_option!(Date32, Date32Array, e, size)
2501            }
2502            ScalarValue::Date64(e) => {
2503                build_array_from_option!(Date64, Date64Array, e, size)
2504            }
2505            ScalarValue::Time32Second(e) => {
2506                build_array_from_option!(
2507                    Time32,
2508                    TimeUnit::Second,
2509                    Time32SecondArray,
2510                    e,
2511                    size
2512                )
2513            }
2514            ScalarValue::Time32Millisecond(e) => {
2515                build_array_from_option!(
2516                    Time32,
2517                    TimeUnit::Millisecond,
2518                    Time32MillisecondArray,
2519                    e,
2520                    size
2521                )
2522            }
2523            ScalarValue::Time64Microsecond(e) => {
2524                build_array_from_option!(
2525                    Time64,
2526                    TimeUnit::Microsecond,
2527                    Time64MicrosecondArray,
2528                    e,
2529                    size
2530                )
2531            }
2532            ScalarValue::Time64Nanosecond(e) => {
2533                build_array_from_option!(
2534                    Time64,
2535                    TimeUnit::Nanosecond,
2536                    Time64NanosecondArray,
2537                    e,
2538                    size
2539                )
2540            }
2541            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2542                Interval,
2543                IntervalUnit::DayTime,
2544                IntervalDayTimeArray,
2545                e,
2546                size
2547            ),
2548            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2549                Interval,
2550                IntervalUnit::YearMonth,
2551                IntervalYearMonthArray,
2552                e,
2553                size
2554            ),
2555            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2556                Interval,
2557                IntervalUnit::MonthDayNano,
2558                IntervalMonthDayNanoArray,
2559                e,
2560                size
2561            ),
2562            ScalarValue::DurationSecond(e) => build_array_from_option!(
2563                Duration,
2564                TimeUnit::Second,
2565                DurationSecondArray,
2566                e,
2567                size
2568            ),
2569            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2570                Duration,
2571                TimeUnit::Millisecond,
2572                DurationMillisecondArray,
2573                e,
2574                size
2575            ),
2576            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2577                Duration,
2578                TimeUnit::Microsecond,
2579                DurationMicrosecondArray,
2580                e,
2581                size
2582            ),
2583            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2584                Duration,
2585                TimeUnit::Nanosecond,
2586                DurationNanosecondArray,
2587                e,
2588                size
2589            ),
2590            ScalarValue::Union(value, fields, mode) => match value {
2591                Some((v_id, value)) => {
2592                    let mut new_fields = Vec::with_capacity(fields.len());
2593                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2594                    for (f_id, field) in fields.iter() {
2595                        let ar = if f_id == *v_id {
2596                            value.to_array_of_size(size)?
2597                        } else {
2598                            let dt = field.data_type();
2599                            match mode {
2600                                UnionMode::Sparse => new_null_array(dt, size),
2601                                // In a dense union, only the child with values needs to be
2602                                // allocated
2603                                UnionMode::Dense => new_null_array(dt, 0),
2604                            }
2605                        };
2606                        let field = (**field).clone();
2607                        child_arrays.push(ar);
2608                        new_fields.push(field.clone());
2609                    }
2610                    let type_ids = repeat(*v_id).take(size);
2611                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2612                    let value_offsets = match mode {
2613                        UnionMode::Sparse => None,
2614                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
2615                    };
2616                    let ar = UnionArray::try_new(
2617                        fields.clone(),
2618                        type_ids,
2619                        value_offsets,
2620                        child_arrays,
2621                    )
2622                    .map_err(|e| DataFusionError::ArrowError(e, None))?;
2623                    Arc::new(ar)
2624                }
2625                None => {
2626                    let dt = self.data_type();
2627                    new_null_array(&dt, size)
2628                }
2629            },
2630            ScalarValue::Dictionary(key_type, v) => {
2631                // values array is one element long (the value)
2632                match key_type.as_ref() {
2633                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2634                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2635                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2636                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2637                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2638                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2639                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2640                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2641                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2642                }
2643            }
2644            ScalarValue::Null => new_null_array(&DataType::Null, size),
2645        })
2646    }
2647
2648    fn get_decimal_value_from_array(
2649        array: &dyn Array,
2650        index: usize,
2651        precision: u8,
2652        scale: i8,
2653    ) -> Result<ScalarValue> {
2654        match array.data_type() {
2655            DataType::Decimal128(_, _) => {
2656                let array = as_decimal128_array(array)?;
2657                if array.is_null(index) {
2658                    Ok(ScalarValue::Decimal128(None, precision, scale))
2659                } else {
2660                    let value = array.value(index);
2661                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2662                }
2663            }
2664            DataType::Decimal256(_, _) => {
2665                let array = as_decimal256_array(array)?;
2666                if array.is_null(index) {
2667                    Ok(ScalarValue::Decimal256(None, precision, scale))
2668                } else {
2669                    let value = array.value(index);
2670                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2671                }
2672            }
2673            _ => _internal_err!("Unsupported decimal type"),
2674        }
2675    }
2676
2677    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2678        let arrays = repeat(arr).take(size).collect::<Vec<_>>();
2679        let ret = match !arrays.is_empty() {
2680            true => arrow::compute::concat(arrays.as_slice())?,
2681            false => arr.slice(0, 0),
2682        };
2683        Ok(ret)
2684    }
2685
2686    /// Retrieve ScalarValue for each row in `array`
2687    ///
2688    /// Example 1: Array (ScalarValue::Int32)
2689    /// ```
2690    /// use datafusion_common::ScalarValue;
2691    /// use arrow::array::ListArray;
2692    /// use arrow::datatypes::{DataType, Int32Type};
2693    ///
2694    /// // Equivalent to [[1,2,3], [4,5]]
2695    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2696    ///    Some(vec![Some(1), Some(2), Some(3)]),
2697    ///    Some(vec![Some(4), Some(5)])
2698    /// ]);
2699    ///
2700    /// // Convert the array into Scalar Values for each row
2701    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2702    ///
2703    /// let expected = vec![
2704    /// vec![
2705    ///     ScalarValue::Int32(Some(1)),
2706    ///     ScalarValue::Int32(Some(2)),
2707    ///     ScalarValue::Int32(Some(3)),
2708    /// ],
2709    /// vec![
2710    ///    ScalarValue::Int32(Some(4)),
2711    ///    ScalarValue::Int32(Some(5)),
2712    /// ],
2713    /// ];
2714    ///
2715    /// assert_eq!(scalar_vec, expected);
2716    /// ```
2717    ///
2718    /// Example 2: Nested array (ScalarValue::List)
2719    /// ```
2720    /// use datafusion_common::ScalarValue;
2721    /// use arrow::array::ListArray;
2722    /// use arrow::datatypes::{DataType, Int32Type};
2723    /// use datafusion_common::utils::SingleRowListArrayBuilder;
2724    /// use std::sync::Arc;
2725    ///
2726    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2727    ///    Some(vec![Some(1), Some(2), Some(3)]),
2728    ///    Some(vec![Some(4), Some(5)])
2729    /// ]);
2730    ///
2731    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
2732    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
2733    ///
2734    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
2735    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2736    ///
2737    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2738    ///     Some(vec![Some(1), Some(2), Some(3)]),
2739    /// ]);
2740    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2741    ///     Some(vec![Some(4), Some(5)]),
2742    /// ]);
2743    ///
2744    /// let expected = vec![
2745    ///   vec![
2746    ///     ScalarValue::List(Arc::new(l1)),
2747    ///     ScalarValue::List(Arc::new(l2)),
2748    ///   ],
2749    /// ];
2750    ///
2751    /// assert_eq!(scalar_vec, expected);
2752    /// ```
2753    pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2754        let mut scalars = Vec::with_capacity(array.len());
2755
2756        for index in 0..array.len() {
2757            let nested_array = array.as_list::<i32>().value(index);
2758            let scalar_values = (0..nested_array.len())
2759                .map(|i| ScalarValue::try_from_array(&nested_array, i))
2760                .collect::<Result<Vec<_>>>()?;
2761            scalars.push(scalar_values);
2762        }
2763
2764        Ok(scalars)
2765    }
2766
2767    // TODO: Support more types after other ScalarValue is wrapped with ArrayRef
2768    /// Get raw data (inner array) inside ScalarValue
2769    pub fn raw_data(&self) -> Result<ArrayRef> {
2770        match self {
2771            ScalarValue::List(arr) => Ok(arr.to_owned()),
2772            _ => _internal_err!("ScalarValue is not a list"),
2773        }
2774    }
2775
2776    /// Converts a value in `array` at `index` into a ScalarValue
2777    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2778        // handle NULL value
2779        if !array.is_valid(index) {
2780            return array.data_type().try_into();
2781        }
2782
2783        Ok(match array.data_type() {
2784            DataType::Null => ScalarValue::Null,
2785            DataType::Decimal128(precision, scale) => {
2786                ScalarValue::get_decimal_value_from_array(
2787                    array, index, *precision, *scale,
2788                )?
2789            }
2790            DataType::Decimal256(precision, scale) => {
2791                ScalarValue::get_decimal_value_from_array(
2792                    array, index, *precision, *scale,
2793                )?
2794            }
2795            DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)?,
2796            DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)?,
2797            DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?,
2798            DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?,
2799            DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)?,
2800            DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?,
2801            DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?,
2802            DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?,
2803            DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)?,
2804            DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)?,
2805            DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?,
2806            DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?,
2807            DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?,
2808            DataType::LargeBinary => {
2809                typed_cast!(array, index, LargeBinaryArray, LargeBinary)?
2810            }
2811            DataType::BinaryView => {
2812                typed_cast!(array, index, BinaryViewArray, BinaryView)?
2813            }
2814            DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?,
2815            DataType::LargeUtf8 => {
2816                typed_cast!(array, index, LargeStringArray, LargeUtf8)?
2817            }
2818            DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?,
2819            DataType::List(field) => {
2820                let list_array = array.as_list::<i32>();
2821                let nested_array = list_array.value(index);
2822                // Produces a single element `ListArray` with the value at `index`.
2823                SingleRowListArrayBuilder::new(nested_array)
2824                    .with_field(field)
2825                    .build_list_scalar()
2826            }
2827            DataType::LargeList(field) => {
2828                let list_array = as_large_list_array(array);
2829                let nested_array = list_array.value(index);
2830                // Produces a single element `LargeListArray` with the value at `index`.
2831                SingleRowListArrayBuilder::new(nested_array)
2832                    .with_field(field)
2833                    .build_large_list_scalar()
2834            }
2835            // TODO: There is no test for FixedSizeList now, add it later
2836            DataType::FixedSizeList(field, _) => {
2837                let list_array = as_fixed_size_list_array(array)?;
2838                let nested_array = list_array.value(index);
2839                // Produces a single element `FixedSizeListArray` with the value at `index`.
2840                let list_size = nested_array.len();
2841                SingleRowListArrayBuilder::new(nested_array)
2842                    .with_field(field)
2843                    .build_fixed_size_list_scalar(list_size)
2844            }
2845            DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?,
2846            DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?,
2847            DataType::Time32(TimeUnit::Second) => {
2848                typed_cast!(array, index, Time32SecondArray, Time32Second)?
2849            }
2850            DataType::Time32(TimeUnit::Millisecond) => {
2851                typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)?
2852            }
2853            DataType::Time64(TimeUnit::Microsecond) => {
2854                typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)?
2855            }
2856            DataType::Time64(TimeUnit::Nanosecond) => {
2857                typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)?
2858            }
2859            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2860                array,
2861                index,
2862                TimestampSecondArray,
2863                TimestampSecond,
2864                tz_opt
2865            )?,
2866            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2867                array,
2868                index,
2869                TimestampMillisecondArray,
2870                TimestampMillisecond,
2871                tz_opt
2872            )?,
2873            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2874                array,
2875                index,
2876                TimestampMicrosecondArray,
2877                TimestampMicrosecond,
2878                tz_opt
2879            )?,
2880            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2881                array,
2882                index,
2883                TimestampNanosecondArray,
2884                TimestampNanosecond,
2885                tz_opt
2886            )?,
2887            DataType::Dictionary(key_type, _) => {
2888                let (values_array, values_index) = match key_type.as_ref() {
2889                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2890                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2891                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2892                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2893                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2894                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2895                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2896                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2897                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2898                };
2899                // look up the index in the values dictionary
2900                let value = match values_index {
2901                    Some(values_index) => {
2902                        ScalarValue::try_from_array(values_array, values_index)
2903                    }
2904                    // else entry was null, so return null
2905                    None => values_array.data_type().try_into(),
2906                }?;
2907
2908                Self::Dictionary(key_type.clone(), Box::new(value))
2909            }
2910            DataType::Struct(_) => {
2911                let a = array.slice(index, 1);
2912                Self::Struct(Arc::new(a.as_struct().to_owned()))
2913            }
2914            DataType::FixedSizeBinary(_) => {
2915                let array = as_fixed_size_binary_array(array)?;
2916                let size = match array.data_type() {
2917                    DataType::FixedSizeBinary(size) => *size,
2918                    _ => unreachable!(),
2919                };
2920                ScalarValue::FixedSizeBinary(
2921                    size,
2922                    match array.is_null(index) {
2923                        true => None,
2924                        false => Some(array.value(index).into()),
2925                    },
2926                )
2927            }
2928            DataType::Interval(IntervalUnit::DayTime) => {
2929                typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)?
2930            }
2931            DataType::Interval(IntervalUnit::YearMonth) => {
2932                typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)?
2933            }
2934            DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!(
2935                array,
2936                index,
2937                IntervalMonthDayNanoArray,
2938                IntervalMonthDayNano
2939            )?,
2940
2941            DataType::Duration(TimeUnit::Second) => {
2942                typed_cast!(array, index, DurationSecondArray, DurationSecond)?
2943            }
2944            DataType::Duration(TimeUnit::Millisecond) => {
2945                typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)?
2946            }
2947            DataType::Duration(TimeUnit::Microsecond) => {
2948                typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)?
2949            }
2950            DataType::Duration(TimeUnit::Nanosecond) => {
2951                typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)?
2952            }
2953            DataType::Map(_, _) => {
2954                let a = array.slice(index, 1);
2955                Self::Map(Arc::new(a.as_map().to_owned()))
2956            }
2957            DataType::Union(fields, mode) => {
2958                let array = as_union_array(array);
2959                let ti = array.type_id(index);
2960                let index = array.value_offset(index);
2961                let value = ScalarValue::try_from_array(array.child(ti), index)?;
2962                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
2963            }
2964            other => {
2965                return _not_impl_err!(
2966                    "Can't create a scalar from array of type \"{other:?}\""
2967                );
2968            }
2969        })
2970    }
2971
2972    /// Try to parse `value` into a ScalarValue of type `target_type`
2973    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
2974        ScalarValue::from(value).cast_to(target_type)
2975    }
2976
2977    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
2978    ///
2979    /// Returns `None` if this `ScalarValue` is not a logical string type or the
2980    /// `ScalarValue` represents the `NULL` value.
2981    ///
2982    /// Note you can use [`Option::flatten`] to check for non null logical
2983    /// strings.
2984    ///
2985    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
2986    /// [`ScalarValue::Dictionary`] with a logical string value and store
2987    /// strings and can be accessed as `&str` using this method.
2988    ///
2989    /// # Example: logical strings
2990    /// ```
2991    /// # use datafusion_common::ScalarValue;
2992    /// /// non strings return None
2993    /// let scalar = ScalarValue::from(42);
2994    /// assert_eq!(scalar.try_as_str(), None);
2995    /// // Non null logical string returns Some(Some(&str))
2996    /// let scalar = ScalarValue::from("hello");
2997    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
2998    /// // Null logical string returns Some(None)
2999    /// let scalar = ScalarValue::Utf8(None);
3000    /// assert_eq!(scalar.try_as_str(), Some(None));
3001    /// ```
3002    ///
3003    /// # Example: use [`Option::flatten`] to check for non-null logical strings
3004    /// ```
3005    /// # use datafusion_common::ScalarValue;
3006    /// // Non null logical string returns Some(Some(&str))
3007    /// let scalar = ScalarValue::from("hello");
3008    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
3009    /// ```
3010    pub fn try_as_str(&self) -> Option<Option<&str>> {
3011        let v = match self {
3012            ScalarValue::Utf8(v) => v,
3013            ScalarValue::LargeUtf8(v) => v,
3014            ScalarValue::Utf8View(v) => v,
3015            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3016            _ => return None,
3017        };
3018        Some(v.as_ref().map(|v| v.as_str()))
3019    }
3020
3021    /// Try to cast this value to a ScalarValue of type `data_type`
3022    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3023        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3024    }
3025
3026    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
3027    pub fn cast_to_with_options(
3028        &self,
3029        target_type: &DataType,
3030        cast_options: &CastOptions<'static>,
3031    ) -> Result<Self> {
3032        let scalar_array = match (self, target_type) {
3033            (
3034                ScalarValue::Float64(Some(float_ts)),
3035                DataType::Timestamp(TimeUnit::Nanosecond, None),
3036            ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
3037                .to_array()?,
3038            _ => self.to_array()?,
3039        };
3040
3041        let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3042        ScalarValue::try_from_array(&cast_arr, 0)
3043    }
3044
3045    fn eq_array_decimal(
3046        array: &ArrayRef,
3047        index: usize,
3048        value: Option<&i128>,
3049        precision: u8,
3050        scale: i8,
3051    ) -> Result<bool> {
3052        let array = as_decimal128_array(array)?;
3053        if array.precision() != precision || array.scale() != scale {
3054            return Ok(false);
3055        }
3056        let is_null = array.is_null(index);
3057        if let Some(v) = value {
3058            Ok(!array.is_null(index) && array.value(index) == *v)
3059        } else {
3060            Ok(is_null)
3061        }
3062    }
3063
3064    fn eq_array_decimal256(
3065        array: &ArrayRef,
3066        index: usize,
3067        value: Option<&i256>,
3068        precision: u8,
3069        scale: i8,
3070    ) -> Result<bool> {
3071        let array = as_decimal256_array(array)?;
3072        if array.precision() != precision || array.scale() != scale {
3073            return Ok(false);
3074        }
3075        let is_null = array.is_null(index);
3076        if let Some(v) = value {
3077            Ok(!array.is_null(index) && array.value(index) == *v)
3078        } else {
3079            Ok(is_null)
3080        }
3081    }
3082
3083    /// Compares a single row of array @ index for equality with self,
3084    /// in an optimized fashion.
3085    ///
3086    /// This method implements an optimized version of:
3087    ///
3088    /// ```text
3089    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
3090    ///     arr_scalar.eq(self)
3091    /// ```
3092    ///
3093    /// *Performance note*: the arrow compute kernels should be
3094    /// preferred over this function if at all possible as they can be
3095    /// vectorized and are generally much faster.
3096    ///
3097    /// This function has a few narrow use cases such as hash table key
3098    /// comparisons where comparing a single row at a time is necessary.
3099    ///
3100    /// # Errors
3101    ///
3102    /// Errors if
3103    /// - it fails to downcast `array` to the data type of `self`
3104    /// - `self` is a `Struct`
3105    ///
3106    /// # Panics
3107    ///
3108    /// Panics if `self` is a dictionary with invalid key type
3109    #[inline]
3110    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3111        Ok(match self {
3112            ScalarValue::Decimal128(v, precision, scale) => {
3113                ScalarValue::eq_array_decimal(
3114                    array,
3115                    index,
3116                    v.as_ref(),
3117                    *precision,
3118                    *scale,
3119                )?
3120            }
3121            ScalarValue::Decimal256(v, precision, scale) => {
3122                ScalarValue::eq_array_decimal256(
3123                    array,
3124                    index,
3125                    v.as_ref(),
3126                    *precision,
3127                    *scale,
3128                )?
3129            }
3130            ScalarValue::Boolean(val) => {
3131                eq_array_primitive!(array, index, BooleanArray, val)?
3132            }
3133            ScalarValue::Float16(val) => {
3134                eq_array_primitive!(array, index, Float16Array, val)?
3135            }
3136            ScalarValue::Float32(val) => {
3137                eq_array_primitive!(array, index, Float32Array, val)?
3138            }
3139            ScalarValue::Float64(val) => {
3140                eq_array_primitive!(array, index, Float64Array, val)?
3141            }
3142            ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?,
3143            ScalarValue::Int16(val) => {
3144                eq_array_primitive!(array, index, Int16Array, val)?
3145            }
3146            ScalarValue::Int32(val) => {
3147                eq_array_primitive!(array, index, Int32Array, val)?
3148            }
3149            ScalarValue::Int64(val) => {
3150                eq_array_primitive!(array, index, Int64Array, val)?
3151            }
3152            ScalarValue::UInt8(val) => {
3153                eq_array_primitive!(array, index, UInt8Array, val)?
3154            }
3155            ScalarValue::UInt16(val) => {
3156                eq_array_primitive!(array, index, UInt16Array, val)?
3157            }
3158            ScalarValue::UInt32(val) => {
3159                eq_array_primitive!(array, index, UInt32Array, val)?
3160            }
3161            ScalarValue::UInt64(val) => {
3162                eq_array_primitive!(array, index, UInt64Array, val)?
3163            }
3164            ScalarValue::Utf8(val) => {
3165                eq_array_primitive!(array, index, StringArray, val)?
3166            }
3167            ScalarValue::Utf8View(val) => {
3168                eq_array_primitive!(array, index, StringViewArray, val)?
3169            }
3170            ScalarValue::LargeUtf8(val) => {
3171                eq_array_primitive!(array, index, LargeStringArray, val)?
3172            }
3173            ScalarValue::Binary(val) => {
3174                eq_array_primitive!(array, index, BinaryArray, val)?
3175            }
3176            ScalarValue::BinaryView(val) => {
3177                eq_array_primitive!(array, index, BinaryViewArray, val)?
3178            }
3179            ScalarValue::FixedSizeBinary(_, val) => {
3180                eq_array_primitive!(array, index, FixedSizeBinaryArray, val)?
3181            }
3182            ScalarValue::LargeBinary(val) => {
3183                eq_array_primitive!(array, index, LargeBinaryArray, val)?
3184            }
3185            ScalarValue::List(arr) => {
3186                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3187            }
3188            ScalarValue::LargeList(arr) => {
3189                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3190            }
3191            ScalarValue::FixedSizeList(arr) => {
3192                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3193            }
3194            ScalarValue::Struct(arr) => {
3195                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3196            }
3197            ScalarValue::Map(arr) => {
3198                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3199            }
3200            ScalarValue::Date32(val) => {
3201                eq_array_primitive!(array, index, Date32Array, val)?
3202            }
3203            ScalarValue::Date64(val) => {
3204                eq_array_primitive!(array, index, Date64Array, val)?
3205            }
3206            ScalarValue::Time32Second(val) => {
3207                eq_array_primitive!(array, index, Time32SecondArray, val)?
3208            }
3209            ScalarValue::Time32Millisecond(val) => {
3210                eq_array_primitive!(array, index, Time32MillisecondArray, val)?
3211            }
3212            ScalarValue::Time64Microsecond(val) => {
3213                eq_array_primitive!(array, index, Time64MicrosecondArray, val)?
3214            }
3215            ScalarValue::Time64Nanosecond(val) => {
3216                eq_array_primitive!(array, index, Time64NanosecondArray, val)?
3217            }
3218            ScalarValue::TimestampSecond(val, _) => {
3219                eq_array_primitive!(array, index, TimestampSecondArray, val)?
3220            }
3221            ScalarValue::TimestampMillisecond(val, _) => {
3222                eq_array_primitive!(array, index, TimestampMillisecondArray, val)?
3223            }
3224            ScalarValue::TimestampMicrosecond(val, _) => {
3225                eq_array_primitive!(array, index, TimestampMicrosecondArray, val)?
3226            }
3227            ScalarValue::TimestampNanosecond(val, _) => {
3228                eq_array_primitive!(array, index, TimestampNanosecondArray, val)?
3229            }
3230            ScalarValue::IntervalYearMonth(val) => {
3231                eq_array_primitive!(array, index, IntervalYearMonthArray, val)?
3232            }
3233            ScalarValue::IntervalDayTime(val) => {
3234                eq_array_primitive!(array, index, IntervalDayTimeArray, val)?
3235            }
3236            ScalarValue::IntervalMonthDayNano(val) => {
3237                eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)?
3238            }
3239            ScalarValue::DurationSecond(val) => {
3240                eq_array_primitive!(array, index, DurationSecondArray, val)?
3241            }
3242            ScalarValue::DurationMillisecond(val) => {
3243                eq_array_primitive!(array, index, DurationMillisecondArray, val)?
3244            }
3245            ScalarValue::DurationMicrosecond(val) => {
3246                eq_array_primitive!(array, index, DurationMicrosecondArray, val)?
3247            }
3248            ScalarValue::DurationNanosecond(val) => {
3249                eq_array_primitive!(array, index, DurationNanosecondArray, val)?
3250            }
3251            ScalarValue::Union(value, _, _) => {
3252                let array = as_union_array(array);
3253                let ti = array.type_id(index);
3254                let index = array.value_offset(index);
3255                if let Some((ti_v, value)) = value {
3256                    ti_v == &ti && value.eq_array(array.child(ti), index)?
3257                } else {
3258                    array.child(ti).is_null(index)
3259                }
3260            }
3261            ScalarValue::Dictionary(key_type, v) => {
3262                let (values_array, values_index) = match key_type.as_ref() {
3263                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3264                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3265                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3266                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3267                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3268                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3269                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3270                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3271                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3272                };
3273                // was the value in the array non null?
3274                match values_index {
3275                    Some(values_index) => v.eq_array(values_array, values_index)?,
3276                    None => v.is_null(),
3277                }
3278            }
3279            ScalarValue::Null => array.is_null(index),
3280        })
3281    }
3282
3283    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3284        let right = arr2.slice(index, 1);
3285        arr1 == &right
3286    }
3287
3288    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
3289    /// includes the allocated size (`capacity`) rather than the current length (`len`)
3290    pub fn size(&self) -> usize {
3291        size_of_val(self)
3292            + match self {
3293                ScalarValue::Null
3294                | ScalarValue::Boolean(_)
3295                | ScalarValue::Float16(_)
3296                | ScalarValue::Float32(_)
3297                | ScalarValue::Float64(_)
3298                | ScalarValue::Decimal128(_, _, _)
3299                | ScalarValue::Decimal256(_, _, _)
3300                | ScalarValue::Int8(_)
3301                | ScalarValue::Int16(_)
3302                | ScalarValue::Int32(_)
3303                | ScalarValue::Int64(_)
3304                | ScalarValue::UInt8(_)
3305                | ScalarValue::UInt16(_)
3306                | ScalarValue::UInt32(_)
3307                | ScalarValue::UInt64(_)
3308                | ScalarValue::Date32(_)
3309                | ScalarValue::Date64(_)
3310                | ScalarValue::Time32Second(_)
3311                | ScalarValue::Time32Millisecond(_)
3312                | ScalarValue::Time64Microsecond(_)
3313                | ScalarValue::Time64Nanosecond(_)
3314                | ScalarValue::IntervalYearMonth(_)
3315                | ScalarValue::IntervalDayTime(_)
3316                | ScalarValue::IntervalMonthDayNano(_)
3317                | ScalarValue::DurationSecond(_)
3318                | ScalarValue::DurationMillisecond(_)
3319                | ScalarValue::DurationMicrosecond(_)
3320                | ScalarValue::DurationNanosecond(_) => 0,
3321                ScalarValue::Utf8(s)
3322                | ScalarValue::LargeUtf8(s)
3323                | ScalarValue::Utf8View(s) => {
3324                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3325                }
3326                ScalarValue::TimestampSecond(_, s)
3327                | ScalarValue::TimestampMillisecond(_, s)
3328                | ScalarValue::TimestampMicrosecond(_, s)
3329                | ScalarValue::TimestampNanosecond(_, s) => {
3330                    s.as_ref().map(|s| s.len()).unwrap_or_default()
3331                }
3332                ScalarValue::Binary(b)
3333                | ScalarValue::FixedSizeBinary(_, b)
3334                | ScalarValue::LargeBinary(b)
3335                | ScalarValue::BinaryView(b) => {
3336                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3337                }
3338                ScalarValue::List(arr) => arr.get_array_memory_size(),
3339                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3340                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3341                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3342                ScalarValue::Map(arr) => arr.get_array_memory_size(),
3343                ScalarValue::Union(vals, fields, _mode) => {
3344                    vals.as_ref()
3345                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
3346                        .unwrap_or_default()
3347                        // `fields` is boxed, so it is NOT already included in `self`
3348                        + size_of_val(fields)
3349                        + (size_of::<Field>() * fields.len())
3350                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
3351                }
3352                ScalarValue::Dictionary(dt, sv) => {
3353                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
3354                    dt.size() + sv.size()
3355                }
3356            }
3357    }
3358
3359    /// Estimates [size](Self::size) of [`Vec`] in bytes.
3360    ///
3361    /// Includes the size of the [`Vec`] container itself.
3362    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3363        size_of_val(vec)
3364            + (size_of::<ScalarValue>() * vec.capacity())
3365            + vec
3366                .iter()
3367                .map(|sv| sv.size() - size_of_val(sv))
3368                .sum::<usize>()
3369    }
3370
3371    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
3372    ///
3373    /// Includes the size of the [`VecDeque`] container itself.
3374    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3375        size_of_val(vec_deque)
3376            + (size_of::<ScalarValue>() * vec_deque.capacity())
3377            + vec_deque
3378                .iter()
3379                .map(|sv| sv.size() - size_of_val(sv))
3380                .sum::<usize>()
3381    }
3382
3383    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
3384    ///
3385    /// Includes the size of the [`HashSet`] container itself.
3386    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3387        size_of_val(set)
3388            + (size_of::<ScalarValue>() * set.capacity())
3389            + set
3390                .iter()
3391                .map(|sv| sv.size() - size_of_val(sv))
3392                .sum::<usize>()
3393    }
3394}
3395
3396macro_rules! impl_scalar {
3397    ($ty:ty, $scalar:tt) => {
3398        impl From<$ty> for ScalarValue {
3399            fn from(value: $ty) -> Self {
3400                ScalarValue::$scalar(Some(value))
3401            }
3402        }
3403
3404        impl From<Option<$ty>> for ScalarValue {
3405            fn from(value: Option<$ty>) -> Self {
3406                ScalarValue::$scalar(value)
3407            }
3408        }
3409    };
3410}
3411
3412impl_scalar!(f64, Float64);
3413impl_scalar!(f32, Float32);
3414impl_scalar!(i8, Int8);
3415impl_scalar!(i16, Int16);
3416impl_scalar!(i32, Int32);
3417impl_scalar!(i64, Int64);
3418impl_scalar!(bool, Boolean);
3419impl_scalar!(u8, UInt8);
3420impl_scalar!(u16, UInt16);
3421impl_scalar!(u32, UInt32);
3422impl_scalar!(u64, UInt64);
3423
3424impl From<&str> for ScalarValue {
3425    fn from(value: &str) -> Self {
3426        Some(value).into()
3427    }
3428}
3429
3430impl From<Option<&str>> for ScalarValue {
3431    fn from(value: Option<&str>) -> Self {
3432        let value = value.map(|s| s.to_string());
3433        ScalarValue::Utf8(value)
3434    }
3435}
3436
3437/// Wrapper to create ScalarValue::Struct for convenience
3438impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3439    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3440        value
3441            .into_iter()
3442            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3443                builder.with_name_and_scalar(name, value)
3444            })
3445            .build()
3446            .unwrap()
3447    }
3448}
3449
3450impl FromStr for ScalarValue {
3451    type Err = Infallible;
3452
3453    fn from_str(s: &str) -> Result<Self, Self::Err> {
3454        Ok(s.into())
3455    }
3456}
3457
3458impl From<String> for ScalarValue {
3459    fn from(value: String) -> Self {
3460        ScalarValue::Utf8(Some(value))
3461    }
3462}
3463
3464macro_rules! impl_try_from {
3465    ($SCALAR:ident, $NATIVE:ident) => {
3466        impl TryFrom<ScalarValue> for $NATIVE {
3467            type Error = DataFusionError;
3468
3469            fn try_from(value: ScalarValue) -> Result<Self> {
3470                match value {
3471                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3472                    _ => _internal_err!(
3473                        "Cannot convert {:?} to {}",
3474                        value,
3475                        std::any::type_name::<Self>()
3476                    ),
3477                }
3478            }
3479        }
3480    };
3481}
3482
3483impl_try_from!(Int8, i8);
3484impl_try_from!(Int16, i16);
3485
3486// special implementation for i32 because of Date32 and Time32
3487impl TryFrom<ScalarValue> for i32 {
3488    type Error = DataFusionError;
3489
3490    fn try_from(value: ScalarValue) -> Result<Self> {
3491        match value {
3492            ScalarValue::Int32(Some(inner_value))
3493            | ScalarValue::Date32(Some(inner_value))
3494            | ScalarValue::Time32Second(Some(inner_value))
3495            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3496            _ => _internal_err!(
3497                "Cannot convert {:?} to {}",
3498                value,
3499                std::any::type_name::<Self>()
3500            ),
3501        }
3502    }
3503}
3504
3505// special implementation for i64 because of Date64, Time64 and Timestamp
3506impl TryFrom<ScalarValue> for i64 {
3507    type Error = DataFusionError;
3508
3509    fn try_from(value: ScalarValue) -> Result<Self> {
3510        match value {
3511            ScalarValue::Int64(Some(inner_value))
3512            | ScalarValue::Date64(Some(inner_value))
3513            | ScalarValue::Time64Microsecond(Some(inner_value))
3514            | ScalarValue::Time64Nanosecond(Some(inner_value))
3515            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3516            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3517            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3518            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3519            _ => _internal_err!(
3520                "Cannot convert {:?} to {}",
3521                value,
3522                std::any::type_name::<Self>()
3523            ),
3524        }
3525    }
3526}
3527
3528// special implementation for i128 because of Decimal128
3529impl TryFrom<ScalarValue> for i128 {
3530    type Error = DataFusionError;
3531
3532    fn try_from(value: ScalarValue) -> Result<Self> {
3533        match value {
3534            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3535            _ => _internal_err!(
3536                "Cannot convert {:?} to {}",
3537                value,
3538                std::any::type_name::<Self>()
3539            ),
3540        }
3541    }
3542}
3543
3544// special implementation for i256 because of Decimal128
3545impl TryFrom<ScalarValue> for i256 {
3546    type Error = DataFusionError;
3547
3548    fn try_from(value: ScalarValue) -> Result<Self> {
3549        match value {
3550            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3551            _ => _internal_err!(
3552                "Cannot convert {:?} to {}",
3553                value,
3554                std::any::type_name::<Self>()
3555            ),
3556        }
3557    }
3558}
3559
3560impl_try_from!(UInt8, u8);
3561impl_try_from!(UInt16, u16);
3562impl_try_from!(UInt32, u32);
3563impl_try_from!(UInt64, u64);
3564impl_try_from!(Float32, f32);
3565impl_try_from!(Float64, f64);
3566impl_try_from!(Boolean, bool);
3567
3568impl TryFrom<DataType> for ScalarValue {
3569    type Error = DataFusionError;
3570
3571    /// Create a Null instance of ScalarValue for this datatype
3572    fn try_from(datatype: DataType) -> Result<Self> {
3573        (&datatype).try_into()
3574    }
3575}
3576
3577impl TryFrom<&DataType> for ScalarValue {
3578    type Error = DataFusionError;
3579
3580    /// Create a Null instance of ScalarValue for this datatype
3581    fn try_from(data_type: &DataType) -> Result<Self> {
3582        Self::try_new_null(data_type)
3583    }
3584}
3585
3586macro_rules! format_option {
3587    ($F:expr, $EXPR:expr) => {{
3588        match $EXPR {
3589            Some(e) => write!($F, "{e}"),
3590            None => write!($F, "NULL"),
3591        }
3592    }};
3593}
3594
3595// Implement Display trait for ScalarValue
3596//
3597// # Panics
3598//
3599// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
3600impl fmt::Display for ScalarValue {
3601    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3602        match self {
3603            ScalarValue::Decimal128(v, p, s) => {
3604                write!(f, "{v:?},{p:?},{s:?}")?;
3605            }
3606            ScalarValue::Decimal256(v, p, s) => {
3607                write!(f, "{v:?},{p:?},{s:?}")?;
3608            }
3609            ScalarValue::Boolean(e) => format_option!(f, e)?,
3610            ScalarValue::Float16(e) => format_option!(f, e)?,
3611            ScalarValue::Float32(e) => format_option!(f, e)?,
3612            ScalarValue::Float64(e) => format_option!(f, e)?,
3613            ScalarValue::Int8(e) => format_option!(f, e)?,
3614            ScalarValue::Int16(e) => format_option!(f, e)?,
3615            ScalarValue::Int32(e) => format_option!(f, e)?,
3616            ScalarValue::Int64(e) => format_option!(f, e)?,
3617            ScalarValue::UInt8(e) => format_option!(f, e)?,
3618            ScalarValue::UInt16(e) => format_option!(f, e)?,
3619            ScalarValue::UInt32(e) => format_option!(f, e)?,
3620            ScalarValue::UInt64(e) => format_option!(f, e)?,
3621            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3622            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3623            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3624            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3625            ScalarValue::Utf8(e)
3626            | ScalarValue::LargeUtf8(e)
3627            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3628            ScalarValue::Binary(e)
3629            | ScalarValue::FixedSizeBinary(_, e)
3630            | ScalarValue::LargeBinary(e)
3631            | ScalarValue::BinaryView(e) => match e {
3632                Some(bytes) => {
3633                    // print up to first 10 bytes, with trailing ... if needed
3634                    for b in bytes.iter().take(10) {
3635                        write!(f, "{b:02X}")?;
3636                    }
3637                    if bytes.len() > 10 {
3638                        write!(f, "...")?;
3639                    }
3640                }
3641                None => write!(f, "NULL")?,
3642            },
3643            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3644            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3645            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3646            ScalarValue::Date32(e) => {
3647                format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))?
3648            }
3649            ScalarValue::Date64(e) => {
3650                format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))?
3651            }
3652            ScalarValue::Time32Second(e) => format_option!(f, e)?,
3653            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3654            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3655            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3656            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3657            ScalarValue::IntervalMonthDayNano(e) => {
3658                format_option!(f, e.map(|v| format!("{v:?}")))?
3659            }
3660            ScalarValue::IntervalDayTime(e) => {
3661                format_option!(f, e.map(|v| format!("{v:?}")))?;
3662            }
3663            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3664            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3665            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3666            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3667            ScalarValue::Struct(struct_arr) => {
3668                // ScalarValue Struct should always have a single element
3669                assert_eq!(struct_arr.len(), 1);
3670
3671                if struct_arr.null_count() == struct_arr.len() {
3672                    write!(f, "NULL")?;
3673                    return Ok(());
3674                }
3675
3676                let columns = struct_arr.columns();
3677                let fields = struct_arr.fields();
3678                let nulls = struct_arr.nulls();
3679
3680                write!(
3681                    f,
3682                    "{{{}}}",
3683                    columns
3684                        .iter()
3685                        .zip(fields.iter())
3686                        .map(|(column, field)| {
3687                            if nulls.is_some_and(|b| b.is_null(0)) {
3688                                format!("{}:NULL", field.name())
3689                            } else if let DataType::Struct(_) = field.data_type() {
3690                                let sv = ScalarValue::Struct(Arc::new(
3691                                    column.as_struct().to_owned(),
3692                                ));
3693                                format!("{}:{sv}", field.name())
3694                            } else {
3695                                let sv = array_value_to_string(column, 0).unwrap();
3696                                format!("{}:{sv}", field.name())
3697                            }
3698                        })
3699                        .collect::<Vec<_>>()
3700                        .join(",")
3701                )?
3702            }
3703            ScalarValue::Map(map_arr) => {
3704                if map_arr.null_count() == map_arr.len() {
3705                    write!(f, "NULL")?;
3706                    return Ok(());
3707                }
3708
3709                write!(
3710                    f,
3711                    "[{}]",
3712                    map_arr
3713                        .iter()
3714                        .map(|struct_array| {
3715                            if let Some(arr) = struct_array {
3716                                let mut buffer = VecDeque::new();
3717                                for i in 0..arr.len() {
3718                                    let key =
3719                                        array_value_to_string(arr.column(0), i).unwrap();
3720                                    let value =
3721                                        array_value_to_string(arr.column(1), i).unwrap();
3722                                    buffer.push_back(format!("{}:{}", key, value));
3723                                }
3724                                format!(
3725                                    "{{{}}}",
3726                                    buffer
3727                                        .into_iter()
3728                                        .collect::<Vec<_>>()
3729                                        .join(",")
3730                                        .as_str()
3731                                )
3732                            } else {
3733                                "NULL".to_string()
3734                            }
3735                        })
3736                        .collect::<Vec<_>>()
3737                        .join(",")
3738                )?
3739            }
3740            ScalarValue::Union(val, _fields, _mode) => match val {
3741                Some((id, val)) => write!(f, "{}:{}", id, val)?,
3742                None => write!(f, "NULL")?,
3743            },
3744            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3745            ScalarValue::Null => write!(f, "NULL")?,
3746        };
3747        Ok(())
3748    }
3749}
3750
3751fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3752    // ScalarValue List, LargeList, FixedSizeList should always have a single element
3753    assert_eq!(arr.len(), 1);
3754    let options = FormatOptions::default().with_display_error(true);
3755    let formatter =
3756        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3757    let value_formatter = formatter.value(0);
3758    write!(f, "{value_formatter}")
3759}
3760
3761/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
3762fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3763    let mut iter = data.iter();
3764    if let Some(b) = iter.next() {
3765        write!(f, "{b}")?;
3766    }
3767    for b in iter {
3768        write!(f, ",{b}")?;
3769    }
3770    Ok(())
3771}
3772
3773impl fmt::Debug for ScalarValue {
3774    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3775        match self {
3776            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3777            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3778            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3779            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3780            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3781            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3782            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3783            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3784            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3785            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3786            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
3787            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
3788            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
3789            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
3790            ScalarValue::TimestampSecond(_, tz_opt) => {
3791                write!(f, "TimestampSecond({self}, {tz_opt:?})")
3792            }
3793            ScalarValue::TimestampMillisecond(_, tz_opt) => {
3794                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
3795            }
3796            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
3797                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
3798            }
3799            ScalarValue::TimestampNanosecond(_, tz_opt) => {
3800                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
3801            }
3802            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
3803            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
3804            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
3805            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
3806            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
3807            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
3808            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
3809            ScalarValue::Binary(Some(b)) => {
3810                write!(f, "Binary(\"")?;
3811                fmt_binary(b.as_slice(), f)?;
3812                write!(f, "\")")
3813            }
3814            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
3815            ScalarValue::BinaryView(Some(b)) => {
3816                write!(f, "BinaryView(\"")?;
3817                fmt_binary(b.as_slice(), f)?;
3818                write!(f, "\")")
3819            }
3820            ScalarValue::FixedSizeBinary(size, None) => {
3821                write!(f, "FixedSizeBinary({size}, {self})")
3822            }
3823            ScalarValue::FixedSizeBinary(size, Some(b)) => {
3824                write!(f, "FixedSizeBinary({size}, \"")?;
3825                fmt_binary(b.as_slice(), f)?;
3826                write!(f, "\")")
3827            }
3828            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
3829            ScalarValue::LargeBinary(Some(b)) => {
3830                write!(f, "LargeBinary(\"")?;
3831                fmt_binary(b.as_slice(), f)?;
3832                write!(f, "\")")
3833            }
3834            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
3835            ScalarValue::List(_) => write!(f, "List({self})"),
3836            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
3837            ScalarValue::Struct(struct_arr) => {
3838                // ScalarValue Struct should always have a single element
3839                assert_eq!(struct_arr.len(), 1);
3840
3841                let columns = struct_arr.columns();
3842                let fields = struct_arr.fields();
3843
3844                write!(
3845                    f,
3846                    "Struct({{{}}})",
3847                    columns
3848                        .iter()
3849                        .zip(fields.iter())
3850                        .map(|(column, field)| {
3851                            let sv = array_value_to_string(column, 0).unwrap();
3852                            let name = field.name();
3853                            format!("{name}:{sv}")
3854                        })
3855                        .collect::<Vec<_>>()
3856                        .join(",")
3857                )
3858            }
3859            ScalarValue::Map(map_arr) => {
3860                write!(
3861                    f,
3862                    "Map([{}])",
3863                    map_arr
3864                        .iter()
3865                        .map(|struct_array| {
3866                            if let Some(arr) = struct_array {
3867                                let buffer: Vec<String> = (0..arr.len())
3868                                    .map(|i| {
3869                                        let key = array_value_to_string(arr.column(0), i)
3870                                            .unwrap();
3871                                        let value =
3872                                            array_value_to_string(arr.column(1), i)
3873                                                .unwrap();
3874                                        format!("{key:?}:{value:?}")
3875                                    })
3876                                    .collect();
3877                                format!("{{{}}}", buffer.join(","))
3878                            } else {
3879                                "NULL".to_string()
3880                            }
3881                        })
3882                        .collect::<Vec<_>>()
3883                        .join(",")
3884                )
3885            }
3886            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
3887            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
3888            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
3889            ScalarValue::Time32Millisecond(_) => {
3890                write!(f, "Time32Millisecond(\"{self}\")")
3891            }
3892            ScalarValue::Time64Microsecond(_) => {
3893                write!(f, "Time64Microsecond(\"{self}\")")
3894            }
3895            ScalarValue::Time64Nanosecond(_) => {
3896                write!(f, "Time64Nanosecond(\"{self}\")")
3897            }
3898            ScalarValue::IntervalDayTime(_) => {
3899                write!(f, "IntervalDayTime(\"{self}\")")
3900            }
3901            ScalarValue::IntervalYearMonth(_) => {
3902                write!(f, "IntervalYearMonth(\"{self}\")")
3903            }
3904            ScalarValue::IntervalMonthDayNano(_) => {
3905                write!(f, "IntervalMonthDayNano(\"{self}\")")
3906            }
3907            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
3908            ScalarValue::DurationMillisecond(_) => {
3909                write!(f, "DurationMillisecond(\"{self}\")")
3910            }
3911            ScalarValue::DurationMicrosecond(_) => {
3912                write!(f, "DurationMicrosecond(\"{self}\")")
3913            }
3914            ScalarValue::DurationNanosecond(_) => {
3915                write!(f, "DurationNanosecond(\"{self}\")")
3916            }
3917            ScalarValue::Union(val, _fields, _mode) => match val {
3918                Some((id, val)) => write!(f, "Union {}:{}", id, val),
3919                None => write!(f, "Union(NULL)"),
3920            },
3921            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
3922            ScalarValue::Null => write!(f, "NULL"),
3923        }
3924    }
3925}
3926
3927/// Trait used to map a NativeType to a ScalarValue
3928pub trait ScalarType<T: ArrowNativeType> {
3929    /// returns a scalar from an optional T
3930    fn scalar(r: Option<T>) -> ScalarValue;
3931}
3932
3933impl ScalarType<f32> for Float32Type {
3934    fn scalar(r: Option<f32>) -> ScalarValue {
3935        ScalarValue::Float32(r)
3936    }
3937}
3938
3939impl ScalarType<i64> for TimestampSecondType {
3940    fn scalar(r: Option<i64>) -> ScalarValue {
3941        ScalarValue::TimestampSecond(r, None)
3942    }
3943}
3944
3945impl ScalarType<i64> for TimestampMillisecondType {
3946    fn scalar(r: Option<i64>) -> ScalarValue {
3947        ScalarValue::TimestampMillisecond(r, None)
3948    }
3949}
3950
3951impl ScalarType<i64> for TimestampMicrosecondType {
3952    fn scalar(r: Option<i64>) -> ScalarValue {
3953        ScalarValue::TimestampMicrosecond(r, None)
3954    }
3955}
3956
3957impl ScalarType<i64> for TimestampNanosecondType {
3958    fn scalar(r: Option<i64>) -> ScalarValue {
3959        ScalarValue::TimestampNanosecond(r, None)
3960    }
3961}
3962
3963impl ScalarType<i32> for Date32Type {
3964    fn scalar(r: Option<i32>) -> ScalarValue {
3965        ScalarValue::Date32(r)
3966    }
3967}
3968
3969#[cfg(test)]
3970mod tests {
3971
3972    use super::*;
3973    use crate::cast::{
3974        as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
3975    };
3976
3977    use crate::assert_batches_eq;
3978    use arrow::array::{types::Float64Type, NullBufferBuilder};
3979    use arrow::buffer::{Buffer, OffsetBuffer};
3980    use arrow::compute::{is_null, kernels};
3981    use arrow::datatypes::Fields;
3982    use arrow::error::ArrowError;
3983    use arrow::util::pretty::pretty_format_columns;
3984    use chrono::NaiveDate;
3985    use rand::Rng;
3986
3987    #[test]
3988    fn test_scalar_value_from_for_map() {
3989        let string_builder = StringBuilder::new();
3990        let int_builder = Int32Builder::with_capacity(4);
3991        let mut builder = MapBuilder::new(None, string_builder, int_builder);
3992        builder.keys().append_value("joe");
3993        builder.values().append_value(1);
3994        builder.append(true).unwrap();
3995
3996        builder.keys().append_value("blogs");
3997        builder.values().append_value(2);
3998        builder.keys().append_value("foo");
3999        builder.values().append_value(4);
4000        builder.append(true).unwrap();
4001        builder.append(true).unwrap();
4002        builder.append(false).unwrap();
4003
4004        let expected = builder.finish();
4005
4006        let sv = ScalarValue::Map(Arc::new(expected.clone()));
4007        let map_arr = sv.to_array().unwrap();
4008        let actual = as_map_array(&map_arr).unwrap();
4009        assert_eq!(actual, &expected);
4010    }
4011
4012    #[test]
4013    fn test_scalar_value_from_for_struct() {
4014        let boolean = Arc::new(BooleanArray::from(vec![false]));
4015        let int = Arc::new(Int32Array::from(vec![42]));
4016
4017        let expected = StructArray::from(vec![
4018            (
4019                Arc::new(Field::new("b", DataType::Boolean, false)),
4020                Arc::clone(&boolean) as ArrayRef,
4021            ),
4022            (
4023                Arc::new(Field::new("c", DataType::Int32, false)),
4024                Arc::clone(&int) as ArrayRef,
4025            ),
4026        ]);
4027
4028        let sv = ScalarStructBuilder::new()
4029            .with_array(Field::new("b", DataType::Boolean, false), boolean)
4030            .with_array(Field::new("c", DataType::Int32, false), int)
4031            .build()
4032            .unwrap();
4033
4034        let struct_arr = sv.to_array().unwrap();
4035        let actual = as_struct_array(&struct_arr).unwrap();
4036        assert_eq!(actual, &expected);
4037    }
4038
4039    #[test]
4040    #[should_panic(
4041        expected = "Error building ScalarValue::Struct. Expected array with exactly one element, found array with 4 elements"
4042    )]
4043    fn test_scalar_value_from_for_struct_should_panic() {
4044        let _ = ScalarStructBuilder::new()
4045            .with_array(
4046                Field::new("bool", DataType::Boolean, false),
4047                Arc::new(BooleanArray::from(vec![false, true, false, false])),
4048            )
4049            .with_array(
4050                Field::new("i32", DataType::Int32, false),
4051                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
4052            )
4053            .build()
4054            .unwrap();
4055    }
4056
4057    #[test]
4058    fn test_to_array_of_size_for_nested() {
4059        // Struct
4060        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
4061        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4062
4063        let struct_array = StructArray::from(vec![
4064            (
4065                Arc::new(Field::new("b", DataType::Boolean, false)),
4066                Arc::clone(&boolean) as ArrayRef,
4067            ),
4068            (
4069                Arc::new(Field::new("c", DataType::Int32, false)),
4070                Arc::clone(&int) as ArrayRef,
4071            ),
4072        ]);
4073        let sv = ScalarValue::Struct(Arc::new(struct_array));
4074        let actual_arr = sv.to_array_of_size(2).unwrap();
4075
4076        let boolean = Arc::new(BooleanArray::from(vec![
4077            false, false, true, true, false, false, true, true,
4078        ]));
4079        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
4080
4081        let struct_array = StructArray::from(vec![
4082            (
4083                Arc::new(Field::new("b", DataType::Boolean, false)),
4084                Arc::clone(&boolean) as ArrayRef,
4085            ),
4086            (
4087                Arc::new(Field::new("c", DataType::Int32, false)),
4088                Arc::clone(&int) as ArrayRef,
4089            ),
4090        ]);
4091
4092        let actual = as_struct_array(&actual_arr).unwrap();
4093        assert_eq!(actual, &struct_array);
4094
4095        // List
4096        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
4097            Some(1),
4098            None,
4099            Some(2),
4100        ])]);
4101
4102        let sv = ScalarValue::List(Arc::new(arr));
4103        let actual_arr = sv
4104            .to_array_of_size(2)
4105            .expect("Failed to convert to array of size");
4106        let actual_list_arr = actual_arr.as_list::<i32>();
4107
4108        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4109            Some(vec![Some(1), None, Some(2)]),
4110            Some(vec![Some(1), None, Some(2)]),
4111        ]);
4112
4113        assert_eq!(&arr, actual_list_arr);
4114    }
4115
4116    #[test]
4117    fn test_to_array_of_size_for_fsl() {
4118        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4119        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4120        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4121        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4122        let actual_arr = sv
4123            .to_array_of_size(2)
4124            .expect("Failed to convert to array of size");
4125
4126        let expected_values =
4127            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4128        let expected_arr =
4129            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4130
4131        assert_eq!(
4132            &expected_arr,
4133            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4134        );
4135
4136        let empty_array = sv
4137            .to_array_of_size(0)
4138            .expect("Failed to convert to empty array");
4139
4140        assert_eq!(empty_array.len(), 0);
4141    }
4142
4143    #[test]
4144    fn test_list_to_array_string() {
4145        let scalars = vec![
4146            ScalarValue::from("rust"),
4147            ScalarValue::from("arrow"),
4148            ScalarValue::from("data-fusion"),
4149        ];
4150
4151        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4152
4153        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
4154        assert_eq!(*result, expected);
4155    }
4156
4157    fn single_row_list_array(items: Vec<&str>) -> ListArray {
4158        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
4159            .build_list_array()
4160    }
4161
4162    fn build_list<O: OffsetSizeTrait>(
4163        values: Vec<Option<Vec<Option<i64>>>>,
4164    ) -> Vec<ScalarValue> {
4165        values
4166            .into_iter()
4167            .map(|v| {
4168                let arr = if v.is_some() {
4169                    Arc::new(
4170                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4171                            vec![v],
4172                        ),
4173                    )
4174                } else if O::IS_LARGE {
4175                    new_null_array(
4176                        &DataType::LargeList(Arc::new(Field::new_list_field(
4177                            DataType::Int64,
4178                            true,
4179                        ))),
4180                        1,
4181                    )
4182                } else {
4183                    new_null_array(
4184                        &DataType::List(Arc::new(Field::new_list_field(
4185                            DataType::Int64,
4186                            true,
4187                        ))),
4188                        1,
4189                    )
4190                };
4191
4192                if O::IS_LARGE {
4193                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4194                } else {
4195                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4196                }
4197            })
4198            .collect()
4199    }
4200
4201    #[test]
4202    fn test_iter_to_array_fixed_size_list() {
4203        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4204        let f1 = Arc::new(FixedSizeListArray::new(
4205            Arc::clone(&field),
4206            3,
4207            Arc::new(Int32Array::from(vec![1, 2, 3])),
4208            None,
4209        ));
4210        let f2 = Arc::new(FixedSizeListArray::new(
4211            Arc::clone(&field),
4212            3,
4213            Arc::new(Int32Array::from(vec![4, 5, 6])),
4214            None,
4215        ));
4216        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4217
4218        let scalars = vec![
4219            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4220            ScalarValue::FixedSizeList(f1),
4221            ScalarValue::FixedSizeList(f2),
4222            ScalarValue::FixedSizeList(f_nulls),
4223        ];
4224
4225        let array = ScalarValue::iter_to_array(scalars).unwrap();
4226
4227        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4228            vec![
4229                None,
4230                Some(vec![Some(1), Some(2), Some(3)]),
4231                Some(vec![Some(4), Some(5), Some(6)]),
4232                None,
4233            ],
4234            3,
4235        );
4236        assert_eq!(array.as_ref(), &expected);
4237    }
4238
4239    #[test]
4240    fn test_iter_to_array_struct() {
4241        let s1 = StructArray::from(vec![
4242            (
4243                Arc::new(Field::new("A", DataType::Boolean, false)),
4244                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4245            ),
4246            (
4247                Arc::new(Field::new("B", DataType::Int32, false)),
4248                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4249            ),
4250        ]);
4251
4252        let s2 = StructArray::from(vec![
4253            (
4254                Arc::new(Field::new("A", DataType::Boolean, false)),
4255                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4256            ),
4257            (
4258                Arc::new(Field::new("B", DataType::Int32, false)),
4259                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4260            ),
4261        ]);
4262
4263        let scalars = vec![
4264            ScalarValue::Struct(Arc::new(s1)),
4265            ScalarValue::Struct(Arc::new(s2)),
4266        ];
4267
4268        let array = ScalarValue::iter_to_array(scalars).unwrap();
4269
4270        let expected = StructArray::from(vec![
4271            (
4272                Arc::new(Field::new("A", DataType::Boolean, false)),
4273                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4274            ),
4275            (
4276                Arc::new(Field::new("B", DataType::Int32, false)),
4277                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4278            ),
4279        ]);
4280        assert_eq!(array.as_ref(), &expected);
4281    }
4282
4283    #[test]
4284    fn test_iter_to_array_struct_with_nulls() {
4285        // non-null
4286        let s1 = StructArray::from((
4287            vec![
4288                (
4289                    Arc::new(Field::new("A", DataType::Int32, false)),
4290                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4291                ),
4292                (
4293                    Arc::new(Field::new("B", DataType::Int64, false)),
4294                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4295                ),
4296            ],
4297            // Present the null mask, 1 is non-null, 0 is null
4298            Buffer::from(&[1]),
4299        ));
4300
4301        // null
4302        let s2 = StructArray::from((
4303            vec![
4304                (
4305                    Arc::new(Field::new("A", DataType::Int32, false)),
4306                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4307                ),
4308                (
4309                    Arc::new(Field::new("B", DataType::Int64, false)),
4310                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4311                ),
4312            ],
4313            Buffer::from(&[0]),
4314        ));
4315
4316        let scalars = vec![
4317            ScalarValue::Struct(Arc::new(s1)),
4318            ScalarValue::Struct(Arc::new(s2)),
4319        ];
4320
4321        let array = ScalarValue::iter_to_array(scalars).unwrap();
4322        let struct_array = array.as_struct();
4323        assert!(struct_array.is_valid(0));
4324        assert!(struct_array.is_null(1));
4325    }
4326
4327    #[test]
4328    fn iter_to_array_primitive_test() {
4329        // List[[1,2,3]], List[null], List[[4,5]]
4330        let scalars = build_list::<i32>(vec![
4331            Some(vec![Some(1), Some(2), Some(3)]),
4332            None,
4333            Some(vec![Some(4), Some(5)]),
4334        ]);
4335
4336        let array = ScalarValue::iter_to_array(scalars).unwrap();
4337        let list_array = as_list_array(&array);
4338        // List[[1,2,3], null, [4,5]]
4339        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4340            Some(vec![Some(1), Some(2), Some(3)]),
4341            None,
4342            Some(vec![Some(4), Some(5)]),
4343        ]);
4344        assert_eq!(list_array, &expected);
4345
4346        let scalars = build_list::<i64>(vec![
4347            Some(vec![Some(1), Some(2), Some(3)]),
4348            None,
4349            Some(vec![Some(4), Some(5)]),
4350        ]);
4351
4352        let array = ScalarValue::iter_to_array(scalars).unwrap();
4353        let list_array = as_large_list_array(&array);
4354        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4355            Some(vec![Some(1), Some(2), Some(3)]),
4356            None,
4357            Some(vec![Some(4), Some(5)]),
4358        ]);
4359        assert_eq!(list_array, &expected);
4360    }
4361
4362    #[test]
4363    fn iter_to_array_string_test() {
4364        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
4365        let arr2 = single_row_list_array(vec!["rust", "world"]);
4366
4367        let scalars = vec![
4368            ScalarValue::List(Arc::new(arr1)),
4369            ScalarValue::List(Arc::new(arr2)),
4370        ];
4371
4372        let array = ScalarValue::iter_to_array(scalars).unwrap();
4373        let result = array.as_list::<i32>();
4374
4375        // build expected array
4376        let string_builder = StringBuilder::with_capacity(5, 25);
4377        let mut list_of_string_builder = ListBuilder::new(string_builder);
4378
4379        list_of_string_builder.values().append_value("foo");
4380        list_of_string_builder.values().append_value("bar");
4381        list_of_string_builder.values().append_value("baz");
4382        list_of_string_builder.append(true);
4383
4384        list_of_string_builder.values().append_value("rust");
4385        list_of_string_builder.values().append_value("world");
4386        list_of_string_builder.append(true);
4387        let expected = list_of_string_builder.finish();
4388
4389        assert_eq!(result, &expected);
4390    }
4391
4392    #[test]
4393    fn test_list_scalar_eq_to_array() {
4394        let list_array: ArrayRef =
4395            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4396                Some(vec![Some(0), Some(1), Some(2)]),
4397                None,
4398                Some(vec![None, Some(5)]),
4399            ]));
4400
4401        let fsl_array: ArrayRef =
4402            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4403                Some(vec![Some(0), Some(1), Some(2)]),
4404                None,
4405                Some(vec![Some(3), None, Some(5)]),
4406            ]));
4407
4408        for arr in [list_array, fsl_array] {
4409            for i in 0..arr.len() {
4410                let scalar =
4411                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4412                assert!(scalar.eq_array(&arr, i).unwrap());
4413            }
4414        }
4415    }
4416
4417    #[test]
4418    fn scalar_add_trait_test() -> Result<()> {
4419        let float_value = ScalarValue::Float64(Some(123.));
4420        let float_value_2 = ScalarValue::Float64(Some(123.));
4421        assert_eq!(
4422            (float_value.add(&float_value_2))?,
4423            ScalarValue::Float64(Some(246.))
4424        );
4425        assert_eq!(
4426            (float_value.add(float_value_2))?,
4427            ScalarValue::Float64(Some(246.))
4428        );
4429        Ok(())
4430    }
4431
4432    #[test]
4433    fn scalar_sub_trait_test() -> Result<()> {
4434        let float_value = ScalarValue::Float64(Some(123.));
4435        let float_value_2 = ScalarValue::Float64(Some(123.));
4436        assert_eq!(
4437            float_value.sub(&float_value_2)?,
4438            ScalarValue::Float64(Some(0.))
4439        );
4440        assert_eq!(
4441            float_value.sub(float_value_2)?,
4442            ScalarValue::Float64(Some(0.))
4443        );
4444        Ok(())
4445    }
4446
4447    #[test]
4448    fn scalar_sub_trait_int32_test() -> Result<()> {
4449        let int_value = ScalarValue::Int32(Some(42));
4450        let int_value_2 = ScalarValue::Int32(Some(100));
4451        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4452        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4453        Ok(())
4454    }
4455
4456    #[test]
4457    fn scalar_sub_trait_int32_overflow_test() {
4458        let int_value = ScalarValue::Int32(Some(i32::MAX));
4459        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4460        let err = int_value
4461            .sub_checked(&int_value_2)
4462            .unwrap_err()
4463            .strip_backtrace();
4464        assert_eq!(
4465            err,
4466            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4467        )
4468    }
4469
4470    #[test]
4471    fn scalar_sub_trait_int64_test() -> Result<()> {
4472        let int_value = ScalarValue::Int64(Some(42));
4473        let int_value_2 = ScalarValue::Int64(Some(100));
4474        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4475        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4476        Ok(())
4477    }
4478
4479    #[test]
4480    fn scalar_sub_trait_int64_overflow_test() {
4481        let int_value = ScalarValue::Int64(Some(i64::MAX));
4482        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4483        let err = int_value
4484            .sub_checked(&int_value_2)
4485            .unwrap_err()
4486            .strip_backtrace();
4487        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4488    }
4489
4490    #[test]
4491    fn scalar_add_overflow_test() -> Result<()> {
4492        check_scalar_add_overflow::<Int8Type>(
4493            ScalarValue::Int8(Some(i8::MAX)),
4494            ScalarValue::Int8(Some(i8::MAX)),
4495        );
4496        check_scalar_add_overflow::<UInt8Type>(
4497            ScalarValue::UInt8(Some(u8::MAX)),
4498            ScalarValue::UInt8(Some(u8::MAX)),
4499        );
4500        check_scalar_add_overflow::<Int16Type>(
4501            ScalarValue::Int16(Some(i16::MAX)),
4502            ScalarValue::Int16(Some(i16::MAX)),
4503        );
4504        check_scalar_add_overflow::<UInt16Type>(
4505            ScalarValue::UInt16(Some(u16::MAX)),
4506            ScalarValue::UInt16(Some(u16::MAX)),
4507        );
4508        check_scalar_add_overflow::<Int32Type>(
4509            ScalarValue::Int32(Some(i32::MAX)),
4510            ScalarValue::Int32(Some(i32::MAX)),
4511        );
4512        check_scalar_add_overflow::<UInt32Type>(
4513            ScalarValue::UInt32(Some(u32::MAX)),
4514            ScalarValue::UInt32(Some(u32::MAX)),
4515        );
4516        check_scalar_add_overflow::<Int64Type>(
4517            ScalarValue::Int64(Some(i64::MAX)),
4518            ScalarValue::Int64(Some(i64::MAX)),
4519        );
4520        check_scalar_add_overflow::<UInt64Type>(
4521            ScalarValue::UInt64(Some(u64::MAX)),
4522            ScalarValue::UInt64(Some(u64::MAX)),
4523        );
4524
4525        Ok(())
4526    }
4527
4528    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
4529    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4530    where
4531        T: ArrowNumericType,
4532    {
4533        let scalar_result = left.add_checked(&right);
4534
4535        let left_array = left.to_array().expect("Failed to convert to array");
4536        let right_array = right.to_array().expect("Failed to convert to array");
4537        let arrow_left_array = left_array.as_primitive::<T>();
4538        let arrow_right_array = right_array.as_primitive::<T>();
4539        let arrow_result = add(arrow_left_array, arrow_right_array);
4540
4541        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4542    }
4543
4544    #[test]
4545    fn test_interval_add_timestamp() -> Result<()> {
4546        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4547            months: 1,
4548            days: 2,
4549            nanoseconds: 3,
4550        }));
4551        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4552        let result = interval.add(&timestamp)?;
4553        let expect = timestamp.add(&interval)?;
4554        assert_eq!(result, expect);
4555
4556        let interval = ScalarValue::IntervalYearMonth(Some(123));
4557        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4558        let result = interval.add(&timestamp)?;
4559        let expect = timestamp.add(&interval)?;
4560        assert_eq!(result, expect);
4561
4562        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4563            days: 1,
4564            milliseconds: 23,
4565        }));
4566        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4567        let result = interval.add(&timestamp)?;
4568        let expect = timestamp.add(&interval)?;
4569        assert_eq!(result, expect);
4570        Ok(())
4571    }
4572
4573    #[test]
4574    fn scalar_decimal_test() -> Result<()> {
4575        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4576        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4577        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4578        assert_eq!(123_i128, try_into_value);
4579        assert!(!decimal_value.is_null());
4580        let neg_decimal_value = decimal_value.arithmetic_negate()?;
4581        match neg_decimal_value {
4582            ScalarValue::Decimal128(v, _, _) => {
4583                assert_eq!(-123, v.unwrap());
4584            }
4585            _ => {
4586                unreachable!();
4587            }
4588        }
4589
4590        // decimal scalar to array
4591        let array = decimal_value
4592            .to_array()
4593            .expect("Failed to convert to array");
4594        let array = as_decimal128_array(&array)?;
4595        assert_eq!(1, array.len());
4596        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4597        assert_eq!(123i128, array.value(0));
4598
4599        // decimal scalar to array with size
4600        let array = decimal_value
4601            .to_array_of_size(10)
4602            .expect("Failed to convert to array of size");
4603        let array_decimal = as_decimal128_array(&array)?;
4604        assert_eq!(10, array.len());
4605        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4606        assert_eq!(123i128, array_decimal.value(0));
4607        assert_eq!(123i128, array_decimal.value(9));
4608        // test eq array
4609        assert!(decimal_value
4610            .eq_array(&array, 1)
4611            .expect("Failed to compare arrays"));
4612        assert!(decimal_value
4613            .eq_array(&array, 5)
4614            .expect("Failed to compare arrays"));
4615        // test try from array
4616        assert_eq!(
4617            decimal_value,
4618            ScalarValue::try_from_array(&array, 5).unwrap()
4619        );
4620
4621        assert_eq!(
4622            decimal_value,
4623            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4624        );
4625
4626        // test compare
4627        let left = ScalarValue::Decimal128(Some(123), 10, 2);
4628        let right = ScalarValue::Decimal128(Some(124), 10, 2);
4629        assert!(!left.eq(&right));
4630        let result = left < right;
4631        assert!(result);
4632        let result = left <= right;
4633        assert!(result);
4634        let right = ScalarValue::Decimal128(Some(124), 10, 3);
4635        // make sure that two decimals with diff datatype can't be compared.
4636        let result = left.partial_cmp(&right);
4637        assert_eq!(None, result);
4638
4639        let decimal_vec = vec![
4640            ScalarValue::Decimal128(Some(1), 10, 2),
4641            ScalarValue::Decimal128(Some(2), 10, 2),
4642            ScalarValue::Decimal128(Some(3), 10, 2),
4643        ];
4644        // convert the vec to decimal array and check the result
4645        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4646        assert_eq!(3, array.len());
4647        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4648
4649        let decimal_vec = vec![
4650            ScalarValue::Decimal128(Some(1), 10, 2),
4651            ScalarValue::Decimal128(Some(2), 10, 2),
4652            ScalarValue::Decimal128(Some(3), 10, 2),
4653            ScalarValue::Decimal128(None, 10, 2),
4654        ];
4655        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4656        assert_eq!(4, array.len());
4657        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4658
4659        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4660            .unwrap()
4661            .eq_array(&array, 0)
4662            .expect("Failed to compare arrays"));
4663        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4664            .unwrap()
4665            .eq_array(&array, 1)
4666            .expect("Failed to compare arrays"));
4667        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4668            .unwrap()
4669            .eq_array(&array, 2)
4670            .expect("Failed to compare arrays"));
4671        assert_eq!(
4672            ScalarValue::Decimal128(None, 10, 2),
4673            ScalarValue::try_from_array(&array, 3).unwrap()
4674        );
4675
4676        Ok(())
4677    }
4678
4679    #[test]
4680    fn test_list_partial_cmp() {
4681        let a =
4682            ScalarValue::List(Arc::new(
4683                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4684                    Some(1),
4685                    Some(2),
4686                    Some(3),
4687                ])]),
4688            ));
4689        let b =
4690            ScalarValue::List(Arc::new(
4691                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4692                    Some(1),
4693                    Some(2),
4694                    Some(3),
4695                ])]),
4696            ));
4697        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4698
4699        let a =
4700            ScalarValue::List(Arc::new(
4701                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4702                    Some(10),
4703                    Some(2),
4704                    Some(3),
4705                ])]),
4706            ));
4707        let b =
4708            ScalarValue::List(Arc::new(
4709                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4710                    Some(1),
4711                    Some(2),
4712                    Some(30),
4713                ])]),
4714            ));
4715        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4716
4717        let a =
4718            ScalarValue::List(Arc::new(
4719                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4720                    Some(10),
4721                    Some(2),
4722                    Some(3),
4723                ])]),
4724            ));
4725        let b =
4726            ScalarValue::List(Arc::new(
4727                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4728                    Some(10),
4729                    Some(2),
4730                    Some(30),
4731                ])]),
4732            ));
4733        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4734    }
4735
4736    #[test]
4737    fn scalar_value_to_array_u64() -> Result<()> {
4738        let value = ScalarValue::UInt64(Some(13u64));
4739        let array = value.to_array().expect("Failed to convert to array");
4740        let array = as_uint64_array(&array)?;
4741        assert_eq!(array.len(), 1);
4742        assert!(!array.is_null(0));
4743        assert_eq!(array.value(0), 13);
4744
4745        let value = ScalarValue::UInt64(None);
4746        let array = value.to_array().expect("Failed to convert to array");
4747        let array = as_uint64_array(&array)?;
4748        assert_eq!(array.len(), 1);
4749        assert!(array.is_null(0));
4750        Ok(())
4751    }
4752
4753    #[test]
4754    fn scalar_value_to_array_u32() -> Result<()> {
4755        let value = ScalarValue::UInt32(Some(13u32));
4756        let array = value.to_array().expect("Failed to convert to array");
4757        let array = as_uint32_array(&array)?;
4758        assert_eq!(array.len(), 1);
4759        assert!(!array.is_null(0));
4760        assert_eq!(array.value(0), 13);
4761
4762        let value = ScalarValue::UInt32(None);
4763        let array = value.to_array().expect("Failed to convert to array");
4764        let array = as_uint32_array(&array)?;
4765        assert_eq!(array.len(), 1);
4766        assert!(array.is_null(0));
4767        Ok(())
4768    }
4769
4770    #[test]
4771    fn scalar_list_null_to_array() {
4772        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
4773
4774        assert_eq!(list_array.len(), 1);
4775        assert_eq!(list_array.values().len(), 0);
4776    }
4777
4778    #[test]
4779    fn scalar_large_list_null_to_array() {
4780        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
4781
4782        assert_eq!(list_array.len(), 1);
4783        assert_eq!(list_array.values().len(), 0);
4784    }
4785
4786    #[test]
4787    fn scalar_list_to_array() -> Result<()> {
4788        let values = vec![
4789            ScalarValue::UInt64(Some(100)),
4790            ScalarValue::UInt64(None),
4791            ScalarValue::UInt64(Some(101)),
4792        ];
4793        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
4794        assert_eq!(list_array.len(), 1);
4795        assert_eq!(list_array.values().len(), 3);
4796
4797        let prim_array_ref = list_array.value(0);
4798        let prim_array = as_uint64_array(&prim_array_ref)?;
4799        assert_eq!(prim_array.len(), 3);
4800        assert_eq!(prim_array.value(0), 100);
4801        assert!(prim_array.is_null(1));
4802        assert_eq!(prim_array.value(2), 101);
4803        Ok(())
4804    }
4805
4806    #[test]
4807    fn scalar_large_list_to_array() -> Result<()> {
4808        let values = vec![
4809            ScalarValue::UInt64(Some(100)),
4810            ScalarValue::UInt64(None),
4811            ScalarValue::UInt64(Some(101)),
4812        ];
4813        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
4814        assert_eq!(list_array.len(), 1);
4815        assert_eq!(list_array.values().len(), 3);
4816
4817        let prim_array_ref = list_array.value(0);
4818        let prim_array = as_uint64_array(&prim_array_ref)?;
4819        assert_eq!(prim_array.len(), 3);
4820        assert_eq!(prim_array.value(0), 100);
4821        assert!(prim_array.is_null(1));
4822        assert_eq!(prim_array.value(2), 101);
4823        Ok(())
4824    }
4825
4826    /// Creates array directly and via ScalarValue and ensures they are the same
4827    macro_rules! check_scalar_iter {
4828        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4829            let scalars: Vec<_> =
4830                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
4831
4832            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4833
4834            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4835
4836            assert_eq!(&array, &expected);
4837        }};
4838    }
4839
4840    /// Creates array directly and via ScalarValue and ensures they are the same
4841    /// but for variants that carry a timezone field.
4842    macro_rules! check_scalar_iter_tz {
4843        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4844            let scalars: Vec<_> = $INPUT
4845                .iter()
4846                .map(|v| ScalarValue::$SCALAR_T(*v, None))
4847                .collect();
4848
4849            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4850
4851            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4852
4853            assert_eq!(&array, &expected);
4854        }};
4855    }
4856
4857    /// Creates array directly and via ScalarValue and ensures they
4858    /// are the same, for string  arrays
4859    macro_rules! check_scalar_iter_string {
4860        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4861            let scalars: Vec<_> = $INPUT
4862                .iter()
4863                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
4864                .collect();
4865
4866            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4867
4868            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4869
4870            assert_eq!(&array, &expected);
4871        }};
4872    }
4873
4874    /// Creates array directly and via ScalarValue and ensures they
4875    /// are the same, for binary arrays
4876    macro_rules! check_scalar_iter_binary {
4877        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4878            let scalars: Vec<_> = $INPUT
4879                .iter()
4880                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
4881                .collect();
4882
4883            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4884
4885            let expected: $ARRAYTYPE =
4886                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
4887
4888            let expected: ArrayRef = Arc::new(expected);
4889
4890            assert_eq!(&array, &expected);
4891        }};
4892    }
4893
4894    #[test]
4895    // despite clippy claiming they are useless, the code doesn't compile otherwise.
4896    #[allow(clippy::useless_vec)]
4897    fn scalar_iter_to_array_boolean() {
4898        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
4899        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
4900        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
4901
4902        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
4903        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
4904        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
4905        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
4906
4907        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
4908        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
4909        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
4910        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
4911
4912        check_scalar_iter_tz!(
4913            TimestampSecond,
4914            TimestampSecondArray,
4915            vec![Some(1), None, Some(3)]
4916        );
4917        check_scalar_iter_tz!(
4918            TimestampMillisecond,
4919            TimestampMillisecondArray,
4920            vec![Some(1), None, Some(3)]
4921        );
4922        check_scalar_iter_tz!(
4923            TimestampMicrosecond,
4924            TimestampMicrosecondArray,
4925            vec![Some(1), None, Some(3)]
4926        );
4927        check_scalar_iter_tz!(
4928            TimestampNanosecond,
4929            TimestampNanosecondArray,
4930            vec![Some(1), None, Some(3)]
4931        );
4932
4933        check_scalar_iter_string!(
4934            Utf8,
4935            StringArray,
4936            vec![Some("foo"), None, Some("bar")]
4937        );
4938        check_scalar_iter_string!(
4939            LargeUtf8,
4940            LargeStringArray,
4941            vec![Some("foo"), None, Some("bar")]
4942        );
4943        check_scalar_iter_binary!(
4944            Binary,
4945            BinaryArray,
4946            vec![Some(b"foo"), None, Some(b"bar")]
4947        );
4948        check_scalar_iter_binary!(
4949            LargeBinary,
4950            LargeBinaryArray,
4951            vec![Some(b"foo"), None, Some(b"bar")]
4952        );
4953    }
4954
4955    #[test]
4956    fn scalar_iter_to_array_empty() {
4957        let scalars = vec![] as Vec<ScalarValue>;
4958
4959        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
4960        assert!(
4961            result
4962                .to_string()
4963                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
4964            "{}",
4965            result
4966        );
4967    }
4968
4969    #[test]
4970    fn scalar_iter_to_dictionary() {
4971        fn make_val(v: Option<String>) -> ScalarValue {
4972            let key_type = DataType::Int32;
4973            let value = ScalarValue::Utf8(v);
4974            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
4975        }
4976
4977        let scalars = [
4978            make_val(Some("Foo".into())),
4979            make_val(None),
4980            make_val(Some("Bar".into())),
4981        ];
4982
4983        let array = ScalarValue::iter_to_array(scalars).unwrap();
4984        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
4985        let values_array = as_string_array(array.values()).unwrap();
4986
4987        let values = array
4988            .keys_iter()
4989            .map(|k| {
4990                k.map(|k| {
4991                    assert!(values_array.is_valid(k));
4992                    values_array.value(k)
4993                })
4994            })
4995            .collect::<Vec<_>>();
4996
4997        let expected = vec![Some("Foo"), None, Some("Bar")];
4998        assert_eq!(values, expected);
4999    }
5000
5001    #[test]
5002    fn scalar_iter_to_array_mismatched_types() {
5003        use ScalarValue::*;
5004        // If the scalar values are not all the correct type, error here
5005        let scalars = [Boolean(Some(true)), Int32(Some(5))];
5006
5007        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5008        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
5009                "{}", result);
5010    }
5011
5012    #[test]
5013    fn scalar_try_from_array_null() {
5014        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
5015        let array: ArrayRef = Arc::new(array);
5016
5017        assert_eq!(
5018            ScalarValue::Int64(Some(33)),
5019            ScalarValue::try_from_array(&array, 0).unwrap()
5020        );
5021        assert_eq!(
5022            ScalarValue::Int64(None),
5023            ScalarValue::try_from_array(&array, 1).unwrap()
5024        );
5025    }
5026
5027    #[test]
5028    fn scalar_try_from_array_list_array_null() {
5029        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5030            Some(vec![Some(1), Some(2)]),
5031            None,
5032        ]);
5033
5034        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
5035        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
5036
5037        let data_type =
5038            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5039
5040        assert_eq!(non_null_list_scalar.data_type(), data_type);
5041        assert_eq!(null_list_scalar.data_type(), data_type);
5042    }
5043
5044    #[test]
5045    fn scalar_try_from_list_datatypes() {
5046        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
5047
5048        // Test for List
5049        let data_type = &DataType::List(Arc::clone(&inner_field));
5050        let scalar: ScalarValue = data_type.try_into().unwrap();
5051        let expected = ScalarValue::List(
5052            new_null_array(data_type, 1)
5053                .as_list::<i32>()
5054                .to_owned()
5055                .into(),
5056        );
5057        assert_eq!(expected, scalar);
5058        assert!(expected.is_null());
5059
5060        // Test for LargeList
5061        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
5062        let scalar: ScalarValue = data_type.try_into().unwrap();
5063        let expected = ScalarValue::LargeList(
5064            new_null_array(data_type, 1)
5065                .as_list::<i64>()
5066                .to_owned()
5067                .into(),
5068        );
5069        assert_eq!(expected, scalar);
5070        assert!(expected.is_null());
5071
5072        // Test for FixedSizeList(5)
5073        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
5074        let scalar: ScalarValue = data_type.try_into().unwrap();
5075        let expected = ScalarValue::FixedSizeList(
5076            new_null_array(data_type, 1)
5077                .as_fixed_size_list()
5078                .to_owned()
5079                .into(),
5080        );
5081        assert_eq!(expected, scalar);
5082        assert!(expected.is_null());
5083    }
5084
5085    #[test]
5086    fn scalar_try_from_list_of_list() {
5087        let data_type = DataType::List(Arc::new(Field::new_list_field(
5088            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5089            true,
5090        )));
5091        let data_type = &data_type;
5092        let scalar: ScalarValue = data_type.try_into().unwrap();
5093
5094        let expected = ScalarValue::List(
5095            new_null_array(
5096                &DataType::List(Arc::new(Field::new_list_field(
5097                    DataType::List(Arc::new(Field::new_list_field(
5098                        DataType::Int32,
5099                        true,
5100                    ))),
5101                    true,
5102                ))),
5103                1,
5104            )
5105            .as_list::<i32>()
5106            .to_owned()
5107            .into(),
5108        );
5109
5110        assert_eq!(expected, scalar)
5111    }
5112
5113    #[test]
5114    fn scalar_try_from_not_equal_list_nested_list() {
5115        let list_data_type =
5116            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5117        let data_type = &list_data_type;
5118        let list_scalar: ScalarValue = data_type.try_into().unwrap();
5119
5120        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
5121            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5122            true,
5123        )));
5124        let data_type = &nested_list_data_type;
5125        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5126
5127        assert_ne!(list_scalar, nested_list_scalar);
5128    }
5129
5130    #[test]
5131    fn scalar_try_from_dict_datatype() {
5132        let data_type =
5133            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5134        let data_type = &data_type;
5135        let expected = ScalarValue::Dictionary(
5136            Box::new(DataType::Int8),
5137            Box::new(ScalarValue::Utf8(None)),
5138        );
5139        assert_eq!(expected, data_type.try_into().unwrap())
5140    }
5141
5142    #[test]
5143    fn size_of_scalar() {
5144        // Since ScalarValues are used in a non trivial number of places,
5145        // making it larger means significant more memory consumption
5146        // per distinct value.
5147        //
5148        // Thus this test ensures that no code change makes ScalarValue larger
5149        //
5150        // The alignment requirements differ across architectures and
5151        // thus the size of the enum appears to as well
5152
5153        // The value may also change depending on rust version
5154        assert_eq!(size_of::<ScalarValue>(), 64);
5155    }
5156
5157    #[test]
5158    fn memory_size() {
5159        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5160        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
5161        let sv_size = sv.size();
5162
5163        let mut v = Vec::with_capacity(10);
5164        // do NOT clone `sv` here because this may shrink the vector capacity
5165        v.push(sv);
5166        assert_eq!(v.capacity(), 10);
5167        assert_eq!(
5168            ScalarValue::size_of_vec(&v),
5169            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
5170        );
5171
5172        let mut s = HashSet::with_capacity(0);
5173        // do NOT clone `sv` here because this may shrink the vector capacity
5174        s.insert(v.pop().unwrap());
5175        // hashsets may easily grow during insert, so capacity is dynamic
5176        let s_capacity = s.capacity();
5177        assert_eq!(
5178            ScalarValue::size_of_hashset(&s),
5179            size_of::<HashSet<ScalarValue>>()
5180                + ((s_capacity - 1) * size_of::<ScalarValue>())
5181                + sv_size,
5182        );
5183    }
5184
5185    #[test]
5186    fn scalar_eq_array() {
5187        // Validate that eq_array has the same semantics as ScalarValue::eq
5188        macro_rules! make_typed_vec {
5189            ($INPUT:expr, $TYPE:ident) => {{
5190                $INPUT
5191                    .iter()
5192                    .map(|v| v.map(|v| v as $TYPE))
5193                    .collect::<Vec<_>>()
5194            }};
5195        }
5196
5197        let bool_vals = [Some(true), None, Some(false)];
5198        let f32_vals = [Some(-1.0), None, Some(1.0)];
5199        let f64_vals = make_typed_vec!(f32_vals, f64);
5200
5201        let i8_vals = [Some(-1), None, Some(1)];
5202        let i16_vals = make_typed_vec!(i8_vals, i16);
5203        let i32_vals = make_typed_vec!(i8_vals, i32);
5204        let i64_vals = make_typed_vec!(i8_vals, i64);
5205
5206        let u8_vals = [Some(0), None, Some(1)];
5207        let u16_vals = make_typed_vec!(u8_vals, u16);
5208        let u32_vals = make_typed_vec!(u8_vals, u32);
5209        let u64_vals = make_typed_vec!(u8_vals, u64);
5210
5211        let str_vals = [Some("foo"), None, Some("bar")];
5212
5213        let interval_dt_vals = [
5214            Some(IntervalDayTime::MINUS_ONE),
5215            None,
5216            Some(IntervalDayTime::ONE),
5217        ];
5218        let interval_mdn_vals = [
5219            Some(IntervalMonthDayNano::MINUS_ONE),
5220            None,
5221            Some(IntervalMonthDayNano::ONE),
5222        ];
5223
5224        /// Test each value in `scalar` with the corresponding element
5225        /// at `array`. Assumes each element is unique (aka not equal
5226        /// with all other indexes)
5227        #[derive(Debug)]
5228        struct TestCase {
5229            array: ArrayRef,
5230            scalars: Vec<ScalarValue>,
5231        }
5232
5233        /// Create a test case for casing the input to the specified array type
5234        macro_rules! make_test_case {
5235            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5236                TestCase {
5237                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5238                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5239                }
5240            }};
5241
5242            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5243                let tz = $TZ;
5244                TestCase {
5245                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5246                    scalars: $INPUT
5247                        .iter()
5248                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5249                        .collect(),
5250                }
5251            }};
5252        }
5253
5254        macro_rules! make_str_test_case {
5255            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5256                TestCase {
5257                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5258                    scalars: $INPUT
5259                        .iter()
5260                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5261                        .collect(),
5262                }
5263            }};
5264        }
5265
5266        macro_rules! make_binary_test_case {
5267            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5268                TestCase {
5269                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5270                    scalars: $INPUT
5271                        .iter()
5272                        .map(|v| {
5273                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5274                        })
5275                        .collect(),
5276                }
5277            }};
5278        }
5279
5280        /// create a test case for DictionaryArray<$INDEX_TY>
5281        macro_rules! make_str_dict_test_case {
5282            ($INPUT:expr, $INDEX_TY:ident) => {{
5283                TestCase {
5284                    array: Arc::new(
5285                        $INPUT
5286                            .iter()
5287                            .cloned()
5288                            .collect::<DictionaryArray<$INDEX_TY>>(),
5289                    ),
5290                    scalars: $INPUT
5291                        .iter()
5292                        .map(|v| {
5293                            ScalarValue::Dictionary(
5294                                Box::new($INDEX_TY::DATA_TYPE),
5295                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5296                            )
5297                        })
5298                        .collect(),
5299                }
5300            }};
5301        }
5302
5303        let cases = vec![
5304            make_test_case!(bool_vals, BooleanArray, Boolean),
5305            make_test_case!(f32_vals, Float32Array, Float32),
5306            make_test_case!(f64_vals, Float64Array, Float64),
5307            make_test_case!(i8_vals, Int8Array, Int8),
5308            make_test_case!(i16_vals, Int16Array, Int16),
5309            make_test_case!(i32_vals, Int32Array, Int32),
5310            make_test_case!(i64_vals, Int64Array, Int64),
5311            make_test_case!(u8_vals, UInt8Array, UInt8),
5312            make_test_case!(u16_vals, UInt16Array, UInt16),
5313            make_test_case!(u32_vals, UInt32Array, UInt32),
5314            make_test_case!(u64_vals, UInt64Array, UInt64),
5315            make_str_test_case!(str_vals, StringArray, Utf8),
5316            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5317            make_binary_test_case!(str_vals, BinaryArray, Binary),
5318            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5319            make_test_case!(i32_vals, Date32Array, Date32),
5320            make_test_case!(i64_vals, Date64Array, Date64),
5321            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5322            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5323            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5324            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5325            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5326            make_test_case!(
5327                i64_vals,
5328                TimestampSecondArray,
5329                TimestampSecond,
5330                Some("UTC".into())
5331            ),
5332            make_test_case!(
5333                i64_vals,
5334                TimestampMillisecondArray,
5335                TimestampMillisecond,
5336                None
5337            ),
5338            make_test_case!(
5339                i64_vals,
5340                TimestampMillisecondArray,
5341                TimestampMillisecond,
5342                Some("UTC".into())
5343            ),
5344            make_test_case!(
5345                i64_vals,
5346                TimestampMicrosecondArray,
5347                TimestampMicrosecond,
5348                None
5349            ),
5350            make_test_case!(
5351                i64_vals,
5352                TimestampMicrosecondArray,
5353                TimestampMicrosecond,
5354                Some("UTC".into())
5355            ),
5356            make_test_case!(
5357                i64_vals,
5358                TimestampNanosecondArray,
5359                TimestampNanosecond,
5360                None
5361            ),
5362            make_test_case!(
5363                i64_vals,
5364                TimestampNanosecondArray,
5365                TimestampNanosecond,
5366                Some("UTC".into())
5367            ),
5368            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5369            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5370            make_test_case!(
5371                interval_mdn_vals,
5372                IntervalMonthDayNanoArray,
5373                IntervalMonthDayNano
5374            ),
5375            make_str_dict_test_case!(str_vals, Int8Type),
5376            make_str_dict_test_case!(str_vals, Int16Type),
5377            make_str_dict_test_case!(str_vals, Int32Type),
5378            make_str_dict_test_case!(str_vals, Int64Type),
5379            make_str_dict_test_case!(str_vals, UInt8Type),
5380            make_str_dict_test_case!(str_vals, UInt16Type),
5381            make_str_dict_test_case!(str_vals, UInt32Type),
5382            make_str_dict_test_case!(str_vals, UInt64Type),
5383        ];
5384
5385        for case in cases {
5386            println!("**** Test Case *****");
5387            let TestCase { array, scalars } = case;
5388            println!("Input array type: {}", array.data_type());
5389            println!("Input scalars: {scalars:#?}");
5390            assert_eq!(array.len(), scalars.len());
5391
5392            for (index, scalar) in scalars.into_iter().enumerate() {
5393                assert!(
5394                    scalar
5395                        .eq_array(&array, index)
5396                        .expect("Failed to compare arrays"),
5397                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
5398                );
5399
5400                // test that all other elements are *not* equal
5401                for other_index in 0..array.len() {
5402                    if index != other_index {
5403                        assert!(
5404                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5405                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5406                        );
5407                    }
5408                }
5409            }
5410        }
5411    }
5412
5413    #[test]
5414    fn scalar_partial_ordering() {
5415        use ScalarValue::*;
5416
5417        assert_eq!(
5418            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5419            Some(Ordering::Greater)
5420        );
5421        assert_eq!(
5422            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5423            Some(Ordering::Less)
5424        );
5425        assert_eq!(
5426            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5427            Some(Ordering::Equal)
5428        );
5429        // For different data type, `partial_cmp` returns None.
5430        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5431        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5432
5433        assert_eq!(
5434            ScalarValue::from(vec![
5435                ("A", ScalarValue::from(1.0)),
5436                ("B", ScalarValue::from("Z")),
5437            ])
5438            .partial_cmp(&ScalarValue::from(vec![
5439                ("A", ScalarValue::from(2.0)),
5440                ("B", ScalarValue::from("A")),
5441            ])),
5442            Some(Ordering::Less)
5443        );
5444
5445        // For different struct fields, `partial_cmp` returns None.
5446        assert_eq!(
5447            ScalarValue::from(vec![
5448                ("A", ScalarValue::from(1.0)),
5449                ("B", ScalarValue::from("Z")),
5450            ])
5451            .partial_cmp(&ScalarValue::from(vec![
5452                ("a", ScalarValue::from(2.0)),
5453                ("b", ScalarValue::from("A")),
5454            ])),
5455            None
5456        );
5457    }
5458
5459    #[test]
5460    fn test_scalar_value_from_string() {
5461        let scalar = ScalarValue::from("foo");
5462        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5463        let scalar = ScalarValue::from("foo".to_string());
5464        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5465        let scalar = ScalarValue::from_str("foo").unwrap();
5466        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5467    }
5468
5469    #[test]
5470    fn test_scalar_struct() {
5471        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5472        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5473        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5474
5475        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5476        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5477        let field_d = Arc::new(Field::new(
5478            "D",
5479            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5480            false,
5481        ));
5482
5483        let struct_array = StructArray::from(vec![
5484            (
5485                Arc::clone(&field_e),
5486                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5487            ),
5488            (
5489                Arc::clone(&field_f),
5490                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5491            ),
5492        ]);
5493
5494        let struct_array = StructArray::from(vec![
5495            (
5496                Arc::clone(&field_a),
5497                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5498            ),
5499            (
5500                Arc::clone(&field_b),
5501                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5502            ),
5503            (
5504                Arc::clone(&field_c),
5505                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5506            ),
5507            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5508        ]);
5509        let scalar = ScalarValue::Struct(Arc::new(struct_array));
5510
5511        let array = scalar
5512            .to_array_of_size(2)
5513            .expect("Failed to convert to array of size");
5514
5515        let expected = Arc::new(StructArray::from(vec![
5516            (
5517                Arc::clone(&field_a),
5518                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5519            ),
5520            (
5521                Arc::clone(&field_b),
5522                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5523            ),
5524            (
5525                Arc::clone(&field_c),
5526                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5527            ),
5528            (
5529                Arc::clone(&field_d),
5530                Arc::new(StructArray::from(vec![
5531                    (
5532                        Arc::clone(&field_e),
5533                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5534                    ),
5535                    (
5536                        Arc::clone(&field_f),
5537                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5538                    ),
5539                ])) as ArrayRef,
5540            ),
5541        ])) as ArrayRef;
5542
5543        assert_eq!(&array, &expected);
5544
5545        // Construct from second element of ArrayRef
5546        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5547        assert_eq!(constructed, scalar);
5548
5549        // None version
5550        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5551        assert!(none_scalar.is_null());
5552        assert_eq!(
5553            format!("{none_scalar:?}"),
5554            String::from("Struct({A:,B:,C:,D:})")
5555        );
5556
5557        // Construct with convenience From<Vec<(&str, ScalarValue)>>
5558        let constructed = ScalarValue::from(vec![
5559            ("A", ScalarValue::from(23)),
5560            ("B", ScalarValue::from(false)),
5561            ("C", ScalarValue::from("Hello")),
5562            (
5563                "D",
5564                ScalarValue::from(vec![
5565                    ("e", ScalarValue::from(2i16)),
5566                    ("f", ScalarValue::from(3i64)),
5567                ]),
5568            ),
5569        ]);
5570        assert_eq!(constructed, scalar);
5571
5572        // Build Array from Vec of structs
5573        let scalars = vec![
5574            ScalarValue::from(vec![
5575                ("A", ScalarValue::from(23)),
5576                ("B", ScalarValue::from(false)),
5577                ("C", ScalarValue::from("Hello")),
5578                (
5579                    "D",
5580                    ScalarValue::from(vec![
5581                        ("e", ScalarValue::from(2i16)),
5582                        ("f", ScalarValue::from(3i64)),
5583                    ]),
5584                ),
5585            ]),
5586            ScalarValue::from(vec![
5587                ("A", ScalarValue::from(7)),
5588                ("B", ScalarValue::from(true)),
5589                ("C", ScalarValue::from("World")),
5590                (
5591                    "D",
5592                    ScalarValue::from(vec![
5593                        ("e", ScalarValue::from(4i16)),
5594                        ("f", ScalarValue::from(5i64)),
5595                    ]),
5596                ),
5597            ]),
5598            ScalarValue::from(vec![
5599                ("A", ScalarValue::from(-1000)),
5600                ("B", ScalarValue::from(true)),
5601                ("C", ScalarValue::from("!!!!!")),
5602                (
5603                    "D",
5604                    ScalarValue::from(vec![
5605                        ("e", ScalarValue::from(6i16)),
5606                        ("f", ScalarValue::from(7i64)),
5607                    ]),
5608                ),
5609            ]),
5610        ];
5611        let array = ScalarValue::iter_to_array(scalars).unwrap();
5612
5613        let expected = Arc::new(StructArray::from(vec![
5614            (
5615                Arc::clone(&field_a),
5616                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5617            ),
5618            (
5619                Arc::clone(&field_b),
5620                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5621            ),
5622            (
5623                Arc::clone(&field_c),
5624                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5625            ),
5626            (
5627                Arc::clone(&field_d),
5628                Arc::new(StructArray::from(vec![
5629                    (
5630                        Arc::clone(&field_e),
5631                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5632                    ),
5633                    (
5634                        Arc::clone(&field_f),
5635                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5636                    ),
5637                ])) as ArrayRef,
5638            ),
5639        ])) as ArrayRef;
5640
5641        assert_eq!(&array, &expected);
5642    }
5643
5644    #[test]
5645    fn round_trip() {
5646        // Each array type should be able to round tripped through a scalar
5647        let cases: Vec<ArrayRef> = vec![
5648            // int
5649            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
5650            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
5651            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
5652            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
5653            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
5654            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
5655            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
5656            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
5657            // bool
5658            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
5659            // float
5660            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
5661            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
5662            // string array
5663            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
5664            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
5665            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
5666            // string dictionary
5667            {
5668                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
5669                builder.append("foo").unwrap();
5670                builder.append_null();
5671                builder.append("bar").unwrap();
5672                Arc::new(builder.finish())
5673            },
5674            // binary array
5675            Arc::new(BinaryArray::from_iter(vec![
5676                Some(b"foo"),
5677                None,
5678                Some(b"bar"),
5679            ])),
5680            Arc::new(LargeBinaryArray::from_iter(vec![
5681                Some(b"foo"),
5682                None,
5683                Some(b"bar"),
5684            ])),
5685            Arc::new(BinaryViewArray::from_iter(vec![
5686                Some(b"foo"),
5687                None,
5688                Some(b"bar"),
5689            ])),
5690            // timestamp
5691            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
5692            Arc::new(TimestampMillisecondArray::from(vec![
5693                Some(1),
5694                None,
5695                Some(3),
5696            ])),
5697            Arc::new(TimestampMicrosecondArray::from(vec![
5698                Some(1),
5699                None,
5700                Some(3),
5701            ])),
5702            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
5703            // timestamp with timezone
5704            Arc::new(
5705                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
5706                    .with_timezone_opt(Some("UTC")),
5707            ),
5708            Arc::new(
5709                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
5710                    .with_timezone_opt(Some("UTC")),
5711            ),
5712            Arc::new(
5713                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
5714                    .with_timezone_opt(Some("UTC")),
5715            ),
5716            Arc::new(
5717                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
5718                    .with_timezone_opt(Some("UTC")),
5719            ),
5720            // date
5721            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
5722            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
5723            // time
5724            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
5725            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
5726            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
5727            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
5728            // null array
5729            Arc::new(NullArray::new(3)),
5730            // dense union
5731            {
5732                let mut builder = UnionBuilder::new_dense();
5733                builder.append::<Int32Type>("a", 1).unwrap();
5734                builder.append::<Float64Type>("b", 3.4).unwrap();
5735                Arc::new(builder.build().unwrap())
5736            },
5737            // sparse union
5738            {
5739                let mut builder = UnionBuilder::new_sparse();
5740                builder.append::<Int32Type>("a", 1).unwrap();
5741                builder.append::<Float64Type>("b", 3.4).unwrap();
5742                Arc::new(builder.build().unwrap())
5743            },
5744            // list array
5745            {
5746                let values_builder = StringBuilder::new();
5747                let mut builder = ListBuilder::new(values_builder);
5748                // [A, B]
5749                builder.values().append_value("A");
5750                builder.values().append_value("B");
5751                builder.append(true);
5752                // [ ] (empty list)
5753                builder.append(true);
5754                // Null
5755                builder.values().append_value("?"); // irrelevant
5756                builder.append(false);
5757                Arc::new(builder.finish())
5758            },
5759            // large list array
5760            {
5761                let values_builder = StringBuilder::new();
5762                let mut builder = LargeListBuilder::new(values_builder);
5763                // [A, B]
5764                builder.values().append_value("A");
5765                builder.values().append_value("B");
5766                builder.append(true);
5767                // [ ] (empty list)
5768                builder.append(true);
5769                // Null
5770                builder.append(false);
5771                Arc::new(builder.finish())
5772            },
5773            // fixed size list array
5774            {
5775                let values_builder = Int32Builder::new();
5776                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
5777
5778                //  [[0, 1, 2], null, [3, null, 5]
5779                builder.values().append_value(0);
5780                builder.values().append_value(1);
5781                builder.values().append_value(2);
5782                builder.append(true);
5783                builder.values().append_null();
5784                builder.values().append_null();
5785                builder.values().append_null();
5786                builder.append(false);
5787                builder.values().append_value(3);
5788                builder.values().append_null();
5789                builder.values().append_value(5);
5790                builder.append(true);
5791                Arc::new(builder.finish())
5792            },
5793            // map
5794            {
5795                let string_builder = StringBuilder::new();
5796                let int_builder = Int32Builder::with_capacity(4);
5797
5798                let mut builder = MapBuilder::new(None, string_builder, int_builder);
5799                // {"joe": 1}
5800                builder.keys().append_value("joe");
5801                builder.values().append_value(1);
5802                builder.append(true).unwrap();
5803                // {}
5804                builder.append(true).unwrap();
5805                // null
5806                builder.append(false).unwrap();
5807
5808                Arc::new(builder.finish())
5809            },
5810        ];
5811
5812        for arr in cases {
5813            round_trip_through_scalar(arr);
5814        }
5815    }
5816
5817    /// for each row in `arr`:
5818    /// 1. convert to a `ScalarValue`
5819    /// 2. Convert `ScalarValue` back to an `ArrayRef`
5820    /// 3. Compare the original array (sliced) and new array for equality
5821    fn round_trip_through_scalar(arr: ArrayRef) {
5822        for i in 0..arr.len() {
5823            // convert Scalar --> Array
5824            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
5825            let array = scalar.to_array_of_size(1).unwrap();
5826            assert_eq!(array.len(), 1);
5827            assert_eq!(array.data_type(), arr.data_type());
5828            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
5829        }
5830    }
5831
5832    #[test]
5833    fn test_scalar_union_sparse() {
5834        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
5835        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
5836        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
5837        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
5838
5839        let mut values_a = vec![None; 6];
5840        values_a[0] = Some(42);
5841        let mut values_b = vec![None; 6];
5842        values_b[1] = Some(true);
5843        let mut values_c = vec![None; 6];
5844        values_c[2] = Some("foo");
5845        let children: Vec<ArrayRef> = vec![
5846            Arc::new(Int32Array::from(values_a)),
5847            Arc::new(BooleanArray::from(values_b)),
5848            Arc::new(StringArray::from(values_c)),
5849        ];
5850
5851        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
5852        let array: ArrayRef = Arc::new(
5853            UnionArray::try_new(fields.clone(), type_ids, None, children)
5854                .expect("UnionArray"),
5855        );
5856
5857        let expected = [
5858            (0, ScalarValue::from(42)),
5859            (1, ScalarValue::from(true)),
5860            (2, ScalarValue::from("foo")),
5861            (0, ScalarValue::Int32(None)),
5862            (1, ScalarValue::Boolean(None)),
5863            (2, ScalarValue::Utf8(None)),
5864        ];
5865
5866        for (i, (ti, value)) in expected.into_iter().enumerate() {
5867            let is_null = value.is_null();
5868            let value = Some((ti, Box::new(value)));
5869            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
5870            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
5871
5872            assert_eq!(
5873                actual, expected,
5874                "[{i}] {actual} was not equal to {expected}"
5875            );
5876
5877            assert!(
5878                expected.eq_array(&array, i).expect("eq_array"),
5879                "[{i}] {expected}.eq_array was false"
5880            );
5881
5882            if is_null {
5883                assert!(actual.is_null(), "[{i}] {actual} was not null")
5884            }
5885        }
5886    }
5887
5888    #[test]
5889    fn test_scalar_union_dense() {
5890        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
5891        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
5892        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
5893        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
5894        let children: Vec<ArrayRef> = vec![
5895            Arc::new(Int32Array::from(vec![Some(42), None])),
5896            Arc::new(BooleanArray::from(vec![Some(true), None])),
5897            Arc::new(StringArray::from(vec![Some("foo"), None])),
5898        ];
5899
5900        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
5901        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
5902        let array: ArrayRef = Arc::new(
5903            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
5904                .expect("UnionArray"),
5905        );
5906
5907        let expected = [
5908            (0, ScalarValue::from(42)),
5909            (1, ScalarValue::from(true)),
5910            (2, ScalarValue::from("foo")),
5911            (0, ScalarValue::Int32(None)),
5912            (1, ScalarValue::Boolean(None)),
5913            (2, ScalarValue::Utf8(None)),
5914        ];
5915
5916        for (i, (ti, value)) in expected.into_iter().enumerate() {
5917            let is_null = value.is_null();
5918            let value = Some((ti, Box::new(value)));
5919            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
5920            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
5921
5922            assert_eq!(
5923                actual, expected,
5924                "[{i}] {actual} was not equal to {expected}"
5925            );
5926
5927            assert!(
5928                expected.eq_array(&array, i).expect("eq_array"),
5929                "[{i}] {expected}.eq_array was false"
5930            );
5931
5932            if is_null {
5933                assert!(actual.is_null(), "[{i}] {actual} was not null")
5934            }
5935        }
5936    }
5937
5938    #[test]
5939    fn test_lists_in_struct() {
5940        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
5941        let field_primitive_list = Arc::new(Field::new(
5942            "primitive_list",
5943            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5944            false,
5945        ));
5946
5947        // Define primitive list scalars
5948        let l0 =
5949            ScalarValue::List(Arc::new(
5950                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5951                    Some(1),
5952                    Some(2),
5953                    Some(3),
5954                ])]),
5955            ));
5956        let l1 =
5957            ScalarValue::List(Arc::new(
5958                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5959                    Some(4),
5960                    Some(5),
5961                ])]),
5962            ));
5963        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
5964            Int32Type,
5965            _,
5966            _,
5967        >(vec![Some(vec![Some(6)])])));
5968
5969        // Define struct scalars
5970        let s0 = ScalarValue::from(vec![
5971            ("A", ScalarValue::from("First")),
5972            ("primitive_list", l0),
5973        ]);
5974
5975        let s1 = ScalarValue::from(vec![
5976            ("A", ScalarValue::from("Second")),
5977            ("primitive_list", l1),
5978        ]);
5979
5980        let s2 = ScalarValue::from(vec![
5981            ("A", ScalarValue::from("Third")),
5982            ("primitive_list", l2),
5983        ]);
5984
5985        // iter_to_array for struct scalars
5986        let array =
5987            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
5988
5989        let array = as_struct_array(&array).unwrap();
5990        let expected = StructArray::from(vec![
5991            (
5992                Arc::clone(&field_a),
5993                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
5994            ),
5995            (
5996                Arc::clone(&field_primitive_list),
5997                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5998                    Some(vec![Some(1), Some(2), Some(3)]),
5999                    Some(vec![Some(4), Some(5)]),
6000                    Some(vec![Some(6)]),
6001                ])),
6002            ),
6003        ]);
6004
6005        assert_eq!(array, &expected);
6006
6007        // Define list-of-structs scalars
6008
6009        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
6010        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
6011
6012        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
6013        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
6014
6015        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
6016        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
6017
6018        // iter_to_array for list-of-struct
6019        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
6020        let array = array.as_list::<i32>();
6021
6022        // Construct expected array with array builders
6023        let field_a_builder = StringBuilder::with_capacity(4, 1024);
6024        let primitive_value_builder = Int32Array::builder(8);
6025        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
6026
6027        let element_builder = StructBuilder::new(
6028            vec![field_a, field_primitive_list],
6029            vec![
6030                Box::new(field_a_builder),
6031                Box::new(field_primitive_list_builder),
6032            ],
6033        );
6034
6035        let mut list_builder = ListBuilder::new(element_builder);
6036
6037        list_builder
6038            .values()
6039            .field_builder::<StringBuilder>(0)
6040            .unwrap()
6041            .append_value("First");
6042        list_builder
6043            .values()
6044            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6045            .unwrap()
6046            .values()
6047            .append_value(1);
6048        list_builder
6049            .values()
6050            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6051            .unwrap()
6052            .values()
6053            .append_value(2);
6054        list_builder
6055            .values()
6056            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6057            .unwrap()
6058            .values()
6059            .append_value(3);
6060        list_builder
6061            .values()
6062            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6063            .unwrap()
6064            .append(true);
6065        list_builder.values().append(true);
6066
6067        list_builder
6068            .values()
6069            .field_builder::<StringBuilder>(0)
6070            .unwrap()
6071            .append_value("Second");
6072        list_builder
6073            .values()
6074            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6075            .unwrap()
6076            .values()
6077            .append_value(4);
6078        list_builder
6079            .values()
6080            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6081            .unwrap()
6082            .values()
6083            .append_value(5);
6084        list_builder
6085            .values()
6086            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6087            .unwrap()
6088            .append(true);
6089        list_builder.values().append(true);
6090        list_builder.append(true);
6091
6092        list_builder
6093            .values()
6094            .field_builder::<StringBuilder>(0)
6095            .unwrap()
6096            .append_value("Third");
6097        list_builder
6098            .values()
6099            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6100            .unwrap()
6101            .values()
6102            .append_value(6);
6103        list_builder
6104            .values()
6105            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6106            .unwrap()
6107            .append(true);
6108        list_builder.values().append(true);
6109        list_builder.append(true);
6110
6111        list_builder
6112            .values()
6113            .field_builder::<StringBuilder>(0)
6114            .unwrap()
6115            .append_value("Second");
6116        list_builder
6117            .values()
6118            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6119            .unwrap()
6120            .values()
6121            .append_value(4);
6122        list_builder
6123            .values()
6124            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6125            .unwrap()
6126            .values()
6127            .append_value(5);
6128        list_builder
6129            .values()
6130            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6131            .unwrap()
6132            .append(true);
6133        list_builder.values().append(true);
6134        list_builder.append(true);
6135
6136        let expected = list_builder.finish();
6137
6138        assert_eq!(array, &expected);
6139    }
6140
6141    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
6142        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
6143        ListArray::new(
6144            Arc::new(Field::new_list_field(
6145                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6146                true,
6147            )),
6148            OffsetBuffer::<i32>::from_lengths([1]),
6149            Arc::new(a1),
6150            None,
6151        )
6152    }
6153
6154    #[test]
6155    fn test_nested_lists() {
6156        // Define inner list scalars
6157        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
6158        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
6159        let arr3 = build_2d_list(vec![Some(6)]);
6160
6161        let array = ScalarValue::iter_to_array(vec![
6162            ScalarValue::List(Arc::new(arr1)),
6163            ScalarValue::List(Arc::new(arr2)),
6164            ScalarValue::List(Arc::new(arr3)),
6165        ])
6166        .unwrap();
6167        let array = array.as_list::<i32>();
6168
6169        // Construct expected array with array builders
6170        let inner_builder = Int32Array::builder(6);
6171        let middle_builder = ListBuilder::new(inner_builder);
6172        let mut outer_builder = ListBuilder::new(middle_builder);
6173
6174        outer_builder.values().values().append_value(1);
6175        outer_builder.values().values().append_value(2);
6176        outer_builder.values().values().append_value(3);
6177        outer_builder.values().append(true);
6178        outer_builder.append(true);
6179
6180        outer_builder.values().values().append_value(4);
6181        outer_builder.values().values().append_value(5);
6182        outer_builder.values().append(true);
6183        outer_builder.append(true);
6184
6185        outer_builder.values().values().append_value(6);
6186        outer_builder.values().append(true);
6187        outer_builder.append(true);
6188
6189        let expected = outer_builder.finish();
6190
6191        assert_eq!(array, &expected);
6192    }
6193
6194    #[test]
6195    fn scalar_timestamp_ns_utc_timezone() {
6196        let scalar = ScalarValue::TimestampNanosecond(
6197            Some(1599566400000000000),
6198            Some("UTC".into()),
6199        );
6200
6201        assert_eq!(
6202            scalar.data_type(),
6203            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6204        );
6205
6206        let array = scalar.to_array().expect("Failed to convert to array");
6207        assert_eq!(array.len(), 1);
6208        assert_eq!(
6209            array.data_type(),
6210            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6211        );
6212
6213        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
6214        assert_eq!(
6215            new_scalar.data_type(),
6216            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6217        );
6218    }
6219
6220    #[test]
6221    fn cast_round_trip() {
6222        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
6223        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
6224
6225        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
6226
6227        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
6228
6229        check_scalar_cast(
6230            ScalarValue::from("foo"),
6231            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6232        );
6233
6234        check_scalar_cast(
6235            ScalarValue::Utf8(None),
6236            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6237        );
6238
6239        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
6240        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
6241        check_scalar_cast(
6242            ScalarValue::from("larger than 12 bytes string"),
6243            DataType::Utf8View,
6244        );
6245        check_scalar_cast(
6246            {
6247                let element_field =
6248                    Arc::new(Field::new("element", DataType::Int32, true));
6249
6250                let mut builder =
6251                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
6252                builder.append_value([Some(1)]);
6253                builder.append(true);
6254
6255                ScalarValue::List(Arc::new(builder.finish()))
6256            },
6257            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
6258        );
6259        check_scalar_cast(
6260            {
6261                let element_field =
6262                    Arc::new(Field::new("element", DataType::Int32, true));
6263
6264                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
6265                    .with_field(element_field);
6266                builder.values().append_value(1);
6267                builder.append(true);
6268
6269                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
6270            },
6271            DataType::FixedSizeList(
6272                Arc::new(Field::new("element", DataType::Int64, true)),
6273                1,
6274            ),
6275        );
6276        check_scalar_cast(
6277            {
6278                let element_field =
6279                    Arc::new(Field::new("element", DataType::Int32, true));
6280
6281                let mut builder =
6282                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
6283                builder.append_value([Some(1)]);
6284                builder.append(true);
6285
6286                ScalarValue::LargeList(Arc::new(builder.finish()))
6287            },
6288            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
6289        );
6290    }
6291
6292    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
6293    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
6294        // convert from scalar --> Array to call cast
6295        let scalar_array = scalar.to_array().expect("Failed to convert to array");
6296        // cast the actual value
6297        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
6298
6299        // turn it back to a scalar
6300        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
6301        assert_eq!(cast_scalar.data_type(), desired_type);
6302
6303        // Some time later the "cast" scalar is turned back into an array:
6304        let array = cast_scalar
6305            .to_array_of_size(10)
6306            .expect("Failed to convert to array of size");
6307
6308        // The datatype should be "Dictionary" but is actually Utf8!!!
6309        assert_eq!(array.data_type(), &desired_type)
6310    }
6311
6312    #[test]
6313    fn test_scalar_negative() -> Result<()> {
6314        // positive test
6315        let value = ScalarValue::Int32(Some(12));
6316        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
6317        let value = ScalarValue::Int32(None);
6318        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
6319
6320        // negative test
6321        let value = ScalarValue::UInt8(Some(12));
6322        assert!(value.arithmetic_negate().is_err());
6323        let value = ScalarValue::Boolean(None);
6324        assert!(value.arithmetic_negate().is_err());
6325        Ok(())
6326    }
6327
6328    #[test]
6329    #[allow(arithmetic_overflow)] // we want to test them
6330    fn test_scalar_negative_overflows() -> Result<()> {
6331        macro_rules! test_overflow_on_value {
6332            ($($val:expr),* $(,)?) => {$(
6333                {
6334                    let value: ScalarValue = $val;
6335                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6336                    let root_err = err.find_root();
6337                    match  root_err{
6338                        DataFusionError::ArrowError(
6339                            ArrowError::ArithmeticOverflow(_),
6340                            _,
6341                        ) => {}
6342                        _ => return Err(err),
6343                    };
6344                }
6345            )*};
6346        }
6347        test_overflow_on_value!(
6348            // the integers
6349            i8::MIN.into(),
6350            i16::MIN.into(),
6351            i32::MIN.into(),
6352            i64::MIN.into(),
6353            // for decimals, only value needs to be tested
6354            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6355            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6356            // interval, check all possible values
6357            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6358            ScalarValue::new_interval_dt(i32::MIN, 999),
6359            ScalarValue::new_interval_dt(1, i32::MIN),
6360            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6361            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6362            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6363            // tz doesn't matter when negating
6364            ScalarValue::TimestampSecond(Some(i64::MIN), None),
6365            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6366            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6367            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6368        );
6369
6370        let float_cases = [
6371            (
6372                ScalarValue::Float16(Some(f16::MIN)),
6373                ScalarValue::Float16(Some(f16::MAX)),
6374            ),
6375            (
6376                ScalarValue::Float16(Some(f16::MAX)),
6377                ScalarValue::Float16(Some(f16::MIN)),
6378            ),
6379            (f32::MIN.into(), f32::MAX.into()),
6380            (f32::MAX.into(), f32::MIN.into()),
6381            (f64::MIN.into(), f64::MAX.into()),
6382            (f64::MAX.into(), f64::MIN.into()),
6383        ];
6384        // skip float 16 because they aren't supported
6385        for (test, expected) in float_cases.into_iter().skip(2) {
6386            assert_eq!(test.arithmetic_negate()?, expected);
6387        }
6388        Ok(())
6389    }
6390
6391    #[test]
6392    fn f16_test_overflow() {
6393        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
6394        let cases = [
6395            (
6396                ScalarValue::Float16(Some(f16::MIN)),
6397                ScalarValue::Float16(Some(f16::MAX)),
6398            ),
6399            (
6400                ScalarValue::Float16(Some(f16::MAX)),
6401                ScalarValue::Float16(Some(f16::MIN)),
6402            ),
6403        ];
6404
6405        for (test, expected) in cases {
6406            assert_eq!(test.arithmetic_negate().unwrap(), expected);
6407        }
6408    }
6409
6410    macro_rules! expect_operation_error {
6411        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6412            #[test]
6413            fn $TEST_NAME() {
6414                let lhs = ScalarValue::UInt64(Some(12));
6415                let rhs = ScalarValue::Int32(Some(-3));
6416                match lhs.$FUNCTION(&rhs) {
6417                    Ok(_result) => {
6418                        panic!(
6419                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6420                            lhs, rhs
6421                        );
6422                    }
6423                    Err(e) => {
6424                        let error_message = e.to_string();
6425                        assert!(
6426                            error_message.contains($EXPECTED_ERROR),
6427                            "Expected error '{}' not found in actual error '{}'",
6428                            $EXPECTED_ERROR,
6429                            error_message
6430                        );
6431                    }
6432                }
6433            }
6434        };
6435    }
6436
6437    expect_operation_error!(
6438        expect_add_error,
6439        add,
6440        "Invalid arithmetic operation: UInt64 + Int32"
6441    );
6442    expect_operation_error!(
6443        expect_sub_error,
6444        sub,
6445        "Invalid arithmetic operation: UInt64 - Int32"
6446    );
6447
6448    macro_rules! decimal_op_test_cases {
6449    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6450            $(
6451
6452                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6453                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6454                let result = left.$OPERATION(&right).unwrap();
6455                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6456
6457            )+
6458        };
6459    }
6460
6461    #[test]
6462    fn decimal_operations() {
6463        decimal_op_test_cases!(
6464            add,
6465            [
6466                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6467                // test sum decimal with diff scale
6468                [
6469                    Some(123),
6470                    10,
6471                    3,
6472                    Some(124),
6473                    10,
6474                    2,
6475                    Some(123 + 124 * 10_i128.pow(1)),
6476                    12,
6477                    3
6478                ],
6479                // diff precision and scale for decimal data type
6480                [
6481                    Some(123),
6482                    10,
6483                    2,
6484                    Some(124),
6485                    11,
6486                    3,
6487                    Some(123 * 10_i128.pow(3 - 2) + 124),
6488                    12,
6489                    3
6490                ]
6491            ]
6492        );
6493    }
6494
6495    #[test]
6496    fn decimal_operations_with_nulls() {
6497        decimal_op_test_cases!(
6498            add,
6499            [
6500                // Case: (None, Some, 0)
6501                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6502                // Case: (Some, None, 0)
6503                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6504                // Case: (Some, None, _) + Side=False
6505                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6506                // Case: (None, Some, _) + Side=False
6507                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6508                // Case: (Some, None, _) + Side=True
6509                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6510                // Case: (None, Some, _) + Side=True
6511                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6512            ]
6513        );
6514    }
6515
6516    #[test]
6517    fn test_scalar_distance() {
6518        let cases = [
6519            // scalar (lhs), scalar (rhs), expected distance
6520            // ---------------------------------------------
6521            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6522            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6523            (
6524                ScalarValue::Int16(Some(-5)),
6525                ScalarValue::Int16(Some(5)),
6526                10,
6527            ),
6528            (
6529                ScalarValue::Int16(Some(5)),
6530                ScalarValue::Int16(Some(-5)),
6531                10,
6532            ),
6533            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6534            (
6535                ScalarValue::Int32(Some(-5)),
6536                ScalarValue::Int32(Some(-10)),
6537                5,
6538            ),
6539            (
6540                ScalarValue::Int64(Some(-10)),
6541                ScalarValue::Int64(Some(-5)),
6542                5,
6543            ),
6544            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6545            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6546            (
6547                ScalarValue::UInt16(Some(5)),
6548                ScalarValue::UInt16(Some(10)),
6549                5,
6550            ),
6551            (
6552                ScalarValue::UInt32(Some(10)),
6553                ScalarValue::UInt32(Some(5)),
6554                5,
6555            ),
6556            (
6557                ScalarValue::UInt64(Some(5)),
6558                ScalarValue::UInt64(Some(10)),
6559                5,
6560            ),
6561            (
6562                ScalarValue::Float16(Some(f16::from_f32(1.1))),
6563                ScalarValue::Float16(Some(f16::from_f32(1.9))),
6564                1,
6565            ),
6566            (
6567                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6568                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6569                4,
6570            ),
6571            (
6572                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6573                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6574                4,
6575            ),
6576            (
6577                ScalarValue::Float32(Some(1.0)),
6578                ScalarValue::Float32(Some(2.0)),
6579                1,
6580            ),
6581            (
6582                ScalarValue::Float32(Some(2.0)),
6583                ScalarValue::Float32(Some(1.0)),
6584                1,
6585            ),
6586            (
6587                ScalarValue::Float64(Some(0.0)),
6588                ScalarValue::Float64(Some(0.0)),
6589                0,
6590            ),
6591            (
6592                ScalarValue::Float64(Some(-5.0)),
6593                ScalarValue::Float64(Some(-10.0)),
6594                5,
6595            ),
6596            (
6597                ScalarValue::Float64(Some(-10.0)),
6598                ScalarValue::Float64(Some(-5.0)),
6599                5,
6600            ),
6601            // Floats are currently special cased to f64/f32 and the result is rounded
6602            // rather than ceiled/floored. In the future we might want to take a mode
6603            // which specified the rounding behavior.
6604            (
6605                ScalarValue::Float32(Some(1.2)),
6606                ScalarValue::Float32(Some(1.3)),
6607                0,
6608            ),
6609            (
6610                ScalarValue::Float32(Some(1.1)),
6611                ScalarValue::Float32(Some(1.9)),
6612                1,
6613            ),
6614            (
6615                ScalarValue::Float64(Some(-5.3)),
6616                ScalarValue::Float64(Some(-9.2)),
6617                4,
6618            ),
6619            (
6620                ScalarValue::Float64(Some(-5.3)),
6621                ScalarValue::Float64(Some(-9.7)),
6622                4,
6623            ),
6624            (
6625                ScalarValue::Float64(Some(-5.3)),
6626                ScalarValue::Float64(Some(-9.9)),
6627                5,
6628            ),
6629        ];
6630        for (lhs, rhs, expected) in cases.iter() {
6631            let distance = lhs.distance(rhs).unwrap();
6632            assert_eq!(distance, *expected);
6633        }
6634    }
6635
6636    #[test]
6637    fn test_scalar_distance_invalid() {
6638        let cases = [
6639            // scalar (lhs), scalar (rhs)
6640            // --------------------------
6641            // Same type but with nulls
6642            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6643            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6644            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6645            // Different type
6646            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6647            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6648            (
6649                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6650                ScalarValue::Float32(Some(1.0)),
6651            ),
6652            (
6653                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6654                ScalarValue::Int32(Some(1)),
6655            ),
6656            (
6657                ScalarValue::Float64(Some(1.1)),
6658                ScalarValue::Float32(Some(2.2)),
6659            ),
6660            (
6661                ScalarValue::UInt64(Some(777)),
6662                ScalarValue::Int32(Some(111)),
6663            ),
6664            // Different types with nulls
6665            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
6666            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
6667            // Unsupported types
6668            (ScalarValue::from("foo"), ScalarValue::from("bar")),
6669            (
6670                ScalarValue::Boolean(Some(true)),
6671                ScalarValue::Boolean(Some(false)),
6672            ),
6673            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
6674            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
6675            (
6676                ScalarValue::Decimal128(Some(123), 5, 5),
6677                ScalarValue::Decimal128(Some(120), 5, 5),
6678            ),
6679        ];
6680        for (lhs, rhs) in cases {
6681            let distance = lhs.distance(&rhs);
6682            assert!(distance.is_none());
6683        }
6684    }
6685
6686    #[test]
6687    fn test_scalar_interval_negate() {
6688        let cases = [
6689            (
6690                ScalarValue::new_interval_ym(1, 12),
6691                ScalarValue::new_interval_ym(-1, -12),
6692            ),
6693            (
6694                ScalarValue::new_interval_dt(1, 999),
6695                ScalarValue::new_interval_dt(-1, -999),
6696            ),
6697            (
6698                ScalarValue::new_interval_mdn(12, 15, 123_456),
6699                ScalarValue::new_interval_mdn(-12, -15, -123_456),
6700            ),
6701        ];
6702        for (expr, expected) in cases.iter() {
6703            let result = expr.arithmetic_negate().unwrap();
6704            assert_eq!(*expected, result, "-expr:{expr:?}");
6705        }
6706    }
6707
6708    #[test]
6709    fn test_scalar_interval_add() {
6710        let cases = [
6711            (
6712                ScalarValue::new_interval_ym(1, 12),
6713                ScalarValue::new_interval_ym(1, 12),
6714                ScalarValue::new_interval_ym(2, 24),
6715            ),
6716            (
6717                ScalarValue::new_interval_dt(1, 999),
6718                ScalarValue::new_interval_dt(1, 999),
6719                ScalarValue::new_interval_dt(2, 1998),
6720            ),
6721            (
6722                ScalarValue::new_interval_mdn(12, 15, 123_456),
6723                ScalarValue::new_interval_mdn(12, 15, 123_456),
6724                ScalarValue::new_interval_mdn(24, 30, 246_912),
6725            ),
6726        ];
6727        for (lhs, rhs, expected) in cases.iter() {
6728            let result = lhs.add(rhs).unwrap();
6729            let result_commute = rhs.add(lhs).unwrap();
6730            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
6731            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
6732        }
6733    }
6734
6735    #[test]
6736    fn test_scalar_interval_sub() {
6737        let cases = [
6738            (
6739                ScalarValue::new_interval_ym(1, 12),
6740                ScalarValue::new_interval_ym(1, 12),
6741                ScalarValue::new_interval_ym(0, 0),
6742            ),
6743            (
6744                ScalarValue::new_interval_dt(1, 999),
6745                ScalarValue::new_interval_dt(1, 999),
6746                ScalarValue::new_interval_dt(0, 0),
6747            ),
6748            (
6749                ScalarValue::new_interval_mdn(12, 15, 123_456),
6750                ScalarValue::new_interval_mdn(12, 15, 123_456),
6751                ScalarValue::new_interval_mdn(0, 0, 0),
6752            ),
6753        ];
6754        for (lhs, rhs, expected) in cases.iter() {
6755            let result = lhs.sub(rhs).unwrap();
6756            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
6757        }
6758    }
6759
6760    #[test]
6761    fn timestamp_op_random_tests() {
6762        // timestamp1 + (or -) interval = timestamp2
6763        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
6764        let sample_size = 1000;
6765        let timestamps1 = get_random_timestamps(sample_size);
6766        let intervals = get_random_intervals(sample_size);
6767        // ts(sec) + interval(ns) = ts(sec); however,
6768        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
6769        // timestamps are more precise than intervals in tests.
6770        for (idx, ts1) in timestamps1.iter().enumerate() {
6771            if idx % 2 == 0 {
6772                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
6773                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
6774                assert_eq!(ts1, &back);
6775            } else {
6776                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
6777                let back = timestamp2.add(intervals[idx].clone()).unwrap();
6778                assert_eq!(ts1, &back);
6779            };
6780        }
6781    }
6782
6783    #[test]
6784    fn test_struct_nulls() {
6785        let fields_b = Fields::from(vec![
6786            Field::new("ba", DataType::UInt64, true),
6787            Field::new("bb", DataType::UInt64, true),
6788        ]);
6789        let fields = Fields::from(vec![
6790            Field::new("a", DataType::UInt64, true),
6791            Field::new("b", DataType::Struct(fields_b.clone()), true),
6792        ]);
6793
6794        let struct_value = vec![
6795            (
6796                Arc::clone(&fields[0]),
6797                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
6798            ),
6799            (
6800                Arc::clone(&fields[1]),
6801                Arc::new(StructArray::from(vec![
6802                    (
6803                        Arc::clone(&fields_b[0]),
6804                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
6805                    ),
6806                    (
6807                        Arc::clone(&fields_b[1]),
6808                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
6809                    ),
6810                ])) as ArrayRef,
6811            ),
6812        ];
6813
6814        let struct_value_with_nulls = vec![
6815            (
6816                Arc::clone(&fields[0]),
6817                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
6818            ),
6819            (
6820                Arc::clone(&fields[1]),
6821                Arc::new(StructArray::from((
6822                    vec![
6823                        (
6824                            Arc::clone(&fields_b[0]),
6825                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
6826                        ),
6827                        (
6828                            Arc::clone(&fields_b[1]),
6829                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
6830                        ),
6831                    ],
6832                    Buffer::from(&[0]),
6833                ))) as ArrayRef,
6834            ),
6835        ];
6836
6837        let scalars = vec![
6838            // all null
6839            ScalarValue::Struct(Arc::new(StructArray::from((
6840                struct_value.clone(),
6841                Buffer::from(&[0]),
6842            )))),
6843            // field 1 valid, field 2 null
6844            ScalarValue::Struct(Arc::new(StructArray::from((
6845                struct_value_with_nulls.clone(),
6846                Buffer::from(&[1]),
6847            )))),
6848            // all valid
6849            ScalarValue::Struct(Arc::new(StructArray::from((
6850                struct_value.clone(),
6851                Buffer::from(&[1]),
6852            )))),
6853        ];
6854
6855        let check_array = |array| {
6856            let is_null = is_null(&array).unwrap();
6857            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
6858
6859            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
6860            let formatted = formatted.split('\n').collect::<Vec<_>>();
6861            let expected = vec![
6862                "+---------------------------+",
6863                "| col                       |",
6864                "+---------------------------+",
6865                "|                           |",
6866                "| {a: 1, b: }               |",
6867                "| {a: 1, b: {ba: 2, bb: 3}} |",
6868                "+---------------------------+",
6869            ];
6870            assert_eq!(
6871                formatted, expected,
6872                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
6873            );
6874        };
6875
6876        // test `ScalarValue::iter_to_array`
6877        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
6878        check_array(array);
6879
6880        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
6881        let arrays = scalars
6882            .iter()
6883            .map(ScalarValue::to_array)
6884            .collect::<Result<Vec<_>>>()
6885            .expect("Failed to convert to array");
6886        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
6887        let array = arrow::compute::concat(&arrays).unwrap();
6888        check_array(array);
6889    }
6890
6891    #[test]
6892    fn test_struct_display() {
6893        let field_a = Field::new("a", DataType::Int32, true);
6894        let field_b = Field::new("b", DataType::Utf8, true);
6895
6896        let s = ScalarStructBuilder::new()
6897            .with_scalar(field_a, ScalarValue::from(1i32))
6898            .with_scalar(field_b, ScalarValue::Utf8(None))
6899            .build()
6900            .unwrap();
6901
6902        assert_eq!(s.to_string(), "{a:1,b:}");
6903        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
6904
6905        let ScalarValue::Struct(arr) = s else {
6906            panic!("Expected struct");
6907        };
6908
6909        //verify compared to arrow display
6910        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6911        let expected = [
6912            "+-------------+",
6913            "| s           |",
6914            "+-------------+",
6915            "| {a: 1, b: } |",
6916            "+-------------+",
6917        ];
6918        assert_batches_eq!(&expected, &[batch]);
6919    }
6920
6921    #[test]
6922    fn test_null_bug() {
6923        let field_a = Field::new("a", DataType::Int32, true);
6924        let field_b = Field::new("b", DataType::Int32, true);
6925        let fields = Fields::from(vec![field_a, field_b]);
6926
6927        let array_a = Arc::new(Int32Array::from_iter_values([1]));
6928        let array_b = Arc::new(Int32Array::from_iter_values([2]));
6929        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
6930
6931        let mut not_nulls = NullBufferBuilder::new(1);
6932
6933        not_nulls.append_non_null();
6934
6935        let ar = StructArray::new(fields, arrays, not_nulls.finish());
6936        let s = ScalarValue::Struct(Arc::new(ar));
6937
6938        assert_eq!(s.to_string(), "{a:1,b:2}");
6939        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
6940
6941        let ScalarValue::Struct(arr) = s else {
6942            panic!("Expected struct");
6943        };
6944
6945        //verify compared to arrow display
6946        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6947        let expected = [
6948            "+--------------+",
6949            "| s            |",
6950            "+--------------+",
6951            "| {a: 1, b: 2} |",
6952            "+--------------+",
6953        ];
6954        assert_batches_eq!(&expected, &[batch]);
6955    }
6956
6957    #[test]
6958    fn test_struct_display_null() {
6959        let fields = vec![Field::new("a", DataType::Int32, false)];
6960        let s = ScalarStructBuilder::new_null(fields);
6961        assert_eq!(s.to_string(), "NULL");
6962
6963        let ScalarValue::Struct(arr) = s else {
6964            panic!("Expected struct");
6965        };
6966
6967        //verify compared to arrow display
6968        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6969
6970        #[rustfmt::skip]
6971            let expected = [
6972            "+---+",
6973            "| s |",
6974            "+---+",
6975            "|   |",
6976            "+---+",
6977        ];
6978        assert_batches_eq!(&expected, &[batch]);
6979    }
6980
6981    #[test]
6982    fn test_map_display_and_debug() {
6983        let string_builder = StringBuilder::new();
6984        let int_builder = Int32Builder::with_capacity(4);
6985        let mut builder = MapBuilder::new(None, string_builder, int_builder);
6986        builder.keys().append_value("joe");
6987        builder.values().append_value(1);
6988        builder.append(true).unwrap();
6989
6990        builder.keys().append_value("blogs");
6991        builder.values().append_value(2);
6992        builder.keys().append_value("foo");
6993        builder.values().append_value(4);
6994        builder.append(true).unwrap();
6995        builder.append(true).unwrap();
6996        builder.append(false).unwrap();
6997
6998        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
6999
7000        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
7001        assert_eq!(
7002            format!("{map_value:?}"),
7003            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
7004        );
7005
7006        let ScalarValue::Map(arr) = map_value else {
7007            panic!("Expected map");
7008        };
7009
7010        //verify compared to arrow display
7011        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7012        let expected = [
7013            "+--------------------+",
7014            "| m                  |",
7015            "+--------------------+",
7016            "| {joe: 1}           |",
7017            "| {blogs: 2, foo: 4} |",
7018            "| {}                 |",
7019            "|                    |",
7020            "+--------------------+",
7021        ];
7022        assert_batches_eq!(&expected, &[batch]);
7023    }
7024
7025    #[test]
7026    fn test_binary_display() {
7027        let no_binary_value = ScalarValue::Binary(None);
7028        assert_eq!(format!("{no_binary_value}"), "NULL");
7029        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7030        assert_eq!(format!("{single_binary_value}"), "2A");
7031        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7032        assert_eq!(format!("{small_binary_value}"), "010203");
7033        let large_binary_value =
7034            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7035        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7036
7037        let no_binary_value = ScalarValue::BinaryView(None);
7038        assert_eq!(format!("{no_binary_value}"), "NULL");
7039        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7040        assert_eq!(format!("{small_binary_value}"), "010203");
7041        let large_binary_value =
7042            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7043        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7044
7045        let no_binary_value = ScalarValue::LargeBinary(None);
7046        assert_eq!(format!("{no_binary_value}"), "NULL");
7047        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7048        assert_eq!(format!("{small_binary_value}"), "010203");
7049        let large_binary_value =
7050            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7051        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7052
7053        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7054        assert_eq!(format!("{no_binary_value}"), "NULL");
7055        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7056        assert_eq!(format!("{small_binary_value}"), "010203");
7057        let large_binary_value = ScalarValue::FixedSizeBinary(
7058            11,
7059            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7060        );
7061        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7062    }
7063
7064    #[test]
7065    fn test_binary_debug() {
7066        let no_binary_value = ScalarValue::Binary(None);
7067        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
7068        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7069        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
7070        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7071        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
7072        let large_binary_value =
7073            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7074        assert_eq!(
7075            format!("{large_binary_value:?}"),
7076            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7077        );
7078
7079        let no_binary_value = ScalarValue::BinaryView(None);
7080        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
7081        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7082        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
7083        let large_binary_value =
7084            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7085        assert_eq!(
7086            format!("{large_binary_value:?}"),
7087            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
7088        );
7089
7090        let no_binary_value = ScalarValue::LargeBinary(None);
7091        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
7092        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7093        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
7094        let large_binary_value =
7095            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7096        assert_eq!(
7097            format!("{large_binary_value:?}"),
7098            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7099        );
7100
7101        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7102        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
7103        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7104        assert_eq!(
7105            format!("{small_binary_value:?}"),
7106            "FixedSizeBinary(3, \"1,2,3\")"
7107        );
7108        let large_binary_value = ScalarValue::FixedSizeBinary(
7109            11,
7110            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7111        );
7112        assert_eq!(
7113            format!("{large_binary_value:?}"),
7114            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
7115        );
7116    }
7117
7118    #[test]
7119    fn test_build_timestamp_millisecond_list() {
7120        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
7121        let arr = ScalarValue::new_list_nullable(
7122            &values,
7123            &DataType::Timestamp(TimeUnit::Millisecond, None),
7124        );
7125        assert_eq!(1, arr.len());
7126    }
7127
7128    #[test]
7129    fn test_newlist_timestamp_zone() {
7130        let s: &'static str = "UTC";
7131        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
7132        let arr = ScalarValue::new_list_nullable(
7133            &values,
7134            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7135        );
7136        assert_eq!(1, arr.len());
7137        assert_eq!(
7138            arr.data_type(),
7139            &DataType::List(Arc::new(Field::new_list_field(
7140                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7141                true,
7142            )))
7143        );
7144    }
7145
7146    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
7147        let vector_size = sample_size;
7148        let mut timestamp = vec![];
7149        let mut rng = rand::thread_rng();
7150        for i in 0..vector_size {
7151            let year = rng.gen_range(1995..=2050);
7152            let month = rng.gen_range(1..=12);
7153            let day = rng.gen_range(1..=28); // to exclude invalid dates
7154            let hour = rng.gen_range(0..=23);
7155            let minute = rng.gen_range(0..=59);
7156            let second = rng.gen_range(0..=59);
7157            if i % 4 == 0 {
7158                timestamp.push(ScalarValue::TimestampSecond(
7159                    Some(
7160                        NaiveDate::from_ymd_opt(year, month, day)
7161                            .unwrap()
7162                            .and_hms_opt(hour, minute, second)
7163                            .unwrap()
7164                            .and_utc()
7165                            .timestamp(),
7166                    ),
7167                    None,
7168                ))
7169            } else if i % 4 == 1 {
7170                let millisec = rng.gen_range(0..=999);
7171                timestamp.push(ScalarValue::TimestampMillisecond(
7172                    Some(
7173                        NaiveDate::from_ymd_opt(year, month, day)
7174                            .unwrap()
7175                            .and_hms_milli_opt(hour, minute, second, millisec)
7176                            .unwrap()
7177                            .and_utc()
7178                            .timestamp_millis(),
7179                    ),
7180                    None,
7181                ))
7182            } else if i % 4 == 2 {
7183                let microsec = rng.gen_range(0..=999_999);
7184                timestamp.push(ScalarValue::TimestampMicrosecond(
7185                    Some(
7186                        NaiveDate::from_ymd_opt(year, month, day)
7187                            .unwrap()
7188                            .and_hms_micro_opt(hour, minute, second, microsec)
7189                            .unwrap()
7190                            .and_utc()
7191                            .timestamp_micros(),
7192                    ),
7193                    None,
7194                ))
7195            } else if i % 4 == 3 {
7196                let nanosec = rng.gen_range(0..=999_999_999);
7197                timestamp.push(ScalarValue::TimestampNanosecond(
7198                    Some(
7199                        NaiveDate::from_ymd_opt(year, month, day)
7200                            .unwrap()
7201                            .and_hms_nano_opt(hour, minute, second, nanosec)
7202                            .unwrap()
7203                            .and_utc()
7204                            .timestamp_nanos_opt()
7205                            .unwrap(),
7206                    ),
7207                    None,
7208                ))
7209            }
7210        }
7211        timestamp
7212    }
7213
7214    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
7215        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
7216        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
7217
7218        let vector_size = sample_size;
7219        let mut intervals = vec![];
7220        let mut rng = rand::thread_rng();
7221        const SECS_IN_ONE_DAY: i32 = 86_400;
7222        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
7223        for i in 0..vector_size {
7224            if i % 4 == 0 {
7225                let days = rng.gen_range(0..5000);
7226                // to not break second precision
7227                let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000;
7228                intervals.push(ScalarValue::new_interval_dt(days, millis));
7229            } else if i % 4 == 1 {
7230                let days = rng.gen_range(0..5000);
7231                let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32));
7232                intervals.push(ScalarValue::new_interval_dt(days, millisec));
7233            } else if i % 4 == 2 {
7234                let days = rng.gen_range(0..5000);
7235                // to not break microsec precision
7236                let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000;
7237                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7238            } else {
7239                let days = rng.gen_range(0..5000);
7240                let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY);
7241                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7242            }
7243        }
7244        intervals
7245    }
7246
7247    fn union_fields() -> UnionFields {
7248        [
7249            (0, Arc::new(Field::new("A", DataType::Int32, true))),
7250            (1, Arc::new(Field::new("B", DataType::Float64, true))),
7251        ]
7252        .into_iter()
7253        .collect()
7254    }
7255
7256    #[test]
7257    fn sparse_scalar_union_is_null() {
7258        let sparse_scalar = ScalarValue::Union(
7259            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7260            union_fields(),
7261            UnionMode::Sparse,
7262        );
7263        assert!(sparse_scalar.is_null());
7264    }
7265
7266    #[test]
7267    fn dense_scalar_union_is_null() {
7268        let dense_scalar = ScalarValue::Union(
7269            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7270            union_fields(),
7271            UnionMode::Dense,
7272        );
7273        assert!(dense_scalar.is_null());
7274    }
7275
7276    #[test]
7277    fn null_dictionary_scalar_produces_null_dictionary_array() {
7278        let dictionary_scalar = ScalarValue::Dictionary(
7279            Box::new(DataType::Int32),
7280            Box::new(ScalarValue::Null),
7281        );
7282        assert!(dictionary_scalar.is_null());
7283        let dictionary_array = dictionary_scalar.to_array().unwrap();
7284        assert!(dictionary_array.is_null(0));
7285    }
7286
7287    #[test]
7288    fn test_scalar_value_try_new_null() {
7289        let scalars = vec![
7290            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
7291            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
7292            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
7293            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
7294            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
7295            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
7296            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
7297            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
7298            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
7299            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
7300            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
7301            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
7302            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
7303            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
7304            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
7305            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
7306            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
7307            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
7308            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
7309            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
7310            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
7311            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
7312            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
7313            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
7314            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
7315            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
7316            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
7317            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
7318                .unwrap(),
7319            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
7320                .unwrap(),
7321            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
7322                .unwrap(),
7323            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
7324                .unwrap(),
7325            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
7326                .unwrap(),
7327            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
7328                .unwrap(),
7329            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
7330                .unwrap(),
7331            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
7332            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
7333                .unwrap(),
7334            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
7335            ScalarValue::try_new_null(&DataType::Null).unwrap(),
7336        ];
7337        assert!(scalars.iter().all(|s| s.is_null()));
7338
7339        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
7340        let map_field_ref = Arc::new(Field::new(
7341            "foo",
7342            DataType::Struct(Fields::from(vec![
7343                Field::new("bar", DataType::Utf8, true),
7344                Field::new("baz", DataType::Int32, true),
7345            ])),
7346            true,
7347        ));
7348        let scalars = vec![
7349            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
7350            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
7351                .unwrap(),
7352            ScalarValue::try_new_null(&DataType::FixedSizeList(
7353                Arc::clone(&field_ref),
7354                42,
7355            ))
7356            .unwrap(),
7357            ScalarValue::try_new_null(&DataType::Struct(
7358                vec![Arc::clone(&field_ref)].into(),
7359            ))
7360            .unwrap(),
7361            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
7362            ScalarValue::try_new_null(&DataType::Union(
7363                UnionFields::new(vec![42], vec![field_ref]),
7364                UnionMode::Dense,
7365            ))
7366            .unwrap(),
7367        ];
7368        assert!(scalars.iter().all(|s| s.is_null()));
7369    }
7370}