arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181// partition indices into valid and null indices
182fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183    match array.null_count() {
184        // faster path
185        0 => ((0..(array.len() as u32)).collect(), vec![]),
186        _ => {
187            let indices = 0..(array.len() as u32);
188            indices.partition(|index| array.is_valid(*index as usize))
189        }
190    }
191}
192
193/// Whether `sort_to_indices` can sort an array of given data type.
194fn can_sort_to_indices(data_type: &DataType) -> bool {
195    data_type.is_primitive()
196        || matches!(
197            data_type,
198            DataType::Boolean
199                | DataType::Utf8
200                | DataType::LargeUtf8
201                | DataType::Utf8View
202                | DataType::Binary
203                | DataType::LargeBinary
204                | DataType::BinaryView
205                | DataType::FixedSizeBinary(_)
206        )
207        || match data_type {
208            DataType::List(f) if can_rank(f.data_type()) => true,
209            DataType::LargeList(f) if can_rank(f.data_type()) => true,
210            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213            _ => false,
214        }
215}
216
217/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
218/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
219pub fn sort_to_indices(
220    array: &dyn Array,
221    options: Option<SortOptions>,
222    limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224    let options = options.unwrap_or_default();
225
226    let (v, n) = partition_validity(array);
227
228    Ok(downcast_primitive_array! {
229        array => sort_primitive(array, v, n, options, limit),
230        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241        DataType::Dictionary(_, _) => downcast_dictionary_array!{
242            array => sort_dictionary(array, v, n, options, limit)?,
243            _ => unreachable!()
244        }
245        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249            dt => {
250                return Err(ArrowError::ComputeError(format!(
251                    "Invalid run end data type: {dt}"
252                )))
253            }
254        },
255        t => {
256            return Err(ArrowError::ComputeError(format!(
257                "Sort not supported for data type {t:?}"
258            )));
259        }
260    })
261}
262
263fn sort_boolean(
264    values: &BooleanArray,
265    value_indices: Vec<u32>,
266    null_indices: Vec<u32>,
267    options: SortOptions,
268    limit: Option<usize>,
269) -> UInt32Array {
270    let mut valids = value_indices
271        .into_iter()
272        .map(|index| (index, values.value(index as usize)))
273        .collect::<Vec<(u32, bool)>>();
274    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278    values: &PrimitiveArray<T>,
279    value_indices: Vec<u32>,
280    nulls: Vec<u32>,
281    options: SortOptions,
282    limit: Option<usize>,
283) -> UInt32Array {
284    let mut valids = value_indices
285        .into_iter()
286        .map(|index| (index, values.value(index as usize)))
287        .collect::<Vec<(u32, T::Native)>>();
288    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292    values: &GenericByteArray<T>,
293    value_indices: Vec<u32>,
294    nulls: Vec<u32>,
295    options: SortOptions,
296    limit: Option<usize>,
297) -> UInt32Array {
298    let mut valids = value_indices
299        .into_iter()
300        .map(|index| (index, values.value(index as usize).as_ref()))
301        .collect::<Vec<(u32, &[u8])>>();
302
303    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307    values: &GenericByteViewArray<T>,
308    value_indices: Vec<u32>,
309    nulls: Vec<u32>,
310    options: SortOptions,
311    limit: Option<usize>,
312) -> UInt32Array {
313    let mut valids = value_indices
314        .into_iter()
315        .map(|index| (index, values.value(index as usize).as_ref()))
316        .collect::<Vec<(u32, &[u8])>>();
317    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
318}
319
320fn sort_fixed_size_binary(
321    values: &FixedSizeBinaryArray,
322    value_indices: Vec<u32>,
323    nulls: Vec<u32>,
324    options: SortOptions,
325    limit: Option<usize>,
326) -> UInt32Array {
327    let mut valids = value_indices
328        .iter()
329        .copied()
330        .map(|index| (index, values.value(index as usize)))
331        .collect::<Vec<(u32, &[u8])>>();
332    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
333}
334
335fn sort_dictionary<K: ArrowDictionaryKeyType>(
336    dict: &DictionaryArray<K>,
337    value_indices: Vec<u32>,
338    null_indices: Vec<u32>,
339    options: SortOptions,
340    limit: Option<usize>,
341) -> Result<UInt32Array, ArrowError> {
342    let keys: &PrimitiveArray<K> = dict.keys();
343    let rank = child_rank(dict.values().as_ref(), options)?;
344
345    // create tuples that are used for sorting
346    let mut valids = value_indices
347        .into_iter()
348        .map(|index| {
349            let key: K::Native = keys.value(index as usize);
350            (index, rank[key.as_usize()])
351        })
352        .collect::<Vec<(u32, u32)>>();
353
354    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
355}
356
357fn sort_list<O: OffsetSizeTrait>(
358    array: &GenericListArray<O>,
359    value_indices: Vec<u32>,
360    null_indices: Vec<u32>,
361    options: SortOptions,
362    limit: Option<usize>,
363) -> Result<UInt32Array, ArrowError> {
364    let rank = child_rank(array.values().as_ref(), options)?;
365    let offsets = array.value_offsets();
366    let mut valids = value_indices
367        .into_iter()
368        .map(|index| {
369            let end = offsets[index as usize + 1].as_usize();
370            let start = offsets[index as usize].as_usize();
371            (index, &rank[start..end])
372        })
373        .collect::<Vec<(u32, &[u32])>>();
374    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
375}
376
377fn sort_fixed_size_list(
378    array: &FixedSizeListArray,
379    value_indices: Vec<u32>,
380    null_indices: Vec<u32>,
381    options: SortOptions,
382    limit: Option<usize>,
383) -> Result<UInt32Array, ArrowError> {
384    let rank = child_rank(array.values().as_ref(), options)?;
385    let size = array.value_length() as usize;
386    let mut valids = value_indices
387        .into_iter()
388        .map(|index| {
389            let start = index as usize * size;
390            (index, &rank[start..start + size])
391        })
392        .collect::<Vec<(u32, &[u32])>>();
393    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
394}
395
396#[inline(never)]
397fn sort_impl<T: Copy>(
398    options: SortOptions,
399    valids: &mut [(u32, T)],
400    nulls: &[u32],
401    limit: Option<usize>,
402    mut cmp: impl FnMut(T, T) -> Ordering,
403) -> Vec<u32> {
404    let v_limit = match (limit, options.nulls_first) {
405        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
406        _ => valids.len(),
407    };
408
409    match options.descending {
410        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
411        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
412    }
413
414    let len = valids.len() + nulls.len();
415    let limit = limit.unwrap_or(len).min(len);
416    let mut out = Vec::with_capacity(len);
417    match options.nulls_first {
418        true => {
419            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
420            let remaining = limit - out.len();
421            out.extend(valids.iter().map(|x| x.0).take(remaining));
422        }
423        false => {
424            out.extend(valids.iter().map(|x| x.0).take(limit));
425            let remaining = limit - out.len();
426            out.extend_from_slice(&nulls[..remaining])
427        }
428    }
429    out
430}
431
432/// Computes the rank for a set of child values
433fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
434    // If parent sort order is descending we need to invert the value of nulls_first so that
435    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
436    let value_options = Some(SortOptions {
437        descending: false,
438        nulls_first: options.nulls_first != options.descending,
439    });
440    rank(values, value_options)
441}
442
443// Sort run array and return sorted run array.
444// The output RunArray will be encoded at the same level as input run array.
445// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
446// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
447// and not RunArray { run_ends = [4,8], values = [1,2] }
448fn sort_run(
449    values: &dyn Array,
450    options: Option<SortOptions>,
451    limit: Option<usize>,
452) -> Result<ArrayRef, ArrowError> {
453    match values.data_type() {
454        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
455            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
456            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
457            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
458            dt => unreachable!("Not valid run ends data type {dt}"),
459        },
460        dt => Err(ArrowError::InvalidArgumentError(format!(
461            "Input is not a run encoded array. Input data type {dt}"
462        ))),
463    }
464}
465
466fn sort_run_downcasted<R: RunEndIndexType>(
467    values: &dyn Array,
468    options: Option<SortOptions>,
469    limit: Option<usize>,
470) -> Result<ArrayRef, ArrowError> {
471    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
472
473    // Determine the length of output run array.
474    let output_len = if let Some(limit) = limit {
475        limit.min(run_array.len())
476    } else {
477        run_array.len()
478    };
479
480    let run_ends = run_array.run_ends();
481
482    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
483    let mut new_run_end: usize = 0;
484    let mut new_physical_len: usize = 0;
485
486    let consume_runs = |run_length, _| {
487        new_run_end += run_length;
488        new_physical_len += 1;
489        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
490    };
491
492    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
493
494    let new_run_ends = unsafe {
495        // Safety:
496        // The function builds a valid run_ends array and hence need not be validated.
497        ArrayDataBuilder::new(R::DATA_TYPE)
498            .len(new_physical_len)
499            .add_buffer(new_run_ends_builder.finish())
500            .build_unchecked()
501    };
502
503    // slice the sorted value indices based on limit.
504    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
505        .slice(0, new_run_ends.len())
506        .into_data()
507        .into();
508
509    let new_values = take(&run_values, &new_values_indices, None)?;
510
511    // Build sorted run array
512    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
513        .len(new_run_end)
514        .add_child_data(new_run_ends)
515        .add_child_data(new_values.into_data());
516    let array_data: RunArray<R> = unsafe {
517        // Safety:
518        //  This function builds a valid run array and hence can skip validation.
519        builder.build_unchecked().into()
520    };
521    Ok(Arc::new(array_data))
522}
523
524// Sort to indices for run encoded array.
525// This function will be slow for run array as it decodes the physical indices to
526// logical indices and to get the run array back, the logical indices has to be
527// encoded back to run array.
528fn sort_run_to_indices<R: RunEndIndexType>(
529    values: &dyn Array,
530    options: SortOptions,
531    limit: Option<usize>,
532) -> UInt32Array {
533    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
534    let output_len = if let Some(limit) = limit {
535        limit.min(run_array.len())
536    } else {
537        run_array.len()
538    };
539    let mut result: Vec<u32> = Vec::with_capacity(output_len);
540
541    //Add all logical indices belonging to a physical index to the output
542    let consume_runs = |run_length, logical_start| {
543        result.extend(logical_start as u32..(logical_start + run_length) as u32);
544    };
545    sort_run_inner(run_array, Some(options), output_len, consume_runs);
546
547    UInt32Array::from(result)
548}
549
550fn sort_run_inner<R: RunEndIndexType, F>(
551    run_array: &RunArray<R>,
552    options: Option<SortOptions>,
553    output_len: usize,
554    mut consume_runs: F,
555) -> (PrimitiveArray<UInt32Type>, ArrayRef)
556where
557    F: FnMut(usize, usize),
558{
559    // slice the run_array.values based on offset and length.
560    let start_physical_index = run_array.get_start_physical_index();
561    let end_physical_index = run_array.get_end_physical_index();
562    let physical_len = end_physical_index - start_physical_index + 1;
563    let run_values = run_array.values().slice(start_physical_index, physical_len);
564
565    // All the values have to be sorted irrespective of input limit.
566    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
567
568    let mut remaining_len = output_len;
569
570    let run_ends = run_array.run_ends().values();
571
572    assert_eq!(
573        0,
574        values_indices.null_count(),
575        "The output of sort_to_indices should not have null values. Its values is {}",
576        values_indices.null_count()
577    );
578
579    // Calculate `run length` of sorted value indices.
580    // Find the `logical index` at which the run starts.
581    // Call the consumer using the run length and starting logical index.
582    for physical_index in values_indices.values() {
583        // As the values were sliced with offset = start_physical_index, it has to be added back
584        // before accessing `RunArray::run_ends`
585        let physical_index = *physical_index as usize + start_physical_index;
586
587        // calculate the run length and logical index of sorted values
588        let (run_length, logical_index_start) = unsafe {
589            // Safety:
590            // The index will be within bounds as its in bounds of start_physical_index
591            // and len, both of which are within bounds of run_array
592            if physical_index == start_physical_index {
593                (
594                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
595                    0,
596                )
597            } else if physical_index == end_physical_index {
598                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
599                (
600                    run_array.offset() + run_array.len() - prev_run_end,
601                    prev_run_end - run_array.offset(),
602                )
603            } else {
604                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
605                (
606                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
607                    prev_run_end - run_array.offset(),
608                )
609            }
610        };
611        let new_run_length = run_length.min(remaining_len);
612        consume_runs(new_run_length, logical_index_start);
613        remaining_len -= new_run_length;
614
615        if remaining_len == 0 {
616            break;
617        }
618    }
619
620    if remaining_len > 0 {
621        panic!("Remaining length should be zero its values is {remaining_len}")
622    }
623    (values_indices, run_values)
624}
625
626/// One column to be used in lexicographical sort
627#[derive(Clone, Debug)]
628pub struct SortColumn {
629    /// The column to sort
630    pub values: ArrayRef,
631    /// Sort options for this column
632    pub options: Option<SortOptions>,
633}
634
635/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
636///
637/// Performs a stable lexicographical sort on values and indices.
638///
639/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
640/// `lexsort_to_indices` or `take`.
641///
642/// Example:
643///
644/// ```
645/// # use std::convert::From;
646/// # use std::sync::Arc;
647/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
648/// # use arrow_array::types::Int64Type;
649/// # use arrow_array::cast::AsArray;
650/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
651///
652/// let sorted_columns = lexsort(&vec![
653///     SortColumn {
654///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
655///             None,
656///             Some(-2),
657///             Some(89),
658///             Some(-64),
659///             Some(101),
660///         ])) as ArrayRef,
661///         options: None,
662///     },
663///     SortColumn {
664///         values: Arc::new(StringArray::from(vec![
665///             Some("hello"),
666///             Some("world"),
667///             Some(","),
668///             Some("foobar"),
669///             Some("!"),
670///         ])) as ArrayRef,
671///         options: Some(SortOptions {
672///             descending: true,
673///             nulls_first: false,
674///         }),
675///     },
676/// ], None).unwrap();
677///
678/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
679/// assert!(sorted_columns[0].is_null(0));
680/// ```
681///
682/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
683/// may be significantly faster
684///
685pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
686    let indices = lexsort_to_indices(columns, limit)?;
687    columns
688        .iter()
689        .map(|c| take(c.values.as_ref(), &indices, None))
690        .collect()
691}
692
693/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
694/// (`UInt32Array`) of indices.
695///
696/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
697/// may be significantly faster
698pub fn lexsort_to_indices(
699    columns: &[SortColumn],
700    limit: Option<usize>,
701) -> Result<UInt32Array, ArrowError> {
702    if columns.is_empty() {
703        return Err(ArrowError::InvalidArgumentError(
704            "Sort requires at least one column".to_string(),
705        ));
706    }
707    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
708        // fallback to non-lexical sort
709        let column = &columns[0];
710        return sort_to_indices(&column.values, column.options, limit);
711    }
712
713    let row_count = columns[0].values.len();
714    if columns.iter().any(|item| item.values.len() != row_count) {
715        return Err(ArrowError::ComputeError(
716            "lexical sort columns have different row counts".to_string(),
717        ));
718    };
719
720    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
721    let mut len = value_indices.len();
722
723    if let Some(limit) = limit {
724        len = limit.min(len);
725    }
726
727    let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
728    // uint32 can be sorted unstably
729    sort_unstable_by(&mut value_indices, len, |a, b| {
730        lexicographical_comparator.compare(*a, *b)
731    });
732
733    Ok(UInt32Array::from_iter_values(
734        value_indices.iter().take(len).map(|i| *i as u32),
735    ))
736}
737
738/// It's unstable_sort, may not preserve the order of equal elements
739pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
740where
741    F: FnMut(&T, &T) -> Ordering,
742{
743    if let Some(n) = limit.checked_sub(1) {
744        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
745        before.sort_unstable_by(is_less);
746    }
747}
748
749/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
750/// at given two indices. The lifetime is the same at the data wrapped.
751pub struct LexicographicalComparator {
752    compare_items: Vec<DynComparator>,
753}
754
755impl LexicographicalComparator {
756    /// lexicographically compare values at the wrapped columns with given indices.
757    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
758        for comparator in &self.compare_items {
759            match comparator(a_idx, b_idx) {
760                Ordering::Equal => continue,
761                r => return r,
762            }
763        }
764        Ordering::Equal
765    }
766
767    /// Create a new lex comparator that will wrap the given sort columns and give comparison
768    /// results with two indices.
769    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
770        let compare_items = columns
771            .iter()
772            .map(|c| {
773                make_comparator(
774                    c.values.as_ref(),
775                    c.values.as_ref(),
776                    c.options.unwrap_or_default(),
777                )
778            })
779            .collect::<Result<Vec<_>, ArrowError>>()?;
780        Ok(LexicographicalComparator { compare_items })
781    }
782}
783
784#[cfg(test)]
785mod tests {
786    use super::*;
787    use arrow_array::builder::{
788        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
789        PrimitiveRunBuilder,
790    };
791    use arrow_buffer::{i256, NullBuffer};
792    use arrow_schema::Field;
793    use half::f16;
794    use rand::rngs::StdRng;
795    use rand::seq::SliceRandom;
796    use rand::{Rng, RngCore, SeedableRng};
797
798    fn create_decimal_array<T: DecimalType>(
799        data: Vec<Option<usize>>,
800        precision: u8,
801        scale: i8,
802    ) -> PrimitiveArray<T> {
803        data.into_iter()
804            .map(|x| x.and_then(T::Native::from_usize))
805            .collect::<PrimitiveArray<T>>()
806            .with_precision_and_scale(precision, scale)
807            .unwrap()
808    }
809
810    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
811        data.into_iter()
812            .collect::<Decimal256Array>()
813            .with_precision_and_scale(53, 6)
814            .unwrap()
815    }
816
817    fn test_sort_to_indices_decimal_array<T: DecimalType>(
818        data: Vec<Option<usize>>,
819        options: Option<SortOptions>,
820        limit: Option<usize>,
821        expected_data: Vec<u32>,
822        precision: u8,
823        scale: i8,
824    ) {
825        let output = create_decimal_array::<T>(data, precision, scale);
826        let expected = UInt32Array::from(expected_data);
827        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
828        assert_eq!(output, expected)
829    }
830
831    fn test_sort_to_indices_decimal256_array(
832        data: Vec<Option<i256>>,
833        options: Option<SortOptions>,
834        limit: Option<usize>,
835        expected_data: Vec<u32>,
836    ) {
837        let output = create_decimal256_array(data);
838        let expected = UInt32Array::from(expected_data);
839        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
840        assert_eq!(output, expected)
841    }
842
843    fn test_sort_decimal_array<T: DecimalType>(
844        data: Vec<Option<usize>>,
845        options: Option<SortOptions>,
846        limit: Option<usize>,
847        expected_data: Vec<Option<usize>>,
848        p: u8,
849        s: i8,
850    ) {
851        let output = create_decimal_array::<T>(data, p, s);
852        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
853        let output = match limit {
854            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
855            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
856        };
857        assert_eq!(&output, &expected)
858    }
859
860    fn test_sort_decimal256_array(
861        data: Vec<Option<i256>>,
862        options: Option<SortOptions>,
863        limit: Option<usize>,
864        expected_data: Vec<Option<i256>>,
865    ) {
866        let output = create_decimal256_array(data);
867        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
868        let output = match limit {
869            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
870            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
871        };
872        assert_eq!(&output, &expected)
873    }
874
875    fn test_sort_to_indices_boolean_arrays(
876        data: Vec<Option<bool>>,
877        options: Option<SortOptions>,
878        limit: Option<usize>,
879        expected_data: Vec<u32>,
880    ) {
881        let output = BooleanArray::from(data);
882        let expected = UInt32Array::from(expected_data);
883        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
884        assert_eq!(output, expected)
885    }
886
887    fn test_sort_to_indices_primitive_arrays<T>(
888        data: Vec<Option<T::Native>>,
889        options: Option<SortOptions>,
890        limit: Option<usize>,
891        expected_data: Vec<u32>,
892    ) where
893        T: ArrowPrimitiveType,
894        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
895    {
896        let output = PrimitiveArray::<T>::from(data);
897        let expected = UInt32Array::from(expected_data);
898        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
899        assert_eq!(output, expected)
900    }
901
902    fn test_sort_primitive_arrays<T>(
903        data: Vec<Option<T::Native>>,
904        options: Option<SortOptions>,
905        limit: Option<usize>,
906        expected_data: Vec<Option<T::Native>>,
907    ) where
908        T: ArrowPrimitiveType,
909        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
910    {
911        let output = PrimitiveArray::<T>::from(data);
912        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
913        let output = match limit {
914            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
915            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
916        };
917        assert_eq!(&output, &expected)
918    }
919
920    fn test_sort_to_indices_string_arrays(
921        data: Vec<Option<&str>>,
922        options: Option<SortOptions>,
923        limit: Option<usize>,
924        expected_data: Vec<u32>,
925    ) {
926        let output = StringArray::from(data);
927        let expected = UInt32Array::from(expected_data);
928        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
929        assert_eq!(output, expected)
930    }
931
932    /// Tests both Utf8 and LargeUtf8
933    fn test_sort_string_arrays(
934        data: Vec<Option<&str>>,
935        options: Option<SortOptions>,
936        limit: Option<usize>,
937        expected_data: Vec<Option<&str>>,
938    ) {
939        let output = StringArray::from(data.clone());
940        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
941        let output = match limit {
942            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
943            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
944        };
945        assert_eq!(&output, &expected);
946
947        let output = LargeStringArray::from(data.clone());
948        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
949        let output = match limit {
950            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
951            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
952        };
953        assert_eq!(&output, &expected);
954
955        let output = StringViewArray::from(data);
956        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
957        let output = match limit {
958            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
959            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
960        };
961        assert_eq!(&output, &expected);
962    }
963
964    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
965        data: Vec<Option<&str>>,
966        options: Option<SortOptions>,
967        limit: Option<usize>,
968        expected_data: Vec<Option<&str>>,
969    ) {
970        let array = data.into_iter().collect::<DictionaryArray<T>>();
971        let array_values = array.values().clone();
972        let dict = array_values
973            .as_any()
974            .downcast_ref::<StringArray>()
975            .expect("Unable to get dictionary values");
976
977        let sorted = match limit {
978            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
979            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
980        };
981        let sorted = sorted
982            .as_any()
983            .downcast_ref::<DictionaryArray<T>>()
984            .unwrap();
985        let sorted_values = sorted.values();
986        let sorted_dict = sorted_values
987            .as_any()
988            .downcast_ref::<StringArray>()
989            .expect("Unable to get dictionary values");
990        let sorted_keys = sorted.keys();
991
992        assert_eq!(sorted_dict, dict);
993
994        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
995            if sorted.is_valid(i) {
996                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
997            } else {
998                None
999            }
1000        }));
1001        let expected = StringArray::from(expected_data);
1002
1003        assert_eq!(sorted_strings, expected)
1004    }
1005
1006    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1007        keys: PrimitiveArray<K>,
1008        values: PrimitiveArray<T>,
1009        options: Option<SortOptions>,
1010        limit: Option<usize>,
1011        expected_data: Vec<Option<T::Native>>,
1012    ) where
1013        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1014    {
1015        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1016        let array_values = array.values().clone();
1017        let dict = array_values.as_primitive::<T>();
1018
1019        let sorted = match limit {
1020            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1021            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1022        };
1023        let sorted = sorted
1024            .as_any()
1025            .downcast_ref::<DictionaryArray<K>>()
1026            .unwrap();
1027        let sorted_values = sorted.values();
1028        let sorted_dict = sorted_values
1029            .as_any()
1030            .downcast_ref::<PrimitiveArray<T>>()
1031            .expect("Unable to get dictionary values");
1032        let sorted_keys = sorted.keys();
1033
1034        assert_eq!(sorted_dict, dict);
1035
1036        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1037            (0..sorted.len())
1038                .map(|i| {
1039                    let key = sorted_keys.value(i).as_usize();
1040                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1041                        Some(sorted_dict.value(key))
1042                    } else {
1043                        None
1044                    }
1045                })
1046                .collect::<Vec<Option<T::Native>>>(),
1047        );
1048        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1049
1050        assert_eq!(sorted_values, expected)
1051    }
1052
1053    fn test_sort_list_arrays<T>(
1054        data: Vec<Option<Vec<Option<T::Native>>>>,
1055        options: Option<SortOptions>,
1056        limit: Option<usize>,
1057        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1058        fixed_length: Option<i32>,
1059    ) where
1060        T: ArrowPrimitiveType,
1061        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1062    {
1063        // for FixedSizedList
1064        if let Some(length) = fixed_length {
1065            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1066                data.clone(),
1067                length,
1068            ));
1069            let sorted = match limit {
1070                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1071                _ => sort(&(input as ArrayRef), options).unwrap(),
1072            };
1073            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1074                expected_data.clone(),
1075                length,
1076            )) as ArrayRef;
1077
1078            assert_eq!(&sorted, &expected);
1079        }
1080
1081        // for List
1082        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1083        let sorted = match limit {
1084            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1085            _ => sort(&(input as ArrayRef), options).unwrap(),
1086        };
1087        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1088            expected_data.clone(),
1089        )) as ArrayRef;
1090
1091        assert_eq!(&sorted, &expected);
1092
1093        // for LargeList
1094        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1095        let sorted = match limit {
1096            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1097            _ => sort(&(input as ArrayRef), options).unwrap(),
1098        };
1099        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1100            expected_data,
1101        )) as ArrayRef;
1102
1103        assert_eq!(&sorted, &expected);
1104    }
1105
1106    fn test_lex_sort_arrays(
1107        input: Vec<SortColumn>,
1108        expected_output: Vec<ArrayRef>,
1109        limit: Option<usize>,
1110    ) {
1111        let sorted = lexsort(&input, limit).unwrap();
1112
1113        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1114            assert_eq!(result, expected);
1115        }
1116    }
1117
1118    /// slice all arrays in expected_output to offset/length
1119    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1120        expected_output
1121            .into_iter()
1122            .map(|array| array.slice(offset, length))
1123            .collect()
1124    }
1125
1126    fn test_sort_binary_arrays(
1127        data: Vec<Option<Vec<u8>>>,
1128        options: Option<SortOptions>,
1129        limit: Option<usize>,
1130        expected_data: Vec<Option<Vec<u8>>>,
1131        fixed_length: Option<i32>,
1132    ) {
1133        // Fixed size binary array
1134        if let Some(length) = fixed_length {
1135            let input = Arc::new(
1136                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1137                    .unwrap(),
1138            );
1139            let sorted = match limit {
1140                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1141                None => sort(&(input as ArrayRef), options).unwrap(),
1142            };
1143            let expected = Arc::new(
1144                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1145                    expected_data.iter().cloned(),
1146                    length,
1147                )
1148                .unwrap(),
1149            ) as ArrayRef;
1150
1151            assert_eq!(&sorted, &expected);
1152        }
1153
1154        // Generic size binary array
1155        fn make_generic_binary_array<S: OffsetSizeTrait>(
1156            data: &[Option<Vec<u8>>],
1157        ) -> Arc<GenericBinaryArray<S>> {
1158            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1159                data.iter()
1160                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1161                    .collect(),
1162            ))
1163        }
1164
1165        // BinaryArray
1166        let input = make_generic_binary_array::<i32>(&data);
1167        let sorted = match limit {
1168            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1169            None => sort(&(input as ArrayRef), options).unwrap(),
1170        };
1171        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1172        assert_eq!(&sorted, &expected);
1173
1174        // LargeBinaryArray
1175        let input = make_generic_binary_array::<i64>(&data);
1176        let sorted = match limit {
1177            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1178            None => sort(&(input as ArrayRef), options).unwrap(),
1179        };
1180        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1181        assert_eq!(&sorted, &expected);
1182    }
1183
1184    #[test]
1185    fn test_sort_to_indices_primitives() {
1186        test_sort_to_indices_primitive_arrays::<Int8Type>(
1187            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1188            None,
1189            None,
1190            vec![0, 5, 3, 1, 4, 2],
1191        );
1192        test_sort_to_indices_primitive_arrays::<Int16Type>(
1193            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1194            None,
1195            None,
1196            vec![0, 5, 3, 1, 4, 2],
1197        );
1198        test_sort_to_indices_primitive_arrays::<Int32Type>(
1199            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1200            None,
1201            None,
1202            vec![0, 5, 3, 1, 4, 2],
1203        );
1204        test_sort_to_indices_primitive_arrays::<Int64Type>(
1205            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1206            None,
1207            None,
1208            vec![0, 5, 3, 1, 4, 2],
1209        );
1210        test_sort_to_indices_primitive_arrays::<Float16Type>(
1211            vec![
1212                None,
1213                Some(f16::from_f32(-0.05)),
1214                Some(f16::from_f32(2.225)),
1215                Some(f16::from_f32(-1.01)),
1216                Some(f16::from_f32(-0.05)),
1217                None,
1218            ],
1219            None,
1220            None,
1221            vec![0, 5, 3, 1, 4, 2],
1222        );
1223        test_sort_to_indices_primitive_arrays::<Float32Type>(
1224            vec![
1225                None,
1226                Some(-0.05),
1227                Some(2.225),
1228                Some(-1.01),
1229                Some(-0.05),
1230                None,
1231            ],
1232            None,
1233            None,
1234            vec![0, 5, 3, 1, 4, 2],
1235        );
1236        test_sort_to_indices_primitive_arrays::<Float64Type>(
1237            vec![
1238                None,
1239                Some(-0.05),
1240                Some(2.225),
1241                Some(-1.01),
1242                Some(-0.05),
1243                None,
1244            ],
1245            None,
1246            None,
1247            vec![0, 5, 3, 1, 4, 2],
1248        );
1249
1250        // descending
1251        test_sort_to_indices_primitive_arrays::<Int8Type>(
1252            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1253            Some(SortOptions {
1254                descending: true,
1255                nulls_first: false,
1256            }),
1257            None,
1258            vec![2, 1, 4, 3, 0, 5],
1259        );
1260
1261        test_sort_to_indices_primitive_arrays::<Int16Type>(
1262            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1263            Some(SortOptions {
1264                descending: true,
1265                nulls_first: false,
1266            }),
1267            None,
1268            vec![2, 1, 4, 3, 0, 5],
1269        );
1270
1271        test_sort_to_indices_primitive_arrays::<Int32Type>(
1272            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1273            Some(SortOptions {
1274                descending: true,
1275                nulls_first: false,
1276            }),
1277            None,
1278            vec![2, 1, 4, 3, 0, 5],
1279        );
1280
1281        test_sort_to_indices_primitive_arrays::<Int64Type>(
1282            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1283            Some(SortOptions {
1284                descending: true,
1285                nulls_first: false,
1286            }),
1287            None,
1288            vec![2, 1, 4, 3, 0, 5],
1289        );
1290
1291        test_sort_to_indices_primitive_arrays::<Float16Type>(
1292            vec![
1293                None,
1294                Some(f16::from_f32(0.005)),
1295                Some(f16::from_f32(20.22)),
1296                Some(f16::from_f32(-10.3)),
1297                Some(f16::from_f32(0.005)),
1298                None,
1299            ],
1300            Some(SortOptions {
1301                descending: true,
1302                nulls_first: false,
1303            }),
1304            None,
1305            vec![2, 1, 4, 3, 0, 5],
1306        );
1307
1308        test_sort_to_indices_primitive_arrays::<Float32Type>(
1309            vec![
1310                None,
1311                Some(0.005),
1312                Some(20.22),
1313                Some(-10.3),
1314                Some(0.005),
1315                None,
1316            ],
1317            Some(SortOptions {
1318                descending: true,
1319                nulls_first: false,
1320            }),
1321            None,
1322            vec![2, 1, 4, 3, 0, 5],
1323        );
1324
1325        test_sort_to_indices_primitive_arrays::<Float64Type>(
1326            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1327            Some(SortOptions {
1328                descending: true,
1329                nulls_first: false,
1330            }),
1331            None,
1332            vec![2, 1, 4, 3, 0, 5],
1333        );
1334
1335        // descending, nulls first
1336        test_sort_to_indices_primitive_arrays::<Int8Type>(
1337            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1338            Some(SortOptions {
1339                descending: true,
1340                nulls_first: true,
1341            }),
1342            None,
1343            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1344        );
1345
1346        test_sort_to_indices_primitive_arrays::<Int16Type>(
1347            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1348            Some(SortOptions {
1349                descending: true,
1350                nulls_first: true,
1351            }),
1352            None,
1353            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1354        );
1355
1356        test_sort_to_indices_primitive_arrays::<Int32Type>(
1357            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1358            Some(SortOptions {
1359                descending: true,
1360                nulls_first: true,
1361            }),
1362            None,
1363            vec![0, 5, 2, 1, 4, 3],
1364        );
1365
1366        test_sort_to_indices_primitive_arrays::<Int64Type>(
1367            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1368            Some(SortOptions {
1369                descending: true,
1370                nulls_first: true,
1371            }),
1372            None,
1373            vec![0, 5, 2, 1, 4, 3],
1374        );
1375
1376        test_sort_to_indices_primitive_arrays::<Float16Type>(
1377            vec![
1378                None,
1379                Some(f16::from_f32(0.1)),
1380                Some(f16::from_f32(0.2)),
1381                Some(f16::from_f32(-1.3)),
1382                Some(f16::from_f32(0.01)),
1383                None,
1384            ],
1385            Some(SortOptions {
1386                descending: true,
1387                nulls_first: true,
1388            }),
1389            None,
1390            vec![0, 5, 2, 1, 4, 3],
1391        );
1392
1393        test_sort_to_indices_primitive_arrays::<Float32Type>(
1394            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1395            Some(SortOptions {
1396                descending: true,
1397                nulls_first: true,
1398            }),
1399            None,
1400            vec![0, 5, 2, 1, 4, 3],
1401        );
1402
1403        test_sort_to_indices_primitive_arrays::<Float64Type>(
1404            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1405            Some(SortOptions {
1406                descending: true,
1407                nulls_first: true,
1408            }),
1409            None,
1410            vec![0, 5, 2, 1, 4, 3],
1411        );
1412
1413        // valid values less than limit with extra nulls
1414        test_sort_to_indices_primitive_arrays::<Float64Type>(
1415            vec![Some(2.0), None, None, Some(1.0)],
1416            Some(SortOptions {
1417                descending: false,
1418                nulls_first: false,
1419            }),
1420            Some(3),
1421            vec![3, 0, 1],
1422        );
1423
1424        test_sort_to_indices_primitive_arrays::<Float64Type>(
1425            vec![Some(2.0), None, None, Some(1.0)],
1426            Some(SortOptions {
1427                descending: false,
1428                nulls_first: true,
1429            }),
1430            Some(3),
1431            vec![1, 2, 3],
1432        );
1433
1434        // more nulls than limit
1435        test_sort_to_indices_primitive_arrays::<Float64Type>(
1436            vec![Some(1.0), None, None, None],
1437            Some(SortOptions {
1438                descending: false,
1439                nulls_first: true,
1440            }),
1441            Some(2),
1442            vec![1, 2],
1443        );
1444
1445        test_sort_to_indices_primitive_arrays::<Float64Type>(
1446            vec![Some(1.0), None, None, None],
1447            Some(SortOptions {
1448                descending: false,
1449                nulls_first: false,
1450            }),
1451            Some(2),
1452            vec![0, 1],
1453        );
1454    }
1455
1456    #[test]
1457    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1458        test_sort_to_indices_primitive_arrays::<Int32Type>(
1459            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1460            Some(SortOptions {
1461                descending: false,
1462                nulls_first: false,
1463            }),
1464            Some(2),
1465            vec![4, 6],
1466        );
1467    }
1468
1469    #[test]
1470    fn test_sort_boolean() {
1471        // boolean
1472        test_sort_to_indices_boolean_arrays(
1473            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1474            None,
1475            None,
1476            vec![0, 5, 1, 4, 2, 3],
1477        );
1478
1479        // boolean, descending
1480        test_sort_to_indices_boolean_arrays(
1481            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1482            Some(SortOptions {
1483                descending: true,
1484                nulls_first: false,
1485            }),
1486            None,
1487            vec![2, 3, 1, 4, 0, 5],
1488        );
1489
1490        // boolean, descending, nulls first
1491        test_sort_to_indices_boolean_arrays(
1492            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1493            Some(SortOptions {
1494                descending: true,
1495                nulls_first: true,
1496            }),
1497            None,
1498            vec![0, 5, 2, 3, 1, 4],
1499        );
1500
1501        // boolean, descending, nulls first, limit
1502        test_sort_to_indices_boolean_arrays(
1503            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1504            Some(SortOptions {
1505                descending: true,
1506                nulls_first: true,
1507            }),
1508            Some(3),
1509            vec![0, 5, 2],
1510        );
1511
1512        // valid values less than limit with extra nulls
1513        test_sort_to_indices_boolean_arrays(
1514            vec![Some(true), None, None, Some(false)],
1515            Some(SortOptions {
1516                descending: false,
1517                nulls_first: false,
1518            }),
1519            Some(3),
1520            vec![3, 0, 1],
1521        );
1522
1523        test_sort_to_indices_boolean_arrays(
1524            vec![Some(true), None, None, Some(false)],
1525            Some(SortOptions {
1526                descending: false,
1527                nulls_first: true,
1528            }),
1529            Some(3),
1530            vec![1, 2, 3],
1531        );
1532
1533        // more nulls than limit
1534        test_sort_to_indices_boolean_arrays(
1535            vec![Some(true), None, None, None],
1536            Some(SortOptions {
1537                descending: false,
1538                nulls_first: true,
1539            }),
1540            Some(2),
1541            vec![1, 2],
1542        );
1543
1544        test_sort_to_indices_boolean_arrays(
1545            vec![Some(true), None, None, None],
1546            Some(SortOptions {
1547                descending: false,
1548                nulls_first: false,
1549            }),
1550            Some(2),
1551            vec![0, 1],
1552        );
1553    }
1554
1555    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1556    ///
1557    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1558    ///
1559    fn test_every_config_sort_boolean_list_arrays(
1560        data: Vec<Option<Vec<Option<bool>>>>,
1561        options: Option<SortOptions>,
1562        expected_data: Vec<Option<Vec<Option<bool>>>>,
1563    ) {
1564        let first_length = data
1565            .iter()
1566            .find_map(|x| x.as_ref().map(|x| x.len()))
1567            .unwrap_or(0);
1568        let first_non_match_length = data
1569            .iter()
1570            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1571            .position(|x| x != first_length);
1572
1573        assert_eq!(
1574            first_non_match_length, None,
1575            "All list items should have the same length {first_length}, input data is invalid"
1576        );
1577
1578        let first_non_match_length = expected_data
1579            .iter()
1580            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1581            .position(|x| x != first_length);
1582
1583        assert_eq!(
1584            first_non_match_length, None,
1585            "All list items should have the same length {first_length}, expected data is invalid"
1586        );
1587
1588        let limit = expected_data.len().saturating_div(2);
1589
1590        for &with_limit in &[false, true] {
1591            let (limit, expected_data) = if with_limit {
1592                (
1593                    Some(limit),
1594                    expected_data.iter().take(limit).cloned().collect(),
1595                )
1596            } else {
1597                (None, expected_data.clone())
1598            };
1599
1600            for &fixed_length in &[None, Some(first_length as i32)] {
1601                test_sort_boolean_list_arrays(
1602                    data.clone(),
1603                    options,
1604                    limit,
1605                    expected_data.clone(),
1606                    fixed_length,
1607                );
1608            }
1609        }
1610    }
1611
1612    fn test_sort_boolean_list_arrays(
1613        data: Vec<Option<Vec<Option<bool>>>>,
1614        options: Option<SortOptions>,
1615        limit: Option<usize>,
1616        expected_data: Vec<Option<Vec<Option<bool>>>>,
1617        fixed_length: Option<i32>,
1618    ) {
1619        fn build_fixed_boolean_list_array(
1620            data: Vec<Option<Vec<Option<bool>>>>,
1621            fixed_length: i32,
1622        ) -> ArrayRef {
1623            let mut builder = FixedSizeListBuilder::new(
1624                BooleanBuilder::with_capacity(fixed_length as usize),
1625                fixed_length,
1626            );
1627            for sublist in data {
1628                match sublist {
1629                    Some(sublist) => {
1630                        builder.values().extend(sublist);
1631                        builder.append(true);
1632                    }
1633                    None => {
1634                        builder
1635                            .values()
1636                            .extend(std::iter::repeat(None).take(fixed_length as usize));
1637                        builder.append(false);
1638                    }
1639                }
1640            }
1641            Arc::new(builder.finish()) as ArrayRef
1642        }
1643
1644        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1645            data: Vec<Option<Vec<Option<bool>>>>,
1646        ) -> ArrayRef {
1647            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1648            builder.extend(data);
1649            Arc::new(builder.finish()) as ArrayRef
1650        }
1651
1652        // for FixedSizedList
1653        if let Some(length) = fixed_length {
1654            let input = build_fixed_boolean_list_array(data.clone(), length);
1655            let sorted = match limit {
1656                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1657                _ => sort(&(input as ArrayRef), options).unwrap(),
1658            };
1659            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1660
1661            assert_eq!(&sorted, &expected);
1662        }
1663
1664        // for List
1665        let input = build_generic_boolean_list_array::<i32>(data.clone());
1666        let sorted = match limit {
1667            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1668            _ => sort(&(input as ArrayRef), options).unwrap(),
1669        };
1670        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1671
1672        assert_eq!(&sorted, &expected);
1673
1674        // for LargeList
1675        let input = build_generic_boolean_list_array::<i64>(data.clone());
1676        let sorted = match limit {
1677            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1678            _ => sort(&(input as ArrayRef), options).unwrap(),
1679        };
1680        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1681
1682        assert_eq!(&sorted, &expected);
1683    }
1684
1685    #[test]
1686    fn test_sort_list_of_booleans() {
1687        // These are all the possible combinations of boolean values
1688        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1689        #[rustfmt::skip]
1690        let mut cases = vec![
1691            Some(vec![Some(true),  Some(true),  Some(true)]),
1692            Some(vec![Some(true),  Some(true),  Some(false)]),
1693            Some(vec![Some(true),  Some(true),  None]),
1694
1695            Some(vec![Some(true),  Some(false), Some(true)]),
1696            Some(vec![Some(true),  Some(false), Some(false)]),
1697            Some(vec![Some(true),  Some(false), None]),
1698
1699            Some(vec![Some(true),  None,        Some(true)]),
1700            Some(vec![Some(true),  None,        Some(false)]),
1701            Some(vec![Some(true),  None,        None]),
1702
1703            Some(vec![Some(false), Some(true),  Some(true)]),
1704            Some(vec![Some(false), Some(true),  Some(false)]),
1705            Some(vec![Some(false), Some(true),  None]),
1706
1707            Some(vec![Some(false), Some(false), Some(true)]),
1708            Some(vec![Some(false), Some(false), Some(false)]),
1709            Some(vec![Some(false), Some(false), None]),
1710
1711            Some(vec![Some(false), None,        Some(true)]),
1712            Some(vec![Some(false), None,        Some(false)]),
1713            Some(vec![Some(false), None,        None]),
1714
1715            Some(vec![None,        Some(true),  Some(true)]),
1716            Some(vec![None,        Some(true),  Some(false)]),
1717            Some(vec![None,        Some(true),  None]),
1718
1719            Some(vec![None,        Some(false), Some(true)]),
1720            Some(vec![None,        Some(false), Some(false)]),
1721            Some(vec![None,        Some(false), None]),
1722
1723            Some(vec![None,        None,        Some(true)]),
1724            Some(vec![None,        None,        Some(false)]),
1725            Some(vec![None,        None,        None]),
1726            None,
1727        ];
1728
1729        cases.shuffle(&mut StdRng::seed_from_u64(42));
1730
1731        // The order is false, true, null
1732        #[rustfmt::skip]
1733        let expected_descending_false_nulls_first_false = vec![
1734            Some(vec![Some(false), Some(false), Some(false)]),
1735            Some(vec![Some(false), Some(false), Some(true)]),
1736            Some(vec![Some(false), Some(false), None]),
1737
1738            Some(vec![Some(false), Some(true),  Some(false)]),
1739            Some(vec![Some(false), Some(true),  Some(true)]),
1740            Some(vec![Some(false), Some(true),  None]),
1741
1742            Some(vec![Some(false), None,        Some(false)]),
1743            Some(vec![Some(false), None,        Some(true)]),
1744            Some(vec![Some(false), None,        None]),
1745
1746            Some(vec![Some(true),  Some(false), Some(false)]),
1747            Some(vec![Some(true),  Some(false), Some(true)]),
1748            Some(vec![Some(true),  Some(false), None]),
1749
1750            Some(vec![Some(true),  Some(true),  Some(false)]),
1751            Some(vec![Some(true),  Some(true),  Some(true)]),
1752            Some(vec![Some(true),  Some(true),  None]),
1753
1754            Some(vec![Some(true),  None,        Some(false)]),
1755            Some(vec![Some(true),  None,        Some(true)]),
1756            Some(vec![Some(true),  None,        None]),
1757
1758            Some(vec![None,        Some(false), Some(false)]),
1759            Some(vec![None,        Some(false), Some(true)]),
1760            Some(vec![None,        Some(false), None]),
1761
1762            Some(vec![None,        Some(true),  Some(false)]),
1763            Some(vec![None,        Some(true),  Some(true)]),
1764            Some(vec![None,        Some(true),  None]),
1765
1766            Some(vec![None,        None,        Some(false)]),
1767            Some(vec![None,        None,        Some(true)]),
1768            Some(vec![None,        None,        None]),
1769            None,
1770        ];
1771        test_every_config_sort_boolean_list_arrays(
1772            cases.clone(),
1773            Some(SortOptions {
1774                descending: false,
1775                nulls_first: false,
1776            }),
1777            expected_descending_false_nulls_first_false,
1778        );
1779
1780        // The order is null, false, true
1781        #[rustfmt::skip]
1782        let expected_descending_false_nulls_first_true = vec![
1783            None,
1784
1785            Some(vec![None,        None,        None]),
1786            Some(vec![None,        None,        Some(false)]),
1787            Some(vec![None,        None,        Some(true)]),
1788
1789            Some(vec![None,        Some(false), None]),
1790            Some(vec![None,        Some(false), Some(false)]),
1791            Some(vec![None,        Some(false), Some(true)]),
1792
1793            Some(vec![None,        Some(true),  None]),
1794            Some(vec![None,        Some(true),  Some(false)]),
1795            Some(vec![None,        Some(true),  Some(true)]),
1796
1797            Some(vec![Some(false), None,        None]),
1798            Some(vec![Some(false), None,        Some(false)]),
1799            Some(vec![Some(false), None,        Some(true)]),
1800
1801            Some(vec![Some(false), Some(false), None]),
1802            Some(vec![Some(false), Some(false), Some(false)]),
1803            Some(vec![Some(false), Some(false), Some(true)]),
1804
1805            Some(vec![Some(false), Some(true),  None]),
1806            Some(vec![Some(false), Some(true),  Some(false)]),
1807            Some(vec![Some(false), Some(true),  Some(true)]),
1808
1809            Some(vec![Some(true),  None,        None]),
1810            Some(vec![Some(true),  None,        Some(false)]),
1811            Some(vec![Some(true),  None,        Some(true)]),
1812
1813            Some(vec![Some(true),  Some(false), None]),
1814            Some(vec![Some(true),  Some(false), Some(false)]),
1815            Some(vec![Some(true),  Some(false), Some(true)]),
1816
1817            Some(vec![Some(true),  Some(true),  None]),
1818            Some(vec![Some(true),  Some(true),  Some(false)]),
1819            Some(vec![Some(true),  Some(true),  Some(true)]),
1820        ];
1821
1822        test_every_config_sort_boolean_list_arrays(
1823            cases.clone(),
1824            Some(SortOptions {
1825                descending: false,
1826                nulls_first: true,
1827            }),
1828            expected_descending_false_nulls_first_true,
1829        );
1830
1831        // The order is true, false, null
1832        #[rustfmt::skip]
1833        let expected_descending_true_nulls_first_false = vec![
1834            Some(vec![Some(true),  Some(true),  Some(true)]),
1835            Some(vec![Some(true),  Some(true),  Some(false)]),
1836            Some(vec![Some(true),  Some(true),  None]),
1837
1838            Some(vec![Some(true),  Some(false), Some(true)]),
1839            Some(vec![Some(true),  Some(false), Some(false)]),
1840            Some(vec![Some(true),  Some(false), None]),
1841
1842            Some(vec![Some(true),  None,        Some(true)]),
1843            Some(vec![Some(true),  None,        Some(false)]),
1844            Some(vec![Some(true),  None,        None]),
1845
1846            Some(vec![Some(false), Some(true),  Some(true)]),
1847            Some(vec![Some(false), Some(true),  Some(false)]),
1848            Some(vec![Some(false), Some(true),  None]),
1849
1850            Some(vec![Some(false), Some(false), Some(true)]),
1851            Some(vec![Some(false), Some(false), Some(false)]),
1852            Some(vec![Some(false), Some(false), None]),
1853
1854            Some(vec![Some(false), None,        Some(true)]),
1855            Some(vec![Some(false), None,        Some(false)]),
1856            Some(vec![Some(false), None,        None]),
1857
1858            Some(vec![None,        Some(true),  Some(true)]),
1859            Some(vec![None,        Some(true),  Some(false)]),
1860            Some(vec![None,        Some(true),  None]),
1861
1862            Some(vec![None,        Some(false), Some(true)]),
1863            Some(vec![None,        Some(false), Some(false)]),
1864            Some(vec![None,        Some(false), None]),
1865
1866            Some(vec![None,        None,        Some(true)]),
1867            Some(vec![None,        None,        Some(false)]),
1868            Some(vec![None,        None,        None]),
1869
1870            None,
1871        ];
1872        test_every_config_sort_boolean_list_arrays(
1873            cases.clone(),
1874            Some(SortOptions {
1875                descending: true,
1876                nulls_first: false,
1877            }),
1878            expected_descending_true_nulls_first_false,
1879        );
1880
1881        // The order is null, true, false
1882        #[rustfmt::skip]
1883        let expected_descending_true_nulls_first_true = vec![
1884            None,
1885
1886            Some(vec![None,        None,        None]),
1887            Some(vec![None,        None,        Some(true)]),
1888            Some(vec![None,        None,        Some(false)]),
1889
1890            Some(vec![None,        Some(true),  None]),
1891            Some(vec![None,        Some(true),  Some(true)]),
1892            Some(vec![None,        Some(true),  Some(false)]),
1893
1894            Some(vec![None,        Some(false), None]),
1895            Some(vec![None,        Some(false), Some(true)]),
1896            Some(vec![None,        Some(false), Some(false)]),
1897
1898            Some(vec![Some(true),  None,        None]),
1899            Some(vec![Some(true),  None,        Some(true)]),
1900            Some(vec![Some(true),  None,        Some(false)]),
1901
1902            Some(vec![Some(true),  Some(true),  None]),
1903            Some(vec![Some(true),  Some(true),  Some(true)]),
1904            Some(vec![Some(true),  Some(true),  Some(false)]),
1905
1906            Some(vec![Some(true),  Some(false), None]),
1907            Some(vec![Some(true),  Some(false), Some(true)]),
1908            Some(vec![Some(true),  Some(false), Some(false)]),
1909
1910            Some(vec![Some(false), None,        None]),
1911            Some(vec![Some(false), None,        Some(true)]),
1912            Some(vec![Some(false), None,        Some(false)]),
1913
1914            Some(vec![Some(false), Some(true),  None]),
1915            Some(vec![Some(false), Some(true),  Some(true)]),
1916            Some(vec![Some(false), Some(true),  Some(false)]),
1917
1918            Some(vec![Some(false), Some(false), None]),
1919            Some(vec![Some(false), Some(false), Some(true)]),
1920            Some(vec![Some(false), Some(false), Some(false)]),
1921        ];
1922        // Testing with limit false and fixed_length None
1923        test_every_config_sort_boolean_list_arrays(
1924            cases.clone(),
1925            Some(SortOptions {
1926                descending: true,
1927                nulls_first: true,
1928            }),
1929            expected_descending_true_nulls_first_true,
1930        );
1931    }
1932
1933    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
1934        // decimal default
1935        test_sort_to_indices_decimal_array::<T>(
1936            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1937            None,
1938            None,
1939            vec![0, 6, 4, 2, 3, 5, 1],
1940            precision,
1941            scale,
1942        );
1943        // decimal descending
1944        test_sort_to_indices_decimal_array::<T>(
1945            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1946            Some(SortOptions {
1947                descending: true,
1948                nulls_first: false,
1949            }),
1950            None,
1951            vec![1, 5, 3, 2, 4, 0, 6],
1952            precision,
1953            scale,
1954        );
1955        // decimal null_first and descending
1956        test_sort_to_indices_decimal_array::<T>(
1957            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1958            Some(SortOptions {
1959                descending: true,
1960                nulls_first: true,
1961            }),
1962            None,
1963            vec![0, 6, 1, 5, 3, 2, 4],
1964            precision,
1965            scale,
1966        );
1967        // decimal null_first
1968        test_sort_to_indices_decimal_array::<T>(
1969            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1970            Some(SortOptions {
1971                descending: false,
1972                nulls_first: true,
1973            }),
1974            None,
1975            vec![0, 6, 4, 2, 3, 5, 1],
1976            precision,
1977            scale,
1978        );
1979        // limit
1980        test_sort_to_indices_decimal_array::<T>(
1981            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1982            None,
1983            Some(3),
1984            vec![0, 6, 4],
1985            precision,
1986            scale,
1987        );
1988        // limit descending
1989        test_sort_to_indices_decimal_array::<T>(
1990            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1991            Some(SortOptions {
1992                descending: true,
1993                nulls_first: false,
1994            }),
1995            Some(3),
1996            vec![1, 5, 3],
1997            precision,
1998            scale,
1999        );
2000        // limit descending null_first
2001        test_sort_to_indices_decimal_array::<T>(
2002            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2003            Some(SortOptions {
2004                descending: true,
2005                nulls_first: true,
2006            }),
2007            Some(3),
2008            vec![0, 6, 1],
2009            precision,
2010            scale,
2011        );
2012        // limit null_first
2013        test_sort_to_indices_decimal_array::<T>(
2014            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2015            Some(SortOptions {
2016                descending: false,
2017                nulls_first: true,
2018            }),
2019            Some(3),
2020            vec![0, 6, 4],
2021            precision,
2022            scale,
2023        );
2024    }
2025
2026    #[test]
2027    fn test_sort_indices_decimal128() {
2028        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2029    }
2030
2031    #[test]
2032    fn test_sort_indices_decimal256() {
2033        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2034    }
2035
2036    #[test]
2037    fn test_sort_indices_decimal256_max_min() {
2038        let data = vec![
2039            None,
2040            Some(i256::MIN),
2041            Some(i256::from_i128(1)),
2042            Some(i256::MAX),
2043            Some(i256::from_i128(-1)),
2044        ];
2045        test_sort_to_indices_decimal256_array(
2046            data.clone(),
2047            Some(SortOptions {
2048                descending: false,
2049                nulls_first: true,
2050            }),
2051            None,
2052            vec![0, 1, 4, 2, 3],
2053        );
2054
2055        test_sort_to_indices_decimal256_array(
2056            data.clone(),
2057            Some(SortOptions {
2058                descending: true,
2059                nulls_first: true,
2060            }),
2061            None,
2062            vec![0, 3, 2, 4, 1],
2063        );
2064
2065        test_sort_to_indices_decimal256_array(
2066            data.clone(),
2067            Some(SortOptions {
2068                descending: false,
2069                nulls_first: true,
2070            }),
2071            Some(4),
2072            vec![0, 1, 4, 2],
2073        );
2074
2075        test_sort_to_indices_decimal256_array(
2076            data.clone(),
2077            Some(SortOptions {
2078                descending: true,
2079                nulls_first: true,
2080            }),
2081            Some(4),
2082            vec![0, 3, 2, 4],
2083        );
2084    }
2085
2086    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2087        // decimal default
2088        test_sort_decimal_array::<T>(
2089            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2090            None,
2091            None,
2092            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2093            precision,
2094            scale,
2095        );
2096        // decimal descending
2097        test_sort_decimal_array::<T>(
2098            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2099            Some(SortOptions {
2100                descending: true,
2101                nulls_first: false,
2102            }),
2103            None,
2104            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2105            precision,
2106            scale,
2107        );
2108        // decimal null_first and descending
2109        test_sort_decimal_array::<T>(
2110            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2111            Some(SortOptions {
2112                descending: true,
2113                nulls_first: true,
2114            }),
2115            None,
2116            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2117            precision,
2118            scale,
2119        );
2120        // decimal null_first
2121        test_sort_decimal_array::<T>(
2122            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2123            Some(SortOptions {
2124                descending: false,
2125                nulls_first: true,
2126            }),
2127            None,
2128            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2129            precision,
2130            scale,
2131        );
2132        // limit
2133        test_sort_decimal_array::<T>(
2134            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2135            None,
2136            Some(3),
2137            vec![None, None, Some(1)],
2138            precision,
2139            scale,
2140        );
2141        // limit descending
2142        test_sort_decimal_array::<T>(
2143            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2144            Some(SortOptions {
2145                descending: true,
2146                nulls_first: false,
2147            }),
2148            Some(3),
2149            vec![Some(5), Some(4), Some(3)],
2150            precision,
2151            scale,
2152        );
2153        // limit descending null_first
2154        test_sort_decimal_array::<T>(
2155            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2156            Some(SortOptions {
2157                descending: true,
2158                nulls_first: true,
2159            }),
2160            Some(3),
2161            vec![None, None, Some(5)],
2162            precision,
2163            scale,
2164        );
2165        // limit null_first
2166        test_sort_decimal_array::<T>(
2167            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2168            Some(SortOptions {
2169                descending: false,
2170                nulls_first: true,
2171            }),
2172            Some(3),
2173            vec![None, None, Some(1)],
2174            precision,
2175            scale,
2176        );
2177    }
2178
2179    #[test]
2180    fn test_sort_decimal128() {
2181        test_sort_decimal::<Decimal128Type>(23, 6);
2182    }
2183
2184    #[test]
2185    fn test_sort_decimal256() {
2186        test_sort_decimal::<Decimal256Type>(53, 6);
2187    }
2188
2189    #[test]
2190    fn test_sort_decimal256_max_min() {
2191        test_sort_decimal256_array(
2192            vec![
2193                None,
2194                Some(i256::MIN),
2195                Some(i256::from_i128(1)),
2196                Some(i256::MAX),
2197                Some(i256::from_i128(-1)),
2198                None,
2199            ],
2200            Some(SortOptions {
2201                descending: false,
2202                nulls_first: true,
2203            }),
2204            None,
2205            vec![
2206                None,
2207                None,
2208                Some(i256::MIN),
2209                Some(i256::from_i128(-1)),
2210                Some(i256::from_i128(1)),
2211                Some(i256::MAX),
2212            ],
2213        );
2214
2215        test_sort_decimal256_array(
2216            vec![
2217                None,
2218                Some(i256::MIN),
2219                Some(i256::from_i128(1)),
2220                Some(i256::MAX),
2221                Some(i256::from_i128(-1)),
2222                None,
2223            ],
2224            Some(SortOptions {
2225                descending: true,
2226                nulls_first: true,
2227            }),
2228            None,
2229            vec![
2230                None,
2231                None,
2232                Some(i256::MAX),
2233                Some(i256::from_i128(1)),
2234                Some(i256::from_i128(-1)),
2235                Some(i256::MIN),
2236            ],
2237        );
2238
2239        test_sort_decimal256_array(
2240            vec![
2241                None,
2242                Some(i256::MIN),
2243                Some(i256::from_i128(1)),
2244                Some(i256::MAX),
2245                Some(i256::from_i128(-1)),
2246                None,
2247            ],
2248            Some(SortOptions {
2249                descending: false,
2250                nulls_first: true,
2251            }),
2252            Some(4),
2253            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2254        );
2255
2256        test_sort_decimal256_array(
2257            vec![
2258                None,
2259                Some(i256::MIN),
2260                Some(i256::from_i128(1)),
2261                Some(i256::MAX),
2262                Some(i256::from_i128(-1)),
2263                None,
2264            ],
2265            Some(SortOptions {
2266                descending: true,
2267                nulls_first: true,
2268            }),
2269            Some(4),
2270            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2271        );
2272    }
2273
2274    #[test]
2275    fn test_sort_primitives() {
2276        // default case
2277        test_sort_primitive_arrays::<UInt8Type>(
2278            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2279            None,
2280            None,
2281            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2282        );
2283        test_sort_primitive_arrays::<UInt16Type>(
2284            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2285            None,
2286            None,
2287            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2288        );
2289        test_sort_primitive_arrays::<UInt32Type>(
2290            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2291            None,
2292            None,
2293            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2294        );
2295        test_sort_primitive_arrays::<UInt64Type>(
2296            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2297            None,
2298            None,
2299            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2300        );
2301
2302        // descending
2303        test_sort_primitive_arrays::<Int8Type>(
2304            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2305            Some(SortOptions {
2306                descending: true,
2307                nulls_first: false,
2308            }),
2309            None,
2310            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2311        );
2312        test_sort_primitive_arrays::<Int16Type>(
2313            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2314            Some(SortOptions {
2315                descending: true,
2316                nulls_first: false,
2317            }),
2318            None,
2319            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2320        );
2321        test_sort_primitive_arrays::<Int32Type>(
2322            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2323            Some(SortOptions {
2324                descending: true,
2325                nulls_first: false,
2326            }),
2327            None,
2328            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2329        );
2330        test_sort_primitive_arrays::<Int16Type>(
2331            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2332            Some(SortOptions {
2333                descending: true,
2334                nulls_first: false,
2335            }),
2336            None,
2337            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2338        );
2339
2340        // descending, nulls first
2341        test_sort_primitive_arrays::<Int8Type>(
2342            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2343            Some(SortOptions {
2344                descending: true,
2345                nulls_first: true,
2346            }),
2347            None,
2348            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2349        );
2350        test_sort_primitive_arrays::<Int16Type>(
2351            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2352            Some(SortOptions {
2353                descending: true,
2354                nulls_first: true,
2355            }),
2356            None,
2357            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2358        );
2359        test_sort_primitive_arrays::<Int32Type>(
2360            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2361            Some(SortOptions {
2362                descending: true,
2363                nulls_first: true,
2364            }),
2365            None,
2366            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2367        );
2368        test_sort_primitive_arrays::<Int64Type>(
2369            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2370            Some(SortOptions {
2371                descending: true,
2372                nulls_first: true,
2373            }),
2374            None,
2375            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2376        );
2377
2378        test_sort_primitive_arrays::<Int64Type>(
2379            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2380            Some(SortOptions {
2381                descending: true,
2382                nulls_first: true,
2383            }),
2384            Some(3),
2385            vec![None, None, Some(2)],
2386        );
2387
2388        test_sort_primitive_arrays::<Float16Type>(
2389            vec![
2390                None,
2391                Some(f16::from_f32(0.0)),
2392                Some(f16::from_f32(2.0)),
2393                Some(f16::from_f32(-1.0)),
2394                Some(f16::from_f32(0.0)),
2395                None,
2396            ],
2397            Some(SortOptions {
2398                descending: true,
2399                nulls_first: true,
2400            }),
2401            None,
2402            vec![
2403                None,
2404                None,
2405                Some(f16::from_f32(2.0)),
2406                Some(f16::from_f32(0.0)),
2407                Some(f16::from_f32(0.0)),
2408                Some(f16::from_f32(-1.0)),
2409            ],
2410        );
2411
2412        test_sort_primitive_arrays::<Float32Type>(
2413            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2414            Some(SortOptions {
2415                descending: true,
2416                nulls_first: true,
2417            }),
2418            None,
2419            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2420        );
2421        test_sort_primitive_arrays::<Float64Type>(
2422            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2423            Some(SortOptions {
2424                descending: true,
2425                nulls_first: true,
2426            }),
2427            None,
2428            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2429        );
2430        test_sort_primitive_arrays::<Float64Type>(
2431            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2432            Some(SortOptions {
2433                descending: true,
2434                nulls_first: true,
2435            }),
2436            None,
2437            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2438        );
2439
2440        // int8 nulls first
2441        test_sort_primitive_arrays::<Int8Type>(
2442            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2443            Some(SortOptions {
2444                descending: false,
2445                nulls_first: true,
2446            }),
2447            None,
2448            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2449        );
2450        test_sort_primitive_arrays::<Int16Type>(
2451            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2452            Some(SortOptions {
2453                descending: false,
2454                nulls_first: true,
2455            }),
2456            None,
2457            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2458        );
2459        test_sort_primitive_arrays::<Int32Type>(
2460            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2461            Some(SortOptions {
2462                descending: false,
2463                nulls_first: true,
2464            }),
2465            None,
2466            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2467        );
2468        test_sort_primitive_arrays::<Int64Type>(
2469            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2470            Some(SortOptions {
2471                descending: false,
2472                nulls_first: true,
2473            }),
2474            None,
2475            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2476        );
2477        test_sort_primitive_arrays::<Float16Type>(
2478            vec![
2479                None,
2480                Some(f16::from_f32(0.0)),
2481                Some(f16::from_f32(2.0)),
2482                Some(f16::from_f32(-1.0)),
2483                Some(f16::from_f32(0.0)),
2484                None,
2485            ],
2486            Some(SortOptions {
2487                descending: false,
2488                nulls_first: true,
2489            }),
2490            None,
2491            vec![
2492                None,
2493                None,
2494                Some(f16::from_f32(-1.0)),
2495                Some(f16::from_f32(0.0)),
2496                Some(f16::from_f32(0.0)),
2497                Some(f16::from_f32(2.0)),
2498            ],
2499        );
2500        test_sort_primitive_arrays::<Float32Type>(
2501            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2502            Some(SortOptions {
2503                descending: false,
2504                nulls_first: true,
2505            }),
2506            None,
2507            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2508        );
2509        test_sort_primitive_arrays::<Float64Type>(
2510            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2511            Some(SortOptions {
2512                descending: false,
2513                nulls_first: true,
2514            }),
2515            None,
2516            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2517        );
2518        test_sort_primitive_arrays::<Float64Type>(
2519            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2520            Some(SortOptions {
2521                descending: false,
2522                nulls_first: true,
2523            }),
2524            None,
2525            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2526        );
2527
2528        // limit
2529        test_sort_primitive_arrays::<Float64Type>(
2530            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2531            Some(SortOptions {
2532                descending: false,
2533                nulls_first: true,
2534            }),
2535            Some(2),
2536            vec![Some(1.0), Some(f64::NAN)],
2537        );
2538
2539        // limit with actual value
2540        test_sort_primitive_arrays::<Float64Type>(
2541            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2542            Some(SortOptions {
2543                descending: false,
2544                nulls_first: true,
2545            }),
2546            Some(3),
2547            vec![Some(1.0), Some(2.0), Some(3.0)],
2548        );
2549
2550        // valid values less than limit with extra nulls
2551        test_sort_primitive_arrays::<Float64Type>(
2552            vec![Some(2.0), None, None, Some(1.0)],
2553            Some(SortOptions {
2554                descending: false,
2555                nulls_first: false,
2556            }),
2557            Some(3),
2558            vec![Some(1.0), Some(2.0), None],
2559        );
2560
2561        test_sort_primitive_arrays::<Float64Type>(
2562            vec![Some(2.0), None, None, Some(1.0)],
2563            Some(SortOptions {
2564                descending: false,
2565                nulls_first: true,
2566            }),
2567            Some(3),
2568            vec![None, None, Some(1.0)],
2569        );
2570
2571        // more nulls than limit
2572        test_sort_primitive_arrays::<Float64Type>(
2573            vec![Some(2.0), None, None, None],
2574            Some(SortOptions {
2575                descending: false,
2576                nulls_first: true,
2577            }),
2578            Some(2),
2579            vec![None, None],
2580        );
2581
2582        test_sort_primitive_arrays::<Float64Type>(
2583            vec![Some(2.0), None, None, None],
2584            Some(SortOptions {
2585                descending: false,
2586                nulls_first: false,
2587            }),
2588            Some(2),
2589            vec![Some(2.0), None],
2590        );
2591    }
2592
2593    #[test]
2594    fn test_sort_to_indices_strings() {
2595        test_sort_to_indices_string_arrays(
2596            vec![
2597                None,
2598                Some("bad"),
2599                Some("sad"),
2600                None,
2601                Some("glad"),
2602                Some("-ad"),
2603            ],
2604            None,
2605            None,
2606            vec![0, 3, 5, 1, 4, 2],
2607        );
2608
2609        test_sort_to_indices_string_arrays(
2610            vec![
2611                None,
2612                Some("bad"),
2613                Some("sad"),
2614                None,
2615                Some("glad"),
2616                Some("-ad"),
2617            ],
2618            Some(SortOptions {
2619                descending: true,
2620                nulls_first: false,
2621            }),
2622            None,
2623            vec![2, 4, 1, 5, 0, 3],
2624        );
2625
2626        test_sort_to_indices_string_arrays(
2627            vec![
2628                None,
2629                Some("bad"),
2630                Some("sad"),
2631                None,
2632                Some("glad"),
2633                Some("-ad"),
2634            ],
2635            Some(SortOptions {
2636                descending: false,
2637                nulls_first: true,
2638            }),
2639            None,
2640            vec![0, 3, 5, 1, 4, 2],
2641        );
2642
2643        test_sort_to_indices_string_arrays(
2644            vec![
2645                None,
2646                Some("bad"),
2647                Some("sad"),
2648                None,
2649                Some("glad"),
2650                Some("-ad"),
2651            ],
2652            Some(SortOptions {
2653                descending: true,
2654                nulls_first: true,
2655            }),
2656            None,
2657            vec![0, 3, 2, 4, 1, 5],
2658        );
2659
2660        test_sort_to_indices_string_arrays(
2661            vec![
2662                None,
2663                Some("bad"),
2664                Some("sad"),
2665                None,
2666                Some("glad"),
2667                Some("-ad"),
2668            ],
2669            Some(SortOptions {
2670                descending: true,
2671                nulls_first: true,
2672            }),
2673            Some(3),
2674            vec![0, 3, 2],
2675        );
2676
2677        // valid values less than limit with extra nulls
2678        test_sort_to_indices_string_arrays(
2679            vec![Some("def"), None, None, Some("abc")],
2680            Some(SortOptions {
2681                descending: false,
2682                nulls_first: false,
2683            }),
2684            Some(3),
2685            vec![3, 0, 1],
2686        );
2687
2688        test_sort_to_indices_string_arrays(
2689            vec![Some("def"), None, None, Some("abc")],
2690            Some(SortOptions {
2691                descending: false,
2692                nulls_first: true,
2693            }),
2694            Some(3),
2695            vec![1, 2, 3],
2696        );
2697
2698        // more nulls than limit
2699        test_sort_to_indices_string_arrays(
2700            vec![Some("def"), None, None, None],
2701            Some(SortOptions {
2702                descending: false,
2703                nulls_first: true,
2704            }),
2705            Some(2),
2706            vec![1, 2],
2707        );
2708
2709        test_sort_to_indices_string_arrays(
2710            vec![Some("def"), None, None, None],
2711            Some(SortOptions {
2712                descending: false,
2713                nulls_first: false,
2714            }),
2715            Some(2),
2716            vec![0, 1],
2717        );
2718    }
2719
2720    #[test]
2721    fn test_sort_strings() {
2722        test_sort_string_arrays(
2723            vec![
2724                None,
2725                Some("bad"),
2726                Some("sad"),
2727                Some("long string longer than 12 bytes"),
2728                None,
2729                Some("glad"),
2730                Some("lang string longer than 12 bytes"),
2731                Some("-ad"),
2732            ],
2733            None,
2734            None,
2735            vec![
2736                None,
2737                None,
2738                Some("-ad"),
2739                Some("bad"),
2740                Some("glad"),
2741                Some("lang string longer than 12 bytes"),
2742                Some("long string longer than 12 bytes"),
2743                Some("sad"),
2744            ],
2745        );
2746
2747        test_sort_string_arrays(
2748            vec![
2749                None,
2750                Some("bad"),
2751                Some("sad"),
2752                Some("long string longer than 12 bytes"),
2753                None,
2754                Some("glad"),
2755                Some("lang string longer than 12 bytes"),
2756                Some("-ad"),
2757            ],
2758            Some(SortOptions {
2759                descending: true,
2760                nulls_first: false,
2761            }),
2762            None,
2763            vec![
2764                Some("sad"),
2765                Some("long string longer than 12 bytes"),
2766                Some("lang string longer than 12 bytes"),
2767                Some("glad"),
2768                Some("bad"),
2769                Some("-ad"),
2770                None,
2771                None,
2772            ],
2773        );
2774
2775        test_sort_string_arrays(
2776            vec![
2777                None,
2778                Some("bad"),
2779                Some("long string longer than 12 bytes"),
2780                Some("sad"),
2781                None,
2782                Some("glad"),
2783                Some("lang string longer than 12 bytes"),
2784                Some("-ad"),
2785            ],
2786            Some(SortOptions {
2787                descending: false,
2788                nulls_first: true,
2789            }),
2790            None,
2791            vec![
2792                None,
2793                None,
2794                Some("-ad"),
2795                Some("bad"),
2796                Some("glad"),
2797                Some("lang string longer than 12 bytes"),
2798                Some("long string longer than 12 bytes"),
2799                Some("sad"),
2800            ],
2801        );
2802
2803        test_sort_string_arrays(
2804            vec![
2805                None,
2806                Some("bad"),
2807                Some("long string longer than 12 bytes"),
2808                Some("sad"),
2809                None,
2810                Some("glad"),
2811                Some("lang string longer than 12 bytes"),
2812                Some("-ad"),
2813            ],
2814            Some(SortOptions {
2815                descending: true,
2816                nulls_first: true,
2817            }),
2818            None,
2819            vec![
2820                None,
2821                None,
2822                Some("sad"),
2823                Some("long string longer than 12 bytes"),
2824                Some("lang string longer than 12 bytes"),
2825                Some("glad"),
2826                Some("bad"),
2827                Some("-ad"),
2828            ],
2829        );
2830
2831        test_sort_string_arrays(
2832            vec![
2833                None,
2834                Some("bad"),
2835                Some("long string longer than 12 bytes"),
2836                Some("sad"),
2837                None,
2838                Some("glad"),
2839                Some("lang string longer than 12 bytes"),
2840                Some("-ad"),
2841            ],
2842            Some(SortOptions {
2843                descending: true,
2844                nulls_first: true,
2845            }),
2846            Some(3),
2847            vec![None, None, Some("sad")],
2848        );
2849
2850        // valid values less than limit with extra nulls
2851        test_sort_string_arrays(
2852            vec![
2853                Some("def long string longer than 12"),
2854                None,
2855                None,
2856                Some("abc"),
2857            ],
2858            Some(SortOptions {
2859                descending: false,
2860                nulls_first: false,
2861            }),
2862            Some(3),
2863            vec![Some("abc"), Some("def long string longer than 12"), None],
2864        );
2865
2866        test_sort_string_arrays(
2867            vec![
2868                Some("def long string longer than 12"),
2869                None,
2870                None,
2871                Some("abc"),
2872            ],
2873            Some(SortOptions {
2874                descending: false,
2875                nulls_first: true,
2876            }),
2877            Some(3),
2878            vec![None, None, Some("abc")],
2879        );
2880
2881        // more nulls than limit
2882        test_sort_string_arrays(
2883            vec![Some("def long string longer than 12"), None, None, None],
2884            Some(SortOptions {
2885                descending: false,
2886                nulls_first: true,
2887            }),
2888            Some(2),
2889            vec![None, None],
2890        );
2891
2892        test_sort_string_arrays(
2893            vec![Some("def long string longer than 12"), None, None, None],
2894            Some(SortOptions {
2895                descending: false,
2896                nulls_first: false,
2897            }),
2898            Some(2),
2899            vec![Some("def long string longer than 12"), None],
2900        );
2901    }
2902
2903    #[test]
2904    fn test_sort_run_to_run() {
2905        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
2906    }
2907
2908    #[test]
2909    fn test_sort_run_to_indices() {
2910        test_sort_run_inner(|array, sort_options, limit| {
2911            let indices = sort_to_indices(array, sort_options, limit).unwrap();
2912            take(array, &indices, None)
2913        });
2914    }
2915
2916    fn test_sort_run_inner<F>(sort_fn: F)
2917    where
2918        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2919    {
2920        // Create an input array for testing
2921        let total_len = 80;
2922        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
2923        let repeats: Vec<usize> = vec![1, 3, 2, 4];
2924        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
2925        for ix in 0_usize..32 {
2926            let repeat: usize = repeats[ix % repeats.len()];
2927            let val: Option<i32> = vals[ix % vals.len()];
2928            input_array.resize(input_array.len() + repeat, val);
2929        }
2930
2931        // create run array using input_array
2932        // Encode the input_array to run array
2933        let mut builder =
2934            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
2935        builder.extend(input_array.iter().copied());
2936        let run_array = builder.finish();
2937
2938        // slice lengths that are tested
2939        let slice_lens = [
2940            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
2941        ];
2942        for slice_len in slice_lens {
2943            test_sort_run_inner2(
2944                input_array.as_slice(),
2945                &run_array,
2946                0,
2947                slice_len,
2948                None,
2949                &sort_fn,
2950            );
2951            test_sort_run_inner2(
2952                input_array.as_slice(),
2953                &run_array,
2954                total_len - slice_len,
2955                slice_len,
2956                None,
2957                &sort_fn,
2958            );
2959            // Test with non zero limit
2960            if slice_len > 1 {
2961                test_sort_run_inner2(
2962                    input_array.as_slice(),
2963                    &run_array,
2964                    0,
2965                    slice_len,
2966                    Some(slice_len / 2),
2967                    &sort_fn,
2968                );
2969                test_sort_run_inner2(
2970                    input_array.as_slice(),
2971                    &run_array,
2972                    total_len - slice_len,
2973                    slice_len,
2974                    Some(slice_len / 2),
2975                    &sort_fn,
2976                );
2977            }
2978        }
2979    }
2980
2981    fn test_sort_run_inner2<F>(
2982        input_array: &[Option<i32>],
2983        run_array: &RunArray<Int16Type>,
2984        offset: usize,
2985        length: usize,
2986        limit: Option<usize>,
2987        sort_fn: &F,
2988    ) where
2989        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2990    {
2991        // Run the sort and build actual result
2992        let sliced_array = run_array.slice(offset, length);
2993        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
2994        let sorted_run_array = sorted_sliced_array
2995            .as_any()
2996            .downcast_ref::<RunArray<Int16Type>>()
2997            .unwrap();
2998        let typed_run_array = sorted_run_array
2999            .downcast::<PrimitiveArray<Int32Type>>()
3000            .unwrap();
3001        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3002
3003        // build expected result.
3004        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3005        sliced_input.sort();
3006        let expected = if let Some(limit) = limit {
3007            sliced_input.iter().take(limit).copied().collect()
3008        } else {
3009            sliced_input
3010        };
3011
3012        assert_eq!(expected, actual)
3013    }
3014
3015    #[test]
3016    fn test_sort_string_dicts() {
3017        test_sort_string_dict_arrays::<Int8Type>(
3018            vec![
3019                None,
3020                Some("bad"),
3021                Some("sad"),
3022                None,
3023                Some("glad"),
3024                Some("-ad"),
3025            ],
3026            None,
3027            None,
3028            vec![
3029                None,
3030                None,
3031                Some("-ad"),
3032                Some("bad"),
3033                Some("glad"),
3034                Some("sad"),
3035            ],
3036        );
3037
3038        test_sort_string_dict_arrays::<Int16Type>(
3039            vec![
3040                None,
3041                Some("bad"),
3042                Some("sad"),
3043                None,
3044                Some("glad"),
3045                Some("-ad"),
3046            ],
3047            Some(SortOptions {
3048                descending: true,
3049                nulls_first: false,
3050            }),
3051            None,
3052            vec![
3053                Some("sad"),
3054                Some("glad"),
3055                Some("bad"),
3056                Some("-ad"),
3057                None,
3058                None,
3059            ],
3060        );
3061
3062        test_sort_string_dict_arrays::<Int32Type>(
3063            vec![
3064                None,
3065                Some("bad"),
3066                Some("sad"),
3067                None,
3068                Some("glad"),
3069                Some("-ad"),
3070            ],
3071            Some(SortOptions {
3072                descending: false,
3073                nulls_first: true,
3074            }),
3075            None,
3076            vec![
3077                None,
3078                None,
3079                Some("-ad"),
3080                Some("bad"),
3081                Some("glad"),
3082                Some("sad"),
3083            ],
3084        );
3085
3086        test_sort_string_dict_arrays::<Int16Type>(
3087            vec![
3088                None,
3089                Some("bad"),
3090                Some("sad"),
3091                None,
3092                Some("glad"),
3093                Some("-ad"),
3094            ],
3095            Some(SortOptions {
3096                descending: true,
3097                nulls_first: true,
3098            }),
3099            None,
3100            vec![
3101                None,
3102                None,
3103                Some("sad"),
3104                Some("glad"),
3105                Some("bad"),
3106                Some("-ad"),
3107            ],
3108        );
3109
3110        test_sort_string_dict_arrays::<Int16Type>(
3111            vec![
3112                None,
3113                Some("bad"),
3114                Some("sad"),
3115                None,
3116                Some("glad"),
3117                Some("-ad"),
3118            ],
3119            Some(SortOptions {
3120                descending: true,
3121                nulls_first: true,
3122            }),
3123            Some(3),
3124            vec![None, None, Some("sad")],
3125        );
3126
3127        // valid values less than limit with extra nulls
3128        test_sort_string_dict_arrays::<Int16Type>(
3129            vec![Some("def"), None, None, Some("abc")],
3130            Some(SortOptions {
3131                descending: false,
3132                nulls_first: false,
3133            }),
3134            Some(3),
3135            vec![Some("abc"), Some("def"), None],
3136        );
3137
3138        test_sort_string_dict_arrays::<Int16Type>(
3139            vec![Some("def"), None, None, Some("abc")],
3140            Some(SortOptions {
3141                descending: false,
3142                nulls_first: true,
3143            }),
3144            Some(3),
3145            vec![None, None, Some("abc")],
3146        );
3147
3148        // more nulls than limit
3149        test_sort_string_dict_arrays::<Int16Type>(
3150            vec![Some("def"), None, None, None],
3151            Some(SortOptions {
3152                descending: false,
3153                nulls_first: true,
3154            }),
3155            Some(2),
3156            vec![None, None],
3157        );
3158
3159        test_sort_string_dict_arrays::<Int16Type>(
3160            vec![Some("def"), None, None, None],
3161            Some(SortOptions {
3162                descending: false,
3163                nulls_first: false,
3164            }),
3165            Some(2),
3166            vec![Some("def"), None],
3167        );
3168    }
3169
3170    #[test]
3171    fn test_sort_list() {
3172        test_sort_list_arrays::<Int8Type>(
3173            vec![
3174                Some(vec![Some(1)]),
3175                Some(vec![Some(4)]),
3176                Some(vec![Some(2)]),
3177                Some(vec![Some(3)]),
3178            ],
3179            Some(SortOptions {
3180                descending: false,
3181                nulls_first: false,
3182            }),
3183            None,
3184            vec![
3185                Some(vec![Some(1)]),
3186                Some(vec![Some(2)]),
3187                Some(vec![Some(3)]),
3188                Some(vec![Some(4)]),
3189            ],
3190            Some(1),
3191        );
3192
3193        test_sort_list_arrays::<Float16Type>(
3194            vec![
3195                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3196                Some(vec![
3197                    Some(f16::from_f32(4.0)),
3198                    Some(f16::from_f32(3.0)),
3199                    Some(f16::from_f32(2.0)),
3200                    Some(f16::from_f32(1.0)),
3201                ]),
3202                Some(vec![
3203                    Some(f16::from_f32(2.0)),
3204                    Some(f16::from_f32(3.0)),
3205                    Some(f16::from_f32(4.0)),
3206                ]),
3207                Some(vec![
3208                    Some(f16::from_f32(3.0)),
3209                    Some(f16::from_f32(3.0)),
3210                    Some(f16::from_f32(3.0)),
3211                    Some(f16::from_f32(3.0)),
3212                ]),
3213                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3214            ],
3215            Some(SortOptions {
3216                descending: false,
3217                nulls_first: false,
3218            }),
3219            None,
3220            vec![
3221                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3222                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3223                Some(vec![
3224                    Some(f16::from_f32(2.0)),
3225                    Some(f16::from_f32(3.0)),
3226                    Some(f16::from_f32(4.0)),
3227                ]),
3228                Some(vec![
3229                    Some(f16::from_f32(3.0)),
3230                    Some(f16::from_f32(3.0)),
3231                    Some(f16::from_f32(3.0)),
3232                    Some(f16::from_f32(3.0)),
3233                ]),
3234                Some(vec![
3235                    Some(f16::from_f32(4.0)),
3236                    Some(f16::from_f32(3.0)),
3237                    Some(f16::from_f32(2.0)),
3238                    Some(f16::from_f32(1.0)),
3239                ]),
3240            ],
3241            None,
3242        );
3243
3244        test_sort_list_arrays::<Float32Type>(
3245            vec![
3246                Some(vec![Some(1.0), Some(0.0)]),
3247                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3248                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3249                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3250                Some(vec![Some(1.0), Some(1.0)]),
3251            ],
3252            Some(SortOptions {
3253                descending: false,
3254                nulls_first: false,
3255            }),
3256            None,
3257            vec![
3258                Some(vec![Some(1.0), Some(0.0)]),
3259                Some(vec![Some(1.0), Some(1.0)]),
3260                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3261                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3262                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3263            ],
3264            None,
3265        );
3266
3267        test_sort_list_arrays::<Float64Type>(
3268            vec![
3269                Some(vec![Some(1.0), Some(0.0)]),
3270                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3271                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3272                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3273                Some(vec![Some(1.0), Some(1.0)]),
3274            ],
3275            Some(SortOptions {
3276                descending: false,
3277                nulls_first: false,
3278            }),
3279            None,
3280            vec![
3281                Some(vec![Some(1.0), Some(0.0)]),
3282                Some(vec![Some(1.0), Some(1.0)]),
3283                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3284                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3285                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3286            ],
3287            None,
3288        );
3289
3290        test_sort_list_arrays::<Int32Type>(
3291            vec![
3292                Some(vec![Some(1), Some(0)]),
3293                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3294                Some(vec![Some(2), Some(3), Some(4)]),
3295                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3296                Some(vec![Some(1), Some(1)]),
3297            ],
3298            Some(SortOptions {
3299                descending: false,
3300                nulls_first: false,
3301            }),
3302            None,
3303            vec![
3304                Some(vec![Some(1), Some(0)]),
3305                Some(vec![Some(1), Some(1)]),
3306                Some(vec![Some(2), Some(3), Some(4)]),
3307                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3308                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3309            ],
3310            None,
3311        );
3312
3313        test_sort_list_arrays::<Int32Type>(
3314            vec![
3315                None,
3316                Some(vec![Some(4), None, Some(2)]),
3317                Some(vec![Some(2), Some(3), Some(4)]),
3318                None,
3319                Some(vec![Some(3), Some(3), None]),
3320            ],
3321            Some(SortOptions {
3322                descending: false,
3323                nulls_first: false,
3324            }),
3325            None,
3326            vec![
3327                Some(vec![Some(2), Some(3), Some(4)]),
3328                Some(vec![Some(3), Some(3), None]),
3329                Some(vec![Some(4), None, Some(2)]),
3330                None,
3331                None,
3332            ],
3333            Some(3),
3334        );
3335
3336        test_sort_list_arrays::<Int32Type>(
3337            vec![
3338                Some(vec![Some(1), Some(0)]),
3339                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3340                Some(vec![Some(2), Some(3), Some(4)]),
3341                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3342                Some(vec![Some(1), Some(1)]),
3343            ],
3344            Some(SortOptions {
3345                descending: false,
3346                nulls_first: false,
3347            }),
3348            Some(2),
3349            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3350            None,
3351        );
3352
3353        // valid values less than limit with extra nulls
3354        test_sort_list_arrays::<Int32Type>(
3355            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3356            Some(SortOptions {
3357                descending: false,
3358                nulls_first: false,
3359            }),
3360            Some(3),
3361            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3362            None,
3363        );
3364
3365        test_sort_list_arrays::<Int32Type>(
3366            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3367            Some(SortOptions {
3368                descending: false,
3369                nulls_first: true,
3370            }),
3371            Some(3),
3372            vec![None, None, Some(vec![Some(1)])],
3373            None,
3374        );
3375
3376        // more nulls than limit
3377        test_sort_list_arrays::<Int32Type>(
3378            vec![Some(vec![Some(1)]), None, None, None],
3379            Some(SortOptions {
3380                descending: false,
3381                nulls_first: true,
3382            }),
3383            Some(2),
3384            vec![None, None],
3385            None,
3386        );
3387
3388        test_sort_list_arrays::<Int32Type>(
3389            vec![Some(vec![Some(1)]), None, None, None],
3390            Some(SortOptions {
3391                descending: false,
3392                nulls_first: false,
3393            }),
3394            Some(2),
3395            vec![Some(vec![Some(1)]), None],
3396            None,
3397        );
3398    }
3399
3400    #[test]
3401    fn test_sort_binary() {
3402        test_sort_binary_arrays(
3403            vec![
3404                Some(vec![0, 0, 0]),
3405                Some(vec![0, 0, 5]),
3406                Some(vec![0, 0, 3]),
3407                Some(vec![0, 0, 7]),
3408                Some(vec![0, 0, 1]),
3409            ],
3410            Some(SortOptions {
3411                descending: false,
3412                nulls_first: false,
3413            }),
3414            None,
3415            vec![
3416                Some(vec![0, 0, 0]),
3417                Some(vec![0, 0, 1]),
3418                Some(vec![0, 0, 3]),
3419                Some(vec![0, 0, 5]),
3420                Some(vec![0, 0, 7]),
3421            ],
3422            Some(3),
3423        );
3424
3425        // with nulls
3426        test_sort_binary_arrays(
3427            vec![
3428                Some(vec![0, 0, 0]),
3429                None,
3430                Some(vec![0, 0, 3]),
3431                Some(vec![0, 0, 7]),
3432                Some(vec![0, 0, 1]),
3433                None,
3434            ],
3435            Some(SortOptions {
3436                descending: false,
3437                nulls_first: false,
3438            }),
3439            None,
3440            vec![
3441                Some(vec![0, 0, 0]),
3442                Some(vec![0, 0, 1]),
3443                Some(vec![0, 0, 3]),
3444                Some(vec![0, 0, 7]),
3445                None,
3446                None,
3447            ],
3448            Some(3),
3449        );
3450
3451        test_sort_binary_arrays(
3452            vec![
3453                Some(vec![3, 5, 7]),
3454                None,
3455                Some(vec![1, 7, 1]),
3456                Some(vec![2, 7, 3]),
3457                None,
3458                Some(vec![1, 4, 3]),
3459            ],
3460            Some(SortOptions {
3461                descending: false,
3462                nulls_first: false,
3463            }),
3464            None,
3465            vec![
3466                Some(vec![1, 4, 3]),
3467                Some(vec![1, 7, 1]),
3468                Some(vec![2, 7, 3]),
3469                Some(vec![3, 5, 7]),
3470                None,
3471                None,
3472            ],
3473            Some(3),
3474        );
3475
3476        // descending
3477        test_sort_binary_arrays(
3478            vec![
3479                Some(vec![0, 0, 0]),
3480                None,
3481                Some(vec![0, 0, 3]),
3482                Some(vec![0, 0, 7]),
3483                Some(vec![0, 0, 1]),
3484                None,
3485            ],
3486            Some(SortOptions {
3487                descending: true,
3488                nulls_first: false,
3489            }),
3490            None,
3491            vec![
3492                Some(vec![0, 0, 7]),
3493                Some(vec![0, 0, 3]),
3494                Some(vec![0, 0, 1]),
3495                Some(vec![0, 0, 0]),
3496                None,
3497                None,
3498            ],
3499            Some(3),
3500        );
3501
3502        // nulls first
3503        test_sort_binary_arrays(
3504            vec![
3505                Some(vec![0, 0, 0]),
3506                None,
3507                Some(vec![0, 0, 3]),
3508                Some(vec![0, 0, 7]),
3509                Some(vec![0, 0, 1]),
3510                None,
3511            ],
3512            Some(SortOptions {
3513                descending: false,
3514                nulls_first: true,
3515            }),
3516            None,
3517            vec![
3518                None,
3519                None,
3520                Some(vec![0, 0, 0]),
3521                Some(vec![0, 0, 1]),
3522                Some(vec![0, 0, 3]),
3523                Some(vec![0, 0, 7]),
3524            ],
3525            Some(3),
3526        );
3527
3528        // limit
3529        test_sort_binary_arrays(
3530            vec![
3531                Some(vec![0, 0, 0]),
3532                None,
3533                Some(vec![0, 0, 3]),
3534                Some(vec![0, 0, 7]),
3535                Some(vec![0, 0, 1]),
3536                None,
3537            ],
3538            Some(SortOptions {
3539                descending: false,
3540                nulls_first: true,
3541            }),
3542            Some(4),
3543            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3544            Some(3),
3545        );
3546
3547        // var length
3548        test_sort_binary_arrays(
3549            vec![
3550                Some(b"Hello".to_vec()),
3551                None,
3552                Some(b"from".to_vec()),
3553                Some(b"Apache".to_vec()),
3554                Some(b"Arrow-rs".to_vec()),
3555                None,
3556            ],
3557            Some(SortOptions {
3558                descending: false,
3559                nulls_first: false,
3560            }),
3561            None,
3562            vec![
3563                Some(b"Apache".to_vec()),
3564                Some(b"Arrow-rs".to_vec()),
3565                Some(b"Hello".to_vec()),
3566                Some(b"from".to_vec()),
3567                None,
3568                None,
3569            ],
3570            None,
3571        );
3572
3573        // limit
3574        test_sort_binary_arrays(
3575            vec![
3576                Some(b"Hello".to_vec()),
3577                None,
3578                Some(b"from".to_vec()),
3579                Some(b"Apache".to_vec()),
3580                Some(b"Arrow-rs".to_vec()),
3581                None,
3582            ],
3583            Some(SortOptions {
3584                descending: false,
3585                nulls_first: true,
3586            }),
3587            Some(4),
3588            vec![
3589                None,
3590                None,
3591                Some(b"Apache".to_vec()),
3592                Some(b"Arrow-rs".to_vec()),
3593            ],
3594            None,
3595        );
3596    }
3597
3598    #[test]
3599    fn test_lex_sort_single_column() {
3600        let input = vec![SortColumn {
3601            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3602                Some(17),
3603                Some(2),
3604                Some(-1),
3605                Some(0),
3606            ])) as ArrayRef,
3607            options: None,
3608        }];
3609        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3610            Some(-1),
3611            Some(0),
3612            Some(2),
3613            Some(17),
3614        ])) as ArrayRef];
3615        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3616        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3617
3618        // Explicitly test a limit on the sort as a demonstration
3619        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3620            Some(-1),
3621            Some(0),
3622            Some(2),
3623        ])) as ArrayRef];
3624        test_lex_sort_arrays(input, expected, Some(3));
3625    }
3626
3627    #[test]
3628    fn test_lex_sort_unaligned_rows() {
3629        let input = vec![
3630            SortColumn {
3631                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3632                    as ArrayRef,
3633                options: None,
3634            },
3635            SortColumn {
3636                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3637                options: None,
3638            },
3639        ];
3640        assert!(
3641            lexsort(&input, None).is_err(),
3642            "lexsort should reject columns with different row counts"
3643        );
3644    }
3645
3646    #[test]
3647    fn test_lex_sort_mixed_types() {
3648        let input = vec![
3649            SortColumn {
3650                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3651                    Some(0),
3652                    Some(2),
3653                    Some(-1),
3654                    Some(0),
3655                ])) as ArrayRef,
3656                options: None,
3657            },
3658            SortColumn {
3659                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3660                    Some(101),
3661                    Some(8),
3662                    Some(7),
3663                    Some(102),
3664                ])) as ArrayRef,
3665                options: None,
3666            },
3667            SortColumn {
3668                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3669                    Some(-1),
3670                    Some(-2),
3671                    Some(-3),
3672                    Some(-4),
3673                ])) as ArrayRef,
3674                options: None,
3675            },
3676        ];
3677        let expected = vec![
3678            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3679                Some(-1),
3680                Some(0),
3681                Some(0),
3682                Some(2),
3683            ])) as ArrayRef,
3684            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3685                Some(7),
3686                Some(101),
3687                Some(102),
3688                Some(8),
3689            ])) as ArrayRef,
3690            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3691                Some(-3),
3692                Some(-1),
3693                Some(-4),
3694                Some(-2),
3695            ])) as ArrayRef,
3696        ];
3697        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3698        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3699
3700        // test mix of string and in64 with option
3701        let input = vec![
3702            SortColumn {
3703                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3704                    Some(0),
3705                    Some(2),
3706                    Some(-1),
3707                    Some(0),
3708                ])) as ArrayRef,
3709                options: Some(SortOptions {
3710                    descending: true,
3711                    nulls_first: true,
3712                }),
3713            },
3714            SortColumn {
3715                values: Arc::new(StringArray::from(vec![
3716                    Some("foo"),
3717                    Some("9"),
3718                    Some("7"),
3719                    Some("bar"),
3720                ])) as ArrayRef,
3721                options: Some(SortOptions {
3722                    descending: true,
3723                    nulls_first: true,
3724                }),
3725            },
3726        ];
3727        let expected = vec![
3728            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3729                Some(2),
3730                Some(0),
3731                Some(0),
3732                Some(-1),
3733            ])) as ArrayRef,
3734            Arc::new(StringArray::from(vec![
3735                Some("9"),
3736                Some("foo"),
3737                Some("bar"),
3738                Some("7"),
3739            ])) as ArrayRef,
3740        ];
3741        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3742        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3743
3744        // test sort with nulls first
3745        let input = vec![
3746            SortColumn {
3747                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3748                    None,
3749                    Some(-1),
3750                    Some(2),
3751                    None,
3752                ])) as ArrayRef,
3753                options: Some(SortOptions {
3754                    descending: true,
3755                    nulls_first: true,
3756                }),
3757            },
3758            SortColumn {
3759                values: Arc::new(StringArray::from(vec![
3760                    Some("foo"),
3761                    Some("world"),
3762                    Some("hello"),
3763                    None,
3764                ])) as ArrayRef,
3765                options: Some(SortOptions {
3766                    descending: true,
3767                    nulls_first: true,
3768                }),
3769            },
3770        ];
3771        let expected = vec![
3772            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3773                None,
3774                None,
3775                Some(2),
3776                Some(-1),
3777            ])) as ArrayRef,
3778            Arc::new(StringArray::from(vec![
3779                None,
3780                Some("foo"),
3781                Some("hello"),
3782                Some("world"),
3783            ])) as ArrayRef,
3784        ];
3785        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3786        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3787
3788        // test sort with nulls last
3789        let input = vec![
3790            SortColumn {
3791                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3792                    None,
3793                    Some(-1),
3794                    Some(2),
3795                    None,
3796                ])) as ArrayRef,
3797                options: Some(SortOptions {
3798                    descending: true,
3799                    nulls_first: false,
3800                }),
3801            },
3802            SortColumn {
3803                values: Arc::new(StringArray::from(vec![
3804                    Some("foo"),
3805                    Some("world"),
3806                    Some("hello"),
3807                    None,
3808                ])) as ArrayRef,
3809                options: Some(SortOptions {
3810                    descending: true,
3811                    nulls_first: false,
3812                }),
3813            },
3814        ];
3815        let expected = vec![
3816            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3817                Some(2),
3818                Some(-1),
3819                None,
3820                None,
3821            ])) as ArrayRef,
3822            Arc::new(StringArray::from(vec![
3823                Some("hello"),
3824                Some("world"),
3825                Some("foo"),
3826                None,
3827            ])) as ArrayRef,
3828        ];
3829        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3830        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3831
3832        // test sort with opposite options
3833        let input = vec![
3834            SortColumn {
3835                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3836                    None,
3837                    Some(-1),
3838                    Some(2),
3839                    Some(-1),
3840                    None,
3841                ])) as ArrayRef,
3842                options: Some(SortOptions {
3843                    descending: false,
3844                    nulls_first: false,
3845                }),
3846            },
3847            SortColumn {
3848                values: Arc::new(StringArray::from(vec![
3849                    Some("foo"),
3850                    Some("bar"),
3851                    Some("world"),
3852                    Some("hello"),
3853                    None,
3854                ])) as ArrayRef,
3855                options: Some(SortOptions {
3856                    descending: true,
3857                    nulls_first: true,
3858                }),
3859            },
3860        ];
3861        let expected = vec![
3862            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3863                Some(-1),
3864                Some(-1),
3865                Some(2),
3866                None,
3867                None,
3868            ])) as ArrayRef,
3869            Arc::new(StringArray::from(vec![
3870                Some("hello"),
3871                Some("bar"),
3872                Some("world"),
3873                None,
3874                Some("foo"),
3875            ])) as ArrayRef,
3876        ];
3877        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3878        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3879
3880        // Limiting by more rows than present is ok
3881        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
3882
3883        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
3884
3885        // case1
3886        let primitive_array_data = vec![
3887            Some(2),
3888            Some(3),
3889            Some(2),
3890            Some(0),
3891            None,
3892            Some(2),
3893            Some(1),
3894            Some(2),
3895        ];
3896        let list_array_data = vec![
3897            None,
3898            Some(vec![Some(4)]),
3899            Some(vec![Some(3)]),
3900            Some(vec![Some(1)]),
3901            Some(vec![Some(5)]),
3902            Some(vec![Some(0)]),
3903            Some(vec![Some(2)]),
3904            Some(vec![None]),
3905        ];
3906
3907        let expected_primitive_array_data = vec![
3908            None,
3909            Some(0),
3910            Some(1),
3911            Some(2),
3912            Some(2),
3913            Some(2),
3914            Some(2),
3915            Some(3),
3916        ];
3917        let expected_list_array_data = vec![
3918            Some(vec![Some(5)]),
3919            Some(vec![Some(1)]),
3920            Some(vec![Some(2)]),
3921            None, // <-
3922            Some(vec![None]),
3923            Some(vec![Some(0)]),
3924            Some(vec![Some(3)]), // <-
3925            Some(vec![Some(4)]),
3926        ];
3927        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3928            primitive_array_data.clone(),
3929            list_array_data.clone(),
3930            expected_primitive_array_data.clone(),
3931            expected_list_array_data,
3932            None,
3933            None,
3934        );
3935
3936        // case2
3937        let primitive_array_options = SortOptions {
3938            descending: false,
3939            nulls_first: true,
3940        };
3941        let list_array_options = SortOptions {
3942            descending: false,
3943            nulls_first: false, // has been modified
3944        };
3945        let expected_list_array_data = vec![
3946            Some(vec![Some(5)]),
3947            Some(vec![Some(1)]),
3948            Some(vec![Some(2)]),
3949            Some(vec![Some(0)]), // <-
3950            Some(vec![Some(3)]),
3951            Some(vec![None]),
3952            None, // <-
3953            Some(vec![Some(4)]),
3954        ];
3955        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3956            primitive_array_data.clone(),
3957            list_array_data.clone(),
3958            expected_primitive_array_data.clone(),
3959            expected_list_array_data,
3960            Some(primitive_array_options),
3961            Some(list_array_options),
3962        );
3963
3964        // case3
3965        let primitive_array_options = SortOptions {
3966            descending: false,
3967            nulls_first: true,
3968        };
3969        let list_array_options = SortOptions {
3970            descending: true, // has been modified
3971            nulls_first: true,
3972        };
3973        let expected_list_array_data = vec![
3974            Some(vec![Some(5)]),
3975            Some(vec![Some(1)]),
3976            Some(vec![Some(2)]),
3977            None, // <-
3978            Some(vec![None]),
3979            Some(vec![Some(3)]),
3980            Some(vec![Some(0)]), // <-
3981            Some(vec![Some(4)]),
3982        ];
3983        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3984            primitive_array_data.clone(),
3985            list_array_data.clone(),
3986            expected_primitive_array_data,
3987            expected_list_array_data,
3988            Some(primitive_array_options),
3989            Some(list_array_options),
3990        );
3991
3992        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
3993
3994        let list_array_data = vec![
3995            Some(vec![Some(2), Some(1)]), // 0
3996            None,                         // 10
3997            Some(vec![Some(3)]),          // 1
3998            Some(vec![Some(2), Some(0)]), // 2
3999            Some(vec![None, Some(2)]),    // 3
4000            Some(vec![Some(0)]),          // none
4001            None,                         // 11
4002            Some(vec![Some(2), None]),    // 4
4003            Some(vec![None]),             // 5
4004            Some(vec![Some(2), Some(1)]), // 6
4005        ];
4006        let primitive_array_data = vec![
4007            Some(0),
4008            Some(10),
4009            Some(1),
4010            Some(2),
4011            Some(3),
4012            None,
4013            Some(11),
4014            Some(4),
4015            Some(5),
4016            Some(6),
4017        ];
4018        let expected_list_array_data = vec![
4019            None,
4020            None,
4021            Some(vec![None]),
4022            Some(vec![None, Some(2)]),
4023            Some(vec![Some(0)]),
4024            Some(vec![Some(2), None]),
4025            Some(vec![Some(2), Some(0)]),
4026            Some(vec![Some(2), Some(1)]),
4027            Some(vec![Some(2), Some(1)]),
4028            Some(vec![Some(3)]),
4029        ];
4030        let expected_primitive_array_data = vec![
4031            Some(10),
4032            Some(11),
4033            Some(5),
4034            Some(3),
4035            None,
4036            Some(4),
4037            Some(2),
4038            Some(0),
4039            Some(6),
4040            Some(1),
4041        ];
4042        test_lex_sort_mixed_types_with_list::<Int32Type>(
4043            list_array_data.clone(),
4044            primitive_array_data.clone(),
4045            expected_list_array_data,
4046            expected_primitive_array_data,
4047            None,
4048            None,
4049        );
4050    }
4051
4052    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4053        primitive_array_data: Vec<Option<T::Native>>,
4054        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4055        expected_primitive_array_data: Vec<Option<T::Native>>,
4056        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4057        primitive_array_options: Option<SortOptions>,
4058        list_array_options: Option<SortOptions>,
4059    ) where
4060        T: ArrowPrimitiveType,
4061        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4062    {
4063        let input = vec![
4064            SortColumn {
4065                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4066                    as ArrayRef,
4067                options: primitive_array_options,
4068            },
4069            SortColumn {
4070                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4071                    list_array_data.clone(),
4072                    1,
4073                )) as ArrayRef,
4074                options: list_array_options,
4075            },
4076        ];
4077
4078        let expected = vec![
4079            Arc::new(PrimitiveArray::<T>::from(
4080                expected_primitive_array_data.clone(),
4081            )) as ArrayRef,
4082            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4083                expected_list_array_data.clone(),
4084                1,
4085            )) as ArrayRef,
4086        ];
4087
4088        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4089        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4090    }
4091
4092    fn test_lex_sort_mixed_types_with_list<T>(
4093        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4094        primitive_array_data: Vec<Option<T::Native>>,
4095        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4096        expected_primitive_array_data: Vec<Option<T::Native>>,
4097        list_array_options: Option<SortOptions>,
4098        primitive_array_options: Option<SortOptions>,
4099    ) where
4100        T: ArrowPrimitiveType,
4101        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4102    {
4103        macro_rules! run_test {
4104            ($ARRAY_TYPE:ident) => {
4105                let input = vec![
4106                    SortColumn {
4107                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4108                            list_array_data.clone(),
4109                        )) as ArrayRef,
4110                        options: list_array_options.clone(),
4111                    },
4112                    SortColumn {
4113                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4114                            as ArrayRef,
4115                        options: primitive_array_options.clone(),
4116                    },
4117                ];
4118
4119                let expected = vec![
4120                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4121                        expected_list_array_data.clone(),
4122                    )) as ArrayRef,
4123                    Arc::new(PrimitiveArray::<T>::from(
4124                        expected_primitive_array_data.clone(),
4125                    )) as ArrayRef,
4126                ];
4127
4128                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4129                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4130            };
4131        }
4132        run_test!(ListArray);
4133        run_test!(LargeListArray);
4134    }
4135
4136    #[test]
4137    fn test_partial_sort() {
4138        let mut before: Vec<&str> = vec![
4139            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4140        ];
4141        let mut d = before.clone();
4142        d.sort_unstable();
4143
4144        for last in 0..before.len() {
4145            partial_sort(&mut before, last, |a, b| a.cmp(b));
4146            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4147        }
4148    }
4149
4150    #[test]
4151    fn test_partial_rand_sort() {
4152        let size = 1000u32;
4153        let mut rng = StdRng::seed_from_u64(42);
4154        let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect();
4155        let mut d = before.clone();
4156        let last = (rng.next_u32() % size) as usize;
4157        d.sort_unstable();
4158
4159        partial_sort(&mut before, last, |a, b| a.cmp(b));
4160        assert_eq!(&d[0..last], &before[0..last]);
4161    }
4162
4163    #[test]
4164    fn test_sort_int8_dicts() {
4165        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4166        let values = Int8Array::from(vec![1, 3, 5]);
4167        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4168            keys,
4169            values,
4170            None,
4171            None,
4172            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4173        );
4174
4175        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4176        let values = Int8Array::from(vec![1, 3, 5]);
4177        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4178            keys,
4179            values,
4180            Some(SortOptions {
4181                descending: true,
4182                nulls_first: false,
4183            }),
4184            None,
4185            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4186        );
4187
4188        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4189        let values = Int8Array::from(vec![1, 3, 5]);
4190        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4191            keys,
4192            values,
4193            Some(SortOptions {
4194                descending: false,
4195                nulls_first: false,
4196            }),
4197            None,
4198            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4199        );
4200
4201        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4202        let values = Int8Array::from(vec![1, 3, 5]);
4203        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4204            keys,
4205            values,
4206            Some(SortOptions {
4207                descending: true,
4208                nulls_first: true,
4209            }),
4210            Some(3),
4211            vec![None, None, Some(5)],
4212        );
4213
4214        // Values have `None`.
4215        let keys = Int8Array::from(vec![
4216            Some(1_i8),
4217            None,
4218            Some(3),
4219            None,
4220            Some(2),
4221            Some(3),
4222            Some(0),
4223        ]);
4224        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4225        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4226            keys,
4227            values,
4228            None,
4229            None,
4230            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4231        );
4232
4233        let keys = Int8Array::from(vec![
4234            Some(1_i8),
4235            None,
4236            Some(3),
4237            None,
4238            Some(2),
4239            Some(3),
4240            Some(0),
4241        ]);
4242        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4243        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4244            keys,
4245            values,
4246            Some(SortOptions {
4247                descending: false,
4248                nulls_first: false,
4249            }),
4250            None,
4251            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4252        );
4253
4254        let keys = Int8Array::from(vec![
4255            Some(1_i8),
4256            None,
4257            Some(3),
4258            None,
4259            Some(2),
4260            Some(3),
4261            Some(0),
4262        ]);
4263        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4264        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4265            keys,
4266            values,
4267            Some(SortOptions {
4268                descending: true,
4269                nulls_first: false,
4270            }),
4271            None,
4272            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4273        );
4274
4275        let keys = Int8Array::from(vec![
4276            Some(1_i8),
4277            None,
4278            Some(3),
4279            None,
4280            Some(2),
4281            Some(3),
4282            Some(0),
4283        ]);
4284        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4285        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4286            keys,
4287            values,
4288            Some(SortOptions {
4289                descending: true,
4290                nulls_first: true,
4291            }),
4292            None,
4293            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4294        );
4295    }
4296
4297    #[test]
4298    fn test_sort_f32_dicts() {
4299        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4300        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4301        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4302            keys,
4303            values,
4304            None,
4305            None,
4306            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4307        );
4308
4309        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4310        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4311        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4312            keys,
4313            values,
4314            Some(SortOptions {
4315                descending: true,
4316                nulls_first: false,
4317            }),
4318            None,
4319            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4320        );
4321
4322        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4323        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4324        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4325            keys,
4326            values,
4327            Some(SortOptions {
4328                descending: false,
4329                nulls_first: false,
4330            }),
4331            None,
4332            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4333        );
4334
4335        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4336        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4337        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4338            keys,
4339            values,
4340            Some(SortOptions {
4341                descending: true,
4342                nulls_first: true,
4343            }),
4344            Some(3),
4345            vec![None, None, Some(5.1)],
4346        );
4347
4348        // Values have `None`.
4349        let keys = Int8Array::from(vec![
4350            Some(1_i8),
4351            None,
4352            Some(3),
4353            None,
4354            Some(2),
4355            Some(3),
4356            Some(0),
4357        ]);
4358        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4359        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4360            keys,
4361            values,
4362            None,
4363            None,
4364            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4365        );
4366
4367        let keys = Int8Array::from(vec![
4368            Some(1_i8),
4369            None,
4370            Some(3),
4371            None,
4372            Some(2),
4373            Some(3),
4374            Some(0),
4375        ]);
4376        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4377        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4378            keys,
4379            values,
4380            Some(SortOptions {
4381                descending: false,
4382                nulls_first: false,
4383            }),
4384            None,
4385            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4386        );
4387
4388        let keys = Int8Array::from(vec![
4389            Some(1_i8),
4390            None,
4391            Some(3),
4392            None,
4393            Some(2),
4394            Some(3),
4395            Some(0),
4396        ]);
4397        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4398        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4399            keys,
4400            values,
4401            Some(SortOptions {
4402                descending: true,
4403                nulls_first: false,
4404            }),
4405            None,
4406            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4407        );
4408
4409        let keys = Int8Array::from(vec![
4410            Some(1_i8),
4411            None,
4412            Some(3),
4413            None,
4414            Some(2),
4415            Some(3),
4416            Some(0),
4417        ]);
4418        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4419        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4420            keys,
4421            values,
4422            Some(SortOptions {
4423                descending: true,
4424                nulls_first: true,
4425            }),
4426            None,
4427            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4428        );
4429    }
4430
4431    #[test]
4432    fn test_lexicographic_comparator_null_dict_values() {
4433        let values = Int32Array::new(
4434            vec![1, 2, 3, 4].into(),
4435            Some(NullBuffer::from(vec![true, false, false, true])),
4436        );
4437        let keys = Int32Array::new(
4438            vec![0, 1, 53, 3].into(),
4439            Some(NullBuffer::from(vec![true, true, false, true])),
4440        );
4441        // [1, NULL, NULL, 4]
4442        let dict = DictionaryArray::new(keys, Arc::new(values));
4443
4444        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4445            values: Arc::new(dict),
4446            options: None,
4447        }])
4448        .unwrap();
4449        // 1.cmp(NULL)
4450        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4451        // NULL.cmp(NULL)
4452        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4453        // NULL.cmp(4)
4454        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4455    }
4456
4457    #[test]
4458    fn sort_list_equal() {
4459        let a = {
4460            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4461            for value in [[1, 5], [0, 3], [1, 3]] {
4462                builder.values().append_slice(&value);
4463                builder.append(true);
4464            }
4465            builder.finish()
4466        };
4467
4468        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4469        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4470
4471        let a = {
4472            let mut builder = ListBuilder::new(Int64Builder::new());
4473            for value in [[1, 5], [0, 3], [1, 3]] {
4474                builder.values().append_slice(&value);
4475                builder.append(true);
4476            }
4477            builder.finish()
4478        };
4479
4480        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4481        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4482    }
4483
4484    #[test]
4485    fn sort_struct_fallback_to_lexsort() {
4486        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4487        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4488
4489        let struct_array = StructArray::from(vec![
4490            (
4491                Arc::new(Field::new("b", DataType::Float32, false)),
4492                float.clone() as ArrayRef,
4493            ),
4494            (
4495                Arc::new(Field::new("c", DataType::Int32, false)),
4496                int.clone() as ArrayRef,
4497            ),
4498        ]);
4499
4500        assert!(!can_sort_to_indices(struct_array.data_type()));
4501        assert!(sort_to_indices(&struct_array, None, None)
4502            .err()
4503            .unwrap()
4504            .to_string()
4505            .contains("Sort not supported for data type"));
4506
4507        let sort_columns = vec![SortColumn {
4508            values: Arc::new(struct_array.clone()) as ArrayRef,
4509            options: None,
4510        }];
4511        let sorted = lexsort(&sort_columns, None).unwrap();
4512
4513        let expected_struct_array = Arc::new(StructArray::from(vec![
4514            (
4515                Arc::new(Field::new("b", DataType::Float32, false)),
4516                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4517            ),
4518            (
4519                Arc::new(Field::new("c", DataType::Int32, false)),
4520                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4521            ),
4522        ])) as ArrayRef;
4523
4524        assert_eq!(&sorted[0], &expected_struct_array);
4525    }
4526}