1use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183 match array.null_count() {
184 0 => ((0..(array.len() as u32)).collect(), vec![]),
186 _ => {
187 let indices = 0..(array.len() as u32);
188 indices.partition(|index| array.is_valid(*index as usize))
189 }
190 }
191}
192
193fn can_sort_to_indices(data_type: &DataType) -> bool {
195 data_type.is_primitive()
196 || matches!(
197 data_type,
198 DataType::Boolean
199 | DataType::Utf8
200 | DataType::LargeUtf8
201 | DataType::Utf8View
202 | DataType::Binary
203 | DataType::LargeBinary
204 | DataType::BinaryView
205 | DataType::FixedSizeBinary(_)
206 )
207 || match data_type {
208 DataType::List(f) if can_rank(f.data_type()) => true,
209 DataType::LargeList(f) if can_rank(f.data_type()) => true,
210 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213 _ => false,
214 }
215}
216
217pub fn sort_to_indices(
220 array: &dyn Array,
221 options: Option<SortOptions>,
222 limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224 let options = options.unwrap_or_default();
225
226 let (v, n) = partition_validity(array);
227
228 Ok(downcast_primitive_array! {
229 array => sort_primitive(array, v, n, options, limit),
230 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241 DataType::Dictionary(_, _) => downcast_dictionary_array!{
242 array => sort_dictionary(array, v, n, options, limit)?,
243 _ => unreachable!()
244 }
245 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249 dt => {
250 return Err(ArrowError::ComputeError(format!(
251 "Invalid run end data type: {dt}"
252 )))
253 }
254 },
255 t => {
256 return Err(ArrowError::ComputeError(format!(
257 "Sort not supported for data type {t:?}"
258 )));
259 }
260 })
261}
262
263fn sort_boolean(
264 values: &BooleanArray,
265 value_indices: Vec<u32>,
266 null_indices: Vec<u32>,
267 options: SortOptions,
268 limit: Option<usize>,
269) -> UInt32Array {
270 let mut valids = value_indices
271 .into_iter()
272 .map(|index| (index, values.value(index as usize)))
273 .collect::<Vec<(u32, bool)>>();
274 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278 values: &PrimitiveArray<T>,
279 value_indices: Vec<u32>,
280 nulls: Vec<u32>,
281 options: SortOptions,
282 limit: Option<usize>,
283) -> UInt32Array {
284 let mut valids = value_indices
285 .into_iter()
286 .map(|index| (index, values.value(index as usize)))
287 .collect::<Vec<(u32, T::Native)>>();
288 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292 values: &GenericByteArray<T>,
293 value_indices: Vec<u32>,
294 nulls: Vec<u32>,
295 options: SortOptions,
296 limit: Option<usize>,
297) -> UInt32Array {
298 let mut valids = value_indices
299 .into_iter()
300 .map(|index| (index, values.value(index as usize).as_ref()))
301 .collect::<Vec<(u32, &[u8])>>();
302
303 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307 values: &GenericByteViewArray<T>,
308 value_indices: Vec<u32>,
309 nulls: Vec<u32>,
310 options: SortOptions,
311 limit: Option<usize>,
312) -> UInt32Array {
313 let mut valids = value_indices
314 .into_iter()
315 .map(|index| (index, values.value(index as usize).as_ref()))
316 .collect::<Vec<(u32, &[u8])>>();
317 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
318}
319
320fn sort_fixed_size_binary(
321 values: &FixedSizeBinaryArray,
322 value_indices: Vec<u32>,
323 nulls: Vec<u32>,
324 options: SortOptions,
325 limit: Option<usize>,
326) -> UInt32Array {
327 let mut valids = value_indices
328 .iter()
329 .copied()
330 .map(|index| (index, values.value(index as usize)))
331 .collect::<Vec<(u32, &[u8])>>();
332 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
333}
334
335fn sort_dictionary<K: ArrowDictionaryKeyType>(
336 dict: &DictionaryArray<K>,
337 value_indices: Vec<u32>,
338 null_indices: Vec<u32>,
339 options: SortOptions,
340 limit: Option<usize>,
341) -> Result<UInt32Array, ArrowError> {
342 let keys: &PrimitiveArray<K> = dict.keys();
343 let rank = child_rank(dict.values().as_ref(), options)?;
344
345 let mut valids = value_indices
347 .into_iter()
348 .map(|index| {
349 let key: K::Native = keys.value(index as usize);
350 (index, rank[key.as_usize()])
351 })
352 .collect::<Vec<(u32, u32)>>();
353
354 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
355}
356
357fn sort_list<O: OffsetSizeTrait>(
358 array: &GenericListArray<O>,
359 value_indices: Vec<u32>,
360 null_indices: Vec<u32>,
361 options: SortOptions,
362 limit: Option<usize>,
363) -> Result<UInt32Array, ArrowError> {
364 let rank = child_rank(array.values().as_ref(), options)?;
365 let offsets = array.value_offsets();
366 let mut valids = value_indices
367 .into_iter()
368 .map(|index| {
369 let end = offsets[index as usize + 1].as_usize();
370 let start = offsets[index as usize].as_usize();
371 (index, &rank[start..end])
372 })
373 .collect::<Vec<(u32, &[u32])>>();
374 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
375}
376
377fn sort_fixed_size_list(
378 array: &FixedSizeListArray,
379 value_indices: Vec<u32>,
380 null_indices: Vec<u32>,
381 options: SortOptions,
382 limit: Option<usize>,
383) -> Result<UInt32Array, ArrowError> {
384 let rank = child_rank(array.values().as_ref(), options)?;
385 let size = array.value_length() as usize;
386 let mut valids = value_indices
387 .into_iter()
388 .map(|index| {
389 let start = index as usize * size;
390 (index, &rank[start..start + size])
391 })
392 .collect::<Vec<(u32, &[u32])>>();
393 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
394}
395
396#[inline(never)]
397fn sort_impl<T: Copy>(
398 options: SortOptions,
399 valids: &mut [(u32, T)],
400 nulls: &[u32],
401 limit: Option<usize>,
402 mut cmp: impl FnMut(T, T) -> Ordering,
403) -> Vec<u32> {
404 let v_limit = match (limit, options.nulls_first) {
405 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
406 _ => valids.len(),
407 };
408
409 match options.descending {
410 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
411 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
412 }
413
414 let len = valids.len() + nulls.len();
415 let limit = limit.unwrap_or(len).min(len);
416 let mut out = Vec::with_capacity(len);
417 match options.nulls_first {
418 true => {
419 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
420 let remaining = limit - out.len();
421 out.extend(valids.iter().map(|x| x.0).take(remaining));
422 }
423 false => {
424 out.extend(valids.iter().map(|x| x.0).take(limit));
425 let remaining = limit - out.len();
426 out.extend_from_slice(&nulls[..remaining])
427 }
428 }
429 out
430}
431
432fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
434 let value_options = Some(SortOptions {
437 descending: false,
438 nulls_first: options.nulls_first != options.descending,
439 });
440 rank(values, value_options)
441}
442
443fn sort_run(
449 values: &dyn Array,
450 options: Option<SortOptions>,
451 limit: Option<usize>,
452) -> Result<ArrayRef, ArrowError> {
453 match values.data_type() {
454 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
455 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
456 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
457 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
458 dt => unreachable!("Not valid run ends data type {dt}"),
459 },
460 dt => Err(ArrowError::InvalidArgumentError(format!(
461 "Input is not a run encoded array. Input data type {dt}"
462 ))),
463 }
464}
465
466fn sort_run_downcasted<R: RunEndIndexType>(
467 values: &dyn Array,
468 options: Option<SortOptions>,
469 limit: Option<usize>,
470) -> Result<ArrayRef, ArrowError> {
471 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
472
473 let output_len = if let Some(limit) = limit {
475 limit.min(run_array.len())
476 } else {
477 run_array.len()
478 };
479
480 let run_ends = run_array.run_ends();
481
482 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
483 let mut new_run_end: usize = 0;
484 let mut new_physical_len: usize = 0;
485
486 let consume_runs = |run_length, _| {
487 new_run_end += run_length;
488 new_physical_len += 1;
489 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
490 };
491
492 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
493
494 let new_run_ends = unsafe {
495 ArrayDataBuilder::new(R::DATA_TYPE)
498 .len(new_physical_len)
499 .add_buffer(new_run_ends_builder.finish())
500 .build_unchecked()
501 };
502
503 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
505 .slice(0, new_run_ends.len())
506 .into_data()
507 .into();
508
509 let new_values = take(&run_values, &new_values_indices, None)?;
510
511 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
513 .len(new_run_end)
514 .add_child_data(new_run_ends)
515 .add_child_data(new_values.into_data());
516 let array_data: RunArray<R> = unsafe {
517 builder.build_unchecked().into()
520 };
521 Ok(Arc::new(array_data))
522}
523
524fn sort_run_to_indices<R: RunEndIndexType>(
529 values: &dyn Array,
530 options: SortOptions,
531 limit: Option<usize>,
532) -> UInt32Array {
533 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
534 let output_len = if let Some(limit) = limit {
535 limit.min(run_array.len())
536 } else {
537 run_array.len()
538 };
539 let mut result: Vec<u32> = Vec::with_capacity(output_len);
540
541 let consume_runs = |run_length, logical_start| {
543 result.extend(logical_start as u32..(logical_start + run_length) as u32);
544 };
545 sort_run_inner(run_array, Some(options), output_len, consume_runs);
546
547 UInt32Array::from(result)
548}
549
550fn sort_run_inner<R: RunEndIndexType, F>(
551 run_array: &RunArray<R>,
552 options: Option<SortOptions>,
553 output_len: usize,
554 mut consume_runs: F,
555) -> (PrimitiveArray<UInt32Type>, ArrayRef)
556where
557 F: FnMut(usize, usize),
558{
559 let start_physical_index = run_array.get_start_physical_index();
561 let end_physical_index = run_array.get_end_physical_index();
562 let physical_len = end_physical_index - start_physical_index + 1;
563 let run_values = run_array.values().slice(start_physical_index, physical_len);
564
565 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
567
568 let mut remaining_len = output_len;
569
570 let run_ends = run_array.run_ends().values();
571
572 assert_eq!(
573 0,
574 values_indices.null_count(),
575 "The output of sort_to_indices should not have null values. Its values is {}",
576 values_indices.null_count()
577 );
578
579 for physical_index in values_indices.values() {
583 let physical_index = *physical_index as usize + start_physical_index;
586
587 let (run_length, logical_index_start) = unsafe {
589 if physical_index == start_physical_index {
593 (
594 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
595 0,
596 )
597 } else if physical_index == end_physical_index {
598 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
599 (
600 run_array.offset() + run_array.len() - prev_run_end,
601 prev_run_end - run_array.offset(),
602 )
603 } else {
604 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
605 (
606 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
607 prev_run_end - run_array.offset(),
608 )
609 }
610 };
611 let new_run_length = run_length.min(remaining_len);
612 consume_runs(new_run_length, logical_index_start);
613 remaining_len -= new_run_length;
614
615 if remaining_len == 0 {
616 break;
617 }
618 }
619
620 if remaining_len > 0 {
621 panic!("Remaining length should be zero its values is {remaining_len}")
622 }
623 (values_indices, run_values)
624}
625
626#[derive(Clone, Debug)]
628pub struct SortColumn {
629 pub values: ArrayRef,
631 pub options: Option<SortOptions>,
633}
634
635pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
686 let indices = lexsort_to_indices(columns, limit)?;
687 columns
688 .iter()
689 .map(|c| take(c.values.as_ref(), &indices, None))
690 .collect()
691}
692
693pub fn lexsort_to_indices(
699 columns: &[SortColumn],
700 limit: Option<usize>,
701) -> Result<UInt32Array, ArrowError> {
702 if columns.is_empty() {
703 return Err(ArrowError::InvalidArgumentError(
704 "Sort requires at least one column".to_string(),
705 ));
706 }
707 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
708 let column = &columns[0];
710 return sort_to_indices(&column.values, column.options, limit);
711 }
712
713 let row_count = columns[0].values.len();
714 if columns.iter().any(|item| item.values.len() != row_count) {
715 return Err(ArrowError::ComputeError(
716 "lexical sort columns have different row counts".to_string(),
717 ));
718 };
719
720 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
721 let mut len = value_indices.len();
722
723 if let Some(limit) = limit {
724 len = limit.min(len);
725 }
726
727 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
728 sort_unstable_by(&mut value_indices, len, |a, b| {
730 lexicographical_comparator.compare(*a, *b)
731 });
732
733 Ok(UInt32Array::from_iter_values(
734 value_indices.iter().take(len).map(|i| *i as u32),
735 ))
736}
737
738pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
740where
741 F: FnMut(&T, &T) -> Ordering,
742{
743 if let Some(n) = limit.checked_sub(1) {
744 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
745 before.sort_unstable_by(is_less);
746 }
747}
748
749pub struct LexicographicalComparator {
752 compare_items: Vec<DynComparator>,
753}
754
755impl LexicographicalComparator {
756 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
758 for comparator in &self.compare_items {
759 match comparator(a_idx, b_idx) {
760 Ordering::Equal => continue,
761 r => return r,
762 }
763 }
764 Ordering::Equal
765 }
766
767 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
770 let compare_items = columns
771 .iter()
772 .map(|c| {
773 make_comparator(
774 c.values.as_ref(),
775 c.values.as_ref(),
776 c.options.unwrap_or_default(),
777 )
778 })
779 .collect::<Result<Vec<_>, ArrowError>>()?;
780 Ok(LexicographicalComparator { compare_items })
781 }
782}
783
784#[cfg(test)]
785mod tests {
786 use super::*;
787 use arrow_array::builder::{
788 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
789 PrimitiveRunBuilder,
790 };
791 use arrow_buffer::{i256, NullBuffer};
792 use arrow_schema::Field;
793 use half::f16;
794 use rand::rngs::StdRng;
795 use rand::seq::SliceRandom;
796 use rand::{Rng, RngCore, SeedableRng};
797
798 fn create_decimal_array<T: DecimalType>(
799 data: Vec<Option<usize>>,
800 precision: u8,
801 scale: i8,
802 ) -> PrimitiveArray<T> {
803 data.into_iter()
804 .map(|x| x.and_then(T::Native::from_usize))
805 .collect::<PrimitiveArray<T>>()
806 .with_precision_and_scale(precision, scale)
807 .unwrap()
808 }
809
810 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
811 data.into_iter()
812 .collect::<Decimal256Array>()
813 .with_precision_and_scale(53, 6)
814 .unwrap()
815 }
816
817 fn test_sort_to_indices_decimal_array<T: DecimalType>(
818 data: Vec<Option<usize>>,
819 options: Option<SortOptions>,
820 limit: Option<usize>,
821 expected_data: Vec<u32>,
822 precision: u8,
823 scale: i8,
824 ) {
825 let output = create_decimal_array::<T>(data, precision, scale);
826 let expected = UInt32Array::from(expected_data);
827 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
828 assert_eq!(output, expected)
829 }
830
831 fn test_sort_to_indices_decimal256_array(
832 data: Vec<Option<i256>>,
833 options: Option<SortOptions>,
834 limit: Option<usize>,
835 expected_data: Vec<u32>,
836 ) {
837 let output = create_decimal256_array(data);
838 let expected = UInt32Array::from(expected_data);
839 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
840 assert_eq!(output, expected)
841 }
842
843 fn test_sort_decimal_array<T: DecimalType>(
844 data: Vec<Option<usize>>,
845 options: Option<SortOptions>,
846 limit: Option<usize>,
847 expected_data: Vec<Option<usize>>,
848 p: u8,
849 s: i8,
850 ) {
851 let output = create_decimal_array::<T>(data, p, s);
852 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
853 let output = match limit {
854 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
855 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
856 };
857 assert_eq!(&output, &expected)
858 }
859
860 fn test_sort_decimal256_array(
861 data: Vec<Option<i256>>,
862 options: Option<SortOptions>,
863 limit: Option<usize>,
864 expected_data: Vec<Option<i256>>,
865 ) {
866 let output = create_decimal256_array(data);
867 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
868 let output = match limit {
869 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
870 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
871 };
872 assert_eq!(&output, &expected)
873 }
874
875 fn test_sort_to_indices_boolean_arrays(
876 data: Vec<Option<bool>>,
877 options: Option<SortOptions>,
878 limit: Option<usize>,
879 expected_data: Vec<u32>,
880 ) {
881 let output = BooleanArray::from(data);
882 let expected = UInt32Array::from(expected_data);
883 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
884 assert_eq!(output, expected)
885 }
886
887 fn test_sort_to_indices_primitive_arrays<T>(
888 data: Vec<Option<T::Native>>,
889 options: Option<SortOptions>,
890 limit: Option<usize>,
891 expected_data: Vec<u32>,
892 ) where
893 T: ArrowPrimitiveType,
894 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
895 {
896 let output = PrimitiveArray::<T>::from(data);
897 let expected = UInt32Array::from(expected_data);
898 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
899 assert_eq!(output, expected)
900 }
901
902 fn test_sort_primitive_arrays<T>(
903 data: Vec<Option<T::Native>>,
904 options: Option<SortOptions>,
905 limit: Option<usize>,
906 expected_data: Vec<Option<T::Native>>,
907 ) where
908 T: ArrowPrimitiveType,
909 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
910 {
911 let output = PrimitiveArray::<T>::from(data);
912 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
913 let output = match limit {
914 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
915 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
916 };
917 assert_eq!(&output, &expected)
918 }
919
920 fn test_sort_to_indices_string_arrays(
921 data: Vec<Option<&str>>,
922 options: Option<SortOptions>,
923 limit: Option<usize>,
924 expected_data: Vec<u32>,
925 ) {
926 let output = StringArray::from(data);
927 let expected = UInt32Array::from(expected_data);
928 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
929 assert_eq!(output, expected)
930 }
931
932 fn test_sort_string_arrays(
934 data: Vec<Option<&str>>,
935 options: Option<SortOptions>,
936 limit: Option<usize>,
937 expected_data: Vec<Option<&str>>,
938 ) {
939 let output = StringArray::from(data.clone());
940 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
941 let output = match limit {
942 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
943 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
944 };
945 assert_eq!(&output, &expected);
946
947 let output = LargeStringArray::from(data.clone());
948 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
949 let output = match limit {
950 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
951 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
952 };
953 assert_eq!(&output, &expected);
954
955 let output = StringViewArray::from(data);
956 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
957 let output = match limit {
958 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
959 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
960 };
961 assert_eq!(&output, &expected);
962 }
963
964 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
965 data: Vec<Option<&str>>,
966 options: Option<SortOptions>,
967 limit: Option<usize>,
968 expected_data: Vec<Option<&str>>,
969 ) {
970 let array = data.into_iter().collect::<DictionaryArray<T>>();
971 let array_values = array.values().clone();
972 let dict = array_values
973 .as_any()
974 .downcast_ref::<StringArray>()
975 .expect("Unable to get dictionary values");
976
977 let sorted = match limit {
978 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
979 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
980 };
981 let sorted = sorted
982 .as_any()
983 .downcast_ref::<DictionaryArray<T>>()
984 .unwrap();
985 let sorted_values = sorted.values();
986 let sorted_dict = sorted_values
987 .as_any()
988 .downcast_ref::<StringArray>()
989 .expect("Unable to get dictionary values");
990 let sorted_keys = sorted.keys();
991
992 assert_eq!(sorted_dict, dict);
993
994 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
995 if sorted.is_valid(i) {
996 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
997 } else {
998 None
999 }
1000 }));
1001 let expected = StringArray::from(expected_data);
1002
1003 assert_eq!(sorted_strings, expected)
1004 }
1005
1006 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1007 keys: PrimitiveArray<K>,
1008 values: PrimitiveArray<T>,
1009 options: Option<SortOptions>,
1010 limit: Option<usize>,
1011 expected_data: Vec<Option<T::Native>>,
1012 ) where
1013 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1014 {
1015 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1016 let array_values = array.values().clone();
1017 let dict = array_values.as_primitive::<T>();
1018
1019 let sorted = match limit {
1020 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1021 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1022 };
1023 let sorted = sorted
1024 .as_any()
1025 .downcast_ref::<DictionaryArray<K>>()
1026 .unwrap();
1027 let sorted_values = sorted.values();
1028 let sorted_dict = sorted_values
1029 .as_any()
1030 .downcast_ref::<PrimitiveArray<T>>()
1031 .expect("Unable to get dictionary values");
1032 let sorted_keys = sorted.keys();
1033
1034 assert_eq!(sorted_dict, dict);
1035
1036 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1037 (0..sorted.len())
1038 .map(|i| {
1039 let key = sorted_keys.value(i).as_usize();
1040 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1041 Some(sorted_dict.value(key))
1042 } else {
1043 None
1044 }
1045 })
1046 .collect::<Vec<Option<T::Native>>>(),
1047 );
1048 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1049
1050 assert_eq!(sorted_values, expected)
1051 }
1052
1053 fn test_sort_list_arrays<T>(
1054 data: Vec<Option<Vec<Option<T::Native>>>>,
1055 options: Option<SortOptions>,
1056 limit: Option<usize>,
1057 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1058 fixed_length: Option<i32>,
1059 ) where
1060 T: ArrowPrimitiveType,
1061 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1062 {
1063 if let Some(length) = fixed_length {
1065 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1066 data.clone(),
1067 length,
1068 ));
1069 let sorted = match limit {
1070 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1071 _ => sort(&(input as ArrayRef), options).unwrap(),
1072 };
1073 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1074 expected_data.clone(),
1075 length,
1076 )) as ArrayRef;
1077
1078 assert_eq!(&sorted, &expected);
1079 }
1080
1081 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1083 let sorted = match limit {
1084 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1085 _ => sort(&(input as ArrayRef), options).unwrap(),
1086 };
1087 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1088 expected_data.clone(),
1089 )) as ArrayRef;
1090
1091 assert_eq!(&sorted, &expected);
1092
1093 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1095 let sorted = match limit {
1096 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1097 _ => sort(&(input as ArrayRef), options).unwrap(),
1098 };
1099 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1100 expected_data,
1101 )) as ArrayRef;
1102
1103 assert_eq!(&sorted, &expected);
1104 }
1105
1106 fn test_lex_sort_arrays(
1107 input: Vec<SortColumn>,
1108 expected_output: Vec<ArrayRef>,
1109 limit: Option<usize>,
1110 ) {
1111 let sorted = lexsort(&input, limit).unwrap();
1112
1113 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1114 assert_eq!(result, expected);
1115 }
1116 }
1117
1118 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1120 expected_output
1121 .into_iter()
1122 .map(|array| array.slice(offset, length))
1123 .collect()
1124 }
1125
1126 fn test_sort_binary_arrays(
1127 data: Vec<Option<Vec<u8>>>,
1128 options: Option<SortOptions>,
1129 limit: Option<usize>,
1130 expected_data: Vec<Option<Vec<u8>>>,
1131 fixed_length: Option<i32>,
1132 ) {
1133 if let Some(length) = fixed_length {
1135 let input = Arc::new(
1136 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1137 .unwrap(),
1138 );
1139 let sorted = match limit {
1140 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1141 None => sort(&(input as ArrayRef), options).unwrap(),
1142 };
1143 let expected = Arc::new(
1144 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1145 expected_data.iter().cloned(),
1146 length,
1147 )
1148 .unwrap(),
1149 ) as ArrayRef;
1150
1151 assert_eq!(&sorted, &expected);
1152 }
1153
1154 fn make_generic_binary_array<S: OffsetSizeTrait>(
1156 data: &[Option<Vec<u8>>],
1157 ) -> Arc<GenericBinaryArray<S>> {
1158 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1159 data.iter()
1160 .map(|binary| binary.as_ref().map(Vec::as_slice))
1161 .collect(),
1162 ))
1163 }
1164
1165 let input = make_generic_binary_array::<i32>(&data);
1167 let sorted = match limit {
1168 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1169 None => sort(&(input as ArrayRef), options).unwrap(),
1170 };
1171 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1172 assert_eq!(&sorted, &expected);
1173
1174 let input = make_generic_binary_array::<i64>(&data);
1176 let sorted = match limit {
1177 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1178 None => sort(&(input as ArrayRef), options).unwrap(),
1179 };
1180 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1181 assert_eq!(&sorted, &expected);
1182 }
1183
1184 #[test]
1185 fn test_sort_to_indices_primitives() {
1186 test_sort_to_indices_primitive_arrays::<Int8Type>(
1187 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1188 None,
1189 None,
1190 vec![0, 5, 3, 1, 4, 2],
1191 );
1192 test_sort_to_indices_primitive_arrays::<Int16Type>(
1193 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1194 None,
1195 None,
1196 vec![0, 5, 3, 1, 4, 2],
1197 );
1198 test_sort_to_indices_primitive_arrays::<Int32Type>(
1199 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1200 None,
1201 None,
1202 vec![0, 5, 3, 1, 4, 2],
1203 );
1204 test_sort_to_indices_primitive_arrays::<Int64Type>(
1205 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1206 None,
1207 None,
1208 vec![0, 5, 3, 1, 4, 2],
1209 );
1210 test_sort_to_indices_primitive_arrays::<Float16Type>(
1211 vec![
1212 None,
1213 Some(f16::from_f32(-0.05)),
1214 Some(f16::from_f32(2.225)),
1215 Some(f16::from_f32(-1.01)),
1216 Some(f16::from_f32(-0.05)),
1217 None,
1218 ],
1219 None,
1220 None,
1221 vec![0, 5, 3, 1, 4, 2],
1222 );
1223 test_sort_to_indices_primitive_arrays::<Float32Type>(
1224 vec![
1225 None,
1226 Some(-0.05),
1227 Some(2.225),
1228 Some(-1.01),
1229 Some(-0.05),
1230 None,
1231 ],
1232 None,
1233 None,
1234 vec![0, 5, 3, 1, 4, 2],
1235 );
1236 test_sort_to_indices_primitive_arrays::<Float64Type>(
1237 vec![
1238 None,
1239 Some(-0.05),
1240 Some(2.225),
1241 Some(-1.01),
1242 Some(-0.05),
1243 None,
1244 ],
1245 None,
1246 None,
1247 vec![0, 5, 3, 1, 4, 2],
1248 );
1249
1250 test_sort_to_indices_primitive_arrays::<Int8Type>(
1252 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1253 Some(SortOptions {
1254 descending: true,
1255 nulls_first: false,
1256 }),
1257 None,
1258 vec![2, 1, 4, 3, 0, 5],
1259 );
1260
1261 test_sort_to_indices_primitive_arrays::<Int16Type>(
1262 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1263 Some(SortOptions {
1264 descending: true,
1265 nulls_first: false,
1266 }),
1267 None,
1268 vec![2, 1, 4, 3, 0, 5],
1269 );
1270
1271 test_sort_to_indices_primitive_arrays::<Int32Type>(
1272 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1273 Some(SortOptions {
1274 descending: true,
1275 nulls_first: false,
1276 }),
1277 None,
1278 vec![2, 1, 4, 3, 0, 5],
1279 );
1280
1281 test_sort_to_indices_primitive_arrays::<Int64Type>(
1282 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1283 Some(SortOptions {
1284 descending: true,
1285 nulls_first: false,
1286 }),
1287 None,
1288 vec![2, 1, 4, 3, 0, 5],
1289 );
1290
1291 test_sort_to_indices_primitive_arrays::<Float16Type>(
1292 vec![
1293 None,
1294 Some(f16::from_f32(0.005)),
1295 Some(f16::from_f32(20.22)),
1296 Some(f16::from_f32(-10.3)),
1297 Some(f16::from_f32(0.005)),
1298 None,
1299 ],
1300 Some(SortOptions {
1301 descending: true,
1302 nulls_first: false,
1303 }),
1304 None,
1305 vec![2, 1, 4, 3, 0, 5],
1306 );
1307
1308 test_sort_to_indices_primitive_arrays::<Float32Type>(
1309 vec![
1310 None,
1311 Some(0.005),
1312 Some(20.22),
1313 Some(-10.3),
1314 Some(0.005),
1315 None,
1316 ],
1317 Some(SortOptions {
1318 descending: true,
1319 nulls_first: false,
1320 }),
1321 None,
1322 vec![2, 1, 4, 3, 0, 5],
1323 );
1324
1325 test_sort_to_indices_primitive_arrays::<Float64Type>(
1326 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1327 Some(SortOptions {
1328 descending: true,
1329 nulls_first: false,
1330 }),
1331 None,
1332 vec![2, 1, 4, 3, 0, 5],
1333 );
1334
1335 test_sort_to_indices_primitive_arrays::<Int8Type>(
1337 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1338 Some(SortOptions {
1339 descending: true,
1340 nulls_first: true,
1341 }),
1342 None,
1343 vec![0, 5, 2, 1, 4, 3], );
1345
1346 test_sort_to_indices_primitive_arrays::<Int16Type>(
1347 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1348 Some(SortOptions {
1349 descending: true,
1350 nulls_first: true,
1351 }),
1352 None,
1353 vec![0, 5, 2, 1, 4, 3], );
1355
1356 test_sort_to_indices_primitive_arrays::<Int32Type>(
1357 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1358 Some(SortOptions {
1359 descending: true,
1360 nulls_first: true,
1361 }),
1362 None,
1363 vec![0, 5, 2, 1, 4, 3],
1364 );
1365
1366 test_sort_to_indices_primitive_arrays::<Int64Type>(
1367 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1368 Some(SortOptions {
1369 descending: true,
1370 nulls_first: true,
1371 }),
1372 None,
1373 vec![0, 5, 2, 1, 4, 3],
1374 );
1375
1376 test_sort_to_indices_primitive_arrays::<Float16Type>(
1377 vec![
1378 None,
1379 Some(f16::from_f32(0.1)),
1380 Some(f16::from_f32(0.2)),
1381 Some(f16::from_f32(-1.3)),
1382 Some(f16::from_f32(0.01)),
1383 None,
1384 ],
1385 Some(SortOptions {
1386 descending: true,
1387 nulls_first: true,
1388 }),
1389 None,
1390 vec![0, 5, 2, 1, 4, 3],
1391 );
1392
1393 test_sort_to_indices_primitive_arrays::<Float32Type>(
1394 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1395 Some(SortOptions {
1396 descending: true,
1397 nulls_first: true,
1398 }),
1399 None,
1400 vec![0, 5, 2, 1, 4, 3],
1401 );
1402
1403 test_sort_to_indices_primitive_arrays::<Float64Type>(
1404 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1405 Some(SortOptions {
1406 descending: true,
1407 nulls_first: true,
1408 }),
1409 None,
1410 vec![0, 5, 2, 1, 4, 3],
1411 );
1412
1413 test_sort_to_indices_primitive_arrays::<Float64Type>(
1415 vec![Some(2.0), None, None, Some(1.0)],
1416 Some(SortOptions {
1417 descending: false,
1418 nulls_first: false,
1419 }),
1420 Some(3),
1421 vec![3, 0, 1],
1422 );
1423
1424 test_sort_to_indices_primitive_arrays::<Float64Type>(
1425 vec![Some(2.0), None, None, Some(1.0)],
1426 Some(SortOptions {
1427 descending: false,
1428 nulls_first: true,
1429 }),
1430 Some(3),
1431 vec![1, 2, 3],
1432 );
1433
1434 test_sort_to_indices_primitive_arrays::<Float64Type>(
1436 vec![Some(1.0), None, None, None],
1437 Some(SortOptions {
1438 descending: false,
1439 nulls_first: true,
1440 }),
1441 Some(2),
1442 vec![1, 2],
1443 );
1444
1445 test_sort_to_indices_primitive_arrays::<Float64Type>(
1446 vec![Some(1.0), None, None, None],
1447 Some(SortOptions {
1448 descending: false,
1449 nulls_first: false,
1450 }),
1451 Some(2),
1452 vec![0, 1],
1453 );
1454 }
1455
1456 #[test]
1457 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1458 test_sort_to_indices_primitive_arrays::<Int32Type>(
1459 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1460 Some(SortOptions {
1461 descending: false,
1462 nulls_first: false,
1463 }),
1464 Some(2),
1465 vec![4, 6],
1466 );
1467 }
1468
1469 #[test]
1470 fn test_sort_boolean() {
1471 test_sort_to_indices_boolean_arrays(
1473 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1474 None,
1475 None,
1476 vec![0, 5, 1, 4, 2, 3],
1477 );
1478
1479 test_sort_to_indices_boolean_arrays(
1481 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1482 Some(SortOptions {
1483 descending: true,
1484 nulls_first: false,
1485 }),
1486 None,
1487 vec![2, 3, 1, 4, 0, 5],
1488 );
1489
1490 test_sort_to_indices_boolean_arrays(
1492 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1493 Some(SortOptions {
1494 descending: true,
1495 nulls_first: true,
1496 }),
1497 None,
1498 vec![0, 5, 2, 3, 1, 4],
1499 );
1500
1501 test_sort_to_indices_boolean_arrays(
1503 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1504 Some(SortOptions {
1505 descending: true,
1506 nulls_first: true,
1507 }),
1508 Some(3),
1509 vec![0, 5, 2],
1510 );
1511
1512 test_sort_to_indices_boolean_arrays(
1514 vec![Some(true), None, None, Some(false)],
1515 Some(SortOptions {
1516 descending: false,
1517 nulls_first: false,
1518 }),
1519 Some(3),
1520 vec![3, 0, 1],
1521 );
1522
1523 test_sort_to_indices_boolean_arrays(
1524 vec![Some(true), None, None, Some(false)],
1525 Some(SortOptions {
1526 descending: false,
1527 nulls_first: true,
1528 }),
1529 Some(3),
1530 vec![1, 2, 3],
1531 );
1532
1533 test_sort_to_indices_boolean_arrays(
1535 vec![Some(true), None, None, None],
1536 Some(SortOptions {
1537 descending: false,
1538 nulls_first: true,
1539 }),
1540 Some(2),
1541 vec![1, 2],
1542 );
1543
1544 test_sort_to_indices_boolean_arrays(
1545 vec![Some(true), None, None, None],
1546 Some(SortOptions {
1547 descending: false,
1548 nulls_first: false,
1549 }),
1550 Some(2),
1551 vec![0, 1],
1552 );
1553 }
1554
1555 fn test_every_config_sort_boolean_list_arrays(
1560 data: Vec<Option<Vec<Option<bool>>>>,
1561 options: Option<SortOptions>,
1562 expected_data: Vec<Option<Vec<Option<bool>>>>,
1563 ) {
1564 let first_length = data
1565 .iter()
1566 .find_map(|x| x.as_ref().map(|x| x.len()))
1567 .unwrap_or(0);
1568 let first_non_match_length = data
1569 .iter()
1570 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1571 .position(|x| x != first_length);
1572
1573 assert_eq!(
1574 first_non_match_length, None,
1575 "All list items should have the same length {first_length}, input data is invalid"
1576 );
1577
1578 let first_non_match_length = expected_data
1579 .iter()
1580 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1581 .position(|x| x != first_length);
1582
1583 assert_eq!(
1584 first_non_match_length, None,
1585 "All list items should have the same length {first_length}, expected data is invalid"
1586 );
1587
1588 let limit = expected_data.len().saturating_div(2);
1589
1590 for &with_limit in &[false, true] {
1591 let (limit, expected_data) = if with_limit {
1592 (
1593 Some(limit),
1594 expected_data.iter().take(limit).cloned().collect(),
1595 )
1596 } else {
1597 (None, expected_data.clone())
1598 };
1599
1600 for &fixed_length in &[None, Some(first_length as i32)] {
1601 test_sort_boolean_list_arrays(
1602 data.clone(),
1603 options,
1604 limit,
1605 expected_data.clone(),
1606 fixed_length,
1607 );
1608 }
1609 }
1610 }
1611
1612 fn test_sort_boolean_list_arrays(
1613 data: Vec<Option<Vec<Option<bool>>>>,
1614 options: Option<SortOptions>,
1615 limit: Option<usize>,
1616 expected_data: Vec<Option<Vec<Option<bool>>>>,
1617 fixed_length: Option<i32>,
1618 ) {
1619 fn build_fixed_boolean_list_array(
1620 data: Vec<Option<Vec<Option<bool>>>>,
1621 fixed_length: i32,
1622 ) -> ArrayRef {
1623 let mut builder = FixedSizeListBuilder::new(
1624 BooleanBuilder::with_capacity(fixed_length as usize),
1625 fixed_length,
1626 );
1627 for sublist in data {
1628 match sublist {
1629 Some(sublist) => {
1630 builder.values().extend(sublist);
1631 builder.append(true);
1632 }
1633 None => {
1634 builder
1635 .values()
1636 .extend(std::iter::repeat(None).take(fixed_length as usize));
1637 builder.append(false);
1638 }
1639 }
1640 }
1641 Arc::new(builder.finish()) as ArrayRef
1642 }
1643
1644 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1645 data: Vec<Option<Vec<Option<bool>>>>,
1646 ) -> ArrayRef {
1647 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1648 builder.extend(data);
1649 Arc::new(builder.finish()) as ArrayRef
1650 }
1651
1652 if let Some(length) = fixed_length {
1654 let input = build_fixed_boolean_list_array(data.clone(), length);
1655 let sorted = match limit {
1656 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1657 _ => sort(&(input as ArrayRef), options).unwrap(),
1658 };
1659 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1660
1661 assert_eq!(&sorted, &expected);
1662 }
1663
1664 let input = build_generic_boolean_list_array::<i32>(data.clone());
1666 let sorted = match limit {
1667 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1668 _ => sort(&(input as ArrayRef), options).unwrap(),
1669 };
1670 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1671
1672 assert_eq!(&sorted, &expected);
1673
1674 let input = build_generic_boolean_list_array::<i64>(data.clone());
1676 let sorted = match limit {
1677 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1678 _ => sort(&(input as ArrayRef), options).unwrap(),
1679 };
1680 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1681
1682 assert_eq!(&sorted, &expected);
1683 }
1684
1685 #[test]
1686 fn test_sort_list_of_booleans() {
1687 #[rustfmt::skip]
1690 let mut cases = vec![
1691 Some(vec![Some(true), Some(true), Some(true)]),
1692 Some(vec![Some(true), Some(true), Some(false)]),
1693 Some(vec![Some(true), Some(true), None]),
1694
1695 Some(vec![Some(true), Some(false), Some(true)]),
1696 Some(vec![Some(true), Some(false), Some(false)]),
1697 Some(vec![Some(true), Some(false), None]),
1698
1699 Some(vec![Some(true), None, Some(true)]),
1700 Some(vec![Some(true), None, Some(false)]),
1701 Some(vec![Some(true), None, None]),
1702
1703 Some(vec![Some(false), Some(true), Some(true)]),
1704 Some(vec![Some(false), Some(true), Some(false)]),
1705 Some(vec![Some(false), Some(true), None]),
1706
1707 Some(vec![Some(false), Some(false), Some(true)]),
1708 Some(vec![Some(false), Some(false), Some(false)]),
1709 Some(vec![Some(false), Some(false), None]),
1710
1711 Some(vec![Some(false), None, Some(true)]),
1712 Some(vec![Some(false), None, Some(false)]),
1713 Some(vec![Some(false), None, None]),
1714
1715 Some(vec![None, Some(true), Some(true)]),
1716 Some(vec![None, Some(true), Some(false)]),
1717 Some(vec![None, Some(true), None]),
1718
1719 Some(vec![None, Some(false), Some(true)]),
1720 Some(vec![None, Some(false), Some(false)]),
1721 Some(vec![None, Some(false), None]),
1722
1723 Some(vec![None, None, Some(true)]),
1724 Some(vec![None, None, Some(false)]),
1725 Some(vec![None, None, None]),
1726 None,
1727 ];
1728
1729 cases.shuffle(&mut StdRng::seed_from_u64(42));
1730
1731 #[rustfmt::skip]
1733 let expected_descending_false_nulls_first_false = vec![
1734 Some(vec![Some(false), Some(false), Some(false)]),
1735 Some(vec![Some(false), Some(false), Some(true)]),
1736 Some(vec![Some(false), Some(false), None]),
1737
1738 Some(vec![Some(false), Some(true), Some(false)]),
1739 Some(vec![Some(false), Some(true), Some(true)]),
1740 Some(vec![Some(false), Some(true), None]),
1741
1742 Some(vec![Some(false), None, Some(false)]),
1743 Some(vec![Some(false), None, Some(true)]),
1744 Some(vec![Some(false), None, None]),
1745
1746 Some(vec![Some(true), Some(false), Some(false)]),
1747 Some(vec![Some(true), Some(false), Some(true)]),
1748 Some(vec![Some(true), Some(false), None]),
1749
1750 Some(vec![Some(true), Some(true), Some(false)]),
1751 Some(vec![Some(true), Some(true), Some(true)]),
1752 Some(vec![Some(true), Some(true), None]),
1753
1754 Some(vec![Some(true), None, Some(false)]),
1755 Some(vec![Some(true), None, Some(true)]),
1756 Some(vec![Some(true), None, None]),
1757
1758 Some(vec![None, Some(false), Some(false)]),
1759 Some(vec![None, Some(false), Some(true)]),
1760 Some(vec![None, Some(false), None]),
1761
1762 Some(vec![None, Some(true), Some(false)]),
1763 Some(vec![None, Some(true), Some(true)]),
1764 Some(vec![None, Some(true), None]),
1765
1766 Some(vec![None, None, Some(false)]),
1767 Some(vec![None, None, Some(true)]),
1768 Some(vec![None, None, None]),
1769 None,
1770 ];
1771 test_every_config_sort_boolean_list_arrays(
1772 cases.clone(),
1773 Some(SortOptions {
1774 descending: false,
1775 nulls_first: false,
1776 }),
1777 expected_descending_false_nulls_first_false,
1778 );
1779
1780 #[rustfmt::skip]
1782 let expected_descending_false_nulls_first_true = vec![
1783 None,
1784
1785 Some(vec![None, None, None]),
1786 Some(vec![None, None, Some(false)]),
1787 Some(vec![None, None, Some(true)]),
1788
1789 Some(vec![None, Some(false), None]),
1790 Some(vec![None, Some(false), Some(false)]),
1791 Some(vec![None, Some(false), Some(true)]),
1792
1793 Some(vec![None, Some(true), None]),
1794 Some(vec![None, Some(true), Some(false)]),
1795 Some(vec![None, Some(true), Some(true)]),
1796
1797 Some(vec![Some(false), None, None]),
1798 Some(vec![Some(false), None, Some(false)]),
1799 Some(vec![Some(false), None, Some(true)]),
1800
1801 Some(vec![Some(false), Some(false), None]),
1802 Some(vec![Some(false), Some(false), Some(false)]),
1803 Some(vec![Some(false), Some(false), Some(true)]),
1804
1805 Some(vec![Some(false), Some(true), None]),
1806 Some(vec![Some(false), Some(true), Some(false)]),
1807 Some(vec![Some(false), Some(true), Some(true)]),
1808
1809 Some(vec![Some(true), None, None]),
1810 Some(vec![Some(true), None, Some(false)]),
1811 Some(vec![Some(true), None, Some(true)]),
1812
1813 Some(vec![Some(true), Some(false), None]),
1814 Some(vec![Some(true), Some(false), Some(false)]),
1815 Some(vec![Some(true), Some(false), Some(true)]),
1816
1817 Some(vec![Some(true), Some(true), None]),
1818 Some(vec![Some(true), Some(true), Some(false)]),
1819 Some(vec![Some(true), Some(true), Some(true)]),
1820 ];
1821
1822 test_every_config_sort_boolean_list_arrays(
1823 cases.clone(),
1824 Some(SortOptions {
1825 descending: false,
1826 nulls_first: true,
1827 }),
1828 expected_descending_false_nulls_first_true,
1829 );
1830
1831 #[rustfmt::skip]
1833 let expected_descending_true_nulls_first_false = vec![
1834 Some(vec![Some(true), Some(true), Some(true)]),
1835 Some(vec![Some(true), Some(true), Some(false)]),
1836 Some(vec![Some(true), Some(true), None]),
1837
1838 Some(vec![Some(true), Some(false), Some(true)]),
1839 Some(vec![Some(true), Some(false), Some(false)]),
1840 Some(vec![Some(true), Some(false), None]),
1841
1842 Some(vec![Some(true), None, Some(true)]),
1843 Some(vec![Some(true), None, Some(false)]),
1844 Some(vec![Some(true), None, None]),
1845
1846 Some(vec![Some(false), Some(true), Some(true)]),
1847 Some(vec![Some(false), Some(true), Some(false)]),
1848 Some(vec![Some(false), Some(true), None]),
1849
1850 Some(vec![Some(false), Some(false), Some(true)]),
1851 Some(vec![Some(false), Some(false), Some(false)]),
1852 Some(vec![Some(false), Some(false), None]),
1853
1854 Some(vec![Some(false), None, Some(true)]),
1855 Some(vec![Some(false), None, Some(false)]),
1856 Some(vec![Some(false), None, None]),
1857
1858 Some(vec![None, Some(true), Some(true)]),
1859 Some(vec![None, Some(true), Some(false)]),
1860 Some(vec![None, Some(true), None]),
1861
1862 Some(vec![None, Some(false), Some(true)]),
1863 Some(vec![None, Some(false), Some(false)]),
1864 Some(vec![None, Some(false), None]),
1865
1866 Some(vec![None, None, Some(true)]),
1867 Some(vec![None, None, Some(false)]),
1868 Some(vec![None, None, None]),
1869
1870 None,
1871 ];
1872 test_every_config_sort_boolean_list_arrays(
1873 cases.clone(),
1874 Some(SortOptions {
1875 descending: true,
1876 nulls_first: false,
1877 }),
1878 expected_descending_true_nulls_first_false,
1879 );
1880
1881 #[rustfmt::skip]
1883 let expected_descending_true_nulls_first_true = vec![
1884 None,
1885
1886 Some(vec![None, None, None]),
1887 Some(vec![None, None, Some(true)]),
1888 Some(vec![None, None, Some(false)]),
1889
1890 Some(vec![None, Some(true), None]),
1891 Some(vec![None, Some(true), Some(true)]),
1892 Some(vec![None, Some(true), Some(false)]),
1893
1894 Some(vec![None, Some(false), None]),
1895 Some(vec![None, Some(false), Some(true)]),
1896 Some(vec![None, Some(false), Some(false)]),
1897
1898 Some(vec![Some(true), None, None]),
1899 Some(vec![Some(true), None, Some(true)]),
1900 Some(vec![Some(true), None, Some(false)]),
1901
1902 Some(vec![Some(true), Some(true), None]),
1903 Some(vec![Some(true), Some(true), Some(true)]),
1904 Some(vec![Some(true), Some(true), Some(false)]),
1905
1906 Some(vec![Some(true), Some(false), None]),
1907 Some(vec![Some(true), Some(false), Some(true)]),
1908 Some(vec![Some(true), Some(false), Some(false)]),
1909
1910 Some(vec![Some(false), None, None]),
1911 Some(vec![Some(false), None, Some(true)]),
1912 Some(vec![Some(false), None, Some(false)]),
1913
1914 Some(vec![Some(false), Some(true), None]),
1915 Some(vec![Some(false), Some(true), Some(true)]),
1916 Some(vec![Some(false), Some(true), Some(false)]),
1917
1918 Some(vec![Some(false), Some(false), None]),
1919 Some(vec![Some(false), Some(false), Some(true)]),
1920 Some(vec![Some(false), Some(false), Some(false)]),
1921 ];
1922 test_every_config_sort_boolean_list_arrays(
1924 cases.clone(),
1925 Some(SortOptions {
1926 descending: true,
1927 nulls_first: true,
1928 }),
1929 expected_descending_true_nulls_first_true,
1930 );
1931 }
1932
1933 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
1934 test_sort_to_indices_decimal_array::<T>(
1936 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1937 None,
1938 None,
1939 vec![0, 6, 4, 2, 3, 5, 1],
1940 precision,
1941 scale,
1942 );
1943 test_sort_to_indices_decimal_array::<T>(
1945 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1946 Some(SortOptions {
1947 descending: true,
1948 nulls_first: false,
1949 }),
1950 None,
1951 vec![1, 5, 3, 2, 4, 0, 6],
1952 precision,
1953 scale,
1954 );
1955 test_sort_to_indices_decimal_array::<T>(
1957 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1958 Some(SortOptions {
1959 descending: true,
1960 nulls_first: true,
1961 }),
1962 None,
1963 vec![0, 6, 1, 5, 3, 2, 4],
1964 precision,
1965 scale,
1966 );
1967 test_sort_to_indices_decimal_array::<T>(
1969 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1970 Some(SortOptions {
1971 descending: false,
1972 nulls_first: true,
1973 }),
1974 None,
1975 vec![0, 6, 4, 2, 3, 5, 1],
1976 precision,
1977 scale,
1978 );
1979 test_sort_to_indices_decimal_array::<T>(
1981 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1982 None,
1983 Some(3),
1984 vec![0, 6, 4],
1985 precision,
1986 scale,
1987 );
1988 test_sort_to_indices_decimal_array::<T>(
1990 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1991 Some(SortOptions {
1992 descending: true,
1993 nulls_first: false,
1994 }),
1995 Some(3),
1996 vec![1, 5, 3],
1997 precision,
1998 scale,
1999 );
2000 test_sort_to_indices_decimal_array::<T>(
2002 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2003 Some(SortOptions {
2004 descending: true,
2005 nulls_first: true,
2006 }),
2007 Some(3),
2008 vec![0, 6, 1],
2009 precision,
2010 scale,
2011 );
2012 test_sort_to_indices_decimal_array::<T>(
2014 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2015 Some(SortOptions {
2016 descending: false,
2017 nulls_first: true,
2018 }),
2019 Some(3),
2020 vec![0, 6, 4],
2021 precision,
2022 scale,
2023 );
2024 }
2025
2026 #[test]
2027 fn test_sort_indices_decimal128() {
2028 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2029 }
2030
2031 #[test]
2032 fn test_sort_indices_decimal256() {
2033 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2034 }
2035
2036 #[test]
2037 fn test_sort_indices_decimal256_max_min() {
2038 let data = vec![
2039 None,
2040 Some(i256::MIN),
2041 Some(i256::from_i128(1)),
2042 Some(i256::MAX),
2043 Some(i256::from_i128(-1)),
2044 ];
2045 test_sort_to_indices_decimal256_array(
2046 data.clone(),
2047 Some(SortOptions {
2048 descending: false,
2049 nulls_first: true,
2050 }),
2051 None,
2052 vec![0, 1, 4, 2, 3],
2053 );
2054
2055 test_sort_to_indices_decimal256_array(
2056 data.clone(),
2057 Some(SortOptions {
2058 descending: true,
2059 nulls_first: true,
2060 }),
2061 None,
2062 vec![0, 3, 2, 4, 1],
2063 );
2064
2065 test_sort_to_indices_decimal256_array(
2066 data.clone(),
2067 Some(SortOptions {
2068 descending: false,
2069 nulls_first: true,
2070 }),
2071 Some(4),
2072 vec![0, 1, 4, 2],
2073 );
2074
2075 test_sort_to_indices_decimal256_array(
2076 data.clone(),
2077 Some(SortOptions {
2078 descending: true,
2079 nulls_first: true,
2080 }),
2081 Some(4),
2082 vec![0, 3, 2, 4],
2083 );
2084 }
2085
2086 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2087 test_sort_decimal_array::<T>(
2089 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2090 None,
2091 None,
2092 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2093 precision,
2094 scale,
2095 );
2096 test_sort_decimal_array::<T>(
2098 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2099 Some(SortOptions {
2100 descending: true,
2101 nulls_first: false,
2102 }),
2103 None,
2104 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2105 precision,
2106 scale,
2107 );
2108 test_sort_decimal_array::<T>(
2110 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2111 Some(SortOptions {
2112 descending: true,
2113 nulls_first: true,
2114 }),
2115 None,
2116 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2117 precision,
2118 scale,
2119 );
2120 test_sort_decimal_array::<T>(
2122 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2123 Some(SortOptions {
2124 descending: false,
2125 nulls_first: true,
2126 }),
2127 None,
2128 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2129 precision,
2130 scale,
2131 );
2132 test_sort_decimal_array::<T>(
2134 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2135 None,
2136 Some(3),
2137 vec![None, None, Some(1)],
2138 precision,
2139 scale,
2140 );
2141 test_sort_decimal_array::<T>(
2143 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2144 Some(SortOptions {
2145 descending: true,
2146 nulls_first: false,
2147 }),
2148 Some(3),
2149 vec![Some(5), Some(4), Some(3)],
2150 precision,
2151 scale,
2152 );
2153 test_sort_decimal_array::<T>(
2155 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2156 Some(SortOptions {
2157 descending: true,
2158 nulls_first: true,
2159 }),
2160 Some(3),
2161 vec![None, None, Some(5)],
2162 precision,
2163 scale,
2164 );
2165 test_sort_decimal_array::<T>(
2167 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2168 Some(SortOptions {
2169 descending: false,
2170 nulls_first: true,
2171 }),
2172 Some(3),
2173 vec![None, None, Some(1)],
2174 precision,
2175 scale,
2176 );
2177 }
2178
2179 #[test]
2180 fn test_sort_decimal128() {
2181 test_sort_decimal::<Decimal128Type>(23, 6);
2182 }
2183
2184 #[test]
2185 fn test_sort_decimal256() {
2186 test_sort_decimal::<Decimal256Type>(53, 6);
2187 }
2188
2189 #[test]
2190 fn test_sort_decimal256_max_min() {
2191 test_sort_decimal256_array(
2192 vec![
2193 None,
2194 Some(i256::MIN),
2195 Some(i256::from_i128(1)),
2196 Some(i256::MAX),
2197 Some(i256::from_i128(-1)),
2198 None,
2199 ],
2200 Some(SortOptions {
2201 descending: false,
2202 nulls_first: true,
2203 }),
2204 None,
2205 vec![
2206 None,
2207 None,
2208 Some(i256::MIN),
2209 Some(i256::from_i128(-1)),
2210 Some(i256::from_i128(1)),
2211 Some(i256::MAX),
2212 ],
2213 );
2214
2215 test_sort_decimal256_array(
2216 vec![
2217 None,
2218 Some(i256::MIN),
2219 Some(i256::from_i128(1)),
2220 Some(i256::MAX),
2221 Some(i256::from_i128(-1)),
2222 None,
2223 ],
2224 Some(SortOptions {
2225 descending: true,
2226 nulls_first: true,
2227 }),
2228 None,
2229 vec![
2230 None,
2231 None,
2232 Some(i256::MAX),
2233 Some(i256::from_i128(1)),
2234 Some(i256::from_i128(-1)),
2235 Some(i256::MIN),
2236 ],
2237 );
2238
2239 test_sort_decimal256_array(
2240 vec![
2241 None,
2242 Some(i256::MIN),
2243 Some(i256::from_i128(1)),
2244 Some(i256::MAX),
2245 Some(i256::from_i128(-1)),
2246 None,
2247 ],
2248 Some(SortOptions {
2249 descending: false,
2250 nulls_first: true,
2251 }),
2252 Some(4),
2253 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2254 );
2255
2256 test_sort_decimal256_array(
2257 vec![
2258 None,
2259 Some(i256::MIN),
2260 Some(i256::from_i128(1)),
2261 Some(i256::MAX),
2262 Some(i256::from_i128(-1)),
2263 None,
2264 ],
2265 Some(SortOptions {
2266 descending: true,
2267 nulls_first: true,
2268 }),
2269 Some(4),
2270 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2271 );
2272 }
2273
2274 #[test]
2275 fn test_sort_primitives() {
2276 test_sort_primitive_arrays::<UInt8Type>(
2278 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2279 None,
2280 None,
2281 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2282 );
2283 test_sort_primitive_arrays::<UInt16Type>(
2284 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2285 None,
2286 None,
2287 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2288 );
2289 test_sort_primitive_arrays::<UInt32Type>(
2290 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2291 None,
2292 None,
2293 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2294 );
2295 test_sort_primitive_arrays::<UInt64Type>(
2296 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2297 None,
2298 None,
2299 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2300 );
2301
2302 test_sort_primitive_arrays::<Int8Type>(
2304 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2305 Some(SortOptions {
2306 descending: true,
2307 nulls_first: false,
2308 }),
2309 None,
2310 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2311 );
2312 test_sort_primitive_arrays::<Int16Type>(
2313 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2314 Some(SortOptions {
2315 descending: true,
2316 nulls_first: false,
2317 }),
2318 None,
2319 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2320 );
2321 test_sort_primitive_arrays::<Int32Type>(
2322 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2323 Some(SortOptions {
2324 descending: true,
2325 nulls_first: false,
2326 }),
2327 None,
2328 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2329 );
2330 test_sort_primitive_arrays::<Int16Type>(
2331 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2332 Some(SortOptions {
2333 descending: true,
2334 nulls_first: false,
2335 }),
2336 None,
2337 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2338 );
2339
2340 test_sort_primitive_arrays::<Int8Type>(
2342 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2343 Some(SortOptions {
2344 descending: true,
2345 nulls_first: true,
2346 }),
2347 None,
2348 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2349 );
2350 test_sort_primitive_arrays::<Int16Type>(
2351 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2352 Some(SortOptions {
2353 descending: true,
2354 nulls_first: true,
2355 }),
2356 None,
2357 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2358 );
2359 test_sort_primitive_arrays::<Int32Type>(
2360 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2361 Some(SortOptions {
2362 descending: true,
2363 nulls_first: true,
2364 }),
2365 None,
2366 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2367 );
2368 test_sort_primitive_arrays::<Int64Type>(
2369 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2370 Some(SortOptions {
2371 descending: true,
2372 nulls_first: true,
2373 }),
2374 None,
2375 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2376 );
2377
2378 test_sort_primitive_arrays::<Int64Type>(
2379 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2380 Some(SortOptions {
2381 descending: true,
2382 nulls_first: true,
2383 }),
2384 Some(3),
2385 vec![None, None, Some(2)],
2386 );
2387
2388 test_sort_primitive_arrays::<Float16Type>(
2389 vec![
2390 None,
2391 Some(f16::from_f32(0.0)),
2392 Some(f16::from_f32(2.0)),
2393 Some(f16::from_f32(-1.0)),
2394 Some(f16::from_f32(0.0)),
2395 None,
2396 ],
2397 Some(SortOptions {
2398 descending: true,
2399 nulls_first: true,
2400 }),
2401 None,
2402 vec![
2403 None,
2404 None,
2405 Some(f16::from_f32(2.0)),
2406 Some(f16::from_f32(0.0)),
2407 Some(f16::from_f32(0.0)),
2408 Some(f16::from_f32(-1.0)),
2409 ],
2410 );
2411
2412 test_sort_primitive_arrays::<Float32Type>(
2413 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2414 Some(SortOptions {
2415 descending: true,
2416 nulls_first: true,
2417 }),
2418 None,
2419 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2420 );
2421 test_sort_primitive_arrays::<Float64Type>(
2422 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2423 Some(SortOptions {
2424 descending: true,
2425 nulls_first: true,
2426 }),
2427 None,
2428 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2429 );
2430 test_sort_primitive_arrays::<Float64Type>(
2431 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2432 Some(SortOptions {
2433 descending: true,
2434 nulls_first: true,
2435 }),
2436 None,
2437 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2438 );
2439
2440 test_sort_primitive_arrays::<Int8Type>(
2442 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2443 Some(SortOptions {
2444 descending: false,
2445 nulls_first: true,
2446 }),
2447 None,
2448 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2449 );
2450 test_sort_primitive_arrays::<Int16Type>(
2451 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2452 Some(SortOptions {
2453 descending: false,
2454 nulls_first: true,
2455 }),
2456 None,
2457 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2458 );
2459 test_sort_primitive_arrays::<Int32Type>(
2460 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2461 Some(SortOptions {
2462 descending: false,
2463 nulls_first: true,
2464 }),
2465 None,
2466 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2467 );
2468 test_sort_primitive_arrays::<Int64Type>(
2469 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2470 Some(SortOptions {
2471 descending: false,
2472 nulls_first: true,
2473 }),
2474 None,
2475 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2476 );
2477 test_sort_primitive_arrays::<Float16Type>(
2478 vec![
2479 None,
2480 Some(f16::from_f32(0.0)),
2481 Some(f16::from_f32(2.0)),
2482 Some(f16::from_f32(-1.0)),
2483 Some(f16::from_f32(0.0)),
2484 None,
2485 ],
2486 Some(SortOptions {
2487 descending: false,
2488 nulls_first: true,
2489 }),
2490 None,
2491 vec![
2492 None,
2493 None,
2494 Some(f16::from_f32(-1.0)),
2495 Some(f16::from_f32(0.0)),
2496 Some(f16::from_f32(0.0)),
2497 Some(f16::from_f32(2.0)),
2498 ],
2499 );
2500 test_sort_primitive_arrays::<Float32Type>(
2501 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2502 Some(SortOptions {
2503 descending: false,
2504 nulls_first: true,
2505 }),
2506 None,
2507 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2508 );
2509 test_sort_primitive_arrays::<Float64Type>(
2510 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2511 Some(SortOptions {
2512 descending: false,
2513 nulls_first: true,
2514 }),
2515 None,
2516 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2517 );
2518 test_sort_primitive_arrays::<Float64Type>(
2519 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2520 Some(SortOptions {
2521 descending: false,
2522 nulls_first: true,
2523 }),
2524 None,
2525 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2526 );
2527
2528 test_sort_primitive_arrays::<Float64Type>(
2530 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2531 Some(SortOptions {
2532 descending: false,
2533 nulls_first: true,
2534 }),
2535 Some(2),
2536 vec![Some(1.0), Some(f64::NAN)],
2537 );
2538
2539 test_sort_primitive_arrays::<Float64Type>(
2541 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2542 Some(SortOptions {
2543 descending: false,
2544 nulls_first: true,
2545 }),
2546 Some(3),
2547 vec![Some(1.0), Some(2.0), Some(3.0)],
2548 );
2549
2550 test_sort_primitive_arrays::<Float64Type>(
2552 vec![Some(2.0), None, None, Some(1.0)],
2553 Some(SortOptions {
2554 descending: false,
2555 nulls_first: false,
2556 }),
2557 Some(3),
2558 vec![Some(1.0), Some(2.0), None],
2559 );
2560
2561 test_sort_primitive_arrays::<Float64Type>(
2562 vec![Some(2.0), None, None, Some(1.0)],
2563 Some(SortOptions {
2564 descending: false,
2565 nulls_first: true,
2566 }),
2567 Some(3),
2568 vec![None, None, Some(1.0)],
2569 );
2570
2571 test_sort_primitive_arrays::<Float64Type>(
2573 vec![Some(2.0), None, None, None],
2574 Some(SortOptions {
2575 descending: false,
2576 nulls_first: true,
2577 }),
2578 Some(2),
2579 vec![None, None],
2580 );
2581
2582 test_sort_primitive_arrays::<Float64Type>(
2583 vec![Some(2.0), None, None, None],
2584 Some(SortOptions {
2585 descending: false,
2586 nulls_first: false,
2587 }),
2588 Some(2),
2589 vec![Some(2.0), None],
2590 );
2591 }
2592
2593 #[test]
2594 fn test_sort_to_indices_strings() {
2595 test_sort_to_indices_string_arrays(
2596 vec![
2597 None,
2598 Some("bad"),
2599 Some("sad"),
2600 None,
2601 Some("glad"),
2602 Some("-ad"),
2603 ],
2604 None,
2605 None,
2606 vec![0, 3, 5, 1, 4, 2],
2607 );
2608
2609 test_sort_to_indices_string_arrays(
2610 vec![
2611 None,
2612 Some("bad"),
2613 Some("sad"),
2614 None,
2615 Some("glad"),
2616 Some("-ad"),
2617 ],
2618 Some(SortOptions {
2619 descending: true,
2620 nulls_first: false,
2621 }),
2622 None,
2623 vec![2, 4, 1, 5, 0, 3],
2624 );
2625
2626 test_sort_to_indices_string_arrays(
2627 vec![
2628 None,
2629 Some("bad"),
2630 Some("sad"),
2631 None,
2632 Some("glad"),
2633 Some("-ad"),
2634 ],
2635 Some(SortOptions {
2636 descending: false,
2637 nulls_first: true,
2638 }),
2639 None,
2640 vec![0, 3, 5, 1, 4, 2],
2641 );
2642
2643 test_sort_to_indices_string_arrays(
2644 vec![
2645 None,
2646 Some("bad"),
2647 Some("sad"),
2648 None,
2649 Some("glad"),
2650 Some("-ad"),
2651 ],
2652 Some(SortOptions {
2653 descending: true,
2654 nulls_first: true,
2655 }),
2656 None,
2657 vec![0, 3, 2, 4, 1, 5],
2658 );
2659
2660 test_sort_to_indices_string_arrays(
2661 vec![
2662 None,
2663 Some("bad"),
2664 Some("sad"),
2665 None,
2666 Some("glad"),
2667 Some("-ad"),
2668 ],
2669 Some(SortOptions {
2670 descending: true,
2671 nulls_first: true,
2672 }),
2673 Some(3),
2674 vec![0, 3, 2],
2675 );
2676
2677 test_sort_to_indices_string_arrays(
2679 vec![Some("def"), None, None, Some("abc")],
2680 Some(SortOptions {
2681 descending: false,
2682 nulls_first: false,
2683 }),
2684 Some(3),
2685 vec![3, 0, 1],
2686 );
2687
2688 test_sort_to_indices_string_arrays(
2689 vec![Some("def"), None, None, Some("abc")],
2690 Some(SortOptions {
2691 descending: false,
2692 nulls_first: true,
2693 }),
2694 Some(3),
2695 vec![1, 2, 3],
2696 );
2697
2698 test_sort_to_indices_string_arrays(
2700 vec![Some("def"), None, None, None],
2701 Some(SortOptions {
2702 descending: false,
2703 nulls_first: true,
2704 }),
2705 Some(2),
2706 vec![1, 2],
2707 );
2708
2709 test_sort_to_indices_string_arrays(
2710 vec![Some("def"), None, None, None],
2711 Some(SortOptions {
2712 descending: false,
2713 nulls_first: false,
2714 }),
2715 Some(2),
2716 vec![0, 1],
2717 );
2718 }
2719
2720 #[test]
2721 fn test_sort_strings() {
2722 test_sort_string_arrays(
2723 vec![
2724 None,
2725 Some("bad"),
2726 Some("sad"),
2727 Some("long string longer than 12 bytes"),
2728 None,
2729 Some("glad"),
2730 Some("lang string longer than 12 bytes"),
2731 Some("-ad"),
2732 ],
2733 None,
2734 None,
2735 vec![
2736 None,
2737 None,
2738 Some("-ad"),
2739 Some("bad"),
2740 Some("glad"),
2741 Some("lang string longer than 12 bytes"),
2742 Some("long string longer than 12 bytes"),
2743 Some("sad"),
2744 ],
2745 );
2746
2747 test_sort_string_arrays(
2748 vec![
2749 None,
2750 Some("bad"),
2751 Some("sad"),
2752 Some("long string longer than 12 bytes"),
2753 None,
2754 Some("glad"),
2755 Some("lang string longer than 12 bytes"),
2756 Some("-ad"),
2757 ],
2758 Some(SortOptions {
2759 descending: true,
2760 nulls_first: false,
2761 }),
2762 None,
2763 vec![
2764 Some("sad"),
2765 Some("long string longer than 12 bytes"),
2766 Some("lang string longer than 12 bytes"),
2767 Some("glad"),
2768 Some("bad"),
2769 Some("-ad"),
2770 None,
2771 None,
2772 ],
2773 );
2774
2775 test_sort_string_arrays(
2776 vec![
2777 None,
2778 Some("bad"),
2779 Some("long string longer than 12 bytes"),
2780 Some("sad"),
2781 None,
2782 Some("glad"),
2783 Some("lang string longer than 12 bytes"),
2784 Some("-ad"),
2785 ],
2786 Some(SortOptions {
2787 descending: false,
2788 nulls_first: true,
2789 }),
2790 None,
2791 vec![
2792 None,
2793 None,
2794 Some("-ad"),
2795 Some("bad"),
2796 Some("glad"),
2797 Some("lang string longer than 12 bytes"),
2798 Some("long string longer than 12 bytes"),
2799 Some("sad"),
2800 ],
2801 );
2802
2803 test_sort_string_arrays(
2804 vec![
2805 None,
2806 Some("bad"),
2807 Some("long string longer than 12 bytes"),
2808 Some("sad"),
2809 None,
2810 Some("glad"),
2811 Some("lang string longer than 12 bytes"),
2812 Some("-ad"),
2813 ],
2814 Some(SortOptions {
2815 descending: true,
2816 nulls_first: true,
2817 }),
2818 None,
2819 vec![
2820 None,
2821 None,
2822 Some("sad"),
2823 Some("long string longer than 12 bytes"),
2824 Some("lang string longer than 12 bytes"),
2825 Some("glad"),
2826 Some("bad"),
2827 Some("-ad"),
2828 ],
2829 );
2830
2831 test_sort_string_arrays(
2832 vec![
2833 None,
2834 Some("bad"),
2835 Some("long string longer than 12 bytes"),
2836 Some("sad"),
2837 None,
2838 Some("glad"),
2839 Some("lang string longer than 12 bytes"),
2840 Some("-ad"),
2841 ],
2842 Some(SortOptions {
2843 descending: true,
2844 nulls_first: true,
2845 }),
2846 Some(3),
2847 vec![None, None, Some("sad")],
2848 );
2849
2850 test_sort_string_arrays(
2852 vec![
2853 Some("def long string longer than 12"),
2854 None,
2855 None,
2856 Some("abc"),
2857 ],
2858 Some(SortOptions {
2859 descending: false,
2860 nulls_first: false,
2861 }),
2862 Some(3),
2863 vec![Some("abc"), Some("def long string longer than 12"), None],
2864 );
2865
2866 test_sort_string_arrays(
2867 vec![
2868 Some("def long string longer than 12"),
2869 None,
2870 None,
2871 Some("abc"),
2872 ],
2873 Some(SortOptions {
2874 descending: false,
2875 nulls_first: true,
2876 }),
2877 Some(3),
2878 vec![None, None, Some("abc")],
2879 );
2880
2881 test_sort_string_arrays(
2883 vec![Some("def long string longer than 12"), None, None, None],
2884 Some(SortOptions {
2885 descending: false,
2886 nulls_first: true,
2887 }),
2888 Some(2),
2889 vec![None, None],
2890 );
2891
2892 test_sort_string_arrays(
2893 vec![Some("def long string longer than 12"), None, None, None],
2894 Some(SortOptions {
2895 descending: false,
2896 nulls_first: false,
2897 }),
2898 Some(2),
2899 vec![Some("def long string longer than 12"), None],
2900 );
2901 }
2902
2903 #[test]
2904 fn test_sort_run_to_run() {
2905 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
2906 }
2907
2908 #[test]
2909 fn test_sort_run_to_indices() {
2910 test_sort_run_inner(|array, sort_options, limit| {
2911 let indices = sort_to_indices(array, sort_options, limit).unwrap();
2912 take(array, &indices, None)
2913 });
2914 }
2915
2916 fn test_sort_run_inner<F>(sort_fn: F)
2917 where
2918 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2919 {
2920 let total_len = 80;
2922 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
2923 let repeats: Vec<usize> = vec![1, 3, 2, 4];
2924 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
2925 for ix in 0_usize..32 {
2926 let repeat: usize = repeats[ix % repeats.len()];
2927 let val: Option<i32> = vals[ix % vals.len()];
2928 input_array.resize(input_array.len() + repeat, val);
2929 }
2930
2931 let mut builder =
2934 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
2935 builder.extend(input_array.iter().copied());
2936 let run_array = builder.finish();
2937
2938 let slice_lens = [
2940 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
2941 ];
2942 for slice_len in slice_lens {
2943 test_sort_run_inner2(
2944 input_array.as_slice(),
2945 &run_array,
2946 0,
2947 slice_len,
2948 None,
2949 &sort_fn,
2950 );
2951 test_sort_run_inner2(
2952 input_array.as_slice(),
2953 &run_array,
2954 total_len - slice_len,
2955 slice_len,
2956 None,
2957 &sort_fn,
2958 );
2959 if slice_len > 1 {
2961 test_sort_run_inner2(
2962 input_array.as_slice(),
2963 &run_array,
2964 0,
2965 slice_len,
2966 Some(slice_len / 2),
2967 &sort_fn,
2968 );
2969 test_sort_run_inner2(
2970 input_array.as_slice(),
2971 &run_array,
2972 total_len - slice_len,
2973 slice_len,
2974 Some(slice_len / 2),
2975 &sort_fn,
2976 );
2977 }
2978 }
2979 }
2980
2981 fn test_sort_run_inner2<F>(
2982 input_array: &[Option<i32>],
2983 run_array: &RunArray<Int16Type>,
2984 offset: usize,
2985 length: usize,
2986 limit: Option<usize>,
2987 sort_fn: &F,
2988 ) where
2989 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2990 {
2991 let sliced_array = run_array.slice(offset, length);
2993 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
2994 let sorted_run_array = sorted_sliced_array
2995 .as_any()
2996 .downcast_ref::<RunArray<Int16Type>>()
2997 .unwrap();
2998 let typed_run_array = sorted_run_array
2999 .downcast::<PrimitiveArray<Int32Type>>()
3000 .unwrap();
3001 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3002
3003 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3005 sliced_input.sort();
3006 let expected = if let Some(limit) = limit {
3007 sliced_input.iter().take(limit).copied().collect()
3008 } else {
3009 sliced_input
3010 };
3011
3012 assert_eq!(expected, actual)
3013 }
3014
3015 #[test]
3016 fn test_sort_string_dicts() {
3017 test_sort_string_dict_arrays::<Int8Type>(
3018 vec![
3019 None,
3020 Some("bad"),
3021 Some("sad"),
3022 None,
3023 Some("glad"),
3024 Some("-ad"),
3025 ],
3026 None,
3027 None,
3028 vec![
3029 None,
3030 None,
3031 Some("-ad"),
3032 Some("bad"),
3033 Some("glad"),
3034 Some("sad"),
3035 ],
3036 );
3037
3038 test_sort_string_dict_arrays::<Int16Type>(
3039 vec![
3040 None,
3041 Some("bad"),
3042 Some("sad"),
3043 None,
3044 Some("glad"),
3045 Some("-ad"),
3046 ],
3047 Some(SortOptions {
3048 descending: true,
3049 nulls_first: false,
3050 }),
3051 None,
3052 vec![
3053 Some("sad"),
3054 Some("glad"),
3055 Some("bad"),
3056 Some("-ad"),
3057 None,
3058 None,
3059 ],
3060 );
3061
3062 test_sort_string_dict_arrays::<Int32Type>(
3063 vec![
3064 None,
3065 Some("bad"),
3066 Some("sad"),
3067 None,
3068 Some("glad"),
3069 Some("-ad"),
3070 ],
3071 Some(SortOptions {
3072 descending: false,
3073 nulls_first: true,
3074 }),
3075 None,
3076 vec![
3077 None,
3078 None,
3079 Some("-ad"),
3080 Some("bad"),
3081 Some("glad"),
3082 Some("sad"),
3083 ],
3084 );
3085
3086 test_sort_string_dict_arrays::<Int16Type>(
3087 vec![
3088 None,
3089 Some("bad"),
3090 Some("sad"),
3091 None,
3092 Some("glad"),
3093 Some("-ad"),
3094 ],
3095 Some(SortOptions {
3096 descending: true,
3097 nulls_first: true,
3098 }),
3099 None,
3100 vec![
3101 None,
3102 None,
3103 Some("sad"),
3104 Some("glad"),
3105 Some("bad"),
3106 Some("-ad"),
3107 ],
3108 );
3109
3110 test_sort_string_dict_arrays::<Int16Type>(
3111 vec![
3112 None,
3113 Some("bad"),
3114 Some("sad"),
3115 None,
3116 Some("glad"),
3117 Some("-ad"),
3118 ],
3119 Some(SortOptions {
3120 descending: true,
3121 nulls_first: true,
3122 }),
3123 Some(3),
3124 vec![None, None, Some("sad")],
3125 );
3126
3127 test_sort_string_dict_arrays::<Int16Type>(
3129 vec![Some("def"), None, None, Some("abc")],
3130 Some(SortOptions {
3131 descending: false,
3132 nulls_first: false,
3133 }),
3134 Some(3),
3135 vec![Some("abc"), Some("def"), None],
3136 );
3137
3138 test_sort_string_dict_arrays::<Int16Type>(
3139 vec![Some("def"), None, None, Some("abc")],
3140 Some(SortOptions {
3141 descending: false,
3142 nulls_first: true,
3143 }),
3144 Some(3),
3145 vec![None, None, Some("abc")],
3146 );
3147
3148 test_sort_string_dict_arrays::<Int16Type>(
3150 vec![Some("def"), None, None, None],
3151 Some(SortOptions {
3152 descending: false,
3153 nulls_first: true,
3154 }),
3155 Some(2),
3156 vec![None, None],
3157 );
3158
3159 test_sort_string_dict_arrays::<Int16Type>(
3160 vec![Some("def"), None, None, None],
3161 Some(SortOptions {
3162 descending: false,
3163 nulls_first: false,
3164 }),
3165 Some(2),
3166 vec![Some("def"), None],
3167 );
3168 }
3169
3170 #[test]
3171 fn test_sort_list() {
3172 test_sort_list_arrays::<Int8Type>(
3173 vec![
3174 Some(vec![Some(1)]),
3175 Some(vec![Some(4)]),
3176 Some(vec![Some(2)]),
3177 Some(vec![Some(3)]),
3178 ],
3179 Some(SortOptions {
3180 descending: false,
3181 nulls_first: false,
3182 }),
3183 None,
3184 vec![
3185 Some(vec![Some(1)]),
3186 Some(vec![Some(2)]),
3187 Some(vec![Some(3)]),
3188 Some(vec![Some(4)]),
3189 ],
3190 Some(1),
3191 );
3192
3193 test_sort_list_arrays::<Float16Type>(
3194 vec![
3195 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3196 Some(vec![
3197 Some(f16::from_f32(4.0)),
3198 Some(f16::from_f32(3.0)),
3199 Some(f16::from_f32(2.0)),
3200 Some(f16::from_f32(1.0)),
3201 ]),
3202 Some(vec![
3203 Some(f16::from_f32(2.0)),
3204 Some(f16::from_f32(3.0)),
3205 Some(f16::from_f32(4.0)),
3206 ]),
3207 Some(vec![
3208 Some(f16::from_f32(3.0)),
3209 Some(f16::from_f32(3.0)),
3210 Some(f16::from_f32(3.0)),
3211 Some(f16::from_f32(3.0)),
3212 ]),
3213 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3214 ],
3215 Some(SortOptions {
3216 descending: false,
3217 nulls_first: false,
3218 }),
3219 None,
3220 vec![
3221 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3222 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3223 Some(vec![
3224 Some(f16::from_f32(2.0)),
3225 Some(f16::from_f32(3.0)),
3226 Some(f16::from_f32(4.0)),
3227 ]),
3228 Some(vec![
3229 Some(f16::from_f32(3.0)),
3230 Some(f16::from_f32(3.0)),
3231 Some(f16::from_f32(3.0)),
3232 Some(f16::from_f32(3.0)),
3233 ]),
3234 Some(vec![
3235 Some(f16::from_f32(4.0)),
3236 Some(f16::from_f32(3.0)),
3237 Some(f16::from_f32(2.0)),
3238 Some(f16::from_f32(1.0)),
3239 ]),
3240 ],
3241 None,
3242 );
3243
3244 test_sort_list_arrays::<Float32Type>(
3245 vec![
3246 Some(vec![Some(1.0), Some(0.0)]),
3247 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3248 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3249 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3250 Some(vec![Some(1.0), Some(1.0)]),
3251 ],
3252 Some(SortOptions {
3253 descending: false,
3254 nulls_first: false,
3255 }),
3256 None,
3257 vec![
3258 Some(vec![Some(1.0), Some(0.0)]),
3259 Some(vec![Some(1.0), Some(1.0)]),
3260 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3261 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3262 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3263 ],
3264 None,
3265 );
3266
3267 test_sort_list_arrays::<Float64Type>(
3268 vec![
3269 Some(vec![Some(1.0), Some(0.0)]),
3270 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3271 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3272 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3273 Some(vec![Some(1.0), Some(1.0)]),
3274 ],
3275 Some(SortOptions {
3276 descending: false,
3277 nulls_first: false,
3278 }),
3279 None,
3280 vec![
3281 Some(vec![Some(1.0), Some(0.0)]),
3282 Some(vec![Some(1.0), Some(1.0)]),
3283 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3284 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3285 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3286 ],
3287 None,
3288 );
3289
3290 test_sort_list_arrays::<Int32Type>(
3291 vec![
3292 Some(vec![Some(1), Some(0)]),
3293 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3294 Some(vec![Some(2), Some(3), Some(4)]),
3295 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3296 Some(vec![Some(1), Some(1)]),
3297 ],
3298 Some(SortOptions {
3299 descending: false,
3300 nulls_first: false,
3301 }),
3302 None,
3303 vec![
3304 Some(vec![Some(1), Some(0)]),
3305 Some(vec![Some(1), Some(1)]),
3306 Some(vec![Some(2), Some(3), Some(4)]),
3307 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3308 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3309 ],
3310 None,
3311 );
3312
3313 test_sort_list_arrays::<Int32Type>(
3314 vec![
3315 None,
3316 Some(vec![Some(4), None, Some(2)]),
3317 Some(vec![Some(2), Some(3), Some(4)]),
3318 None,
3319 Some(vec![Some(3), Some(3), None]),
3320 ],
3321 Some(SortOptions {
3322 descending: false,
3323 nulls_first: false,
3324 }),
3325 None,
3326 vec![
3327 Some(vec![Some(2), Some(3), Some(4)]),
3328 Some(vec![Some(3), Some(3), None]),
3329 Some(vec![Some(4), None, Some(2)]),
3330 None,
3331 None,
3332 ],
3333 Some(3),
3334 );
3335
3336 test_sort_list_arrays::<Int32Type>(
3337 vec![
3338 Some(vec![Some(1), Some(0)]),
3339 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3340 Some(vec![Some(2), Some(3), Some(4)]),
3341 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3342 Some(vec![Some(1), Some(1)]),
3343 ],
3344 Some(SortOptions {
3345 descending: false,
3346 nulls_first: false,
3347 }),
3348 Some(2),
3349 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3350 None,
3351 );
3352
3353 test_sort_list_arrays::<Int32Type>(
3355 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3356 Some(SortOptions {
3357 descending: false,
3358 nulls_first: false,
3359 }),
3360 Some(3),
3361 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3362 None,
3363 );
3364
3365 test_sort_list_arrays::<Int32Type>(
3366 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3367 Some(SortOptions {
3368 descending: false,
3369 nulls_first: true,
3370 }),
3371 Some(3),
3372 vec![None, None, Some(vec![Some(1)])],
3373 None,
3374 );
3375
3376 test_sort_list_arrays::<Int32Type>(
3378 vec![Some(vec![Some(1)]), None, None, None],
3379 Some(SortOptions {
3380 descending: false,
3381 nulls_first: true,
3382 }),
3383 Some(2),
3384 vec![None, None],
3385 None,
3386 );
3387
3388 test_sort_list_arrays::<Int32Type>(
3389 vec![Some(vec![Some(1)]), None, None, None],
3390 Some(SortOptions {
3391 descending: false,
3392 nulls_first: false,
3393 }),
3394 Some(2),
3395 vec![Some(vec![Some(1)]), None],
3396 None,
3397 );
3398 }
3399
3400 #[test]
3401 fn test_sort_binary() {
3402 test_sort_binary_arrays(
3403 vec![
3404 Some(vec![0, 0, 0]),
3405 Some(vec![0, 0, 5]),
3406 Some(vec![0, 0, 3]),
3407 Some(vec![0, 0, 7]),
3408 Some(vec![0, 0, 1]),
3409 ],
3410 Some(SortOptions {
3411 descending: false,
3412 nulls_first: false,
3413 }),
3414 None,
3415 vec![
3416 Some(vec![0, 0, 0]),
3417 Some(vec![0, 0, 1]),
3418 Some(vec![0, 0, 3]),
3419 Some(vec![0, 0, 5]),
3420 Some(vec![0, 0, 7]),
3421 ],
3422 Some(3),
3423 );
3424
3425 test_sort_binary_arrays(
3427 vec![
3428 Some(vec![0, 0, 0]),
3429 None,
3430 Some(vec![0, 0, 3]),
3431 Some(vec![0, 0, 7]),
3432 Some(vec![0, 0, 1]),
3433 None,
3434 ],
3435 Some(SortOptions {
3436 descending: false,
3437 nulls_first: false,
3438 }),
3439 None,
3440 vec![
3441 Some(vec![0, 0, 0]),
3442 Some(vec![0, 0, 1]),
3443 Some(vec![0, 0, 3]),
3444 Some(vec![0, 0, 7]),
3445 None,
3446 None,
3447 ],
3448 Some(3),
3449 );
3450
3451 test_sort_binary_arrays(
3452 vec![
3453 Some(vec![3, 5, 7]),
3454 None,
3455 Some(vec![1, 7, 1]),
3456 Some(vec![2, 7, 3]),
3457 None,
3458 Some(vec![1, 4, 3]),
3459 ],
3460 Some(SortOptions {
3461 descending: false,
3462 nulls_first: false,
3463 }),
3464 None,
3465 vec![
3466 Some(vec![1, 4, 3]),
3467 Some(vec![1, 7, 1]),
3468 Some(vec![2, 7, 3]),
3469 Some(vec![3, 5, 7]),
3470 None,
3471 None,
3472 ],
3473 Some(3),
3474 );
3475
3476 test_sort_binary_arrays(
3478 vec![
3479 Some(vec![0, 0, 0]),
3480 None,
3481 Some(vec![0, 0, 3]),
3482 Some(vec![0, 0, 7]),
3483 Some(vec![0, 0, 1]),
3484 None,
3485 ],
3486 Some(SortOptions {
3487 descending: true,
3488 nulls_first: false,
3489 }),
3490 None,
3491 vec![
3492 Some(vec![0, 0, 7]),
3493 Some(vec![0, 0, 3]),
3494 Some(vec![0, 0, 1]),
3495 Some(vec![0, 0, 0]),
3496 None,
3497 None,
3498 ],
3499 Some(3),
3500 );
3501
3502 test_sort_binary_arrays(
3504 vec![
3505 Some(vec![0, 0, 0]),
3506 None,
3507 Some(vec![0, 0, 3]),
3508 Some(vec![0, 0, 7]),
3509 Some(vec![0, 0, 1]),
3510 None,
3511 ],
3512 Some(SortOptions {
3513 descending: false,
3514 nulls_first: true,
3515 }),
3516 None,
3517 vec![
3518 None,
3519 None,
3520 Some(vec![0, 0, 0]),
3521 Some(vec![0, 0, 1]),
3522 Some(vec![0, 0, 3]),
3523 Some(vec![0, 0, 7]),
3524 ],
3525 Some(3),
3526 );
3527
3528 test_sort_binary_arrays(
3530 vec![
3531 Some(vec![0, 0, 0]),
3532 None,
3533 Some(vec![0, 0, 3]),
3534 Some(vec![0, 0, 7]),
3535 Some(vec![0, 0, 1]),
3536 None,
3537 ],
3538 Some(SortOptions {
3539 descending: false,
3540 nulls_first: true,
3541 }),
3542 Some(4),
3543 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3544 Some(3),
3545 );
3546
3547 test_sort_binary_arrays(
3549 vec![
3550 Some(b"Hello".to_vec()),
3551 None,
3552 Some(b"from".to_vec()),
3553 Some(b"Apache".to_vec()),
3554 Some(b"Arrow-rs".to_vec()),
3555 None,
3556 ],
3557 Some(SortOptions {
3558 descending: false,
3559 nulls_first: false,
3560 }),
3561 None,
3562 vec![
3563 Some(b"Apache".to_vec()),
3564 Some(b"Arrow-rs".to_vec()),
3565 Some(b"Hello".to_vec()),
3566 Some(b"from".to_vec()),
3567 None,
3568 None,
3569 ],
3570 None,
3571 );
3572
3573 test_sort_binary_arrays(
3575 vec![
3576 Some(b"Hello".to_vec()),
3577 None,
3578 Some(b"from".to_vec()),
3579 Some(b"Apache".to_vec()),
3580 Some(b"Arrow-rs".to_vec()),
3581 None,
3582 ],
3583 Some(SortOptions {
3584 descending: false,
3585 nulls_first: true,
3586 }),
3587 Some(4),
3588 vec![
3589 None,
3590 None,
3591 Some(b"Apache".to_vec()),
3592 Some(b"Arrow-rs".to_vec()),
3593 ],
3594 None,
3595 );
3596 }
3597
3598 #[test]
3599 fn test_lex_sort_single_column() {
3600 let input = vec![SortColumn {
3601 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3602 Some(17),
3603 Some(2),
3604 Some(-1),
3605 Some(0),
3606 ])) as ArrayRef,
3607 options: None,
3608 }];
3609 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3610 Some(-1),
3611 Some(0),
3612 Some(2),
3613 Some(17),
3614 ])) as ArrayRef];
3615 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3616 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3617
3618 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3620 Some(-1),
3621 Some(0),
3622 Some(2),
3623 ])) as ArrayRef];
3624 test_lex_sort_arrays(input, expected, Some(3));
3625 }
3626
3627 #[test]
3628 fn test_lex_sort_unaligned_rows() {
3629 let input = vec![
3630 SortColumn {
3631 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3632 as ArrayRef,
3633 options: None,
3634 },
3635 SortColumn {
3636 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3637 options: None,
3638 },
3639 ];
3640 assert!(
3641 lexsort(&input, None).is_err(),
3642 "lexsort should reject columns with different row counts"
3643 );
3644 }
3645
3646 #[test]
3647 fn test_lex_sort_mixed_types() {
3648 let input = vec![
3649 SortColumn {
3650 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3651 Some(0),
3652 Some(2),
3653 Some(-1),
3654 Some(0),
3655 ])) as ArrayRef,
3656 options: None,
3657 },
3658 SortColumn {
3659 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3660 Some(101),
3661 Some(8),
3662 Some(7),
3663 Some(102),
3664 ])) as ArrayRef,
3665 options: None,
3666 },
3667 SortColumn {
3668 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3669 Some(-1),
3670 Some(-2),
3671 Some(-3),
3672 Some(-4),
3673 ])) as ArrayRef,
3674 options: None,
3675 },
3676 ];
3677 let expected = vec![
3678 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3679 Some(-1),
3680 Some(0),
3681 Some(0),
3682 Some(2),
3683 ])) as ArrayRef,
3684 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3685 Some(7),
3686 Some(101),
3687 Some(102),
3688 Some(8),
3689 ])) as ArrayRef,
3690 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3691 Some(-3),
3692 Some(-1),
3693 Some(-4),
3694 Some(-2),
3695 ])) as ArrayRef,
3696 ];
3697 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3698 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3699
3700 let input = vec![
3702 SortColumn {
3703 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3704 Some(0),
3705 Some(2),
3706 Some(-1),
3707 Some(0),
3708 ])) as ArrayRef,
3709 options: Some(SortOptions {
3710 descending: true,
3711 nulls_first: true,
3712 }),
3713 },
3714 SortColumn {
3715 values: Arc::new(StringArray::from(vec![
3716 Some("foo"),
3717 Some("9"),
3718 Some("7"),
3719 Some("bar"),
3720 ])) as ArrayRef,
3721 options: Some(SortOptions {
3722 descending: true,
3723 nulls_first: true,
3724 }),
3725 },
3726 ];
3727 let expected = vec![
3728 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3729 Some(2),
3730 Some(0),
3731 Some(0),
3732 Some(-1),
3733 ])) as ArrayRef,
3734 Arc::new(StringArray::from(vec![
3735 Some("9"),
3736 Some("foo"),
3737 Some("bar"),
3738 Some("7"),
3739 ])) as ArrayRef,
3740 ];
3741 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3742 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3743
3744 let input = vec![
3746 SortColumn {
3747 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3748 None,
3749 Some(-1),
3750 Some(2),
3751 None,
3752 ])) as ArrayRef,
3753 options: Some(SortOptions {
3754 descending: true,
3755 nulls_first: true,
3756 }),
3757 },
3758 SortColumn {
3759 values: Arc::new(StringArray::from(vec![
3760 Some("foo"),
3761 Some("world"),
3762 Some("hello"),
3763 None,
3764 ])) as ArrayRef,
3765 options: Some(SortOptions {
3766 descending: true,
3767 nulls_first: true,
3768 }),
3769 },
3770 ];
3771 let expected = vec![
3772 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3773 None,
3774 None,
3775 Some(2),
3776 Some(-1),
3777 ])) as ArrayRef,
3778 Arc::new(StringArray::from(vec![
3779 None,
3780 Some("foo"),
3781 Some("hello"),
3782 Some("world"),
3783 ])) as ArrayRef,
3784 ];
3785 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3786 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3787
3788 let input = vec![
3790 SortColumn {
3791 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3792 None,
3793 Some(-1),
3794 Some(2),
3795 None,
3796 ])) as ArrayRef,
3797 options: Some(SortOptions {
3798 descending: true,
3799 nulls_first: false,
3800 }),
3801 },
3802 SortColumn {
3803 values: Arc::new(StringArray::from(vec![
3804 Some("foo"),
3805 Some("world"),
3806 Some("hello"),
3807 None,
3808 ])) as ArrayRef,
3809 options: Some(SortOptions {
3810 descending: true,
3811 nulls_first: false,
3812 }),
3813 },
3814 ];
3815 let expected = vec![
3816 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3817 Some(2),
3818 Some(-1),
3819 None,
3820 None,
3821 ])) as ArrayRef,
3822 Arc::new(StringArray::from(vec![
3823 Some("hello"),
3824 Some("world"),
3825 Some("foo"),
3826 None,
3827 ])) as ArrayRef,
3828 ];
3829 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3830 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3831
3832 let input = vec![
3834 SortColumn {
3835 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3836 None,
3837 Some(-1),
3838 Some(2),
3839 Some(-1),
3840 None,
3841 ])) as ArrayRef,
3842 options: Some(SortOptions {
3843 descending: false,
3844 nulls_first: false,
3845 }),
3846 },
3847 SortColumn {
3848 values: Arc::new(StringArray::from(vec![
3849 Some("foo"),
3850 Some("bar"),
3851 Some("world"),
3852 Some("hello"),
3853 None,
3854 ])) as ArrayRef,
3855 options: Some(SortOptions {
3856 descending: true,
3857 nulls_first: true,
3858 }),
3859 },
3860 ];
3861 let expected = vec![
3862 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3863 Some(-1),
3864 Some(-1),
3865 Some(2),
3866 None,
3867 None,
3868 ])) as ArrayRef,
3869 Arc::new(StringArray::from(vec![
3870 Some("hello"),
3871 Some("bar"),
3872 Some("world"),
3873 None,
3874 Some("foo"),
3875 ])) as ArrayRef,
3876 ];
3877 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3878 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3879
3880 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
3882
3883 let primitive_array_data = vec![
3887 Some(2),
3888 Some(3),
3889 Some(2),
3890 Some(0),
3891 None,
3892 Some(2),
3893 Some(1),
3894 Some(2),
3895 ];
3896 let list_array_data = vec![
3897 None,
3898 Some(vec![Some(4)]),
3899 Some(vec![Some(3)]),
3900 Some(vec![Some(1)]),
3901 Some(vec![Some(5)]),
3902 Some(vec![Some(0)]),
3903 Some(vec![Some(2)]),
3904 Some(vec![None]),
3905 ];
3906
3907 let expected_primitive_array_data = vec![
3908 None,
3909 Some(0),
3910 Some(1),
3911 Some(2),
3912 Some(2),
3913 Some(2),
3914 Some(2),
3915 Some(3),
3916 ];
3917 let expected_list_array_data = vec![
3918 Some(vec![Some(5)]),
3919 Some(vec![Some(1)]),
3920 Some(vec![Some(2)]),
3921 None, Some(vec![None]),
3923 Some(vec![Some(0)]),
3924 Some(vec![Some(3)]), Some(vec![Some(4)]),
3926 ];
3927 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3928 primitive_array_data.clone(),
3929 list_array_data.clone(),
3930 expected_primitive_array_data.clone(),
3931 expected_list_array_data,
3932 None,
3933 None,
3934 );
3935
3936 let primitive_array_options = SortOptions {
3938 descending: false,
3939 nulls_first: true,
3940 };
3941 let list_array_options = SortOptions {
3942 descending: false,
3943 nulls_first: false, };
3945 let expected_list_array_data = vec![
3946 Some(vec![Some(5)]),
3947 Some(vec![Some(1)]),
3948 Some(vec![Some(2)]),
3949 Some(vec![Some(0)]), Some(vec![Some(3)]),
3951 Some(vec![None]),
3952 None, Some(vec![Some(4)]),
3954 ];
3955 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3956 primitive_array_data.clone(),
3957 list_array_data.clone(),
3958 expected_primitive_array_data.clone(),
3959 expected_list_array_data,
3960 Some(primitive_array_options),
3961 Some(list_array_options),
3962 );
3963
3964 let primitive_array_options = SortOptions {
3966 descending: false,
3967 nulls_first: true,
3968 };
3969 let list_array_options = SortOptions {
3970 descending: true, nulls_first: true,
3972 };
3973 let expected_list_array_data = vec![
3974 Some(vec![Some(5)]),
3975 Some(vec![Some(1)]),
3976 Some(vec![Some(2)]),
3977 None, Some(vec![None]),
3979 Some(vec![Some(3)]),
3980 Some(vec![Some(0)]), Some(vec![Some(4)]),
3982 ];
3983 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3984 primitive_array_data.clone(),
3985 list_array_data.clone(),
3986 expected_primitive_array_data,
3987 expected_list_array_data,
3988 Some(primitive_array_options),
3989 Some(list_array_options),
3990 );
3991
3992 let list_array_data = vec![
3995 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4006 let primitive_array_data = vec![
4007 Some(0),
4008 Some(10),
4009 Some(1),
4010 Some(2),
4011 Some(3),
4012 None,
4013 Some(11),
4014 Some(4),
4015 Some(5),
4016 Some(6),
4017 ];
4018 let expected_list_array_data = vec![
4019 None,
4020 None,
4021 Some(vec![None]),
4022 Some(vec![None, Some(2)]),
4023 Some(vec![Some(0)]),
4024 Some(vec![Some(2), None]),
4025 Some(vec![Some(2), Some(0)]),
4026 Some(vec![Some(2), Some(1)]),
4027 Some(vec![Some(2), Some(1)]),
4028 Some(vec![Some(3)]),
4029 ];
4030 let expected_primitive_array_data = vec![
4031 Some(10),
4032 Some(11),
4033 Some(5),
4034 Some(3),
4035 None,
4036 Some(4),
4037 Some(2),
4038 Some(0),
4039 Some(6),
4040 Some(1),
4041 ];
4042 test_lex_sort_mixed_types_with_list::<Int32Type>(
4043 list_array_data.clone(),
4044 primitive_array_data.clone(),
4045 expected_list_array_data,
4046 expected_primitive_array_data,
4047 None,
4048 None,
4049 );
4050 }
4051
4052 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4053 primitive_array_data: Vec<Option<T::Native>>,
4054 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4055 expected_primitive_array_data: Vec<Option<T::Native>>,
4056 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4057 primitive_array_options: Option<SortOptions>,
4058 list_array_options: Option<SortOptions>,
4059 ) where
4060 T: ArrowPrimitiveType,
4061 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4062 {
4063 let input = vec![
4064 SortColumn {
4065 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4066 as ArrayRef,
4067 options: primitive_array_options,
4068 },
4069 SortColumn {
4070 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4071 list_array_data.clone(),
4072 1,
4073 )) as ArrayRef,
4074 options: list_array_options,
4075 },
4076 ];
4077
4078 let expected = vec![
4079 Arc::new(PrimitiveArray::<T>::from(
4080 expected_primitive_array_data.clone(),
4081 )) as ArrayRef,
4082 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4083 expected_list_array_data.clone(),
4084 1,
4085 )) as ArrayRef,
4086 ];
4087
4088 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4089 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4090 }
4091
4092 fn test_lex_sort_mixed_types_with_list<T>(
4093 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4094 primitive_array_data: Vec<Option<T::Native>>,
4095 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4096 expected_primitive_array_data: Vec<Option<T::Native>>,
4097 list_array_options: Option<SortOptions>,
4098 primitive_array_options: Option<SortOptions>,
4099 ) where
4100 T: ArrowPrimitiveType,
4101 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4102 {
4103 macro_rules! run_test {
4104 ($ARRAY_TYPE:ident) => {
4105 let input = vec![
4106 SortColumn {
4107 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4108 list_array_data.clone(),
4109 )) as ArrayRef,
4110 options: list_array_options.clone(),
4111 },
4112 SortColumn {
4113 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4114 as ArrayRef,
4115 options: primitive_array_options.clone(),
4116 },
4117 ];
4118
4119 let expected = vec![
4120 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4121 expected_list_array_data.clone(),
4122 )) as ArrayRef,
4123 Arc::new(PrimitiveArray::<T>::from(
4124 expected_primitive_array_data.clone(),
4125 )) as ArrayRef,
4126 ];
4127
4128 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4129 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4130 };
4131 }
4132 run_test!(ListArray);
4133 run_test!(LargeListArray);
4134 }
4135
4136 #[test]
4137 fn test_partial_sort() {
4138 let mut before: Vec<&str> = vec![
4139 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4140 ];
4141 let mut d = before.clone();
4142 d.sort_unstable();
4143
4144 for last in 0..before.len() {
4145 partial_sort(&mut before, last, |a, b| a.cmp(b));
4146 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4147 }
4148 }
4149
4150 #[test]
4151 fn test_partial_rand_sort() {
4152 let size = 1000u32;
4153 let mut rng = StdRng::seed_from_u64(42);
4154 let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect();
4155 let mut d = before.clone();
4156 let last = (rng.next_u32() % size) as usize;
4157 d.sort_unstable();
4158
4159 partial_sort(&mut before, last, |a, b| a.cmp(b));
4160 assert_eq!(&d[0..last], &before[0..last]);
4161 }
4162
4163 #[test]
4164 fn test_sort_int8_dicts() {
4165 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4166 let values = Int8Array::from(vec![1, 3, 5]);
4167 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4168 keys,
4169 values,
4170 None,
4171 None,
4172 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4173 );
4174
4175 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4176 let values = Int8Array::from(vec![1, 3, 5]);
4177 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4178 keys,
4179 values,
4180 Some(SortOptions {
4181 descending: true,
4182 nulls_first: false,
4183 }),
4184 None,
4185 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4186 );
4187
4188 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4189 let values = Int8Array::from(vec![1, 3, 5]);
4190 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4191 keys,
4192 values,
4193 Some(SortOptions {
4194 descending: false,
4195 nulls_first: false,
4196 }),
4197 None,
4198 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4199 );
4200
4201 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4202 let values = Int8Array::from(vec![1, 3, 5]);
4203 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4204 keys,
4205 values,
4206 Some(SortOptions {
4207 descending: true,
4208 nulls_first: true,
4209 }),
4210 Some(3),
4211 vec![None, None, Some(5)],
4212 );
4213
4214 let keys = Int8Array::from(vec![
4216 Some(1_i8),
4217 None,
4218 Some(3),
4219 None,
4220 Some(2),
4221 Some(3),
4222 Some(0),
4223 ]);
4224 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4225 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4226 keys,
4227 values,
4228 None,
4229 None,
4230 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4231 );
4232
4233 let keys = Int8Array::from(vec![
4234 Some(1_i8),
4235 None,
4236 Some(3),
4237 None,
4238 Some(2),
4239 Some(3),
4240 Some(0),
4241 ]);
4242 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4243 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4244 keys,
4245 values,
4246 Some(SortOptions {
4247 descending: false,
4248 nulls_first: false,
4249 }),
4250 None,
4251 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4252 );
4253
4254 let keys = Int8Array::from(vec![
4255 Some(1_i8),
4256 None,
4257 Some(3),
4258 None,
4259 Some(2),
4260 Some(3),
4261 Some(0),
4262 ]);
4263 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4264 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4265 keys,
4266 values,
4267 Some(SortOptions {
4268 descending: true,
4269 nulls_first: false,
4270 }),
4271 None,
4272 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4273 );
4274
4275 let keys = Int8Array::from(vec![
4276 Some(1_i8),
4277 None,
4278 Some(3),
4279 None,
4280 Some(2),
4281 Some(3),
4282 Some(0),
4283 ]);
4284 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4285 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4286 keys,
4287 values,
4288 Some(SortOptions {
4289 descending: true,
4290 nulls_first: true,
4291 }),
4292 None,
4293 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4294 );
4295 }
4296
4297 #[test]
4298 fn test_sort_f32_dicts() {
4299 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4300 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4301 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4302 keys,
4303 values,
4304 None,
4305 None,
4306 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4307 );
4308
4309 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4310 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4311 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4312 keys,
4313 values,
4314 Some(SortOptions {
4315 descending: true,
4316 nulls_first: false,
4317 }),
4318 None,
4319 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4320 );
4321
4322 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4323 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4324 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4325 keys,
4326 values,
4327 Some(SortOptions {
4328 descending: false,
4329 nulls_first: false,
4330 }),
4331 None,
4332 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4333 );
4334
4335 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4336 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4337 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4338 keys,
4339 values,
4340 Some(SortOptions {
4341 descending: true,
4342 nulls_first: true,
4343 }),
4344 Some(3),
4345 vec![None, None, Some(5.1)],
4346 );
4347
4348 let keys = Int8Array::from(vec![
4350 Some(1_i8),
4351 None,
4352 Some(3),
4353 None,
4354 Some(2),
4355 Some(3),
4356 Some(0),
4357 ]);
4358 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4359 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4360 keys,
4361 values,
4362 None,
4363 None,
4364 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4365 );
4366
4367 let keys = Int8Array::from(vec![
4368 Some(1_i8),
4369 None,
4370 Some(3),
4371 None,
4372 Some(2),
4373 Some(3),
4374 Some(0),
4375 ]);
4376 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4377 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4378 keys,
4379 values,
4380 Some(SortOptions {
4381 descending: false,
4382 nulls_first: false,
4383 }),
4384 None,
4385 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4386 );
4387
4388 let keys = Int8Array::from(vec![
4389 Some(1_i8),
4390 None,
4391 Some(3),
4392 None,
4393 Some(2),
4394 Some(3),
4395 Some(0),
4396 ]);
4397 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4398 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4399 keys,
4400 values,
4401 Some(SortOptions {
4402 descending: true,
4403 nulls_first: false,
4404 }),
4405 None,
4406 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4407 );
4408
4409 let keys = Int8Array::from(vec![
4410 Some(1_i8),
4411 None,
4412 Some(3),
4413 None,
4414 Some(2),
4415 Some(3),
4416 Some(0),
4417 ]);
4418 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4419 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4420 keys,
4421 values,
4422 Some(SortOptions {
4423 descending: true,
4424 nulls_first: true,
4425 }),
4426 None,
4427 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4428 );
4429 }
4430
4431 #[test]
4432 fn test_lexicographic_comparator_null_dict_values() {
4433 let values = Int32Array::new(
4434 vec![1, 2, 3, 4].into(),
4435 Some(NullBuffer::from(vec![true, false, false, true])),
4436 );
4437 let keys = Int32Array::new(
4438 vec![0, 1, 53, 3].into(),
4439 Some(NullBuffer::from(vec![true, true, false, true])),
4440 );
4441 let dict = DictionaryArray::new(keys, Arc::new(values));
4443
4444 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4445 values: Arc::new(dict),
4446 options: None,
4447 }])
4448 .unwrap();
4449 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4451 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4453 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4455 }
4456
4457 #[test]
4458 fn sort_list_equal() {
4459 let a = {
4460 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4461 for value in [[1, 5], [0, 3], [1, 3]] {
4462 builder.values().append_slice(&value);
4463 builder.append(true);
4464 }
4465 builder.finish()
4466 };
4467
4468 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4469 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4470
4471 let a = {
4472 let mut builder = ListBuilder::new(Int64Builder::new());
4473 for value in [[1, 5], [0, 3], [1, 3]] {
4474 builder.values().append_slice(&value);
4475 builder.append(true);
4476 }
4477 builder.finish()
4478 };
4479
4480 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4481 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4482 }
4483
4484 #[test]
4485 fn sort_struct_fallback_to_lexsort() {
4486 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4487 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4488
4489 let struct_array = StructArray::from(vec![
4490 (
4491 Arc::new(Field::new("b", DataType::Float32, false)),
4492 float.clone() as ArrayRef,
4493 ),
4494 (
4495 Arc::new(Field::new("c", DataType::Int32, false)),
4496 int.clone() as ArrayRef,
4497 ),
4498 ]);
4499
4500 assert!(!can_sort_to_indices(struct_array.data_type()));
4501 assert!(sort_to_indices(&struct_array, None, None)
4502 .err()
4503 .unwrap()
4504 .to_string()
4505 .contains("Sort not supported for data type"));
4506
4507 let sort_columns = vec![SortColumn {
4508 values: Arc::new(struct_array.clone()) as ArrayRef,
4509 options: None,
4510 }];
4511 let sorted = lexsort(&sort_columns, None).unwrap();
4512
4513 let expected_struct_array = Arc::new(StructArray::from(vec![
4514 (
4515 Arc::new(Field::new("b", DataType::Float32, false)),
4516 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4517 ),
4518 (
4519 Arc::new(Field::new("c", DataType::Int32, false)),
4520 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4521 ),
4522 ])) as ArrayRef;
4523
4524 assert_eq!(&sorted[0], &expected_struct_array);
4525 }
4526}