1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
26use crate::{DataFusionError, Result, ScalarValue};
27use arrow::array::{
28 cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29 OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34use sqlparser::ast::Ident;
35use sqlparser::dialect::GenericDialect;
36use sqlparser::parser::Parser;
37use std::borrow::{Borrow, Cow};
38use std::cmp::{min, Ordering};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45pub fn project_schema(
75 schema: &SchemaRef,
76 projection: Option<&Vec<usize>>,
77) -> Result<SchemaRef> {
78 let schema = match projection {
79 Some(columns) => Arc::new(schema.project(columns)?),
80 None => Arc::clone(schema),
81 };
82 Ok(schema)
83}
84
85pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
87 columns
88 .iter()
89 .map(|arr| ScalarValue::try_from_array(arr, idx))
90 .collect()
91}
92
93pub fn compare_rows(
95 x: &[ScalarValue],
96 y: &[ScalarValue],
97 sort_options: &[SortOptions],
98) -> Result<Ordering> {
99 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
100 for ((lhs, rhs), sort_options) in zip_it {
102 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
104 (true, false, false) | (false, true, true) => Ordering::Greater,
105 (true, false, true) | (false, true, false) => Ordering::Less,
106 (false, false, _) => if sort_options.descending {
107 rhs.partial_cmp(lhs)
108 } else {
109 lhs.partial_cmp(rhs)
110 }
111 .ok_or_else(|| {
112 _internal_datafusion_err!("Column array shouldn't be empty")
113 })?,
114 (true, true, _) => continue,
115 };
116 if result != Ordering::Equal {
117 return Ok(result);
118 }
119 }
120 Ok(Ordering::Equal)
121}
122
123pub fn bisect<const SIDE: bool>(
128 item_columns: &[ArrayRef],
129 target: &[ScalarValue],
130 sort_options: &[SortOptions],
131) -> Result<usize> {
132 let low: usize = 0;
133 let high: usize = item_columns
134 .first()
135 .ok_or_else(|| {
136 DataFusionError::Internal("Column array shouldn't be empty".to_string())
137 })?
138 .len();
139 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
140 let cmp = compare_rows(current, target, sort_options)?;
141 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
142 };
143 find_bisect_point(item_columns, target, compare_fn, low, high)
144}
145
146pub fn find_bisect_point<F>(
153 item_columns: &[ArrayRef],
154 target: &[ScalarValue],
155 compare_fn: F,
156 mut low: usize,
157 mut high: usize,
158) -> Result<usize>
159where
160 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
161{
162 while low < high {
163 let mid = ((high - low) / 2) + low;
164 let val = get_row_at_idx(item_columns, mid)?;
165 if compare_fn(&val, target)? {
166 low = mid + 1;
167 } else {
168 high = mid;
169 }
170 }
171 Ok(low)
172}
173
174pub fn linear_search<const SIDE: bool>(
179 item_columns: &[ArrayRef],
180 target: &[ScalarValue],
181 sort_options: &[SortOptions],
182) -> Result<usize> {
183 let low: usize = 0;
184 let high: usize = item_columns
185 .first()
186 .ok_or_else(|| {
187 DataFusionError::Internal("Column array shouldn't be empty".to_string())
188 })?
189 .len();
190 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
191 let cmp = compare_rows(current, target, sort_options)?;
192 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
193 };
194 search_in_slice(item_columns, target, compare_fn, low, high)
195}
196
197pub fn search_in_slice<F>(
202 item_columns: &[ArrayRef],
203 target: &[ScalarValue],
204 compare_fn: F,
205 mut low: usize,
206 high: usize,
207) -> Result<usize>
208where
209 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
210{
211 while low < high {
212 let val = get_row_at_idx(item_columns, low)?;
213 if !compare_fn(&val, target)? {
214 break;
215 }
216 low += 1;
217 }
218 Ok(low)
219}
220
221pub fn evaluate_partition_ranges(
226 num_rows: usize,
227 partition_columns: &[SortColumn],
228) -> Result<Vec<Range<usize>>> {
229 Ok(if partition_columns.is_empty() {
230 vec![Range {
231 start: 0,
232 end: num_rows,
233 }]
234 } else {
235 let cols: Vec<_> = partition_columns
236 .iter()
237 .map(|x| Arc::clone(&x.values))
238 .collect();
239 partition(&cols)?.ranges()
240 })
241}
242
243pub fn quote_identifier(s: &str) -> Cow<str> {
248 if needs_quotes(s) {
249 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
250 } else {
251 Cow::Borrowed(s)
252 }
253}
254
255fn needs_quotes(s: &str) -> bool {
257 let mut chars = s.chars();
258
259 if let Some(first_char) = chars.next() {
261 if !(first_char.is_ascii_lowercase() || first_char == '_') {
262 return true;
263 }
264 }
265
266 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
267}
268
269pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
270 let dialect = GenericDialect;
271 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
272 let idents = parser.parse_multipart_identifier()?;
273 Ok(idents)
274}
275
276pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
277 parse_identifiers(s)
278 .unwrap_or_default()
279 .into_iter()
280 .map(|id| match id.quote_style {
281 Some(_) => id.value,
282 None if ignore_case => id.value,
283 _ => id.value.to_ascii_lowercase(),
284 })
285 .collect::<Vec<_>>()
286}
287
288pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
290 items: &[T],
291 indices: impl IntoIterator<Item = I>,
292) -> Result<Vec<T>> {
293 indices
294 .into_iter()
295 .map(|idx| items.get(*idx.borrow()).cloned())
296 .collect::<Option<Vec<T>>>()
297 .ok_or_else(|| {
298 DataFusionError::Execution(
299 "Expects indices to be in the range of searched vector".to_string(),
300 )
301 })
302}
303
304pub fn longest_consecutive_prefix<T: Borrow<usize>>(
310 sequence: impl IntoIterator<Item = T>,
311) -> usize {
312 let mut count = 0;
313 for item in sequence {
314 if !count.eq(item.borrow()) {
315 break;
316 }
317 count += 1;
318 }
319 count
320}
321
322#[derive(Debug, Clone)]
342pub struct SingleRowListArrayBuilder {
343 arr: ArrayRef,
345 nullable: bool,
347 field_name: Option<String>,
350}
351
352impl SingleRowListArrayBuilder {
353 pub fn new(arr: ArrayRef) -> Self {
355 Self {
356 arr,
357 nullable: true,
358 field_name: None,
359 }
360 }
361
362 pub fn with_nullable(mut self, nullable: bool) -> Self {
364 self.nullable = nullable;
365 self
366 }
367
368 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
370 self.field_name = field_name;
371 self
372 }
373
374 pub fn with_field(self, field: &Field) -> Self {
376 self.with_field_name(Some(field.name().to_owned()))
377 .with_nullable(field.is_nullable())
378 }
379
380 pub fn build_list_array(self) -> ListArray {
382 let (field, arr) = self.into_field_and_arr();
383 let offsets = OffsetBuffer::from_lengths([arr.len()]);
384 ListArray::new(field, offsets, arr, None)
385 }
386
387 pub fn build_list_scalar(self) -> ScalarValue {
389 ScalarValue::List(Arc::new(self.build_list_array()))
390 }
391
392 pub fn build_large_list_array(self) -> LargeListArray {
394 let (field, arr) = self.into_field_and_arr();
395 let offsets = OffsetBuffer::from_lengths([arr.len()]);
396 LargeListArray::new(field, offsets, arr, None)
397 }
398
399 pub fn build_large_list_scalar(self) -> ScalarValue {
401 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
402 }
403
404 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
406 let (field, arr) = self.into_field_and_arr();
407 FixedSizeListArray::new(field, list_size as i32, arr, None)
408 }
409
410 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
412 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
413 }
414
415 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
417 let Self {
418 arr,
419 nullable,
420 field_name,
421 } = self;
422 let data_type = arr.data_type().to_owned();
423 let field = match field_name {
424 Some(name) => Field::new(name, data_type, nullable),
425 None => Field::new_list_field(data_type, nullable),
426 };
427 (Arc::new(field), arr)
428 }
429}
430
431#[deprecated(
435 since = "44.0.0",
436 note = "please use `SingleRowListArrayBuilder` instead"
437)]
438pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray {
439 SingleRowListArrayBuilder::new(arr)
440 .with_nullable(true)
441 .build_list_array()
442}
443
444#[deprecated(
447 since = "44.0.0",
448 note = "please use `SingleRowListArrayBuilder` instead"
449)]
450pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
451 SingleRowListArrayBuilder::new(arr)
452 .with_nullable(nullable)
453 .build_list_array()
454}
455
456#[deprecated(
457 since = "44.0.0",
458 note = "please use `SingleRowListArrayBuilder` instead"
459)]
460pub fn array_into_list_array_with_field_name(
461 arr: ArrayRef,
462 nullable: bool,
463 field_name: &str,
464) -> ListArray {
465 SingleRowListArrayBuilder::new(arr)
466 .with_nullable(nullable)
467 .with_field_name(Some(field_name.to_string()))
468 .build_list_array()
469}
470
471#[deprecated(
474 since = "44.0.0",
475 note = "please use `SingleRowListArrayBuilder` instead"
476)]
477pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
478 SingleRowListArrayBuilder::new(arr).build_large_list_array()
479}
480
481#[deprecated(
482 since = "44.0.0",
483 note = "please use `SingleRowListArrayBuilder` instead"
484)]
485pub fn array_into_large_list_array_with_field_name(
486 arr: ArrayRef,
487 field_name: &str,
488) -> LargeListArray {
489 SingleRowListArrayBuilder::new(arr)
490 .with_field_name(Some(field_name.to_string()))
491 .build_large_list_array()
492}
493
494#[deprecated(
495 since = "44.0.0",
496 note = "please use `SingleRowListArrayBuilder` instead"
497)]
498pub fn array_into_fixed_size_list_array(
499 arr: ArrayRef,
500 list_size: usize,
501) -> FixedSizeListArray {
502 SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size)
503}
504
505#[deprecated(
506 since = "44.0.0",
507 note = "please use `SingleRowListArrayBuilder` instead"
508)]
509pub fn array_into_fixed_size_list_array_with_field_name(
510 arr: ArrayRef,
511 list_size: usize,
512 field_name: &str,
513) -> FixedSizeListArray {
514 SingleRowListArrayBuilder::new(arr)
515 .with_field_name(Some(field_name.to_string()))
516 .build_fixed_size_list_array(list_size)
517}
518
519pub fn arrays_into_list_array(
541 arr: impl IntoIterator<Item = ArrayRef>,
542) -> Result<ListArray> {
543 let arr = arr.into_iter().collect::<Vec<_>>();
544 if arr.is_empty() {
545 return _internal_err!("Cannot wrap empty array into list array");
546 }
547
548 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
549 let data_type = arr[0].data_type().to_owned();
551 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
552 Ok(ListArray::new(
553 Arc::new(Field::new_list_field(data_type, true)),
554 OffsetBuffer::from_lengths(lens),
555 arrow::compute::concat(values.as_slice())?,
556 None,
557 ))
558}
559
560pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
562 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
563}
564
565pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
567 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
568}
569
570pub fn base_type(data_type: &DataType) -> DataType {
585 match data_type {
586 DataType::List(field)
587 | DataType::LargeList(field)
588 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
589 _ => data_type.to_owned(),
590 }
591}
592
593#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
595pub enum ListCoercion {
596 FixedSizedListToList,
598}
599
600pub fn coerced_type_with_base_type_only(
613 data_type: &DataType,
614 base_type: &DataType,
615 array_coercion: Option<&ListCoercion>,
616) -> DataType {
617 match (data_type, array_coercion) {
618 (DataType::List(field), _)
619 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
620 {
621 let field_type = coerced_type_with_base_type_only(
622 field.data_type(),
623 base_type,
624 array_coercion,
625 );
626
627 DataType::List(Arc::new(Field::new(
628 field.name(),
629 field_type,
630 field.is_nullable(),
631 )))
632 }
633 (DataType::FixedSizeList(field, len), _) => {
634 let field_type = coerced_type_with_base_type_only(
635 field.data_type(),
636 base_type,
637 array_coercion,
638 );
639
640 DataType::FixedSizeList(
641 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
642 *len,
643 )
644 }
645 (DataType::LargeList(field), _) => {
646 let field_type = coerced_type_with_base_type_only(
647 field.data_type(),
648 base_type,
649 array_coercion,
650 );
651
652 DataType::LargeList(Arc::new(Field::new(
653 field.name(),
654 field_type,
655 field.is_nullable(),
656 )))
657 }
658
659 _ => base_type.clone(),
660 }
661}
662
663pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
665 match data_type {
666 DataType::List(field) | DataType::FixedSizeList(field, _) => {
667 let field_type = coerced_fixed_size_list_to_list(field.data_type());
668
669 DataType::List(Arc::new(Field::new(
670 field.name(),
671 field_type,
672 field.is_nullable(),
673 )))
674 }
675 DataType::LargeList(field) => {
676 let field_type = coerced_fixed_size_list_to_list(field.data_type());
677
678 DataType::LargeList(Arc::new(Field::new(
679 field.name(),
680 field_type,
681 field.is_nullable(),
682 )))
683 }
684
685 _ => data_type.clone(),
686 }
687}
688
689pub fn list_ndims(data_type: &DataType) -> u64 {
691 match data_type {
692 DataType::List(field)
693 | DataType::LargeList(field)
694 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
695 _ => 0,
696 }
697}
698
699pub mod datafusion_strsim {
701 use std::cmp::min;
704 use std::str::Chars;
705
706 struct StringWrapper<'a>(&'a str);
707
708 impl<'b> IntoIterator for &StringWrapper<'b> {
709 type Item = char;
710 type IntoIter = Chars<'b>;
711
712 fn into_iter(self) -> Self::IntoIter {
713 self.0.chars()
714 }
715 }
716
717 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
720 a: &'a Iter1,
721 b: &'b Iter2,
722 ) -> usize
723 where
724 &'a Iter1: IntoIterator<Item = Elem1>,
725 &'b Iter2: IntoIterator<Item = Elem2>,
726 Elem1: PartialEq<Elem2>,
727 {
728 let b_len = b.into_iter().count();
729
730 if a.into_iter().next().is_none() {
731 return b_len;
732 }
733
734 let mut cache: Vec<usize> = (1..b_len + 1).collect();
735
736 let mut result = 0;
737
738 for (i, a_elem) in a.into_iter().enumerate() {
739 result = i + 1;
740 let mut distance_b = i;
741
742 for (j, b_elem) in b.into_iter().enumerate() {
743 let cost = if a_elem == b_elem { 0usize } else { 1usize };
744 let distance_a = distance_b + cost;
745 distance_b = cache[j];
746 result = min(result + 1, min(distance_a, distance_b + 1));
747 cache[j] = result;
748 }
749 }
750
751 result
752 }
753
754 pub fn levenshtein(a: &str, b: &str) -> usize {
763 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
764 }
765
766 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
780 if a.is_empty() && b.is_empty() {
781 return 1.0;
782 }
783 1.0 - (levenshtein(a, b) as f64)
784 / (a.chars().count().max(b.chars().count()) as f64)
785 }
786}
787
788pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
791 first: impl IntoIterator<Item = T>,
792 second: impl IntoIterator<Item = S>,
793) -> Vec<usize> {
794 let mut result: Vec<_> = first
795 .into_iter()
796 .map(|e| *e.borrow())
797 .chain(second.into_iter().map(|e| *e.borrow()))
798 .collect::<HashSet<_>>()
799 .into_iter()
800 .collect();
801 result.sort();
802 result
803}
804
805pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
808 first: impl IntoIterator<Item = T>,
809 second: impl IntoIterator<Item = S>,
810) -> Vec<usize> {
811 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
812 first
813 .into_iter()
814 .map(|e| *e.borrow())
815 .filter(|e| !set.contains(e))
816 .collect()
817}
818
819#[deprecated(since = "45.0.0", note = "Use std::Iterator::is_sorted instead")]
821pub fn is_sorted<T: Borrow<usize>>(sequence: impl IntoIterator<Item = T>) -> bool {
822 let mut previous = 0;
824 for item in sequence.into_iter() {
825 let current = *item.borrow();
826 if current < previous {
827 return false;
828 }
829 previous = current;
830 }
831 true
832}
833
834pub fn find_indices<T: PartialEq, S: Borrow<T>>(
837 items: &[T],
838 targets: impl IntoIterator<Item = S>,
839) -> Result<Vec<usize>> {
840 targets
841 .into_iter()
842 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
843 .collect::<Option<_>>()
844 .ok_or_else(|| DataFusionError::Execution("Target not found".to_string()))
845}
846
847pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
849 match original.as_slice() {
850 [] => vec![],
851 [first, ..] => {
852 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
853 for row in original {
854 for (item, transposed_row) in row.into_iter().zip(&mut result) {
855 transposed_row.push(item);
856 }
857 }
858 result
859 }
860 }
861}
862
863pub fn combine_limit(
907 parent_skip: usize,
908 parent_fetch: Option<usize>,
909 child_skip: usize,
910 child_fetch: Option<usize>,
911) -> (usize, Option<usize>) {
912 let combined_skip = child_skip.saturating_add(parent_skip);
913
914 let combined_fetch = match (parent_fetch, child_fetch) {
915 (Some(parent_fetch), Some(child_fetch)) => {
916 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
917 }
918 (Some(parent_fetch), None) => Some(parent_fetch),
919 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
920 (None, None) => None,
921 };
922
923 (combined_skip, combined_fetch)
924}
925
926pub fn get_available_parallelism() -> usize {
931 available_parallelism()
932 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
933 .get()
934}
935
936pub fn take_function_args<const N: usize, T>(
960 function_name: &str,
961 args: impl IntoIterator<Item = T>,
962) -> Result<[T; N]> {
963 let args = args.into_iter().collect::<Vec<_>>();
964 args.try_into().map_err(|v: Vec<T>| {
965 _exec_datafusion_err!(
966 "{} function requires {} {}, got {}",
967 function_name,
968 N,
969 if N == 1 { "argument" } else { "arguments" },
970 v.len()
971 )
972 })
973}
974
975#[cfg(test)]
976mod tests {
977 use super::*;
978 use crate::ScalarValue::Null;
979 use arrow::array::Float64Array;
980 use sqlparser::tokenizer::Span;
981
982 #[test]
983 fn test_bisect_linear_left_and_right() -> Result<()> {
984 let arrays: Vec<ArrayRef> = vec![
985 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
986 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
987 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
988 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
989 ];
990 let search_tuple: Vec<ScalarValue> = vec![
991 ScalarValue::Float64(Some(8.0)),
992 ScalarValue::Float64(Some(3.0)),
993 ScalarValue::Float64(Some(8.0)),
994 ScalarValue::Float64(Some(8.0)),
995 ];
996 let ords = [
997 SortOptions {
998 descending: false,
999 nulls_first: true,
1000 },
1001 SortOptions {
1002 descending: false,
1003 nulls_first: true,
1004 },
1005 SortOptions {
1006 descending: false,
1007 nulls_first: true,
1008 },
1009 SortOptions {
1010 descending: true,
1011 nulls_first: true,
1012 },
1013 ];
1014 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1015 assert_eq!(res, 2);
1016 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1017 assert_eq!(res, 3);
1018 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1019 assert_eq!(res, 2);
1020 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1021 assert_eq!(res, 3);
1022 Ok(())
1023 }
1024
1025 #[test]
1026 fn vector_ord() {
1027 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
1028 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
1029 assert!(
1030 vec![
1031 ScalarValue::Int32(Some(2)),
1032 Null,
1033 ScalarValue::Int32(Some(0)),
1034 ] < vec![
1035 ScalarValue::Int32(Some(2)),
1036 Null,
1037 ScalarValue::Int32(Some(1)),
1038 ]
1039 );
1040 assert!(
1041 vec![
1042 ScalarValue::Int32(Some(2)),
1043 ScalarValue::Int32(None),
1044 ScalarValue::Int32(Some(0)),
1045 ] < vec![
1046 ScalarValue::Int32(Some(2)),
1047 ScalarValue::Int32(None),
1048 ScalarValue::Int32(Some(1)),
1049 ]
1050 );
1051 }
1052
1053 #[test]
1054 fn ord_same_type() {
1055 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1056 }
1057
1058 #[test]
1059 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1060 let arrays: Vec<ArrayRef> =
1062 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1063 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1064 let ords = [SortOptions {
1065 descending: true,
1066 nulls_first: true,
1067 }];
1068 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1069 assert_eq!(res, 0);
1070 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1071 assert_eq!(res, 0);
1072
1073 let arrays: Vec<ArrayRef> =
1075 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1076 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1077 let ords = [SortOptions {
1078 descending: true,
1079 nulls_first: true,
1080 }];
1081 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1082 assert_eq!(res, 1);
1083 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1084 assert_eq!(res, 1);
1085
1086 let arrays: Vec<ArrayRef> =
1088 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1089 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1090 let ords = [SortOptions {
1091 descending: false,
1092 nulls_first: true,
1093 }];
1094 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1095 assert_eq!(res, 1);
1096 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1097 assert_eq!(res, 1);
1098
1099 let arrays: Vec<ArrayRef> =
1101 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1102 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1103 let ords = [SortOptions {
1104 descending: false,
1105 nulls_first: true,
1106 }];
1107 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1108 assert_eq!(res, 2);
1109 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1110 assert_eq!(res, 2);
1111
1112 let arrays: Vec<ArrayRef> = vec![
1113 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1114 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1115 ];
1116 let search_tuple: Vec<ScalarValue> = vec![
1117 ScalarValue::Float64(Some(8.0)),
1118 ScalarValue::Float64(Some(8.0)),
1119 ];
1120 let ords = [
1121 SortOptions {
1122 descending: false,
1123 nulls_first: true,
1124 },
1125 SortOptions {
1126 descending: true,
1127 nulls_first: true,
1128 },
1129 ];
1130 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1131 assert_eq!(res, 3);
1132 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1133 assert_eq!(res, 3);
1134
1135 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1136 assert_eq!(res, 2);
1137 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1138 assert_eq!(res, 2);
1139 Ok(())
1140 }
1141
1142 #[test]
1143 fn test_evaluate_partition_ranges() -> Result<()> {
1144 let arrays: Vec<ArrayRef> = vec![
1145 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1146 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1147 ];
1148 let n_row = arrays[0].len();
1149 let options: Vec<SortOptions> = vec![
1150 SortOptions {
1151 descending: false,
1152 nulls_first: false,
1153 },
1154 SortOptions {
1155 descending: true,
1156 nulls_first: false,
1157 },
1158 ];
1159 let sort_columns = arrays
1160 .into_iter()
1161 .zip(options)
1162 .map(|(values, options)| SortColumn {
1163 values,
1164 options: Some(options),
1165 })
1166 .collect::<Vec<_>>();
1167 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1168 assert_eq!(ranges.len(), 4);
1169 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1170 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1171 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1172 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1173 Ok(())
1174 }
1175
1176 #[test]
1177 fn test_quote_identifier() -> Result<()> {
1178 let cases = vec![
1179 ("foo", r#"foo"#),
1180 ("_foo", r#"_foo"#),
1181 ("foo_bar", r#"foo_bar"#),
1182 ("foo-bar", r#""foo-bar""#),
1183 ("foo.bar", r#""foo.bar""#),
1185 ("Foo", r#""Foo""#),
1186 ("Foo.Bar", r#""Foo.Bar""#),
1187 ("test1", r#"test1"#),
1189 ("1test", r#""1test""#),
1190 ];
1191
1192 for (identifier, quoted_identifier) in cases {
1193 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1194
1195 assert_eq!(quote_identifier(identifier), quoted_identifier);
1196
1197 let quote_style = if quoted_identifier.starts_with('"') {
1200 Some('"')
1201 } else {
1202 None
1203 };
1204
1205 let expected_parsed = vec![Ident {
1206 value: identifier.to_string(),
1207 quote_style,
1208 span: Span::empty(),
1209 }];
1210
1211 assert_eq!(
1212 parse_identifiers(quoted_identifier).unwrap(),
1213 expected_parsed
1214 );
1215 }
1216
1217 Ok(())
1218 }
1219
1220 #[test]
1221 fn test_get_at_indices() -> Result<()> {
1222 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1223 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1224 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1225 assert!(get_at_indices(&in_vec, [7]).is_err());
1227 Ok(())
1228 }
1229
1230 #[test]
1231 fn test_longest_consecutive_prefix() {
1232 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1233 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1234 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1235 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1236 }
1237
1238 #[test]
1239 fn test_merge_and_order_indices() {
1240 assert_eq!(
1241 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1242 vec![0, 1, 3, 4, 5]
1243 );
1244 assert_eq!(
1246 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1247 vec![0, 1, 3, 4, 5]
1248 );
1249 }
1250
1251 #[test]
1252 fn test_set_difference() {
1253 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1254 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1255 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1257 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1258 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1259 }
1260
1261 #[test]
1262 #[expect(deprecated)]
1263 fn test_is_sorted() {
1264 assert!(is_sorted::<usize>([]));
1265 assert!(is_sorted([0]));
1266 assert!(is_sorted([0, 3, 4]));
1267 assert!(is_sorted([0, 1, 2]));
1268 assert!(is_sorted([0, 1, 4]));
1269 assert!(is_sorted([0usize; 0]));
1270 assert!(is_sorted([1, 2]));
1271 assert!(!is_sorted([3, 2]));
1272 }
1273
1274 #[test]
1275 fn test_find_indices() -> Result<()> {
1276 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1277 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1278 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1279 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1280 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1281 Ok(())
1282 }
1283
1284 #[test]
1285 fn test_transpose() -> Result<()> {
1286 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1287 let transposed = transpose(in_data);
1288 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1289 assert_eq!(expected, transposed);
1290 Ok(())
1291 }
1292}