1use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use arrow::datatypes::{DataType, Schema, SchemaRef};
25
26#[derive(Clone, PartialEq, Eq, Default, Copy)]
29pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
30 Exact(T),
32 Inexact(T),
34 #[default]
36 Absent,
37}
38
39impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
40 pub fn get_value(&self) -> Option<&T> {
43 match self {
44 Precision::Exact(value) | Precision::Inexact(value) => Some(value),
45 Precision::Absent => None,
46 }
47 }
48
49 pub fn map<U, F>(self, f: F) -> Precision<U>
52 where
53 F: Fn(T) -> U,
54 U: Debug + Clone + PartialEq + Eq + PartialOrd,
55 {
56 match self {
57 Precision::Exact(val) => Precision::Exact(f(val)),
58 Precision::Inexact(val) => Precision::Inexact(f(val)),
59 _ => Precision::<U>::Absent,
60 }
61 }
62
63 pub fn is_exact(&self) -> Option<bool> {
66 match self {
67 Precision::Exact(_) => Some(true),
68 Precision::Inexact(_) => Some(false),
69 _ => None,
70 }
71 }
72
73 pub fn max(&self, other: &Precision<T>) -> Precision<T> {
77 match (self, other) {
78 (Precision::Exact(a), Precision::Exact(b)) => {
79 Precision::Exact(if a >= b { a.clone() } else { b.clone() })
80 }
81 (Precision::Inexact(a), Precision::Exact(b))
82 | (Precision::Exact(a), Precision::Inexact(b))
83 | (Precision::Inexact(a), Precision::Inexact(b)) => {
84 Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
85 }
86 (_, _) => Precision::Absent,
87 }
88 }
89
90 pub fn min(&self, other: &Precision<T>) -> Precision<T> {
94 match (self, other) {
95 (Precision::Exact(a), Precision::Exact(b)) => {
96 Precision::Exact(if a >= b { b.clone() } else { a.clone() })
97 }
98 (Precision::Inexact(a), Precision::Exact(b))
99 | (Precision::Exact(a), Precision::Inexact(b))
100 | (Precision::Inexact(a), Precision::Inexact(b)) => {
101 Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
102 }
103 (_, _) => Precision::Absent,
104 }
105 }
106
107 pub fn to_inexact(self) -> Self {
109 match self {
110 Precision::Exact(value) => Precision::Inexact(value),
111 _ => self,
112 }
113 }
114}
115
116impl Precision<usize> {
117 pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
121 match (self, other) {
122 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b),
123 (Precision::Inexact(a), Precision::Exact(b))
124 | (Precision::Exact(a), Precision::Inexact(b))
125 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b),
126 (_, _) => Precision::Absent,
127 }
128 }
129
130 pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
134 match (self, other) {
135 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b),
136 (Precision::Inexact(a), Precision::Exact(b))
137 | (Precision::Exact(a), Precision::Inexact(b))
138 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b),
139 (_, _) => Precision::Absent,
140 }
141 }
142
143 pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
147 match (self, other) {
148 (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b),
149 (Precision::Inexact(a), Precision::Exact(b))
150 | (Precision::Exact(a), Precision::Inexact(b))
151 | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b),
152 (_, _) => Precision::Absent,
153 }
154 }
155
156 pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
161 self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
162 .to_inexact()
163 }
164}
165
166impl Precision<ScalarValue> {
167 pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
171 match (self, other) {
172 (Precision::Exact(a), Precision::Exact(b)) => {
173 a.add(b).map(Precision::Exact).unwrap_or(Precision::Absent)
174 }
175 (Precision::Inexact(a), Precision::Exact(b))
176 | (Precision::Exact(a), Precision::Inexact(b))
177 | (Precision::Inexact(a), Precision::Inexact(b)) => a
178 .add(b)
179 .map(Precision::Inexact)
180 .unwrap_or(Precision::Absent),
181 (_, _) => Precision::Absent,
182 }
183 }
184
185 pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
189 match (self, other) {
190 (Precision::Exact(a), Precision::Exact(b)) => {
191 a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
192 }
193 (Precision::Inexact(a), Precision::Exact(b))
194 | (Precision::Exact(a), Precision::Inexact(b))
195 | (Precision::Inexact(a), Precision::Inexact(b)) => a
196 .sub(b)
197 .map(Precision::Inexact)
198 .unwrap_or(Precision::Absent),
199 (_, _) => Precision::Absent,
200 }
201 }
202
203 pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
207 match (self, other) {
208 (Precision::Exact(a), Precision::Exact(b)) => a
209 .mul_checked(b)
210 .map(Precision::Exact)
211 .unwrap_or(Precision::Absent),
212 (Precision::Inexact(a), Precision::Exact(b))
213 | (Precision::Exact(a), Precision::Inexact(b))
214 | (Precision::Inexact(a), Precision::Inexact(b)) => a
215 .mul_checked(b)
216 .map(Precision::Inexact)
217 .unwrap_or(Precision::Absent),
218 (_, _) => Precision::Absent,
219 }
220 }
221
222 pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
224 match self {
225 Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
226 Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
227 Precision::Absent => Ok(Precision::Absent),
228 }
229 }
230}
231
232impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
233 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234 match self {
235 Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
236 Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
237 Precision::Absent => write!(f, "Absent"),
238 }
239 }
240}
241
242impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
243 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244 match self {
245 Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
246 Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
247 Precision::Absent => write!(f, "Absent"),
248 }
249 }
250}
251
252impl From<Precision<usize>> for Precision<ScalarValue> {
253 fn from(value: Precision<usize>) -> Self {
254 match value {
255 Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
256 Precision::Inexact(v) => {
257 Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
258 }
259 Precision::Absent => Precision::Absent,
260 }
261 }
262}
263
264#[derive(Debug, Clone, PartialEq, Eq)]
269pub struct Statistics {
270 pub num_rows: Precision<usize>,
272 pub total_byte_size: Precision<usize>,
274 pub column_statistics: Vec<ColumnStatistics>,
277}
278
279impl Statistics {
280 pub fn new_unknown(schema: &Schema) -> Self {
283 Self {
284 num_rows: Precision::Absent,
285 total_byte_size: Precision::Absent,
286 column_statistics: Statistics::unknown_column(schema),
287 }
288 }
289
290 pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
292 schema
293 .fields()
294 .iter()
295 .map(|_| ColumnStatistics::new_unknown())
296 .collect()
297 }
298
299 pub fn to_inexact(mut self) -> Self {
302 self.num_rows = self.num_rows.to_inexact();
303 self.total_byte_size = self.total_byte_size.to_inexact();
304 self.column_statistics = self
305 .column_statistics
306 .into_iter()
307 .map(|s| s.to_inexact())
308 .collect();
309 self
310 }
311
312 pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
318 let Some(projection) = projection else {
319 return self;
320 };
321
322 enum Slot {
323 Taken(usize),
325 Present(ColumnStatistics),
327 }
328
329 let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
331 .into_iter()
332 .map(Slot::Present)
333 .collect();
334
335 for idx in projection {
336 let next_idx = self.column_statistics.len();
337 let slot = std::mem::replace(
338 columns.get_mut(*idx).expect("projection out of bounds"),
339 Slot::Taken(next_idx),
340 );
341 match slot {
342 Slot::Present(col) => self.column_statistics.push(col),
344 Slot::Taken(prev_idx) => self
346 .column_statistics
347 .push(self.column_statistics[prev_idx].clone()),
348 }
349 }
350
351 self
352 }
353
354 pub fn with_fetch(
358 mut self,
359 schema: SchemaRef,
360 fetch: Option<usize>,
361 skip: usize,
362 n_partitions: usize,
363 ) -> Result<Self> {
364 let fetch_val = fetch.unwrap_or(usize::MAX);
365
366 self.num_rows = match self {
367 Statistics {
368 num_rows: Precision::Exact(nr),
369 ..
370 }
371 | Statistics {
372 num_rows: Precision::Inexact(nr),
373 ..
374 } => {
375 if nr <= skip {
377 Precision::Exact(0)
379 } else if nr <= fetch_val && skip == 0 {
380 return Ok(self);
386 } else if nr - skip <= fetch_val {
387 check_num_rows(
391 (nr - skip).checked_mul(n_partitions),
392 self.num_rows.is_exact().unwrap(),
394 )
395 } else {
396 check_num_rows(
402 fetch_val.checked_mul(n_partitions),
403 self.num_rows.is_exact().unwrap(),
405 )
406 }
407 }
408 Statistics {
409 num_rows: Precision::Absent,
410 ..
411 } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
412 };
413 self.column_statistics = Statistics::unknown_column(&schema);
414 self.total_byte_size = Precision::Absent;
415 Ok(self)
416 }
417}
418
419fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
422 if let Some(value) = value {
423 if is_exact {
424 Precision::Exact(value)
425 } else {
426 Precision::Inexact(value)
428 }
429 } else {
430 Precision::Absent
433 }
434}
435
436impl Display for Statistics {
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438 let column_stats = self
440 .column_statistics
441 .iter()
442 .enumerate()
443 .map(|(i, cs)| {
444 let s = format!("(Col[{}]:", i);
445 let s = if cs.min_value != Precision::Absent {
446 format!("{} Min={}", s, cs.min_value)
447 } else {
448 s
449 };
450 let s = if cs.max_value != Precision::Absent {
451 format!("{} Max={}", s, cs.max_value)
452 } else {
453 s
454 };
455 let s = if cs.sum_value != Precision::Absent {
456 format!("{} Sum={}", s, cs.sum_value)
457 } else {
458 s
459 };
460 let s = if cs.null_count != Precision::Absent {
461 format!("{} Null={}", s, cs.null_count)
462 } else {
463 s
464 };
465 let s = if cs.distinct_count != Precision::Absent {
466 format!("{} Distinct={}", s, cs.distinct_count)
467 } else {
468 s
469 };
470
471 s + ")"
472 })
473 .collect::<Vec<_>>()
474 .join(",");
475
476 write!(
477 f,
478 "Rows={}, Bytes={}, [{}]",
479 self.num_rows, self.total_byte_size, column_stats
480 )?;
481
482 Ok(())
483 }
484}
485
486#[derive(Clone, Debug, PartialEq, Eq, Default)]
488pub struct ColumnStatistics {
489 pub null_count: Precision<usize>,
491 pub max_value: Precision<ScalarValue>,
493 pub min_value: Precision<ScalarValue>,
495 pub sum_value: Precision<ScalarValue>,
497 pub distinct_count: Precision<usize>,
499}
500
501impl ColumnStatistics {
502 pub fn is_singleton(&self) -> bool {
504 match (&self.min_value, &self.max_value) {
505 (Precision::Exact(min), Precision::Exact(max)) => {
507 !min.is_null() && !max.is_null() && (min == max)
508 }
509 (_, _) => false,
510 }
511 }
512
513 pub fn new_unknown() -> Self {
515 Self {
516 null_count: Precision::Absent,
517 max_value: Precision::Absent,
518 min_value: Precision::Absent,
519 sum_value: Precision::Absent,
520 distinct_count: Precision::Absent,
521 }
522 }
523
524 pub fn to_inexact(mut self) -> Self {
528 self.null_count = self.null_count.to_inexact();
529 self.max_value = self.max_value.to_inexact();
530 self.min_value = self.min_value.to_inexact();
531 self.sum_value = self.sum_value.to_inexact();
532 self.distinct_count = self.distinct_count.to_inexact();
533 self
534 }
535}
536
537#[cfg(test)]
538mod tests {
539 use super::*;
540
541 #[test]
542 fn test_get_value() {
543 let exact_precision = Precision::Exact(42);
544 let inexact_precision = Precision::Inexact(23);
545 let absent_precision = Precision::<i32>::Absent;
546
547 assert_eq!(*exact_precision.get_value().unwrap(), 42);
548 assert_eq!(*inexact_precision.get_value().unwrap(), 23);
549 assert_eq!(absent_precision.get_value(), None);
550 }
551
552 #[test]
553 fn test_map() {
554 let exact_precision = Precision::Exact(42);
555 let inexact_precision = Precision::Inexact(23);
556 let absent_precision = Precision::Absent;
557
558 let squared = |x| x * x;
559
560 assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
561 assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
562 assert_eq!(absent_precision.map(squared), Precision::Absent);
563 }
564
565 #[test]
566 fn test_is_exact() {
567 let exact_precision = Precision::Exact(42);
568 let inexact_precision = Precision::Inexact(23);
569 let absent_precision = Precision::<i32>::Absent;
570
571 assert_eq!(exact_precision.is_exact(), Some(true));
572 assert_eq!(inexact_precision.is_exact(), Some(false));
573 assert_eq!(absent_precision.is_exact(), None);
574 }
575
576 #[test]
577 fn test_max() {
578 let precision1 = Precision::Exact(42);
579 let precision2 = Precision::Inexact(23);
580 let precision3 = Precision::Exact(30);
581 let absent_precision = Precision::Absent;
582
583 assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
584 assert_eq!(precision1.max(&precision3), Precision::Exact(42));
585 assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
586 assert_eq!(precision1.max(&absent_precision), Precision::Absent);
587 }
588
589 #[test]
590 fn test_min() {
591 let precision1 = Precision::Exact(42);
592 let precision2 = Precision::Inexact(23);
593 let precision3 = Precision::Exact(30);
594 let absent_precision = Precision::Absent;
595
596 assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
597 assert_eq!(precision1.min(&precision3), Precision::Exact(30));
598 assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
599 assert_eq!(precision1.min(&absent_precision), Precision::Absent);
600 }
601
602 #[test]
603 fn test_to_inexact() {
604 let exact_precision = Precision::Exact(42);
605 let inexact_precision = Precision::Inexact(42);
606 let absent_precision = Precision::<i32>::Absent;
607
608 assert_eq!(exact_precision.to_inexact(), inexact_precision);
609 assert_eq!(inexact_precision.to_inexact(), inexact_precision);
610 assert_eq!(absent_precision.to_inexact(), absent_precision);
611 }
612
613 #[test]
614 fn test_add() {
615 let precision1 = Precision::Exact(42);
616 let precision2 = Precision::Inexact(23);
617 let precision3 = Precision::Exact(30);
618 let absent_precision = Precision::Absent;
619
620 assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
621 assert_eq!(precision1.add(&precision3), Precision::Exact(72));
622 assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
623 assert_eq!(precision1.add(&absent_precision), Precision::Absent);
624 }
625
626 #[test]
627 fn test_add_scalar() {
628 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
629
630 assert_eq!(
631 precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
632 Precision::Exact(ScalarValue::Int32(Some(65))),
633 );
634 assert_eq!(
635 precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
636 Precision::Inexact(ScalarValue::Int32(Some(65))),
637 );
638 assert_eq!(
639 precision.add(&Precision::Exact(ScalarValue::Int32(None))),
640 Precision::Exact(ScalarValue::Int32(None)),
642 );
643 assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
644 }
645
646 #[test]
647 fn test_sub() {
648 let precision1 = Precision::Exact(42);
649 let precision2 = Precision::Inexact(23);
650 let precision3 = Precision::Exact(30);
651 let absent_precision = Precision::Absent;
652
653 assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
654 assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
655 assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
656 }
657
658 #[test]
659 fn test_sub_scalar() {
660 let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
661
662 assert_eq!(
663 precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
664 Precision::Exact(ScalarValue::Int32(Some(19))),
665 );
666 assert_eq!(
667 precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
668 Precision::Inexact(ScalarValue::Int32(Some(19))),
669 );
670 assert_eq!(
671 precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
672 Precision::Exact(ScalarValue::Int32(None)),
674 );
675 assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
676 }
677
678 #[test]
679 fn test_multiply() {
680 let precision1 = Precision::Exact(6);
681 let precision2 = Precision::Inexact(3);
682 let precision3 = Precision::Exact(5);
683 let absent_precision = Precision::Absent;
684
685 assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
686 assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
687 assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
688 assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
689 }
690
691 #[test]
692 fn test_multiply_scalar() {
693 let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
694
695 assert_eq!(
696 precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
697 Precision::Exact(ScalarValue::Int32(Some(30))),
698 );
699 assert_eq!(
700 precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
701 Precision::Inexact(ScalarValue::Int32(Some(30))),
702 );
703 assert_eq!(
704 precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
705 Precision::Exact(ScalarValue::Int32(None)),
707 );
708 assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
709 }
710
711 #[test]
712 fn test_cast_to() {
713 assert_eq!(
715 Precision::Exact(ScalarValue::Int32(Some(42)))
716 .cast_to(&DataType::Int64)
717 .unwrap(),
718 Precision::Exact(ScalarValue::Int64(Some(42))),
719 );
720 assert_eq!(
721 Precision::Inexact(ScalarValue::Int32(Some(42)))
722 .cast_to(&DataType::Int64)
723 .unwrap(),
724 Precision::Inexact(ScalarValue::Int64(Some(42))),
725 );
726 assert_eq!(
728 Precision::Exact(ScalarValue::Int32(None))
729 .cast_to(&DataType::Int64)
730 .unwrap(),
731 Precision::Exact(ScalarValue::Int64(None)),
732 );
733 assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
735 .cast_to(&DataType::Int8)
736 .is_err());
737 }
738
739 #[test]
740 fn test_precision_cloning() {
741 let precision: Precision<usize> = Precision::Exact(42);
743 let p2 = precision;
744 assert_eq!(precision, p2);
745
746 let precision: Precision<ScalarValue> =
748 Precision::Exact(ScalarValue::Int64(Some(42)));
749 #[allow(clippy::redundant_clone)]
751 let p2 = precision.clone();
752 assert_eq!(precision, p2);
753 }
754
755 #[test]
756 fn test_project_none() {
757 let projection = None;
758 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
759 assert_eq!(stats, make_stats(vec![10, 20, 30]));
760 }
761
762 #[test]
763 fn test_project_empty() {
764 let projection = Some(vec![]);
765 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
766 assert_eq!(stats, make_stats(vec![]));
767 }
768
769 #[test]
770 fn test_project_swap() {
771 let projection = Some(vec![2, 1]);
772 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
773 assert_eq!(stats, make_stats(vec![30, 20]));
774 }
775
776 #[test]
777 fn test_project_repeated() {
778 let projection = Some(vec![1, 2, 1, 1, 0, 2]);
779 let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
780 assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
781 }
782
783 fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
785 Statistics {
786 num_rows: Precision::Exact(42),
787 total_byte_size: Precision::Exact(500),
788 column_statistics: counts.into_iter().map(col_stats_i64).collect(),
789 }
790 }
791
792 fn col_stats_i64(null_count: usize) -> ColumnStatistics {
793 ColumnStatistics {
794 null_count: Precision::Exact(null_count),
795 max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
796 min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
797 sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
798 distinct_count: Precision::Exact(100),
799 }
800 }
801}