use std::fmt::{self, Debug, Display};
use crate::ScalarValue;
use arrow_schema::Schema;
#[derive(Clone, PartialEq, Eq, Default)]
pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
Exact(T),
Inexact(T),
#[default]
Absent,
}
impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
pub fn get_value(&self) -> Option<&T> {
match self {
Precision::Exact(value) | Precision::Inexact(value) => Some(value),
Precision::Absent => None,
}
}
pub fn map<U, F>(self, f: F) -> Precision<U>
where
F: Fn(T) -> U,
U: Debug + Clone + PartialEq + Eq + PartialOrd,
{
match self {
Precision::Exact(val) => Precision::Exact(f(val)),
Precision::Inexact(val) => Precision::Inexact(f(val)),
_ => Precision::<U>::Absent,
}
}
pub fn is_exact(&self) -> Option<bool> {
match self {
Precision::Exact(_) => Some(true),
Precision::Inexact(_) => Some(false),
_ => None,
}
}
pub fn max(&self, other: &Precision<T>) -> Precision<T> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => {
Precision::Exact(if a >= b { a.clone() } else { b.clone() })
}
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => {
Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
}
(_, _) => Precision::Absent,
}
}
pub fn min(&self, other: &Precision<T>) -> Precision<T> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => {
Precision::Exact(if a >= b { b.clone() } else { a.clone() })
}
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => {
Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
}
(_, _) => Precision::Absent,
}
}
pub fn to_inexact(self) -> Self {
match self {
Precision::Exact(value) => Precision::Inexact(value),
_ => self,
}
}
}
impl Precision<usize> {
pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b),
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b),
(_, _) => Precision::Absent,
}
}
pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b),
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b),
(_, _) => Precision::Absent,
}
}
pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b),
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b),
(_, _) => Precision::Absent,
}
}
pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
.to_inexact()
}
}
impl Precision<ScalarValue> {
pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
match (self, other) {
(Precision::Exact(a), Precision::Exact(b)) => {
if let Ok(result) = a.add(b) {
Precision::Exact(result)
} else {
Precision::Absent
}
}
(Precision::Inexact(a), Precision::Exact(b))
| (Precision::Exact(a), Precision::Inexact(b))
| (Precision::Inexact(a), Precision::Inexact(b)) => {
if let Ok(result) = a.add(b) {
Precision::Inexact(result)
} else {
Precision::Absent
}
}
(_, _) => Precision::Absent,
}
}
}
impl<T: fmt::Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
Precision::Absent => write!(f, "Absent"),
}
}
}
impl<T: fmt::Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
Precision::Absent => write!(f, "Absent"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Statistics {
pub num_rows: Precision<usize>,
pub total_byte_size: Precision<usize>,
pub column_statistics: Vec<ColumnStatistics>,
}
impl Statistics {
pub fn new_unknown(schema: &Schema) -> Self {
Self {
num_rows: Precision::Absent,
total_byte_size: Precision::Absent,
column_statistics: Statistics::unknown_column(schema),
}
}
pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
schema
.fields()
.iter()
.map(|_| ColumnStatistics::new_unknown())
.collect()
}
pub fn into_inexact(self) -> Self {
Statistics {
num_rows: self.num_rows.to_inexact(),
total_byte_size: self.total_byte_size.to_inexact(),
column_statistics: self
.column_statistics
.into_iter()
.map(|cs| ColumnStatistics {
null_count: cs.null_count.to_inexact(),
max_value: cs.max_value.to_inexact(),
min_value: cs.min_value.to_inexact(),
distinct_count: cs.distinct_count.to_inexact(),
})
.collect::<Vec<_>>(),
}
}
}
impl Display for Statistics {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let column_stats = self
.column_statistics
.iter()
.enumerate()
.map(|(i, cs)| {
let s = format!("(Col[{}]:", i);
let s = if cs.min_value != Precision::Absent {
format!("{} Min={}", s, cs.min_value)
} else {
s
};
let s = if cs.max_value != Precision::Absent {
format!("{} Max={}", s, cs.max_value)
} else {
s
};
let s = if cs.null_count != Precision::Absent {
format!("{} Null={}", s, cs.null_count)
} else {
s
};
let s = if cs.distinct_count != Precision::Absent {
format!("{} Distinct={}", s, cs.distinct_count)
} else {
s
};
s + ")"
})
.collect::<Vec<_>>()
.join(",");
write!(
f,
"Rows={}, Bytes={}, [{}]",
self.num_rows, self.total_byte_size, column_stats
)?;
Ok(())
}
}
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct ColumnStatistics {
pub null_count: Precision<usize>,
pub max_value: Precision<ScalarValue>,
pub min_value: Precision<ScalarValue>,
pub distinct_count: Precision<usize>,
}
impl ColumnStatistics {
pub fn is_singleton(&self) -> bool {
match (&self.min_value, &self.max_value) {
(Precision::Exact(min), Precision::Exact(max)) => {
!min.is_null() && !max.is_null() && (min == max)
}
(_, _) => false,
}
}
pub fn new_unknown() -> ColumnStatistics {
ColumnStatistics {
null_count: Precision::Absent,
max_value: Precision::Absent,
min_value: Precision::Absent,
distinct_count: Precision::Absent,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_value() {
let exact_precision = Precision::Exact(42);
let inexact_precision = Precision::Inexact(23);
let absent_precision = Precision::<i32>::Absent;
assert_eq!(*exact_precision.get_value().unwrap(), 42);
assert_eq!(*inexact_precision.get_value().unwrap(), 23);
assert_eq!(absent_precision.get_value(), None);
}
#[test]
fn test_map() {
let exact_precision = Precision::Exact(42);
let inexact_precision = Precision::Inexact(23);
let absent_precision = Precision::Absent;
let squared = |x| x * x;
assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
assert_eq!(absent_precision.map(squared), Precision::Absent);
}
#[test]
fn test_is_exact() {
let exact_precision = Precision::Exact(42);
let inexact_precision = Precision::Inexact(23);
let absent_precision = Precision::<i32>::Absent;
assert_eq!(exact_precision.is_exact(), Some(true));
assert_eq!(inexact_precision.is_exact(), Some(false));
assert_eq!(absent_precision.is_exact(), None);
}
#[test]
fn test_max() {
let precision1 = Precision::Exact(42);
let precision2 = Precision::Inexact(23);
let precision3 = Precision::Exact(30);
let absent_precision = Precision::Absent;
assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
assert_eq!(precision1.max(&precision3), Precision::Exact(42));
assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
assert_eq!(precision1.max(&absent_precision), Precision::Absent);
}
#[test]
fn test_min() {
let precision1 = Precision::Exact(42);
let precision2 = Precision::Inexact(23);
let precision3 = Precision::Exact(30);
let absent_precision = Precision::Absent;
assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
assert_eq!(precision1.min(&precision3), Precision::Exact(30));
assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
assert_eq!(precision1.min(&absent_precision), Precision::Absent);
}
#[test]
fn test_to_inexact() {
let exact_precision = Precision::Exact(42);
let inexact_precision = Precision::Inexact(42);
let absent_precision = Precision::<i32>::Absent;
assert_eq!(exact_precision.clone().to_inexact(), inexact_precision);
assert_eq!(inexact_precision.clone().to_inexact(), inexact_precision);
assert_eq!(absent_precision.clone().to_inexact(), absent_precision);
}
#[test]
fn test_add() {
let precision1 = Precision::Exact(42);
let precision2 = Precision::Inexact(23);
let precision3 = Precision::Exact(30);
let absent_precision = Precision::Absent;
assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
assert_eq!(precision1.add(&precision3), Precision::Exact(72));
assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
assert_eq!(precision1.add(&absent_precision), Precision::Absent);
}
#[test]
fn test_sub() {
let precision1 = Precision::Exact(42);
let precision2 = Precision::Inexact(23);
let precision3 = Precision::Exact(30);
let absent_precision = Precision::Absent;
assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
}
#[test]
fn test_multiply() {
let precision1 = Precision::Exact(6);
let precision2 = Precision::Inexact(3);
let precision3 = Precision::Exact(5);
let absent_precision = Precision::Absent;
assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
}
}