datafusion_common/
stats.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module provides data structures to represent statistics
19
20use std::fmt::{self, Debug, Display};
21
22use crate::{Result, ScalarValue};
23
24use arrow::datatypes::{DataType, Schema, SchemaRef};
25
26/// Represents a value with a degree of certainty. `Precision` is used to
27/// propagate information the precision of statistical values.
28#[derive(Clone, PartialEq, Eq, Default, Copy)]
29pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
30    /// The exact value is known
31    Exact(T),
32    /// The value is not known exactly, but is likely close to this value
33    Inexact(T),
34    /// Nothing is known about the value
35    #[default]
36    Absent,
37}
38
39impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Precision<T> {
40    /// If we have some value (exact or inexact), it returns that value.
41    /// Otherwise, it returns `None`.
42    pub fn get_value(&self) -> Option<&T> {
43        match self {
44            Precision::Exact(value) | Precision::Inexact(value) => Some(value),
45            Precision::Absent => None,
46        }
47    }
48
49    /// Transform the value in this [`Precision`] object, if one exists, using
50    /// the given function. Preserves the exactness state.
51    pub fn map<U, F>(self, f: F) -> Precision<U>
52    where
53        F: Fn(T) -> U,
54        U: Debug + Clone + PartialEq + Eq + PartialOrd,
55    {
56        match self {
57            Precision::Exact(val) => Precision::Exact(f(val)),
58            Precision::Inexact(val) => Precision::Inexact(f(val)),
59            _ => Precision::<U>::Absent,
60        }
61    }
62
63    /// Returns `Some(true)` if we have an exact value, `Some(false)` if we
64    /// have an inexact value, and `None` if there is no value.
65    pub fn is_exact(&self) -> Option<bool> {
66        match self {
67            Precision::Exact(_) => Some(true),
68            Precision::Inexact(_) => Some(false),
69            _ => None,
70        }
71    }
72
73    /// Returns the maximum of two (possibly inexact) values, conservatively
74    /// propagating exactness information. If one of the input values is
75    /// [`Precision::Absent`], the result is `Absent` too.
76    pub fn max(&self, other: &Precision<T>) -> Precision<T> {
77        match (self, other) {
78            (Precision::Exact(a), Precision::Exact(b)) => {
79                Precision::Exact(if a >= b { a.clone() } else { b.clone() })
80            }
81            (Precision::Inexact(a), Precision::Exact(b))
82            | (Precision::Exact(a), Precision::Inexact(b))
83            | (Precision::Inexact(a), Precision::Inexact(b)) => {
84                Precision::Inexact(if a >= b { a.clone() } else { b.clone() })
85            }
86            (_, _) => Precision::Absent,
87        }
88    }
89
90    /// Returns the minimum of two (possibly inexact) values, conservatively
91    /// propagating exactness information. If one of the input values is
92    /// [`Precision::Absent`], the result is `Absent` too.
93    pub fn min(&self, other: &Precision<T>) -> Precision<T> {
94        match (self, other) {
95            (Precision::Exact(a), Precision::Exact(b)) => {
96                Precision::Exact(if a >= b { b.clone() } else { a.clone() })
97            }
98            (Precision::Inexact(a), Precision::Exact(b))
99            | (Precision::Exact(a), Precision::Inexact(b))
100            | (Precision::Inexact(a), Precision::Inexact(b)) => {
101                Precision::Inexact(if a >= b { b.clone() } else { a.clone() })
102            }
103            (_, _) => Precision::Absent,
104        }
105    }
106
107    /// Demotes the precision state from exact to inexact (if present).
108    pub fn to_inexact(self) -> Self {
109        match self {
110            Precision::Exact(value) => Precision::Inexact(value),
111            _ => self,
112        }
113    }
114}
115
116impl Precision<usize> {
117    /// Calculates the sum of two (possibly inexact) [`usize`] values,
118    /// conservatively propagating exactness information. If one of the input
119    /// values is [`Precision::Absent`], the result is `Absent` too.
120    pub fn add(&self, other: &Precision<usize>) -> Precision<usize> {
121        match (self, other) {
122            (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b),
123            (Precision::Inexact(a), Precision::Exact(b))
124            | (Precision::Exact(a), Precision::Inexact(b))
125            | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b),
126            (_, _) => Precision::Absent,
127        }
128    }
129
130    /// Calculates the difference of two (possibly inexact) [`usize`] values,
131    /// conservatively propagating exactness information. If one of the input
132    /// values is [`Precision::Absent`], the result is `Absent` too.
133    pub fn sub(&self, other: &Precision<usize>) -> Precision<usize> {
134        match (self, other) {
135            (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b),
136            (Precision::Inexact(a), Precision::Exact(b))
137            | (Precision::Exact(a), Precision::Inexact(b))
138            | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b),
139            (_, _) => Precision::Absent,
140        }
141    }
142
143    /// Calculates the multiplication of two (possibly inexact) [`usize`] values,
144    /// conservatively propagating exactness information. If one of the input
145    /// values is [`Precision::Absent`], the result is `Absent` too.
146    pub fn multiply(&self, other: &Precision<usize>) -> Precision<usize> {
147        match (self, other) {
148            (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b),
149            (Precision::Inexact(a), Precision::Exact(b))
150            | (Precision::Exact(a), Precision::Inexact(b))
151            | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b),
152            (_, _) => Precision::Absent,
153        }
154    }
155
156    /// Return the estimate of applying a filter with estimated selectivity
157    /// `selectivity` to this Precision. A selectivity of `1.0` means that all
158    /// rows are selected. A selectivity of `0.5` means half the rows are
159    /// selected. Will always return inexact statistics.
160    pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
161        self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
162            .to_inexact()
163    }
164}
165
166impl Precision<ScalarValue> {
167    /// Calculates the sum of two (possibly inexact) [`ScalarValue`] values,
168    /// conservatively propagating exactness information. If one of the input
169    /// values is [`Precision::Absent`], the result is `Absent` too.
170    pub fn add(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
171        match (self, other) {
172            (Precision::Exact(a), Precision::Exact(b)) => {
173                a.add(b).map(Precision::Exact).unwrap_or(Precision::Absent)
174            }
175            (Precision::Inexact(a), Precision::Exact(b))
176            | (Precision::Exact(a), Precision::Inexact(b))
177            | (Precision::Inexact(a), Precision::Inexact(b)) => a
178                .add(b)
179                .map(Precision::Inexact)
180                .unwrap_or(Precision::Absent),
181            (_, _) => Precision::Absent,
182        }
183    }
184
185    /// Calculates the difference of two (possibly inexact) [`ScalarValue`] values,
186    /// conservatively propagating exactness information. If one of the input
187    /// values is [`Precision::Absent`], the result is `Absent` too.
188    pub fn sub(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
189        match (self, other) {
190            (Precision::Exact(a), Precision::Exact(b)) => {
191                a.sub(b).map(Precision::Exact).unwrap_or(Precision::Absent)
192            }
193            (Precision::Inexact(a), Precision::Exact(b))
194            | (Precision::Exact(a), Precision::Inexact(b))
195            | (Precision::Inexact(a), Precision::Inexact(b)) => a
196                .sub(b)
197                .map(Precision::Inexact)
198                .unwrap_or(Precision::Absent),
199            (_, _) => Precision::Absent,
200        }
201    }
202
203    /// Calculates the multiplication of two (possibly inexact) [`ScalarValue`] values,
204    /// conservatively propagating exactness information. If one of the input
205    /// values is [`Precision::Absent`], the result is `Absent` too.
206    pub fn multiply(&self, other: &Precision<ScalarValue>) -> Precision<ScalarValue> {
207        match (self, other) {
208            (Precision::Exact(a), Precision::Exact(b)) => a
209                .mul_checked(b)
210                .map(Precision::Exact)
211                .unwrap_or(Precision::Absent),
212            (Precision::Inexact(a), Precision::Exact(b))
213            | (Precision::Exact(a), Precision::Inexact(b))
214            | (Precision::Inexact(a), Precision::Inexact(b)) => a
215                .mul_checked(b)
216                .map(Precision::Inexact)
217                .unwrap_or(Precision::Absent),
218            (_, _) => Precision::Absent,
219        }
220    }
221
222    /// Casts the value to the given data type, propagating exactness information.
223    pub fn cast_to(&self, data_type: &DataType) -> Result<Precision<ScalarValue>> {
224        match self {
225            Precision::Exact(value) => value.cast_to(data_type).map(Precision::Exact),
226            Precision::Inexact(value) => value.cast_to(data_type).map(Precision::Inexact),
227            Precision::Absent => Ok(Precision::Absent),
228        }
229    }
230}
231
232impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Debug for Precision<T> {
233    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234        match self {
235            Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
236            Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
237            Precision::Absent => write!(f, "Absent"),
238        }
239    }
240}
241
242impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
243    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244        match self {
245            Precision::Exact(inner) => write!(f, "Exact({:?})", inner),
246            Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner),
247            Precision::Absent => write!(f, "Absent"),
248        }
249    }
250}
251
252impl From<Precision<usize>> for Precision<ScalarValue> {
253    fn from(value: Precision<usize>) -> Self {
254        match value {
255            Precision::Exact(v) => Precision::Exact(ScalarValue::UInt64(Some(v as u64))),
256            Precision::Inexact(v) => {
257                Precision::Inexact(ScalarValue::UInt64(Some(v as u64)))
258            }
259            Precision::Absent => Precision::Absent,
260        }
261    }
262}
263
264/// Statistics for a relation
265/// Fields are optional and can be inexact because the sources
266/// sometimes provide approximate estimates for performance reasons
267/// and the transformations output are not always predictable.
268#[derive(Debug, Clone, PartialEq, Eq)]
269pub struct Statistics {
270    /// The number of table rows.
271    pub num_rows: Precision<usize>,
272    /// Total bytes of the table rows.
273    pub total_byte_size: Precision<usize>,
274    /// Statistics on a column level. It contains a [`ColumnStatistics`] for
275    /// each field in the schema of the table to which the [`Statistics`] refer.
276    pub column_statistics: Vec<ColumnStatistics>,
277}
278
279impl Statistics {
280    /// Returns a [`Statistics`] instance for the given schema by assigning
281    /// unknown statistics to each column in the schema.
282    pub fn new_unknown(schema: &Schema) -> Self {
283        Self {
284            num_rows: Precision::Absent,
285            total_byte_size: Precision::Absent,
286            column_statistics: Statistics::unknown_column(schema),
287        }
288    }
289
290    /// Returns an unbounded `ColumnStatistics` for each field in the schema.
291    pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
292        schema
293            .fields()
294            .iter()
295            .map(|_| ColumnStatistics::new_unknown())
296            .collect()
297    }
298
299    /// If the exactness of a [`Statistics`] instance is lost, this function relaxes
300    /// the exactness of all information by converting them [`Precision::Inexact`].
301    pub fn to_inexact(mut self) -> Self {
302        self.num_rows = self.num_rows.to_inexact();
303        self.total_byte_size = self.total_byte_size.to_inexact();
304        self.column_statistics = self
305            .column_statistics
306            .into_iter()
307            .map(|s| s.to_inexact())
308            .collect();
309        self
310    }
311
312    /// Project the statistics to the given column indices.
313    ///
314    /// For example, if we had statistics for columns `{"a", "b", "c"}`,
315    /// projecting to `vec![2, 1]` would return statistics for columns `{"c",
316    /// "b"}`.
317    pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
318        let Some(projection) = projection else {
319            return self;
320        };
321
322        enum Slot {
323            /// The column is taken and put into the specified statistics location
324            Taken(usize),
325            /// The original columns is present
326            Present(ColumnStatistics),
327        }
328
329        // Convert to Vec<Slot> so we can avoid copying the statistics
330        let mut columns: Vec<_> = std::mem::take(&mut self.column_statistics)
331            .into_iter()
332            .map(Slot::Present)
333            .collect();
334
335        for idx in projection {
336            let next_idx = self.column_statistics.len();
337            let slot = std::mem::replace(
338                columns.get_mut(*idx).expect("projection out of bounds"),
339                Slot::Taken(next_idx),
340            );
341            match slot {
342                // The column was there, so just move it
343                Slot::Present(col) => self.column_statistics.push(col),
344                // The column was taken, so copy from the previous location
345                Slot::Taken(prev_idx) => self
346                    .column_statistics
347                    .push(self.column_statistics[prev_idx].clone()),
348            }
349        }
350
351        self
352    }
353
354    /// Calculates the statistics after `fetch` and `skip` operations apply.
355    /// Here, `self` denotes per-partition statistics. Use the `n_partitions`
356    /// parameter to compute global statistics in a multi-partition setting.
357    pub fn with_fetch(
358        mut self,
359        schema: SchemaRef,
360        fetch: Option<usize>,
361        skip: usize,
362        n_partitions: usize,
363    ) -> Result<Self> {
364        let fetch_val = fetch.unwrap_or(usize::MAX);
365
366        self.num_rows = match self {
367            Statistics {
368                num_rows: Precision::Exact(nr),
369                ..
370            }
371            | Statistics {
372                num_rows: Precision::Inexact(nr),
373                ..
374            } => {
375                // Here, the inexact case gives us an upper bound on the number of rows.
376                if nr <= skip {
377                    // All input data will be skipped:
378                    Precision::Exact(0)
379                } else if nr <= fetch_val && skip == 0 {
380                    // If the input does not reach the `fetch` globally, and `skip`
381                    // is zero (meaning the input and output are identical), return
382                    // input stats as is.
383                    // TODO: Can input stats still be used, but adjusted, when `skip`
384                    //       is non-zero?
385                    return Ok(self);
386                } else if nr - skip <= fetch_val {
387                    // After `skip` input rows are skipped, the remaining rows are
388                    // less than or equal to the `fetch` values, so `num_rows` must
389                    // equal the remaining rows.
390                    check_num_rows(
391                        (nr - skip).checked_mul(n_partitions),
392                        // We know that we have an estimate for the number of rows:
393                        self.num_rows.is_exact().unwrap(),
394                    )
395                } else {
396                    // At this point we know that we were given a `fetch` value
397                    // as the `None` case would go into the branch above. Since
398                    // the input has more rows than `fetch + skip`, the number
399                    // of rows will be the `fetch`, but we won't be able to
400                    // predict the other statistics.
401                    check_num_rows(
402                        fetch_val.checked_mul(n_partitions),
403                        // We know that we have an estimate for the number of rows:
404                        self.num_rows.is_exact().unwrap(),
405                    )
406                }
407            }
408            Statistics {
409                num_rows: Precision::Absent,
410                ..
411            } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
412        };
413        self.column_statistics = Statistics::unknown_column(&schema);
414        self.total_byte_size = Precision::Absent;
415        Ok(self)
416    }
417}
418
419/// Creates an estimate of the number of rows in the output using the given
420/// optional value and exactness flag.
421fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
422    if let Some(value) = value {
423        if is_exact {
424            Precision::Exact(value)
425        } else {
426            // If the input stats are inexact, so are the output stats.
427            Precision::Inexact(value)
428        }
429    } else {
430        // If the estimate is not available (e.g. due to an overflow), we can
431        // not produce a reliable estimate.
432        Precision::Absent
433    }
434}
435
436impl Display for Statistics {
437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438        // string of column statistics
439        let column_stats = self
440            .column_statistics
441            .iter()
442            .enumerate()
443            .map(|(i, cs)| {
444                let s = format!("(Col[{}]:", i);
445                let s = if cs.min_value != Precision::Absent {
446                    format!("{} Min={}", s, cs.min_value)
447                } else {
448                    s
449                };
450                let s = if cs.max_value != Precision::Absent {
451                    format!("{} Max={}", s, cs.max_value)
452                } else {
453                    s
454                };
455                let s = if cs.sum_value != Precision::Absent {
456                    format!("{} Sum={}", s, cs.sum_value)
457                } else {
458                    s
459                };
460                let s = if cs.null_count != Precision::Absent {
461                    format!("{} Null={}", s, cs.null_count)
462                } else {
463                    s
464                };
465                let s = if cs.distinct_count != Precision::Absent {
466                    format!("{} Distinct={}", s, cs.distinct_count)
467                } else {
468                    s
469                };
470
471                s + ")"
472            })
473            .collect::<Vec<_>>()
474            .join(",");
475
476        write!(
477            f,
478            "Rows={}, Bytes={}, [{}]",
479            self.num_rows, self.total_byte_size, column_stats
480        )?;
481
482        Ok(())
483    }
484}
485
486/// Statistics for a column within a relation
487#[derive(Clone, Debug, PartialEq, Eq, Default)]
488pub struct ColumnStatistics {
489    /// Number of null values on column
490    pub null_count: Precision<usize>,
491    /// Maximum value of column
492    pub max_value: Precision<ScalarValue>,
493    /// Minimum value of column
494    pub min_value: Precision<ScalarValue>,
495    /// Sum value of a column
496    pub sum_value: Precision<ScalarValue>,
497    /// Number of distinct values
498    pub distinct_count: Precision<usize>,
499}
500
501impl ColumnStatistics {
502    /// Column contains a single non null value (e.g constant).
503    pub fn is_singleton(&self) -> bool {
504        match (&self.min_value, &self.max_value) {
505            // Min and max values are the same and not infinity.
506            (Precision::Exact(min), Precision::Exact(max)) => {
507                !min.is_null() && !max.is_null() && (min == max)
508            }
509            (_, _) => false,
510        }
511    }
512
513    /// Returns a [`ColumnStatistics`] instance having all [`Precision::Absent`] parameters.
514    pub fn new_unknown() -> Self {
515        Self {
516            null_count: Precision::Absent,
517            max_value: Precision::Absent,
518            min_value: Precision::Absent,
519            sum_value: Precision::Absent,
520            distinct_count: Precision::Absent,
521        }
522    }
523
524    /// If the exactness of a [`ColumnStatistics`] instance is lost, this
525    /// function relaxes the exactness of all information by converting them
526    /// [`Precision::Inexact`].
527    pub fn to_inexact(mut self) -> Self {
528        self.null_count = self.null_count.to_inexact();
529        self.max_value = self.max_value.to_inexact();
530        self.min_value = self.min_value.to_inexact();
531        self.sum_value = self.sum_value.to_inexact();
532        self.distinct_count = self.distinct_count.to_inexact();
533        self
534    }
535}
536
537#[cfg(test)]
538mod tests {
539    use super::*;
540
541    #[test]
542    fn test_get_value() {
543        let exact_precision = Precision::Exact(42);
544        let inexact_precision = Precision::Inexact(23);
545        let absent_precision = Precision::<i32>::Absent;
546
547        assert_eq!(*exact_precision.get_value().unwrap(), 42);
548        assert_eq!(*inexact_precision.get_value().unwrap(), 23);
549        assert_eq!(absent_precision.get_value(), None);
550    }
551
552    #[test]
553    fn test_map() {
554        let exact_precision = Precision::Exact(42);
555        let inexact_precision = Precision::Inexact(23);
556        let absent_precision = Precision::Absent;
557
558        let squared = |x| x * x;
559
560        assert_eq!(exact_precision.map(squared), Precision::Exact(1764));
561        assert_eq!(inexact_precision.map(squared), Precision::Inexact(529));
562        assert_eq!(absent_precision.map(squared), Precision::Absent);
563    }
564
565    #[test]
566    fn test_is_exact() {
567        let exact_precision = Precision::Exact(42);
568        let inexact_precision = Precision::Inexact(23);
569        let absent_precision = Precision::<i32>::Absent;
570
571        assert_eq!(exact_precision.is_exact(), Some(true));
572        assert_eq!(inexact_precision.is_exact(), Some(false));
573        assert_eq!(absent_precision.is_exact(), None);
574    }
575
576    #[test]
577    fn test_max() {
578        let precision1 = Precision::Exact(42);
579        let precision2 = Precision::Inexact(23);
580        let precision3 = Precision::Exact(30);
581        let absent_precision = Precision::Absent;
582
583        assert_eq!(precision1.max(&precision2), Precision::Inexact(42));
584        assert_eq!(precision1.max(&precision3), Precision::Exact(42));
585        assert_eq!(precision2.max(&precision3), Precision::Inexact(30));
586        assert_eq!(precision1.max(&absent_precision), Precision::Absent);
587    }
588
589    #[test]
590    fn test_min() {
591        let precision1 = Precision::Exact(42);
592        let precision2 = Precision::Inexact(23);
593        let precision3 = Precision::Exact(30);
594        let absent_precision = Precision::Absent;
595
596        assert_eq!(precision1.min(&precision2), Precision::Inexact(23));
597        assert_eq!(precision1.min(&precision3), Precision::Exact(30));
598        assert_eq!(precision2.min(&precision3), Precision::Inexact(23));
599        assert_eq!(precision1.min(&absent_precision), Precision::Absent);
600    }
601
602    #[test]
603    fn test_to_inexact() {
604        let exact_precision = Precision::Exact(42);
605        let inexact_precision = Precision::Inexact(42);
606        let absent_precision = Precision::<i32>::Absent;
607
608        assert_eq!(exact_precision.to_inexact(), inexact_precision);
609        assert_eq!(inexact_precision.to_inexact(), inexact_precision);
610        assert_eq!(absent_precision.to_inexact(), absent_precision);
611    }
612
613    #[test]
614    fn test_add() {
615        let precision1 = Precision::Exact(42);
616        let precision2 = Precision::Inexact(23);
617        let precision3 = Precision::Exact(30);
618        let absent_precision = Precision::Absent;
619
620        assert_eq!(precision1.add(&precision2), Precision::Inexact(65));
621        assert_eq!(precision1.add(&precision3), Precision::Exact(72));
622        assert_eq!(precision2.add(&precision3), Precision::Inexact(53));
623        assert_eq!(precision1.add(&absent_precision), Precision::Absent);
624    }
625
626    #[test]
627    fn test_add_scalar() {
628        let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
629
630        assert_eq!(
631            precision.add(&Precision::Exact(ScalarValue::Int32(Some(23)))),
632            Precision::Exact(ScalarValue::Int32(Some(65))),
633        );
634        assert_eq!(
635            precision.add(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
636            Precision::Inexact(ScalarValue::Int32(Some(65))),
637        );
638        assert_eq!(
639            precision.add(&Precision::Exact(ScalarValue::Int32(None))),
640            // As per behavior of ScalarValue::add
641            Precision::Exact(ScalarValue::Int32(None)),
642        );
643        assert_eq!(precision.add(&Precision::Absent), Precision::Absent);
644    }
645
646    #[test]
647    fn test_sub() {
648        let precision1 = Precision::Exact(42);
649        let precision2 = Precision::Inexact(23);
650        let precision3 = Precision::Exact(30);
651        let absent_precision = Precision::Absent;
652
653        assert_eq!(precision1.sub(&precision2), Precision::Inexact(19));
654        assert_eq!(precision1.sub(&precision3), Precision::Exact(12));
655        assert_eq!(precision1.sub(&absent_precision), Precision::Absent);
656    }
657
658    #[test]
659    fn test_sub_scalar() {
660        let precision = Precision::Exact(ScalarValue::Int32(Some(42)));
661
662        assert_eq!(
663            precision.sub(&Precision::Exact(ScalarValue::Int32(Some(23)))),
664            Precision::Exact(ScalarValue::Int32(Some(19))),
665        );
666        assert_eq!(
667            precision.sub(&Precision::Inexact(ScalarValue::Int32(Some(23)))),
668            Precision::Inexact(ScalarValue::Int32(Some(19))),
669        );
670        assert_eq!(
671            precision.sub(&Precision::Exact(ScalarValue::Int32(None))),
672            // As per behavior of ScalarValue::sub
673            Precision::Exact(ScalarValue::Int32(None)),
674        );
675        assert_eq!(precision.sub(&Precision::Absent), Precision::Absent);
676    }
677
678    #[test]
679    fn test_multiply() {
680        let precision1 = Precision::Exact(6);
681        let precision2 = Precision::Inexact(3);
682        let precision3 = Precision::Exact(5);
683        let absent_precision = Precision::Absent;
684
685        assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18));
686        assert_eq!(precision1.multiply(&precision3), Precision::Exact(30));
687        assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
688        assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
689    }
690
691    #[test]
692    fn test_multiply_scalar() {
693        let precision = Precision::Exact(ScalarValue::Int32(Some(6)));
694
695        assert_eq!(
696            precision.multiply(&Precision::Exact(ScalarValue::Int32(Some(5)))),
697            Precision::Exact(ScalarValue::Int32(Some(30))),
698        );
699        assert_eq!(
700            precision.multiply(&Precision::Inexact(ScalarValue::Int32(Some(5)))),
701            Precision::Inexact(ScalarValue::Int32(Some(30))),
702        );
703        assert_eq!(
704            precision.multiply(&Precision::Exact(ScalarValue::Int32(None))),
705            // As per behavior of ScalarValue::mul_checked
706            Precision::Exact(ScalarValue::Int32(None)),
707        );
708        assert_eq!(precision.multiply(&Precision::Absent), Precision::Absent);
709    }
710
711    #[test]
712    fn test_cast_to() {
713        // Valid
714        assert_eq!(
715            Precision::Exact(ScalarValue::Int32(Some(42)))
716                .cast_to(&DataType::Int64)
717                .unwrap(),
718            Precision::Exact(ScalarValue::Int64(Some(42))),
719        );
720        assert_eq!(
721            Precision::Inexact(ScalarValue::Int32(Some(42)))
722                .cast_to(&DataType::Int64)
723                .unwrap(),
724            Precision::Inexact(ScalarValue::Int64(Some(42))),
725        );
726        // Null
727        assert_eq!(
728            Precision::Exact(ScalarValue::Int32(None))
729                .cast_to(&DataType::Int64)
730                .unwrap(),
731            Precision::Exact(ScalarValue::Int64(None)),
732        );
733        // Overflow returns error
734        assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
735            .cast_to(&DataType::Int8)
736            .is_err());
737    }
738
739    #[test]
740    fn test_precision_cloning() {
741        // Precision<usize> is copy
742        let precision: Precision<usize> = Precision::Exact(42);
743        let p2 = precision;
744        assert_eq!(precision, p2);
745
746        // Precision<ScalarValue> is not copy (requires .clone())
747        let precision: Precision<ScalarValue> =
748            Precision::Exact(ScalarValue::Int64(Some(42)));
749        // Clippy would complain about this if it were Copy
750        #[allow(clippy::redundant_clone)]
751        let p2 = precision.clone();
752        assert_eq!(precision, p2);
753    }
754
755    #[test]
756    fn test_project_none() {
757        let projection = None;
758        let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
759        assert_eq!(stats, make_stats(vec![10, 20, 30]));
760    }
761
762    #[test]
763    fn test_project_empty() {
764        let projection = Some(vec![]);
765        let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
766        assert_eq!(stats, make_stats(vec![]));
767    }
768
769    #[test]
770    fn test_project_swap() {
771        let projection = Some(vec![2, 1]);
772        let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
773        assert_eq!(stats, make_stats(vec![30, 20]));
774    }
775
776    #[test]
777    fn test_project_repeated() {
778        let projection = Some(vec![1, 2, 1, 1, 0, 2]);
779        let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
780        assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
781    }
782
783    // Make a Statistics structure with the specified null counts for each column
784    fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
785        Statistics {
786            num_rows: Precision::Exact(42),
787            total_byte_size: Precision::Exact(500),
788            column_statistics: counts.into_iter().map(col_stats_i64).collect(),
789        }
790    }
791
792    fn col_stats_i64(null_count: usize) -> ColumnStatistics {
793        ColumnStatistics {
794            null_count: Precision::Exact(null_count),
795            max_value: Precision::Exact(ScalarValue::Int64(Some(42))),
796            min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
797            sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
798            distinct_count: Precision::Exact(100),
799        }
800    }
801}
datafusion_common/stats.rs

datafusion_common/
stats.rs