metrics_util/histogram.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
//! Helper functions and types related to histogram data.
/// A bucketed histogram.
///
/// This histogram tracks the number of samples that fall into pre-defined buckets,
/// rather than exposing any sort of quantiles.
///
/// This type is most useful with systems that prefer bucketed data, such as Prometheus'
/// histogram type, as opposed to its summary type, which deals with quantiles.
#[derive(Debug, Clone)]
pub struct Histogram {
count: u64,
bounds: Vec<f64>,
buckets: Vec<u64>,
sum: f64,
}
impl Histogram {
/// Creates a new `Histogram`.
///
/// If `bounds` is empty, returns `None`.
pub fn new(bounds: &[f64]) -> Option<Histogram> {
if bounds.is_empty() {
return None;
}
let buckets = vec![0u64; bounds.len()];
Some(Histogram { count: 0, bounds: Vec::from(bounds), buckets, sum: 0.0 })
}
/// Gets the sum of all samples.
pub fn sum(&self) -> f64 {
self.sum
}
/// Gets the sample count.
pub fn count(&self) -> u64 {
self.count
}
/// Gets the buckets.
///
/// Buckets are tuples, where the first element is the bucket limit itself, and the second
/// element is the count of samples in that bucket.
pub fn buckets(&self) -> Vec<(f64, u64)> {
self.bounds.iter().cloned().zip(self.buckets.iter().cloned()).collect()
}
/// Records a single sample.
pub fn record(&mut self, sample: f64) {
self.sum += sample;
self.count += 1;
// Add the sample to every bucket where the value is less than the bound.
for (idx, bucket) in self.bounds.iter().enumerate() {
if sample <= *bucket {
self.buckets[idx] += 1;
}
}
}
/// Records multiple samples.
pub fn record_many<'a, S>(&mut self, samples: S)
where
S: IntoIterator<Item = &'a f64> + 'a,
{
let mut bucketed = vec![0u64; self.buckets.len()];
let mut sum = 0.0;
let mut count = 0;
for sample in samples.into_iter() {
sum += *sample;
count += 1;
for (idx, bucket) in self.bounds.iter().enumerate() {
if sample <= bucket {
bucketed[idx] += 1;
break;
}
}
}
// Add each bucket to the next bucket to satisfy the "less than or equal to"
// behavior of the buckets.
if bucketed.len() >= 2 {
for idx in 0..(bucketed.len() - 1) {
bucketed[idx + 1] += bucketed[idx];
}
}
// Merge our temporary buckets to our main buckets.
for (idx, local) in bucketed.iter().enumerate() {
self.buckets[idx] += local;
}
self.sum += sum;
self.count += count;
}
}
#[cfg(test)]
mod tests {
use super::Histogram;
#[test]
fn test_histogram() {
// No buckets, can't do shit.
let histogram = Histogram::new(&[]);
assert!(histogram.is_none());
let buckets = &[10.0, 25.0, 100.0];
let values = vec![3.0, 2.0, 6.0, 12.0, 56.0, 82.0, 202.0, 100.0, 29.0];
let mut histogram = Histogram::new(buckets).expect("histogram should have been created");
histogram.record_many(&values);
histogram.record(89.0);
let result = histogram.buckets();
assert_eq!(result.len(), 3);
let (_, first) = result[0];
assert_eq!(first, 3);
let (_, second) = result[1];
assert_eq!(second, 4);
let (_, third) = result[2];
assert_eq!(third, 9);
assert_eq!(histogram.count(), values.len() as u64 + 1);
assert_eq!(histogram.sum(), 581.0);
}
}