average/moments/
mod.rs

1use num_traits::ToPrimitive;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5use super::{Estimate, Merge};
6
7include!("mean.rs");
8include!("variance.rs");
9#[cfg(any(feature = "std", feature = "libm"))]
10include!("skewness.rs");
11#[cfg(any(feature = "std", feature = "libm"))]
12include!("kurtosis.rs");
13
14/// Alias for `Variance`.
15pub type MeanWithError = Variance;
16
17#[doc(hidden)]
18#[macro_export]
19macro_rules! define_moments_common {
20    ($name:ident, $MAX_MOMENT:expr) => {
21        use num_traits::{pow, ToPrimitive};
22
23        /// An iterator over binomial coefficients.
24        struct IterBinomial {
25            a: u64,
26            n: u64,
27            k: u64,
28        }
29
30        impl IterBinomial {
31            /// For a given n, iterate over all binomial coefficients binomial(n, k), for k=0...n.
32            #[inline]
33            pub fn new(n: u64) -> IterBinomial {
34                IterBinomial { k: 0, a: 1, n }
35            }
36        }
37
38        impl Iterator for IterBinomial {
39            type Item = u64;
40
41            #[inline]
42            fn next(&mut self) -> Option<u64> {
43                if self.k > self.n {
44                    return None;
45                }
46                self.a = if !(self.k == 0) {
47                    self.a * (self.n - self.k + 1) / self.k
48                } else {
49                    1
50                };
51                self.k += 1;
52                Some(self.a)
53            }
54        }
55
56        /// The maximal order of the moment to be calculated.
57        const MAX_MOMENT: usize = $MAX_MOMENT;
58
59        impl $name {
60            /// Create a new moments estimator.
61            #[inline]
62            pub fn new() -> $name {
63                $name {
64                    n: 0,
65                    avg: 0.,
66                    m: [0.; MAX_MOMENT - 1],
67                }
68            }
69
70            /// Determine whether the sample is empty.
71            #[inline]
72            pub fn is_empty(&self) -> bool {
73                self.n == 0
74            }
75
76            /// Return the sample size.
77            #[inline]
78            pub fn len(&self) -> u64 {
79                self.n
80            }
81
82            /// Estimate the mean of the population.
83            ///
84            /// Returns NaN for an empty sample.
85            #[inline]
86            pub fn mean(&self) -> f64 {
87                if self.n > 0 { self.avg } else { f64::NAN }
88            }
89
90            /// Estimate the `p`th central moment of the population.
91            /// 
92            /// If `p` > 1, returns NaN for an empty sample.
93            #[inline]
94            pub fn central_moment(&self, p: usize) -> f64 {
95                let n = self.n.to_f64().unwrap();
96                match p {
97                    0 => 1.,
98                    1 => 0.,
99                    _ => if self.n > 0 { self.m[p - 2] / n } else { f64::NAN },
100                }
101            }
102
103            /// Estimate the `p`th standardized moment of the population.
104            #[cfg(any(feature = "std", feature = "libm"))]
105            #[inline]
106            pub fn standardized_moment(&self, p: usize) -> f64 {
107                match p {
108                    0 => self.n.to_f64().unwrap(),
109                    1 => 0.,
110                    2 => 1.,
111                    _ => {
112                        let variance = self.central_moment(2);
113                        assert_ne!(variance, 0.);
114                        self.central_moment(p) / pow(num_traits::Float::sqrt(variance), p)
115                    }
116                }
117            }
118
119            /// Calculate the sample variance.
120            ///
121            /// This is an unbiased estimator of the variance of the population.
122            /// 
123            /// Returns NaN for samples of size 1 or less.
124            #[inline]
125            pub fn sample_variance(&self) -> f64 {
126                if self.n < 2 {
127                    return f64::NAN;
128                }
129                self.m[0] / (self.n - 1).to_f64().unwrap()
130            }
131
132            /// Calculate the sample skewness.
133            /// 
134            /// Returns NaN for an empty sample.
135            #[cfg(any(feature = "std", feature = "libm"))]
136            #[inline]
137            pub fn sample_skewness(&self) -> f64 {
138                use num_traits::Float;
139
140                if self.n == 0 {
141                    return f64::NAN;
142                }
143                if self.n == 1 {
144                    return 0.;
145                }
146                let n = self.n.to_f64().unwrap();
147                if self.n < 3 {
148                    // Method of moments
149                    return self.central_moment(3)
150                        / Float::powf(n * (self.central_moment(2) / (n - 1.)), 1.5);
151                }
152                // Adjusted Fisher-Pearson standardized moment coefficient
153                Float::sqrt(n * (n - 1.)) / (n * (n - 2.))
154                    * Float::powf(self.central_moment(3) / (self.central_moment(2) / n), 1.5)
155            }
156
157            /// Calculate the sample excess kurtosis.
158            /// 
159            /// Returns NaN for samples of size 3 or less.
160            #[inline]
161            pub fn sample_excess_kurtosis(&self) -> f64 {
162                if self.n < 4 {
163                    return f64::NAN;
164                }
165                let n = self.n.to_f64().unwrap();
166                (n + 1.) * n * self.central_moment(4)
167                    / ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2))
168                    - 3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
169            }
170
171            /// Add an observation sampled from the population.
172            #[inline]
173            pub fn add(&mut self, x: f64) {
174                self.n += 1;
175                let delta = x - self.avg;
176                let n = self.n.to_f64().unwrap();
177                self.avg += delta / n;
178
179                let mut coeff_delta = delta;
180                let over_n = 1. / n;
181                let mut term1 = (n - 1.) * (-over_n);
182                let factor1 = -over_n;
183                let mut term2 = (n - 1.) * over_n;
184                let factor2 = (n - 1.) * over_n;
185
186                let factor_coeff = -delta * over_n;
187
188                let prev_m = self.m;
189                for p in 2..=MAX_MOMENT {
190                    term1 *= factor1;
191                    term2 *= factor2;
192                    coeff_delta *= delta;
193                    self.m[p - 2] += (term1 + term2) * coeff_delta;
194
195                    let mut coeff = 1.;
196                    let mut binom = IterBinomial::new(p as u64);
197                    binom.next().unwrap(); // Skip k = 0.
198                    for k in 1..(p - 1) {
199                        coeff *= factor_coeff;
200                        self.m[p - 2] +=
201                            binom.next().unwrap().to_f64().unwrap() * prev_m[p - 2 - k] * coeff;
202                    }
203                }
204            }
205        }
206
207        impl $crate::Merge for $name {
208            #[inline]
209            fn merge(&mut self, other: &$name) {
210                if other.is_empty() {
211                    return;
212                }
213                if self.is_empty() {
214                    *self = other.clone();
215                    return;
216                }
217
218                let n_a = self.n.to_f64().unwrap();
219                let n_b = other.n.to_f64().unwrap();
220                let delta = other.avg - self.avg;
221
222                self.n += other.n;
223                let n = self.n.to_f64().unwrap();
224                let n_a_over_n = n_a / n;
225                let n_b_over_n = n_b / n;
226                self.avg += n_b_over_n * delta;
227
228                let factor_a = -n_b_over_n * delta;
229                let factor_b = n_a_over_n * delta;
230                let mut term_a = n_a * factor_a;
231                let mut term_b = n_b * factor_b;
232                let prev_m = self.m;
233                for p in 2..=MAX_MOMENT {
234                    term_a *= factor_a;
235                    term_b *= factor_b;
236                    self.m[p - 2] += other.m[p - 2] + term_a + term_b;
237
238                    let mut coeff_a = 1.;
239                    let mut coeff_b = 1.;
240                    let mut coeff_delta = 1.;
241                    let mut binom = IterBinomial::new(p as u64);
242                    binom.next().unwrap();
243                    for k in 1..(p - 1) {
244                        coeff_a *= -n_b_over_n;
245                        coeff_b *= n_a_over_n;
246                        coeff_delta *= delta;
247                        self.m[p - 2] += binom.next().unwrap().to_f64().unwrap()
248                            * coeff_delta
249                            * (prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
250                    }
251                }
252            }
253        }
254
255        impl core::default::Default for $name {
256            fn default() -> $name {
257                $name::new()
258            }
259        }
260
261        $crate::impl_from_iterator!($name);
262        $crate::impl_from_par_iterator!($name);
263        $crate::impl_extend!($name);
264    };
265}
266
267#[cfg(feature = "serde")]
268#[doc(hidden)]
269#[macro_export]
270macro_rules! define_moments_inner {
271    ($name:ident, $MAX_MOMENT:expr) => {
272        $crate::define_moments_common!($name, $MAX_MOMENT);
273
274        use serde::{Deserialize, Serialize};
275
276        /// Estimate the first N moments of a sequence of numbers ("population").
277        #[derive(Debug, Clone, Serialize, Deserialize)]
278        pub struct $name {
279            /// Number of samples.
280            ///
281            /// Technically, this is the same as m_0, but we want this to be an integer
282            /// to avoid numerical issues, so we store it separately.
283            n: u64,
284            /// Average.
285            avg: f64,
286            /// Moments times `n`.
287            ///
288            /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
289            m: [f64; MAX_MOMENT - 1],
290        }
291    };
292}
293
294#[cfg(not(feature = "serde"))]
295#[doc(hidden)]
296#[macro_export]
297macro_rules! define_moments_inner {
298    ($name:ident, $MAX_MOMENT:expr) => {
299        $crate::define_moments_common!($name, $MAX_MOMENT);
300
301        /// Estimate the first N moments of a sequence of numbers ("population").
302        #[derive(Debug, Clone)]
303        pub struct $name {
304            /// Number of samples.
305            ///
306            /// Technically, this is the same as m_0, but we want this to be an integer
307            /// to avoid numerical issues, so we store it separately.
308            n: u64,
309            /// Average.
310            avg: f64,
311            /// Moments times `n`.
312            ///
313            /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
314            m: [f64; MAX_MOMENT - 1],
315        }
316    };
317}
318
319/// Define an estimator of all moments up to a number given at compile time.
320///
321/// This uses a [general algorithm][paper] and is slightly less efficient than
322/// the specialized implementations (such as [`Mean`], [`Variance`],
323/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
324///
325/// (In practise, there is an upper limit due to integer overflow and possibly
326/// numerical issues.)
327///
328/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
329/// [`Mean`]: ./struct.Mean.html
330/// [`Variance`]: ./struct.Variance.html
331/// [`Skewness`]: ./struct.Skewness.html
332/// [`Kurtosis`]: ./struct.Kurtosis.html
333///
334///
335/// # Example
336///
337/// ```
338/// use average::{define_moments, assert_almost_eq};
339///
340/// define_moments!(Moments4, 4);
341///
342/// let mut a: Moments4 = (1..6).map(f64::from).collect();
343/// assert_eq!(a.len(), 5);
344/// assert_eq!(a.mean(), 3.0);
345/// assert_eq!(a.central_moment(0), 1.0);
346/// assert_eq!(a.central_moment(1), 0.0);
347/// assert_eq!(a.central_moment(2), 2.0);
348/// assert_eq!(a.standardized_moment(0), 5.0);
349/// assert_eq!(a.standardized_moment(1), 0.0);
350/// assert_eq!(a.standardized_moment(2), 1.0);
351/// a.add(1.0);
352/// // skewness
353/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
354/// // kurtosis
355/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
356/// ```
357#[macro_export]
358macro_rules! define_moments {
359    ($name:ident, $MAX_MOMENT:expr) => {
360        $crate::define_moments_inner!($name, $MAX_MOMENT);
361    };
362}