average/moments/mod.rs
1use num_traits::ToPrimitive;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5use super::{Estimate, Merge};
6
7include!("mean.rs");
8include!("variance.rs");
9#[cfg(any(feature = "std", feature = "libm"))]
10include!("skewness.rs");
11#[cfg(any(feature = "std", feature = "libm"))]
12include!("kurtosis.rs");
13
14/// Alias for `Variance`.
15pub type MeanWithError = Variance;
16
17#[doc(hidden)]
18#[macro_export]
19macro_rules! define_moments_common {
20 ($name:ident, $MAX_MOMENT:expr) => {
21 use num_traits::{pow, ToPrimitive};
22
23 /// An iterator over binomial coefficients.
24 struct IterBinomial {
25 a: u64,
26 n: u64,
27 k: u64,
28 }
29
30 impl IterBinomial {
31 /// For a given n, iterate over all binomial coefficients binomial(n, k), for k=0...n.
32 #[inline]
33 pub fn new(n: u64) -> IterBinomial {
34 IterBinomial { k: 0, a: 1, n }
35 }
36 }
37
38 impl Iterator for IterBinomial {
39 type Item = u64;
40
41 #[inline]
42 fn next(&mut self) -> Option<u64> {
43 if self.k > self.n {
44 return None;
45 }
46 self.a = if !(self.k == 0) {
47 self.a * (self.n - self.k + 1) / self.k
48 } else {
49 1
50 };
51 self.k += 1;
52 Some(self.a)
53 }
54 }
55
56 /// The maximal order of the moment to be calculated.
57 const MAX_MOMENT: usize = $MAX_MOMENT;
58
59 impl $name {
60 /// Create a new moments estimator.
61 #[inline]
62 pub fn new() -> $name {
63 $name {
64 n: 0,
65 avg: 0.,
66 m: [0.; MAX_MOMENT - 1],
67 }
68 }
69
70 /// Determine whether the sample is empty.
71 #[inline]
72 pub fn is_empty(&self) -> bool {
73 self.n == 0
74 }
75
76 /// Return the sample size.
77 #[inline]
78 pub fn len(&self) -> u64 {
79 self.n
80 }
81
82 /// Estimate the mean of the population.
83 ///
84 /// Returns NaN for an empty sample.
85 #[inline]
86 pub fn mean(&self) -> f64 {
87 if self.n > 0 { self.avg } else { f64::NAN }
88 }
89
90 /// Estimate the `p`th central moment of the population.
91 ///
92 /// If `p` > 1, returns NaN for an empty sample.
93 #[inline]
94 pub fn central_moment(&self, p: usize) -> f64 {
95 let n = self.n.to_f64().unwrap();
96 match p {
97 0 => 1.,
98 1 => 0.,
99 _ => if self.n > 0 { self.m[p - 2] / n } else { f64::NAN },
100 }
101 }
102
103 /// Estimate the `p`th standardized moment of the population.
104 #[cfg(any(feature = "std", feature = "libm"))]
105 #[inline]
106 pub fn standardized_moment(&self, p: usize) -> f64 {
107 match p {
108 0 => self.n.to_f64().unwrap(),
109 1 => 0.,
110 2 => 1.,
111 _ => {
112 let variance = self.central_moment(2);
113 assert_ne!(variance, 0.);
114 self.central_moment(p) / pow(num_traits::Float::sqrt(variance), p)
115 }
116 }
117 }
118
119 /// Calculate the sample variance.
120 ///
121 /// This is an unbiased estimator of the variance of the population.
122 ///
123 /// Returns NaN for samples of size 1 or less.
124 #[inline]
125 pub fn sample_variance(&self) -> f64 {
126 if self.n < 2 {
127 return f64::NAN;
128 }
129 self.m[0] / (self.n - 1).to_f64().unwrap()
130 }
131
132 /// Calculate the sample skewness.
133 ///
134 /// Returns NaN for an empty sample.
135 #[cfg(any(feature = "std", feature = "libm"))]
136 #[inline]
137 pub fn sample_skewness(&self) -> f64 {
138 use num_traits::Float;
139
140 if self.n == 0 {
141 return f64::NAN;
142 }
143 if self.n == 1 {
144 return 0.;
145 }
146 let n = self.n.to_f64().unwrap();
147 if self.n < 3 {
148 // Method of moments
149 return self.central_moment(3)
150 / Float::powf(n * (self.central_moment(2) / (n - 1.)), 1.5);
151 }
152 // Adjusted Fisher-Pearson standardized moment coefficient
153 Float::sqrt(n * (n - 1.)) / (n * (n - 2.))
154 * Float::powf(self.central_moment(3) / (self.central_moment(2) / n), 1.5)
155 }
156
157 /// Calculate the sample excess kurtosis.
158 ///
159 /// Returns NaN for samples of size 3 or less.
160 #[inline]
161 pub fn sample_excess_kurtosis(&self) -> f64 {
162 if self.n < 4 {
163 return f64::NAN;
164 }
165 let n = self.n.to_f64().unwrap();
166 (n + 1.) * n * self.central_moment(4)
167 / ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2))
168 - 3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
169 }
170
171 /// Add an observation sampled from the population.
172 #[inline]
173 pub fn add(&mut self, x: f64) {
174 self.n += 1;
175 let delta = x - self.avg;
176 let n = self.n.to_f64().unwrap();
177 self.avg += delta / n;
178
179 let mut coeff_delta = delta;
180 let over_n = 1. / n;
181 let mut term1 = (n - 1.) * (-over_n);
182 let factor1 = -over_n;
183 let mut term2 = (n - 1.) * over_n;
184 let factor2 = (n - 1.) * over_n;
185
186 let factor_coeff = -delta * over_n;
187
188 let prev_m = self.m;
189 for p in 2..=MAX_MOMENT {
190 term1 *= factor1;
191 term2 *= factor2;
192 coeff_delta *= delta;
193 self.m[p - 2] += (term1 + term2) * coeff_delta;
194
195 let mut coeff = 1.;
196 let mut binom = IterBinomial::new(p as u64);
197 binom.next().unwrap(); // Skip k = 0.
198 for k in 1..(p - 1) {
199 coeff *= factor_coeff;
200 self.m[p - 2] +=
201 binom.next().unwrap().to_f64().unwrap() * prev_m[p - 2 - k] * coeff;
202 }
203 }
204 }
205 }
206
207 impl $crate::Merge for $name {
208 #[inline]
209 fn merge(&mut self, other: &$name) {
210 if other.is_empty() {
211 return;
212 }
213 if self.is_empty() {
214 *self = other.clone();
215 return;
216 }
217
218 let n_a = self.n.to_f64().unwrap();
219 let n_b = other.n.to_f64().unwrap();
220 let delta = other.avg - self.avg;
221
222 self.n += other.n;
223 let n = self.n.to_f64().unwrap();
224 let n_a_over_n = n_a / n;
225 let n_b_over_n = n_b / n;
226 self.avg += n_b_over_n * delta;
227
228 let factor_a = -n_b_over_n * delta;
229 let factor_b = n_a_over_n * delta;
230 let mut term_a = n_a * factor_a;
231 let mut term_b = n_b * factor_b;
232 let prev_m = self.m;
233 for p in 2..=MAX_MOMENT {
234 term_a *= factor_a;
235 term_b *= factor_b;
236 self.m[p - 2] += other.m[p - 2] + term_a + term_b;
237
238 let mut coeff_a = 1.;
239 let mut coeff_b = 1.;
240 let mut coeff_delta = 1.;
241 let mut binom = IterBinomial::new(p as u64);
242 binom.next().unwrap();
243 for k in 1..(p - 1) {
244 coeff_a *= -n_b_over_n;
245 coeff_b *= n_a_over_n;
246 coeff_delta *= delta;
247 self.m[p - 2] += binom.next().unwrap().to_f64().unwrap()
248 * coeff_delta
249 * (prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
250 }
251 }
252 }
253 }
254
255 impl core::default::Default for $name {
256 fn default() -> $name {
257 $name::new()
258 }
259 }
260
261 $crate::impl_from_iterator!($name);
262 $crate::impl_from_par_iterator!($name);
263 $crate::impl_extend!($name);
264 };
265}
266
267#[cfg(feature = "serde")]
268#[doc(hidden)]
269#[macro_export]
270macro_rules! define_moments_inner {
271 ($name:ident, $MAX_MOMENT:expr) => {
272 $crate::define_moments_common!($name, $MAX_MOMENT);
273
274 use serde::{Deserialize, Serialize};
275
276 /// Estimate the first N moments of a sequence of numbers ("population").
277 #[derive(Debug, Clone, Serialize, Deserialize)]
278 pub struct $name {
279 /// Number of samples.
280 ///
281 /// Technically, this is the same as m_0, but we want this to be an integer
282 /// to avoid numerical issues, so we store it separately.
283 n: u64,
284 /// Average.
285 avg: f64,
286 /// Moments times `n`.
287 ///
288 /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
289 m: [f64; MAX_MOMENT - 1],
290 }
291 };
292}
293
294#[cfg(not(feature = "serde"))]
295#[doc(hidden)]
296#[macro_export]
297macro_rules! define_moments_inner {
298 ($name:ident, $MAX_MOMENT:expr) => {
299 $crate::define_moments_common!($name, $MAX_MOMENT);
300
301 /// Estimate the first N moments of a sequence of numbers ("population").
302 #[derive(Debug, Clone)]
303 pub struct $name {
304 /// Number of samples.
305 ///
306 /// Technically, this is the same as m_0, but we want this to be an integer
307 /// to avoid numerical issues, so we store it separately.
308 n: u64,
309 /// Average.
310 avg: f64,
311 /// Moments times `n`.
312 ///
313 /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
314 m: [f64; MAX_MOMENT - 1],
315 }
316 };
317}
318
319/// Define an estimator of all moments up to a number given at compile time.
320///
321/// This uses a [general algorithm][paper] and is slightly less efficient than
322/// the specialized implementations (such as [`Mean`], [`Variance`],
323/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
324///
325/// (In practise, there is an upper limit due to integer overflow and possibly
326/// numerical issues.)
327///
328/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
329/// [`Mean`]: ./struct.Mean.html
330/// [`Variance`]: ./struct.Variance.html
331/// [`Skewness`]: ./struct.Skewness.html
332/// [`Kurtosis`]: ./struct.Kurtosis.html
333///
334///
335/// # Example
336///
337/// ```
338/// use average::{define_moments, assert_almost_eq};
339///
340/// define_moments!(Moments4, 4);
341///
342/// let mut a: Moments4 = (1..6).map(f64::from).collect();
343/// assert_eq!(a.len(), 5);
344/// assert_eq!(a.mean(), 3.0);
345/// assert_eq!(a.central_moment(0), 1.0);
346/// assert_eq!(a.central_moment(1), 0.0);
347/// assert_eq!(a.central_moment(2), 2.0);
348/// assert_eq!(a.standardized_moment(0), 5.0);
349/// assert_eq!(a.standardized_moment(1), 0.0);
350/// assert_eq!(a.standardized_moment(2), 1.0);
351/// a.add(1.0);
352/// // skewness
353/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
354/// // kurtosis
355/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
356/// ```
357#[macro_export]
358macro_rules! define_moments {
359 ($name:ident, $MAX_MOMENT:expr) => {
360 $crate::define_moments_inner!($name, $MAX_MOMENT);
361 };
362}