use std::collections::hash_map::{HashMap, Entry};
use std::fmt;
use std::hash::Hash;
use std::iter::{FromIterator, IntoIterator};
use std::default::Default;
use Commute;
#[derive(Clone)]
pub struct Frequencies<T> {
data: HashMap<T, u64>,
}
impl<T: fmt::Debug + Eq + Hash> fmt::Debug for Frequencies<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self.data)
}
}
impl<T: Eq + Hash> Frequencies<T> {
pub fn new() -> Frequencies<T> {
Default::default()
}
pub fn add(&mut self, v: T) {
match self.data.entry(v) {
Entry::Vacant(count) => { count.insert(1); },
Entry::Occupied(mut count) => { *count.get_mut() += 1; },
}
}
pub fn count(&self, v: &T) -> u64 {
self.data.get(v).map(|&v| v).unwrap_or(0)
}
pub fn cardinality(&self) -> u64 {
self.len() as u64
}
pub fn mode(&self) -> Option<&T> {
let counts = self.most_frequent();
if counts.is_empty() {
None
} else if counts.len() >= 2 && counts[0].1 == counts[1].1 {
None
} else {
Some(counts[0].0)
}
}
pub fn most_frequent(&self) -> Vec<(&T, u64)> {
let mut counts: Vec<_> = self.data.iter()
.map(|(k, &v)| (k, v))
.collect();
counts.sort_by(|&(_, c1), &(_, c2)| c2.cmp(&c1));
counts
}
pub fn least_frequent(&self) -> Vec<(&T, u64)> {
let mut counts: Vec<_> = self.data.iter()
.map(|(k, &v)| (k, v))
.collect();
counts.sort_by(|&(_, c1), &(_, c2)| c1.cmp(&c2));
counts
}
pub fn len(&self) -> usize {
self.data.len()
}
}
impl<T: Eq + Hash> Commute for Frequencies<T> {
fn merge(&mut self, v: Frequencies<T>) {
for (k, v2) in v.data.into_iter() {
match self.data.entry(k) {
Entry::Vacant(v1) => { v1.insert(v2); }
Entry::Occupied(mut v1) => { *v1.get_mut() += v2; }
}
}
}
}
impl<T: Eq + Hash> Default for Frequencies<T> {
fn default() -> Frequencies<T> {
Frequencies { data: HashMap::with_capacity(100000) }
}
}
impl<T: Eq + Hash> FromIterator<T> for Frequencies<T> {
fn from_iter<I: IntoIterator<Item=T>>(it: I) -> Frequencies<T> {
let mut v = Frequencies::new();
v.extend(it);
v
}
}
impl<T: Eq + Hash> Extend<T> for Frequencies<T> {
fn extend<I: IntoIterator<Item=T>>(&mut self, it: I) {
for sample in it {
self.add(sample);
}
}
}
#[cfg(test)]
mod test {
use super::Frequencies;
#[test]
fn ranked() {
let mut counts = Frequencies::new();
counts.extend(vec![1usize, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4].into_iter());
assert_eq!(counts.most_frequent()[0], (&2, 5));
assert_eq!(counts.least_frequent()[0], (&3, 1));
}
}