tantivy_common/
group_by.rs

1use std::cell::RefCell;
2use std::iter::Peekable;
3use std::rc::Rc;
4
5pub trait GroupByIteratorExtended: Iterator {
6    /// Return an `Iterator` that groups iterator elements. Consecutive elements that map to the
7    /// same key are assigned to the same group.
8    ///
9    /// The returned Iterator item is `(K, impl Iterator)`, where Iterator are the items of the
10    /// group.
11    ///
12    /// ```
13    /// use tantivy_common::GroupByIteratorExtended;
14    ///
15    /// // group data into blocks of larger than zero or not.
16    /// let data: Vec<i32> = vec![1, 3, -2, -2, 1, 0, 1, 2];
17    /// // groups:               |---->|------>|--------->|
18    ///
19    /// let mut data_grouped = Vec::new();
20    /// // Note: group is an iterator
21    /// for (key, group) in data.into_iter().group_by(|val| *val >= 0) {
22    ///     data_grouped.push((key, group.collect()));
23    /// }
24    /// assert_eq!(data_grouped, vec![(true, vec![1, 3]), (false, vec![-2, -2]), (true, vec![1, 0, 1, 2])]);
25    /// ```
26    fn group_by<K, F>(self, key: F) -> GroupByIterator<Self, F, K>
27    where
28        Self: Sized,
29        F: FnMut(&Self::Item) -> K,
30        K: PartialEq + Clone,
31        Self::Item: Clone,
32    {
33        GroupByIterator::new(self, key)
34    }
35}
36impl<I: Iterator> GroupByIteratorExtended for I {}
37
38pub struct GroupByIterator<I, F, K: Clone>
39where
40    I: Iterator,
41    F: FnMut(&I::Item) -> K,
42{
43    // I really would like to avoid the Rc<RefCell>, but the Iterator is shared between
44    // `GroupByIterator` and `GroupIter`. In practice they are used consecutive and
45    // `GroupByIter` is finished before calling next on `GroupByIterator`. I'm not sure there
46    // is a solution with lifetimes for that, because we would need to enforce it in the usage
47    // somehow.
48    //
49    // One potential solution would be to replace the iterator approach with something similar.
50    inner: Rc<RefCell<GroupByShared<I, F, K>>>,
51}
52
53struct GroupByShared<I, F, K: Clone>
54where
55    I: Iterator,
56    F: FnMut(&I::Item) -> K,
57{
58    iter: Peekable<I>,
59    group_by_fn: F,
60}
61
62impl<I, F, K> GroupByIterator<I, F, K>
63where
64    I: Iterator,
65    F: FnMut(&I::Item) -> K,
66    K: Clone,
67{
68    fn new(inner: I, group_by_fn: F) -> Self {
69        let inner = GroupByShared {
70            iter: inner.peekable(),
71            group_by_fn,
72        };
73
74        Self {
75            inner: Rc::new(RefCell::new(inner)),
76        }
77    }
78}
79
80impl<I, F, K> Iterator for GroupByIterator<I, F, K>
81where
82    I: Iterator,
83    I::Item: Clone,
84    F: FnMut(&I::Item) -> K,
85    K: Clone,
86{
87    type Item = (K, GroupIterator<I, F, K>);
88
89    fn next(&mut self) -> Option<Self::Item> {
90        let mut inner = self.inner.borrow_mut();
91        let value = inner.iter.peek()?.clone();
92        let key = (inner.group_by_fn)(&value);
93
94        let inner = self.inner.clone();
95
96        let group_iter = GroupIterator {
97            inner,
98            group_key: key.clone(),
99        };
100        Some((key, group_iter))
101    }
102}
103
104pub struct GroupIterator<I, F, K: Clone>
105where
106    I: Iterator,
107    F: FnMut(&I::Item) -> K,
108{
109    inner: Rc<RefCell<GroupByShared<I, F, K>>>,
110    group_key: K,
111}
112
113impl<I, F, K: PartialEq + Clone> Iterator for GroupIterator<I, F, K>
114where
115    I: Iterator,
116    I::Item: Clone,
117    F: FnMut(&I::Item) -> K,
118{
119    type Item = I::Item;
120
121    fn next(&mut self) -> Option<Self::Item> {
122        let mut inner = self.inner.borrow_mut();
123        // peek if next value is in group
124        let peek_val = inner.iter.peek()?.clone();
125        if (inner.group_by_fn)(&peek_val) == self.group_key {
126            inner.iter.next()
127        } else {
128            None
129        }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    fn group_by_collect<I: Iterator<Item = u32>>(iter: I) -> Vec<(I::Item, Vec<I::Item>)> {
138        iter.group_by(|val| val / 10)
139            .map(|(el, iter)| (el, iter.collect::<Vec<_>>()))
140            .collect::<Vec<_>>()
141    }
142
143    #[test]
144    fn group_by_two_groups() {
145        let vals = vec![1u32, 4, 15];
146        let grouped_vals = group_by_collect(vals.into_iter());
147        assert_eq!(grouped_vals, vec![(0, vec![1, 4]), (1, vec![15])]);
148    }
149
150    #[test]
151    fn group_by_test_empty() {
152        let vals = vec![];
153        let grouped_vals = group_by_collect(vals.into_iter());
154        assert_eq!(grouped_vals, vec![]);
155    }
156
157    #[test]
158    fn group_by_three_groups() {
159        let vals = vec![1u32, 4, 15, 1];
160        let grouped_vals = group_by_collect(vals.into_iter());
161        assert_eq!(
162            grouped_vals,
163            vec![(0, vec![1, 4]), (1, vec![15]), (0, vec![1])]
164        );
165    }
166}