tantivy_bitpacker/
lib.rs

1mod bitpacker;
2mod blocked_bitpacker;
3mod filter_vec;
4
5use std::cmp::Ordering;
6
7pub use crate::bitpacker::{BitPacker, BitUnpacker};
8pub use crate::blocked_bitpacker::BlockedBitpacker;
9
10/// Computes the number of bits that will be used for bitpacking.
11///
12/// In general the target is the minimum number of bits
13/// required to express the amplitude given in argument.
14///
15/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
16///
17/// The logic is slightly more convoluted here as for optimization
18/// reasons, we want to ensure that a value spawns over at most 8 bytes
19/// of aligned bytes.
20///
21/// Spanning over 9 bytes is possible for instance, if we do
22/// bitpacking with an amplitude of 63 bits.
23/// In this case, the second int will start on bit
24/// 63 (which belongs to byte 7) and ends at byte 15;
25/// Hence 9 bytes (from byte 7 to byte 15 included).
26///
27/// To avoid this, we force the number of bits to 64bits
28/// when the result is greater than `64-8 = 56 bits`.
29///
30/// Note that this only affects rare use cases spawning over
31/// a very large range of values. Even in this case, it results
32/// in an extra cost of at most 12% compared to the optimal
33/// number of bits.
34pub fn compute_num_bits(n: u64) -> u8 {
35    let amplitude = (64u32 - n.leading_zeros()) as u8;
36    if amplitude <= 64 - 8 {
37        amplitude
38    } else {
39        64
40    }
41}
42
43/// Computes the (min, max) of an iterator of `PartialOrd` values.
44///
45/// For values implementing `Ord` (in a way consistent to their `PartialOrd` impl),
46/// this function behaves as expected.
47///
48/// For values with partial ordering, the behavior is non-trivial and may
49/// depends on the order of the values.
50/// For floats however, it simply returns the same results as if NaN were
51/// skipped.
52pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
53where
54    I: Iterator<Item = T>,
55    T: Copy + PartialOrd,
56{
57    let first_el = vals.find(|val| {
58        // We use this to make sure we skip all NaN values when
59        // working with a float type.
60        val.partial_cmp(val) == Some(Ordering::Equal)
61    })?;
62    let mut min_so_far: T = first_el;
63    let mut max_so_far: T = first_el;
64    for val in vals {
65        if val.partial_cmp(&min_so_far) == Some(Ordering::Less) {
66            min_so_far = val;
67        }
68        if val.partial_cmp(&max_so_far) == Some(Ordering::Greater) {
69            max_so_far = val;
70        }
71    }
72    Some((min_so_far, max_so_far))
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn test_compute_num_bits() {
81        assert_eq!(compute_num_bits(1), 1u8);
82        assert_eq!(compute_num_bits(0), 0u8);
83        assert_eq!(compute_num_bits(2), 2u8);
84        assert_eq!(compute_num_bits(3), 2u8);
85        assert_eq!(compute_num_bits(4), 3u8);
86        assert_eq!(compute_num_bits(255), 8u8);
87        assert_eq!(compute_num_bits(256), 9u8);
88        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
89    }
90
91    #[test]
92    fn test_minmax_empty() {
93        let vals: Vec<u32> = vec![];
94        assert_eq!(minmax(vals.into_iter()), None);
95    }
96
97    #[test]
98    fn test_minmax_one() {
99        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
100    }
101
102    #[test]
103    fn test_minmax_two() {
104        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
105        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
106    }
107
108    #[test]
109    fn test_minmax_nan() {
110        assert_eq!(
111            minmax(vec![f64::NAN, 1f64, 2f64].into_iter()),
112            Some((1f64, 2f64))
113        );
114        assert_eq!(
115            minmax(vec![2f64, f64::NAN, 1f64].into_iter()),
116            Some((1f64, 2f64))
117        );
118        assert_eq!(
119            minmax(vec![2f64, 1f64, f64::NAN].into_iter()),
120            Some((1f64, 2f64))
121        );
122    }
123
124    #[test]
125    fn test_minmax_inf() {
126        assert_eq!(
127            minmax(vec![f64::INFINITY, 1f64, 2f64].into_iter()),
128            Some((1f64, f64::INFINITY))
129        );
130        assert_eq!(
131            minmax(vec![-f64::INFINITY, 1f64, 2f64].into_iter()),
132            Some((-f64::INFINITY, 2f64))
133        );
134        assert_eq!(
135            minmax(vec![2f64, f64::INFINITY, 1f64].into_iter()),
136            Some((1f64, f64::INFINITY))
137        );
138        assert_eq!(
139            minmax(vec![2f64, 1f64, -f64::INFINITY].into_iter()),
140            Some((-f64::INFINITY, 2f64))
141        );
142    }
143}