tantivy_common/
lib.rs

1#![allow(clippy::len_without_is_empty)]
2
3use std::ops::Deref;
4
5pub use byteorder::LittleEndian as Endianness;
6
7mod bitset;
8mod byte_count;
9mod datetime;
10pub mod file_slice;
11mod group_by;
12mod json_path_writer;
13mod serialize;
14mod vint;
15mod writer;
16pub use bitset::*;
17pub use byte_count::ByteCount;
18pub use datetime::{DateTime, DateTimePrecision};
19pub use group_by::GroupByIteratorExtended;
20pub use json_path_writer::JsonPathWriter;
21pub use ownedbytes::{OwnedBytes, StableDeref};
22pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
23pub use vint::{
24    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
25};
26pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
27
28/// Has length trait
29pub trait HasLen {
30    /// Return length
31    fn len(&self) -> usize;
32
33    /// Returns true iff empty.
34    fn is_empty(&self) -> bool {
35        self.len() == 0
36    }
37}
38
39impl<T: Deref<Target = [u8]>> HasLen for T {
40    fn len(&self) -> usize {
41        self.deref().len()
42    }
43}
44
45const HIGHEST_BIT: u64 = 1 << 63;
46
47/// Maps a `i64` to `u64`
48///
49/// For simplicity, tantivy internally handles `i64` as `u64`.
50/// The mapping is defined by this function.
51///
52/// Maps `i64` to `u64` so that
53/// `-2^63 .. 2^63-1` is mapped
54///     to
55/// `0 .. 2^64-1`
56/// in that order.
57///
58/// This is more suited than simply casting (`val as u64`)
59/// because of bitpacking.
60///
61/// Imagine a list of `i64` ranging from -10 to 10.
62/// When casting negative values, the negative values are projected
63/// to values over 2^63, and all values end up requiring 64 bits.
64///
65/// # See also
66/// The reverse mapping is [`u64_to_i64()`].
67#[inline]
68pub fn i64_to_u64(val: i64) -> u64 {
69    (val as u64) ^ HIGHEST_BIT
70}
71
72/// Reverse the mapping given by [`i64_to_u64()`].
73#[inline]
74pub fn u64_to_i64(val: u64) -> i64 {
75    (val ^ HIGHEST_BIT) as i64
76}
77
78/// Maps a `f64` to `u64`
79///
80/// For simplicity, tantivy internally handles `f64` as `u64`.
81/// The mapping is defined by this function.
82///
83/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
84///
85/// This is more suited than simply casting (`val as u64`)
86/// which would truncate the result
87///
88/// # Reference
89///
90/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
91/// explains the mapping in a clear manner.
92///
93/// # See also
94/// The reverse mapping is [`u64_to_f64()`].
95#[inline]
96pub fn f64_to_u64(val: f64) -> u64 {
97    let bits = val.to_bits();
98    if val.is_sign_positive() {
99        bits ^ HIGHEST_BIT
100    } else {
101        !bits
102    }
103}
104
105/// Reverse the mapping given by [`f64_to_u64()`].
106#[inline]
107pub fn u64_to_f64(val: u64) -> f64 {
108    f64::from_bits(if val & HIGHEST_BIT != 0 {
109        val ^ HIGHEST_BIT
110    } else {
111        !val
112    })
113}
114
115/// Replaces a given byte in the `bytes` slice of bytes.
116///
117/// This function assumes that the needle is rarely contained in the bytes string
118/// and offers a fast path if the needle is not present.
119#[inline]
120pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
121    if !bytes.contains(&needle) {
122        return;
123    }
124    for b in bytes {
125        if *b == needle {
126            *b = replacement;
127        }
128    }
129}
130
131#[cfg(test)]
132pub mod test {
133
134    use proptest::prelude::*;
135
136    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};
137
138    fn test_i64_converter_helper(val: i64) {
139        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
140    }
141
142    fn test_f64_converter_helper(val: f64) {
143        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
144    }
145
146    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
147        let mut buffer = Vec::new();
148        O::default().serialize(&mut buffer).unwrap();
149        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
150    }
151
152    proptest! {
153        #[test]
154        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
155            let left_u64 = f64_to_u64(left);
156            let right_u64 = f64_to_u64(right);
157            assert_eq!(left_u64 < right_u64,  left < right);
158        }
159    }
160
161    #[test]
162    fn test_i64_converter() {
163        assert_eq!(i64_to_u64(i64::MIN), u64::MIN);
164        assert_eq!(i64_to_u64(i64::MAX), u64::MAX);
165        test_i64_converter_helper(0i64);
166        test_i64_converter_helper(i64::MIN);
167        test_i64_converter_helper(i64::MAX);
168        for i in -1000i64..1000i64 {
169            test_i64_converter_helper(i);
170        }
171    }
172
173    #[test]
174    fn test_f64_converter() {
175        test_f64_converter_helper(f64::INFINITY);
176        test_f64_converter_helper(f64::NEG_INFINITY);
177        test_f64_converter_helper(0.0);
178        test_f64_converter_helper(-0.0);
179        test_f64_converter_helper(1.0);
180        test_f64_converter_helper(-1.0);
181    }
182
183    #[test]
184    fn test_f64_order() {
185        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
186            .contains(&f64_to_u64(f64::NAN))); // nan is not a number
187        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
188        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
189        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
190        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
191        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
192        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
193        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
194    }
195
196    #[test]
197    fn test_replace_in_place() {
198        let test_aux = |before_replacement: &[u8], expected: &[u8]| {
199            let mut bytes: Vec<u8> = before_replacement.to_vec();
200            super::replace_in_place(b'b', b'c', &mut bytes);
201            assert_eq!(&bytes[..], expected);
202        };
203        test_aux(b"", b"");
204        test_aux(b"b", b"c");
205        test_aux(b"baaa", b"caaa");
206        test_aux(b"aaab", b"aaac");
207        test_aux(b"aaabaa", b"aaacaa");
208        test_aux(b"aaaaaa", b"aaaaaa");
209        test_aux(b"bbbb", b"cccc");
210    }
211}