tantivy_common/
vint.rs

1use std::io;
2use std::io::{Read, Write};
3
4use super::BinarySerializable;
5
6/// Variable int serializes a u128 number
7pub fn serialize_vint_u128(mut val: u128, output: &mut Vec<u8>) {
8    loop {
9        let next_byte: u8 = (val % 128u128) as u8;
10        val /= 128u128;
11        if val == 0 {
12            output.push(next_byte | STOP_BIT);
13            return;
14        } else {
15            output.push(next_byte);
16        }
17    }
18}
19
20///   Wrapper over a `u128` that serializes as a variable int.
21#[derive(Clone, Copy, Debug, Eq, PartialEq)]
22pub struct VIntU128(pub u128);
23
24impl BinarySerializable for VIntU128 {
25    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
26        let mut buffer = vec![];
27        serialize_vint_u128(self.0, &mut buffer);
28        writer.write_all(&buffer)
29    }
30
31    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
32        let mut bytes = reader.bytes();
33        let mut result = 0u128;
34        let mut shift = 0u64;
35        loop {
36            match bytes.next() {
37                Some(Ok(b)) => {
38                    result |= u128::from(b % 128u8) << shift;
39                    if b >= STOP_BIT {
40                        return Ok(VIntU128(result));
41                    }
42                    shift += 7;
43                }
44                _ => {
45                    return Err(io::Error::new(
46                        io::ErrorKind::InvalidData,
47                        "Reach end of buffer while reading VInt",
48                    ));
49                }
50            }
51        }
52    }
53}
54
55///   Wrapper over a `u64` that serializes as a variable int.
56#[derive(Clone, Copy, Debug, Eq, PartialEq)]
57pub struct VInt(pub u64);
58
59const STOP_BIT: u8 = 128;
60
61#[inline]
62pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {
63    const START_2: u64 = 1 << 7;
64    const START_3: u64 = 1 << 14;
65    const START_4: u64 = 1 << 21;
66    const START_5: u64 = 1 << 28;
67
68    const MASK_1: u64 = 127;
69    const MASK_2: u64 = MASK_1 << 7;
70    const MASK_3: u64 = MASK_2 << 7;
71    const MASK_4: u64 = MASK_3 << 7;
72    const MASK_5: u64 = MASK_4 << 7;
73
74    let val = u64::from(val);
75    const STOP_BIT: u64 = 128u64;
76    let (res, num_bytes) = if val < START_2 {
77        (val | STOP_BIT, 1)
78    } else if val < START_3 {
79        (
80            (val & MASK_1) | ((val & MASK_2) << 1) | (STOP_BIT << (8)),
81            2,
82        )
83    } else if val < START_4 {
84        (
85            (val & MASK_1) | ((val & MASK_2) << 1) | ((val & MASK_3) << 2) | (STOP_BIT << (8 * 2)),
86            3,
87        )
88    } else if val < START_5 {
89        (
90            (val & MASK_1)
91                | ((val & MASK_2) << 1)
92                | ((val & MASK_3) << 2)
93                | ((val & MASK_4) << 3)
94                | (STOP_BIT << (8 * 3)),
95            4,
96        )
97    } else {
98        (
99            (val & MASK_1)
100                | ((val & MASK_2) << 1)
101                | ((val & MASK_3) << 2)
102                | ((val & MASK_4) << 3)
103                | ((val & MASK_5) << 4)
104                | (STOP_BIT << (8 * 4)),
105            5,
106        )
107    };
108    *buf = res.to_le_bytes();
109    &buf[0..num_bytes]
110}
111
112/// Returns the number of bytes covered by a
113/// serialized vint `u32`.
114///
115/// Expects a buffer data that starts
116/// by the serialized `vint`, scans at most 5 bytes ahead until
117/// it finds the vint final byte.
118///
119/// # May Panic
120/// If the payload does not start by a valid `vint`
121fn vint_len(data: &[u8]) -> usize {
122    for (i, &val) in data.iter().enumerate().take(5) {
123        if val >= STOP_BIT {
124            return i + 1;
125        }
126    }
127    panic!("Corrupted data. Invalid VInt 32");
128}
129
130/// Reads a vint `u32` from a buffer, and
131/// consumes its payload data.
132///
133/// # Panics
134///
135/// If the buffer does not start by a valid
136/// vint payload
137pub fn read_u32_vint(data: &mut &[u8]) -> u32 {
138    let (result, vlen) = read_u32_vint_no_advance(data);
139    *data = &data[vlen..];
140    result
141}
142
143pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
144    let vlen = vint_len(data);
145    let mut result = 0u32;
146    let mut shift = 0u64;
147    for &b in &data[..vlen] {
148        result |= u32::from(b & 127u8) << shift;
149        shift += 7;
150    }
151    (result, vlen)
152}
153/// Write a `u32` as a vint payload.
154pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
155    let mut buf = [0u8; 8];
156    let data = serialize_vint_u32(val, &mut buf);
157    writer.write_all(data)
158}
159
160impl VInt {
161    pub fn val(&self) -> u64 {
162        self.0
163    }
164
165    pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
166        VInt::deserialize(reader).map(|vint| vint.0)
167    }
168
169    pub fn serialize_into_vec(&self, output: &mut Vec<u8>) {
170        let mut buffer = [0u8; 10];
171        let num_bytes = self.serialize_into(&mut buffer);
172        output.extend(&buffer[0..num_bytes]);
173    }
174
175    pub fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
176        let mut remaining = self.0;
177        for (i, b) in buffer.iter_mut().enumerate() {
178            let next_byte: u8 = (remaining % 128u64) as u8;
179            remaining /= 128u64;
180            if remaining == 0u64 {
181                *b = next_byte | STOP_BIT;
182                return i + 1;
183            } else {
184                *b = next_byte;
185            }
186        }
187        unreachable!();
188    }
189}
190
191impl BinarySerializable for VInt {
192    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
193        let mut buffer = [0u8; 10];
194        let num_bytes = self.serialize_into(&mut buffer);
195        writer.write_all(&buffer[0..num_bytes])
196    }
197
198    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
199        let mut bytes = reader.bytes();
200        let mut result = 0u64;
201        let mut shift = 0u64;
202        loop {
203            match bytes.next() {
204                Some(Ok(b)) => {
205                    result |= u64::from(b % 128u8) << shift;
206                    if b >= STOP_BIT {
207                        return Ok(VInt(result));
208                    }
209                    shift += 7;
210                }
211                _ => {
212                    return Err(io::Error::new(
213                        io::ErrorKind::InvalidData,
214                        "Reach end of buffer while reading VInt",
215                    ));
216                }
217            }
218        }
219    }
220}
221
222#[cfg(test)]
223mod tests {
224
225    use super::{serialize_vint_u32, BinarySerializable, VInt};
226
227    fn aux_test_vint(val: u64) {
228        let mut v = [14u8; 10];
229        let num_bytes = VInt(val).serialize_into(&mut v);
230        for el in &v[num_bytes..10] {
231            assert_eq!(el, &14u8);
232        }
233        assert!(num_bytes > 0);
234        if num_bytes < 10 {
235            assert!(1u64 << (7 * num_bytes) > val);
236        }
237        if num_bytes > 1 {
238            assert!(1u64 << (7 * (num_bytes - 1)) <= val);
239        }
240        let serdeser_val = VInt::deserialize(&mut &v[..]).unwrap();
241        assert_eq!(val, serdeser_val.0);
242    }
243
244    #[test]
245    fn test_vint() {
246        aux_test_vint(0);
247        aux_test_vint(1);
248        aux_test_vint(5);
249        aux_test_vint(u64::MAX);
250        for i in 1..9 {
251            let power_of_128 = 1u64 << (7 * i);
252            aux_test_vint(power_of_128 - 1u64);
253            aux_test_vint(power_of_128);
254            aux_test_vint(power_of_128 + 1u64);
255        }
256        aux_test_vint(10);
257    }
258
259    fn aux_test_serialize_vint_u32(val: u32) {
260        let mut buffer = [0u8; 10];
261        let mut buffer2 = [0u8; 8];
262        let len_vint = VInt(val as u64).serialize_into(&mut buffer);
263        let res2 = serialize_vint_u32(val, &mut buffer2);
264        assert_eq!(&buffer[..len_vint], res2, "array wrong for {val}");
265    }
266
267    #[test]
268    fn test_vint_u32() {
269        aux_test_serialize_vint_u32(0);
270        aux_test_serialize_vint_u32(1);
271        aux_test_serialize_vint_u32(5);
272        for i in 1..3 {
273            let power_of_128 = 1u32 << (7 * i);
274            aux_test_serialize_vint_u32(power_of_128 - 1u32);
275            aux_test_serialize_vint_u32(power_of_128);
276            aux_test_serialize_vint_u32(power_of_128 + 1u32);
277        }
278        aux_test_serialize_vint_u32(u32::MAX);
279    }
280}