tantivy_common/
serialize.rs

1use std::borrow::Cow;
2use std::io::{Read, Write};
3use std::{fmt, io};
4
5use byteorder::{ReadBytesExt, WriteBytesExt};
6
7use crate::{Endianness, VInt};
8
9#[derive(Default)]
10struct Counter(u64);
11
12impl io::Write for Counter {
13    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
14        self.0 += buf.len() as u64;
15        Ok(buf.len())
16    }
17
18    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
19        self.0 += buf.len() as u64;
20        Ok(())
21    }
22
23    fn flush(&mut self) -> io::Result<()> {
24        Ok(())
25    }
26}
27
28/// Trait for a simple binary serialization.
29pub trait BinarySerializable: fmt::Debug + Sized {
30    /// Serialize
31    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()>;
32    /// Deserialize
33    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
34
35    fn num_bytes(&self) -> u64 {
36        let mut counter = Counter::default();
37        self.serialize(&mut counter).unwrap();
38        counter.0
39    }
40}
41
42pub trait DeserializeFrom<T: BinarySerializable> {
43    fn deserialize(&mut self) -> io::Result<T>;
44}
45
46/// Implement deserialize from &[u8] for all types which implement BinarySerializable.
47///
48/// TryFrom would actually be preferable, but not possible because of the orphan
49/// rules (not completely sure if this could be resolved)
50impl<T: BinarySerializable> DeserializeFrom<T> for &[u8] {
51    fn deserialize(&mut self) -> io::Result<T> {
52        T::deserialize(self)
53    }
54}
55
56/// `FixedSize` marks a `BinarySerializable` as
57/// always serializing to the same size.
58pub trait FixedSize: BinarySerializable {
59    const SIZE_IN_BYTES: usize;
60}
61
62impl BinarySerializable for () {
63    fn serialize<W: Write + ?Sized>(&self, _: &mut W) -> io::Result<()> {
64        Ok(())
65    }
66    fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
67        Ok(())
68    }
69}
70
71impl FixedSize for () {
72    const SIZE_IN_BYTES: usize = 0;
73}
74
75impl<T: BinarySerializable> BinarySerializable for Vec<T> {
76    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
77        VInt(self.len() as u64).serialize(writer)?;
78        for it in self {
79            it.serialize(writer)?;
80        }
81        Ok(())
82    }
83    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
84        let num_items = VInt::deserialize(reader)?.val();
85        let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
86        for _ in 0..num_items {
87            let item = T::deserialize(reader)?;
88            items.push(item);
89        }
90        Ok(items)
91    }
92}
93
94impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
95    fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
96        self.0.serialize(write)?;
97        self.1.serialize(write)
98    }
99    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
100        Ok((Left::deserialize(reader)?, Right::deserialize(reader)?))
101    }
102}
103impl<Left: BinarySerializable + FixedSize, Right: BinarySerializable + FixedSize> FixedSize
104    for (Left, Right)
105{
106    const SIZE_IN_BYTES: usize = Left::SIZE_IN_BYTES + Right::SIZE_IN_BYTES;
107}
108
109impl BinarySerializable for u32 {
110    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
111        writer.write_u32::<Endianness>(*self)
112    }
113
114    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
115        reader.read_u32::<Endianness>()
116    }
117}
118
119impl FixedSize for u32 {
120    const SIZE_IN_BYTES: usize = 4;
121}
122
123impl BinarySerializable for u16 {
124    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
125        writer.write_u16::<Endianness>(*self)
126    }
127
128    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
129        reader.read_u16::<Endianness>()
130    }
131}
132
133impl FixedSize for u16 {
134    const SIZE_IN_BYTES: usize = 2;
135}
136
137impl BinarySerializable for u64 {
138    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
139        writer.write_u64::<Endianness>(*self)
140    }
141    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
142        reader.read_u64::<Endianness>()
143    }
144}
145
146impl FixedSize for u64 {
147    const SIZE_IN_BYTES: usize = 8;
148}
149
150impl BinarySerializable for u128 {
151    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
152        writer.write_u128::<Endianness>(*self)
153    }
154    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
155        reader.read_u128::<Endianness>()
156    }
157}
158
159impl FixedSize for u128 {
160    const SIZE_IN_BYTES: usize = 16;
161}
162
163impl BinarySerializable for f32 {
164    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
165        writer.write_f32::<Endianness>(*self)
166    }
167    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
168        reader.read_f32::<Endianness>()
169    }
170}
171
172impl FixedSize for f32 {
173    const SIZE_IN_BYTES: usize = 4;
174}
175
176impl BinarySerializable for i64 {
177    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
178        writer.write_i64::<Endianness>(*self)
179    }
180    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
181        reader.read_i64::<Endianness>()
182    }
183}
184
185impl FixedSize for i64 {
186    const SIZE_IN_BYTES: usize = 8;
187}
188
189impl BinarySerializable for f64 {
190    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
191        writer.write_f64::<Endianness>(*self)
192    }
193    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
194        reader.read_f64::<Endianness>()
195    }
196}
197
198impl FixedSize for f64 {
199    const SIZE_IN_BYTES: usize = 8;
200}
201
202impl BinarySerializable for u8 {
203    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
204        writer.write_u8(*self)
205    }
206    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
207        reader.read_u8()
208    }
209}
210
211impl FixedSize for u8 {
212    const SIZE_IN_BYTES: usize = 1;
213}
214
215impl BinarySerializable for bool {
216    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
217        writer.write_u8(u8::from(*self))
218    }
219    fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
220        let val = reader.read_u8()?;
221        match val {
222            0 => Ok(false),
223            1 => Ok(true),
224            _ => Err(io::Error::new(
225                io::ErrorKind::InvalidData,
226                "invalid bool value on deserialization, data corrupted",
227            )),
228        }
229    }
230}
231
232impl FixedSize for bool {
233    const SIZE_IN_BYTES: usize = 1;
234}
235
236impl BinarySerializable for String {
237    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
238        let data: &[u8] = self.as_bytes();
239        VInt(data.len() as u64).serialize(writer)?;
240        writer.write_all(data)
241    }
242
243    fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
244        let string_length = VInt::deserialize(reader)?.val() as usize;
245        let mut result = String::with_capacity(string_length);
246        reader
247            .take(string_length as u64)
248            .read_to_string(&mut result)?;
249        Ok(result)
250    }
251}
252
253impl<'a> BinarySerializable for Cow<'a, str> {
254    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
255        let data: &[u8] = self.as_bytes();
256        VInt(data.len() as u64).serialize(writer)?;
257        writer.write_all(data)
258    }
259
260    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
261        let string_length = VInt::deserialize(reader)?.val() as usize;
262        let mut result = String::with_capacity(string_length);
263        reader
264            .take(string_length as u64)
265            .read_to_string(&mut result)?;
266        Ok(Cow::Owned(result))
267    }
268}
269
270impl<'a> BinarySerializable for Cow<'a, [u8]> {
271    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
272        VInt(self.len() as u64).serialize(writer)?;
273        for it in self.iter() {
274            it.serialize(writer)?;
275        }
276        Ok(())
277    }
278
279    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
280        let num_items = VInt::deserialize(reader)?.val();
281        let mut items: Vec<u8> = Vec::with_capacity(num_items as usize);
282        for _ in 0..num_items {
283            let item = u8::deserialize(reader)?;
284            items.push(item);
285        }
286        Ok(Cow::Owned(items))
287    }
288}
289
290#[cfg(test)]
291pub mod test {
292
293    use super::*;
294    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
295        let mut buffer = Vec::new();
296        O::default().serialize(&mut buffer).unwrap();
297        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
298    }
299
300    fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
301        let mut buffer: Vec<u8> = Vec::new();
302        v.serialize(&mut buffer).unwrap();
303        let num_bytes = buffer.len();
304        let mut cursor = &buffer[..];
305        let deser = T::deserialize(&mut cursor).unwrap();
306        assert_eq!(deser, v);
307        num_bytes
308    }
309
310    #[test]
311    fn test_serialize_u8() {
312        fixed_size_test::<u8>();
313    }
314
315    #[test]
316    fn test_serialize_u32() {
317        fixed_size_test::<u32>();
318        assert_eq!(4, serialize_test(3u32));
319        assert_eq!(4, serialize_test(5u32));
320        assert_eq!(4, serialize_test(u32::MAX));
321    }
322
323    #[test]
324    fn test_serialize_i64() {
325        fixed_size_test::<i64>();
326    }
327
328    #[test]
329    fn test_serialize_f64() {
330        fixed_size_test::<f64>();
331    }
332
333    #[test]
334    fn test_serialize_u64() {
335        fixed_size_test::<u64>();
336    }
337
338    #[test]
339    fn test_serialize_bool() {
340        fixed_size_test::<bool>();
341    }
342
343    #[test]
344    fn test_serialize_string() {
345        assert_eq!(serialize_test(String::from("")), 1);
346        assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
347        assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8);
348    }
349
350    #[test]
351    fn test_serialize_vec() {
352        assert_eq!(serialize_test(Vec::<u8>::new()), 1);
353        assert_eq!(serialize_test(vec![1u32, 3u32]), 1 + 4 * 2);
354    }
355
356    #[test]
357    fn test_serialize_vint() {
358        for i in 0..10_000 {
359            serialize_test(VInt(i as u64));
360        }
361        assert_eq!(serialize_test(VInt(7u64)), 1);
362        assert_eq!(serialize_test(VInt(127u64)), 1);
363        assert_eq!(serialize_test(VInt(128u64)), 2);
364        assert_eq!(serialize_test(VInt(129u64)), 2);
365        assert_eq!(serialize_test(VInt(1234u64)), 2);
366        assert_eq!(serialize_test(VInt(16_383u64)), 2);
367        assert_eq!(serialize_test(VInt(16_384u64)), 3);
368        assert_eq!(serialize_test(VInt(u64::MAX)), 10);
369    }
370}