tantivy_common/
serialize.rs1use std::borrow::Cow;
2use std::io::{Read, Write};
3use std::{fmt, io};
4
5use byteorder::{ReadBytesExt, WriteBytesExt};
6
7use crate::{Endianness, VInt};
8
9#[derive(Default)]
10struct Counter(u64);
11
12impl io::Write for Counter {
13 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
14 self.0 += buf.len() as u64;
15 Ok(buf.len())
16 }
17
18 fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
19 self.0 += buf.len() as u64;
20 Ok(())
21 }
22
23 fn flush(&mut self) -> io::Result<()> {
24 Ok(())
25 }
26}
27
28pub trait BinarySerializable: fmt::Debug + Sized {
30 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()>;
32 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
34
35 fn num_bytes(&self) -> u64 {
36 let mut counter = Counter::default();
37 self.serialize(&mut counter).unwrap();
38 counter.0
39 }
40}
41
42pub trait DeserializeFrom<T: BinarySerializable> {
43 fn deserialize(&mut self) -> io::Result<T>;
44}
45
46impl<T: BinarySerializable> DeserializeFrom<T> for &[u8] {
51 fn deserialize(&mut self) -> io::Result<T> {
52 T::deserialize(self)
53 }
54}
55
56pub trait FixedSize: BinarySerializable {
59 const SIZE_IN_BYTES: usize;
60}
61
62impl BinarySerializable for () {
63 fn serialize<W: Write + ?Sized>(&self, _: &mut W) -> io::Result<()> {
64 Ok(())
65 }
66 fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
67 Ok(())
68 }
69}
70
71impl FixedSize for () {
72 const SIZE_IN_BYTES: usize = 0;
73}
74
75impl<T: BinarySerializable> BinarySerializable for Vec<T> {
76 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
77 VInt(self.len() as u64).serialize(writer)?;
78 for it in self {
79 it.serialize(writer)?;
80 }
81 Ok(())
82 }
83 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
84 let num_items = VInt::deserialize(reader)?.val();
85 let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
86 for _ in 0..num_items {
87 let item = T::deserialize(reader)?;
88 items.push(item);
89 }
90 Ok(items)
91 }
92}
93
94impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
95 fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
96 self.0.serialize(write)?;
97 self.1.serialize(write)
98 }
99 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
100 Ok((Left::deserialize(reader)?, Right::deserialize(reader)?))
101 }
102}
103impl<Left: BinarySerializable + FixedSize, Right: BinarySerializable + FixedSize> FixedSize
104 for (Left, Right)
105{
106 const SIZE_IN_BYTES: usize = Left::SIZE_IN_BYTES + Right::SIZE_IN_BYTES;
107}
108
109impl BinarySerializable for u32 {
110 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
111 writer.write_u32::<Endianness>(*self)
112 }
113
114 fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
115 reader.read_u32::<Endianness>()
116 }
117}
118
119impl FixedSize for u32 {
120 const SIZE_IN_BYTES: usize = 4;
121}
122
123impl BinarySerializable for u16 {
124 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
125 writer.write_u16::<Endianness>(*self)
126 }
127
128 fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
129 reader.read_u16::<Endianness>()
130 }
131}
132
133impl FixedSize for u16 {
134 const SIZE_IN_BYTES: usize = 2;
135}
136
137impl BinarySerializable for u64 {
138 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
139 writer.write_u64::<Endianness>(*self)
140 }
141 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
142 reader.read_u64::<Endianness>()
143 }
144}
145
146impl FixedSize for u64 {
147 const SIZE_IN_BYTES: usize = 8;
148}
149
150impl BinarySerializable for u128 {
151 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
152 writer.write_u128::<Endianness>(*self)
153 }
154 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
155 reader.read_u128::<Endianness>()
156 }
157}
158
159impl FixedSize for u128 {
160 const SIZE_IN_BYTES: usize = 16;
161}
162
163impl BinarySerializable for f32 {
164 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
165 writer.write_f32::<Endianness>(*self)
166 }
167 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
168 reader.read_f32::<Endianness>()
169 }
170}
171
172impl FixedSize for f32 {
173 const SIZE_IN_BYTES: usize = 4;
174}
175
176impl BinarySerializable for i64 {
177 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
178 writer.write_i64::<Endianness>(*self)
179 }
180 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
181 reader.read_i64::<Endianness>()
182 }
183}
184
185impl FixedSize for i64 {
186 const SIZE_IN_BYTES: usize = 8;
187}
188
189impl BinarySerializable for f64 {
190 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
191 writer.write_f64::<Endianness>(*self)
192 }
193 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
194 reader.read_f64::<Endianness>()
195 }
196}
197
198impl FixedSize for f64 {
199 const SIZE_IN_BYTES: usize = 8;
200}
201
202impl BinarySerializable for u8 {
203 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
204 writer.write_u8(*self)
205 }
206 fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
207 reader.read_u8()
208 }
209}
210
211impl FixedSize for u8 {
212 const SIZE_IN_BYTES: usize = 1;
213}
214
215impl BinarySerializable for bool {
216 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
217 writer.write_u8(u8::from(*self))
218 }
219 fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
220 let val = reader.read_u8()?;
221 match val {
222 0 => Ok(false),
223 1 => Ok(true),
224 _ => Err(io::Error::new(
225 io::ErrorKind::InvalidData,
226 "invalid bool value on deserialization, data corrupted",
227 )),
228 }
229 }
230}
231
232impl FixedSize for bool {
233 const SIZE_IN_BYTES: usize = 1;
234}
235
236impl BinarySerializable for String {
237 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
238 let data: &[u8] = self.as_bytes();
239 VInt(data.len() as u64).serialize(writer)?;
240 writer.write_all(data)
241 }
242
243 fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
244 let string_length = VInt::deserialize(reader)?.val() as usize;
245 let mut result = String::with_capacity(string_length);
246 reader
247 .take(string_length as u64)
248 .read_to_string(&mut result)?;
249 Ok(result)
250 }
251}
252
253impl<'a> BinarySerializable for Cow<'a, str> {
254 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
255 let data: &[u8] = self.as_bytes();
256 VInt(data.len() as u64).serialize(writer)?;
257 writer.write_all(data)
258 }
259
260 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
261 let string_length = VInt::deserialize(reader)?.val() as usize;
262 let mut result = String::with_capacity(string_length);
263 reader
264 .take(string_length as u64)
265 .read_to_string(&mut result)?;
266 Ok(Cow::Owned(result))
267 }
268}
269
270impl<'a> BinarySerializable for Cow<'a, [u8]> {
271 fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
272 VInt(self.len() as u64).serialize(writer)?;
273 for it in self.iter() {
274 it.serialize(writer)?;
275 }
276 Ok(())
277 }
278
279 fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
280 let num_items = VInt::deserialize(reader)?.val();
281 let mut items: Vec<u8> = Vec::with_capacity(num_items as usize);
282 for _ in 0..num_items {
283 let item = u8::deserialize(reader)?;
284 items.push(item);
285 }
286 Ok(Cow::Owned(items))
287 }
288}
289
290#[cfg(test)]
291pub mod test {
292
293 use super::*;
294 pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
295 let mut buffer = Vec::new();
296 O::default().serialize(&mut buffer).unwrap();
297 assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
298 }
299
300 fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
301 let mut buffer: Vec<u8> = Vec::new();
302 v.serialize(&mut buffer).unwrap();
303 let num_bytes = buffer.len();
304 let mut cursor = &buffer[..];
305 let deser = T::deserialize(&mut cursor).unwrap();
306 assert_eq!(deser, v);
307 num_bytes
308 }
309
310 #[test]
311 fn test_serialize_u8() {
312 fixed_size_test::<u8>();
313 }
314
315 #[test]
316 fn test_serialize_u32() {
317 fixed_size_test::<u32>();
318 assert_eq!(4, serialize_test(3u32));
319 assert_eq!(4, serialize_test(5u32));
320 assert_eq!(4, serialize_test(u32::MAX));
321 }
322
323 #[test]
324 fn test_serialize_i64() {
325 fixed_size_test::<i64>();
326 }
327
328 #[test]
329 fn test_serialize_f64() {
330 fixed_size_test::<f64>();
331 }
332
333 #[test]
334 fn test_serialize_u64() {
335 fixed_size_test::<u64>();
336 }
337
338 #[test]
339 fn test_serialize_bool() {
340 fixed_size_test::<bool>();
341 }
342
343 #[test]
344 fn test_serialize_string() {
345 assert_eq!(serialize_test(String::from("")), 1);
346 assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
347 assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8);
348 }
349
350 #[test]
351 fn test_serialize_vec() {
352 assert_eq!(serialize_test(Vec::<u8>::new()), 1);
353 assert_eq!(serialize_test(vec![1u32, 3u32]), 1 + 4 * 2);
354 }
355
356 #[test]
357 fn test_serialize_vint() {
358 for i in 0..10_000 {
359 serialize_test(VInt(i as u64));
360 }
361 assert_eq!(serialize_test(VInt(7u64)), 1);
362 assert_eq!(serialize_test(VInt(127u64)), 1);
363 assert_eq!(serialize_test(VInt(128u64)), 2);
364 assert_eq!(serialize_test(VInt(129u64)), 2);
365 assert_eq!(serialize_test(VInt(1234u64)), 2);
366 assert_eq!(serialize_test(VInt(16_383u64)), 2);
367 assert_eq!(serialize_test(VInt(16_384u64)), 3);
368 assert_eq!(serialize_test(VInt(u64::MAX)), 10);
369 }
370}