avro_rs/
rabin.rs

1//! Implementation of the Rabin fingerprint algorithm
2use byteorder::{ByteOrder, LittleEndian};
3use digest::{consts::U8, generic_array::GenericArray, FixedOutput, Reset, Update};
4use lazy_static::lazy_static;
5
6const EMPTY: i64 = -4513414715797952619;
7
8lazy_static! {
9    static ref FPTABLE: [i64; 256] = {
10        let mut fp_table: [i64; 256] = [0; 256];
11        for i in 0..256 {
12            let mut fp = i;
13            for _ in 0..8 {
14                fp = (fp as u64 >> 1) as i64 ^ (EMPTY & -(fp & 1));
15            }
16            fp_table[i as usize] = fp
17        }
18        fp_table
19    };
20}
21
22/// Implementation of the Rabin fingerprint algorithm using the Digest trait as described in [schema_fingerprints](https://avro.apache.org/docs/current/spec.html#schema_fingerprints).
23///
24/// The digest is returned as the 8-byte little-endian encoding of the Rabin hash.
25/// This is what is used for avro [single object encoding](https://avro.apache.org/docs/current/spec.html#single_object_encoding)
26///
27/// ```rust
28/// use avro_rs::rabin::Rabin;
29/// use digest::Digest;
30/// use hex_literal::hex;
31///
32/// // create the Rabin hasher
33/// let mut hasher = Rabin::new();
34///
35/// // add the data
36/// hasher.update(b"hello world");
37///
38/// // read hash digest and consume hasher
39/// let result = hasher.finalize();
40///
41/// assert_eq!(result[..], hex!("60335ba6d0415528"));
42/// ```
43///
44/// To convert the digest to the commonly used 64-bit integer value, you can use the byteorder crate:
45///
46/// ```rust
47/// # use avro_rs::rabin::Rabin;
48/// # use digest::Digest;
49/// # use hex_literal::hex;
50///
51/// # let mut hasher = Rabin::new();
52///
53/// # hasher.update(b"hello world");
54///
55/// # let result = hasher.finalize();
56///
57/// # assert_eq!(result[..], hex!("60335ba6d0415528"));
58/// use byteorder::{ByteOrder, LittleEndian};
59///
60/// let i = LittleEndian::read_i64(&result.to_vec());
61///
62/// assert_eq!(i, 2906301498937520992)
63/// ```
64#[derive(Clone)]
65pub struct Rabin {
66    result: i64,
67}
68
69impl Default for Rabin {
70    fn default() -> Self {
71        Rabin { result: EMPTY }
72    }
73}
74
75impl Update for Rabin {
76    fn update(&mut self, input: impl AsRef<[u8]>) {
77        for b in input.as_ref() {
78            self.result = (self.result as u64 >> 8) as i64
79                ^ FPTABLE[((self.result ^ *b as i64) & 0xff) as usize];
80        }
81    }
82}
83
84impl FixedOutput for Rabin {
85    // 8-byte little-endian form of the i64
86    // See: https://avro.apache.org/docs/current/spec.html#single_object_encoding
87    type OutputSize = U8;
88
89    fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
90        LittleEndian::write_i64(out, self.result);
91    }
92
93    fn finalize_into_reset(&mut self, out: &mut GenericArray<u8, Self::OutputSize>) {
94        LittleEndian::write_i64(out, self.result);
95        self.result = EMPTY;
96    }
97}
98
99impl Reset for Rabin {
100    fn reset(&mut self) {
101        self.result = EMPTY;
102    }
103}
104
105digest::impl_write!(Rabin);
106
107#[cfg(test)]
108mod tests {
109    use super::Rabin;
110    use byteorder::{ByteOrder, LittleEndian};
111    use digest::Digest;
112
113    // See: https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt
114    #[test]
115    fn test1() {
116        let data: &[(&str, i64)] = &[
117            (r#""null""#, 7195948357588979594),
118            (r#""boolean""#, -6970731678124411036),
119            (
120                r#"{"name":"foo","type":"fixed","size":15}"#,
121                1756455273707447556,
122            ),
123            (
124                r#"{"name":"PigValue","type":"record","fields":[{"name":"value","type":["null","int","long","PigValue"]}]}"#,
125                -1759257747318642341,
126            ),
127        ];
128
129        let mut hasher = Rabin::new();
130
131        for (s, fp) in data {
132            hasher.update(s.as_bytes());
133            let result = LittleEndian::read_i64(&hasher.finalize_reset().to_vec());
134            assert_eq!(*fp, result);
135        }
136    }
137}