zlib_rs/
adler32.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#![warn(unsafe_op_in_unsafe_fn)]

#[cfg(target_arch = "x86_64")]
mod avx2;
mod generic;
#[cfg(target_arch = "aarch64")]
mod neon;
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
mod wasm;

pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
    #[cfg(target_arch = "x86_64")]
    if crate::cpu_features::is_enabled_avx2() {
        return avx2::adler32_avx2(start_checksum, data);
    }

    #[cfg(target_arch = "aarch64")]
    if crate::cpu_features::is_enabled_neon() {
        return self::neon::adler32_neon(start_checksum, data);
    }

    #[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
    if crate::cpu_features::is_enabled_simd128() {
        return self::wasm::adler32_wasm(start_checksum, data);
    }

    generic::adler32_rust(start_checksum, data)
}

pub fn adler32_fold_copy(start_checksum: u32, dst: &mut [u8], src: &[u8]) -> u32 {
    debug_assert!(dst.len() >= src.len(), "{} < {}", dst.len(), src.len());

    // integrating the memcpy into the adler32 function did not have any benefits, and in fact was
    // a bit slower for very small chunk sizes.
    dst[..src.len()].copy_from_slice(src);
    adler32(start_checksum, src)
}

pub fn adler32_combine(adler1: u32, adler2: u32, len2: u64) -> u32 {
    const BASE: u64 = self::BASE as u64;

    let rem = len2 % BASE;

    let adler1 = adler1 as u64;
    let adler2 = adler2 as u64;

    /* the derivation of this formula is left as an exercise for the reader */
    let mut sum1 = adler1 & 0xffff;
    let mut sum2 = rem * sum1;
    sum2 %= BASE;
    sum1 += (adler2 & 0xffff) + BASE - 1;
    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;

    if sum1 >= BASE {
        sum1 -= BASE;
    }
    if sum1 >= BASE {
        sum1 -= BASE;
    }
    if sum2 >= (BASE << 1) {
        sum2 -= BASE << 1;
    }
    if sum2 >= BASE {
        sum2 -= BASE;
    }

    (sum1 | (sum2 << 16)) as u32
}

// inefficient but correct, useful for testing
#[cfg(test)]
fn naive_adler32(start_checksum: u32, data: &[u8]) -> u32 {
    const MOD_ADLER: u32 = 65521; // Largest prime smaller than 2^16

    let mut a = start_checksum & 0xFFFF;
    let mut b = (start_checksum >> 16) & 0xFFFF;

    for &byte in data {
        a = (a + byte as u32) % MOD_ADLER;
        b = (b + a) % MOD_ADLER;
    }

    (b << 16) | a
}

const BASE: u32 = 65521; /* largest prime smaller than 65536 */
const NMAX: u32 = 5552;

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn naive_is_fancy_small_inputs() {
        for i in 0..128 {
            let v = (0u8..i).collect::<Vec<_>>();
            assert_eq!(naive_adler32(1, &v), generic::adler32_rust(1, &v));
        }
    }

    #[test]
    fn test_adler32_combine() {
        ::quickcheck::quickcheck(test as fn(_) -> _);

        fn test(data: Vec<u8>) -> bool {
            let Some(buf_len) = data.first().copied() else {
                return true;
            };

            let buf_size = Ord::max(buf_len, 1) as usize;

            let mut adler1 = 1;
            let mut adler2 = 1;

            for chunk in data.chunks(buf_size) {
                adler1 = adler32(adler1, chunk);
            }

            adler2 = adler32(adler2, &data);

            assert_eq!(adler1, adler2);

            let combine1 = adler32_combine(adler1, adler2, data.len() as _);
            let combine2 = adler32_combine(adler1, adler1, data.len() as _);
            assert_eq!(combine1, combine2);

            true
        }
    }
}