reed_solomon_simd/engine/
engine_nosimd.rs

1use std::iter::zip;
2
3use crate::engine::{
4    tables::{self, Mul16, Skew},
5    utils, Engine, GfElement, ShardsRefMut, GF_MODULUS,
6};
7
8// ======================================================================
9// NoSimd - PUBLIC
10
11/// Optimized [`Engine`] without SIMD.
12///
13/// [`NoSimd`] is a basic optimized engine which works on all CPUs.
14#[derive(Clone, Copy)]
15pub struct NoSimd {
16    mul16: &'static Mul16,
17    skew: &'static Skew,
18}
19
20impl NoSimd {
21    /// Creates new [`NoSimd`], initializing all [tables]
22    /// needed for encoding or decoding.
23    ///
24    /// Currently only difference between encoding/decoding is
25    /// [`LogWalsh`] (128 kiB) which is only needed for decoding.
26    ///
27    /// [`LogWalsh`]: crate::engine::tables::LogWalsh
28    pub fn new() -> Self {
29        let mul16 = &*tables::MUL16;
30        let skew = &*tables::SKEW;
31
32        Self { mul16, skew }
33    }
34}
35
36impl Engine for NoSimd {
37    fn fft(
38        &self,
39        data: &mut ShardsRefMut,
40        pos: usize,
41        size: usize,
42        truncated_size: usize,
43        skew_delta: usize,
44    ) {
45        self.fft_private(data, pos, size, truncated_size, skew_delta);
46    }
47
48    fn ifft(
49        &self,
50        data: &mut ShardsRefMut,
51        pos: usize,
52        size: usize,
53        truncated_size: usize,
54        skew_delta: usize,
55    ) {
56        self.ifft_private(data, pos, size, truncated_size, skew_delta);
57    }
58
59    fn mul(&self, x: &mut [[u8; 64]], log_m: GfElement) {
60        let lut = &self.mul16[log_m as usize];
61
62        for x_chunk in x.iter_mut() {
63            let (x_lo, x_hi) = x_chunk.split_at_mut(32);
64
65            for i in 0..32 {
66                let lo = x_lo[i];
67                let hi = x_hi[i];
68                let prod = lut[0][usize::from(lo & 15)]
69                    ^ lut[1][usize::from(lo >> 4)]
70                    ^ lut[2][usize::from(hi & 15)]
71                    ^ lut[3][usize::from(hi >> 4)];
72                x_lo[i] = prod as u8;
73                x_hi[i] = (prod >> 8) as u8;
74            }
75        }
76    }
77}
78
79// ======================================================================
80// NoSimd - IMPL Default
81
82impl Default for NoSimd {
83    fn default() -> Self {
84        Self::new()
85    }
86}
87
88// ======================================================================
89// NoSimd - PRIVATE
90
91impl NoSimd {
92    /// `x[] ^= y[] * log_m`
93    fn mul_add(&self, x: &mut [[u8; 64]], y: &[[u8; 64]], log_m: GfElement) {
94        let lut = &self.mul16[log_m as usize];
95
96        for (x_chunk, y_chunk) in zip(x.iter_mut(), y.iter()) {
97            let (x_lo, x_hi) = x_chunk.split_at_mut(32);
98            let (y_lo, y_hi) = y_chunk.split_at(32);
99
100            for i in 0..32 {
101                let lo = y_lo[i];
102                let hi = y_hi[i];
103                let prod = lut[0][usize::from(lo & 15)]
104                    ^ lut[1][usize::from(lo >> 4)]
105                    ^ lut[2][usize::from(hi & 15)]
106                    ^ lut[3][usize::from(hi >> 4)];
107                x_lo[i] ^= prod as u8;
108                x_hi[i] ^= (prod >> 8) as u8;
109            }
110        }
111    }
112}
113
114// ======================================================================
115// NoSimd - PRIVATE - FFT (fast Fourier transform)
116
117impl NoSimd {
118    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
119    #[inline(always)]
120    fn fft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) {
121        self.mul_add(x, y, log_m);
122        utils::xor(y, x);
123    }
124
125    #[inline(always)]
126    fn fft_butterfly_two_layers(
127        &self,
128        data: &mut ShardsRefMut,
129        pos: usize,
130        dist: usize,
131        log_m01: GfElement,
132        log_m23: GfElement,
133        log_m02: GfElement,
134    ) {
135        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
136
137        // FIRST LAYER
138
139        if log_m02 == GF_MODULUS {
140            utils::xor(s2, s0);
141            utils::xor(s3, s1);
142        } else {
143            self.fft_butterfly_partial(s0, s2, log_m02);
144            self.fft_butterfly_partial(s1, s3, log_m02);
145        }
146
147        // SECOND LAYER
148
149        if log_m01 == GF_MODULUS {
150            utils::xor(s1, s0);
151        } else {
152            self.fft_butterfly_partial(s0, s1, log_m01);
153        }
154
155        if log_m23 == GF_MODULUS {
156            utils::xor(s3, s2);
157        } else {
158            self.fft_butterfly_partial(s2, s3, log_m23);
159        }
160    }
161
162    #[inline(always)]
163    fn fft_private(
164        &self,
165        data: &mut ShardsRefMut,
166        pos: usize,
167        size: usize,
168        truncated_size: usize,
169        skew_delta: usize,
170    ) {
171        // TWO LAYERS AT TIME
172
173        let mut dist4 = size;
174        let mut dist = size >> 2;
175        while dist != 0 {
176            let mut r = 0;
177            while r < truncated_size {
178                let base = r + dist + skew_delta - 1;
179
180                let log_m01 = self.skew[base];
181                let log_m02 = self.skew[base + dist];
182                let log_m23 = self.skew[base + dist * 2];
183
184                for i in r..r + dist {
185                    self.fft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02);
186                }
187
188                r += dist4;
189            }
190            dist4 = dist;
191            dist >>= 2;
192        }
193
194        // FINAL ODD LAYER
195
196        if dist4 == 2 {
197            let mut r = 0;
198            while r < truncated_size {
199                let log_m = self.skew[r + skew_delta];
200
201                let (x, y) = data.dist2_mut(pos + r, 1);
202
203                if log_m == GF_MODULUS {
204                    utils::xor(y, x);
205                } else {
206                    self.fft_butterfly_partial(x, y, log_m);
207                }
208
209                r += 2;
210            }
211        }
212    }
213}
214
215// ======================================================================
216// NoSimd - PRIVATE - IFFT (inverse fast Fourier transform)
217
218impl NoSimd {
219    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
220    #[inline(always)]
221    fn ifft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) {
222        utils::xor(y, x);
223        self.mul_add(x, y, log_m);
224    }
225
226    #[inline(always)]
227    fn ifft_butterfly_two_layers(
228        &self,
229        data: &mut ShardsRefMut,
230        pos: usize,
231        dist: usize,
232        log_m01: GfElement,
233        log_m23: GfElement,
234        log_m02: GfElement,
235    ) {
236        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
237
238        // FIRST LAYER
239
240        if log_m01 == GF_MODULUS {
241            utils::xor(s1, s0);
242        } else {
243            self.ifft_butterfly_partial(s0, s1, log_m01);
244        }
245
246        if log_m23 == GF_MODULUS {
247            utils::xor(s3, s2);
248        } else {
249            self.ifft_butterfly_partial(s2, s3, log_m23);
250        }
251
252        // SECOND LAYER
253
254        if log_m02 == GF_MODULUS {
255            utils::xor(s2, s0);
256            utils::xor(s3, s1);
257        } else {
258            self.ifft_butterfly_partial(s0, s2, log_m02);
259            self.ifft_butterfly_partial(s1, s3, log_m02);
260        }
261    }
262
263    #[inline(always)]
264    fn ifft_private(
265        &self,
266        data: &mut ShardsRefMut,
267        pos: usize,
268        size: usize,
269        truncated_size: usize,
270        skew_delta: usize,
271    ) {
272        // TWO LAYERS AT TIME
273
274        let mut dist = 1;
275        let mut dist4 = 4;
276        while dist4 <= size {
277            let mut r = 0;
278            while r < truncated_size {
279                let base = r + dist + skew_delta - 1;
280
281                let log_m01 = self.skew[base];
282                let log_m02 = self.skew[base + dist];
283                let log_m23 = self.skew[base + dist * 2];
284
285                for i in r..r + dist {
286                    self.ifft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02);
287                }
288
289                r += dist4;
290            }
291            dist = dist4;
292            dist4 <<= 2;
293        }
294
295        // FINAL ODD LAYER
296
297        if dist < size {
298            let log_m = self.skew[dist + skew_delta - 1];
299            if log_m == GF_MODULUS {
300                utils::xor_within(data, pos + dist, pos, dist);
301            } else {
302                let (mut a, mut b) = data.split_at_mut(pos + dist);
303                for i in 0..dist {
304                    self.ifft_butterfly_partial(
305                        &mut a[pos + i], // data[pos + i]
306                        &mut b[i],       // data[pos + i + dist]
307                        log_m,
308                    );
309                }
310            }
311        }
312    }
313}
314
315// ======================================================================
316// TESTS
317
318// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.
319
320#[cfg(test)]
321mod tests {
322    use crate::engine::{Engine, Naive, NoSimd};
323
324    use rand::{Rng, SeedableRng};
325    use rand_chacha::ChaCha8Rng;
326
327    #[test]
328    fn mul() {
329        let naive = Naive::default();
330        let nosimd = NoSimd::default();
331
332        let mut rng = ChaCha8Rng::from_seed([0; 32]);
333
334        for shard_chunks in 0..6 {
335            let mut data_nosimd = vec![[0; 64]; shard_chunks];
336            rng.fill(data_nosimd.as_flattened_mut());
337            let mut data_naive = data_nosimd.clone();
338
339            let log_m = rng.gen();
340
341            nosimd.mul(&mut data_nosimd, log_m);
342            naive.mul(&mut data_naive, log_m);
343
344            assert_eq!(data_nosimd, data_naive);
345        }
346    }
347}