reed_solomon_16/engine/
engine_nosimd.rs

1use crate::engine::{
2    self,
3    tables::{self, Mul16, Skew},
4    Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
5};
6
7// ======================================================================
8// NoSimd - PUBLIC
9
10/// Optimized [`Engine`] without SIMD.
11///
12/// [`NoSimd`] is a basic optimized engine which works on all CPUs.
13#[derive(Clone)]
14pub struct NoSimd {
15    mul16: &'static Mul16,
16    skew: &'static Skew,
17}
18
19impl NoSimd {
20    /// Creates new [`NoSimd`], initializing all tables
21    /// needed for encoding or decoding.
22    ///
23    /// Currently only difference between encoding/decoding is
24    /// `log_walsh` (128 kiB) which is only needed for decoding.
25    pub fn new() -> Self {
26        let mul16 = tables::initialize_mul16();
27        let skew = tables::initialize_skew();
28
29        // This is used in `Engine::eval_poly`.
30        tables::initialize_log_walsh::<Self>();
31
32        Self { mul16, skew }
33    }
34}
35
36impl Engine for NoSimd {
37    fn fft(
38        &self,
39        data: &mut ShardsRefMut,
40        pos: usize,
41        size: usize,
42        truncated_size: usize,
43        skew_delta: usize,
44    ) {
45        self.fft_private(data, pos, size, truncated_size, skew_delta);
46    }
47
48    fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
49        Self::fwht_private(data, truncated_size);
50    }
51
52    fn ifft(
53        &self,
54        data: &mut ShardsRefMut,
55        pos: usize,
56        size: usize,
57        truncated_size: usize,
58        skew_delta: usize,
59    ) {
60        self.ifft_private(data, pos, size, truncated_size, skew_delta);
61    }
62
63    fn mul(&self, x: &mut [u8], log_m: GfElement) {
64        let lut = &self.mul16[log_m as usize];
65
66        let mut pos = 0;
67        while pos < x.len() {
68            for i in 0..32 {
69                let lo = x[pos + i] as usize;
70                let hi = x[pos + i + 32] as usize;
71                let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
72                x[pos + i] = prod as u8;
73                x[pos + i + 32] = (prod >> 8) as u8;
74            }
75            pos += 64;
76        }
77    }
78
79    fn xor(x: &mut [u8], y: &[u8]) {
80        let x64: &mut [u64] = bytemuck::cast_slice_mut(x);
81        let y64: &[u64] = bytemuck::cast_slice(y);
82
83        for i in 0..x64.len() {
84            x64[i] ^= y64[i];
85        }
86    }
87}
88
89// ======================================================================
90// NoSimd - IMPL Default
91
92impl Default for NoSimd {
93    fn default() -> Self {
94        Self::new()
95    }
96}
97
98// ======================================================================
99// NoSimd - PRIVATE
100
101impl NoSimd {
102    /// `x[] ^= y[] * log_m`
103    fn mul_add(&self, x: &mut [u8], y: &[u8], log_m: GfElement) {
104        let lut = &self.mul16[log_m as usize];
105
106        let mut pos = 0;
107        while pos < x.len() {
108            for i in 0..32 {
109                let lo = y[pos + i] as usize;
110                let hi = y[pos + i + 32] as usize;
111                let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
112                x[pos + i] ^= prod as u8;
113                x[pos + i + 32] ^= (prod >> 8) as u8;
114            }
115            pos += 64;
116        }
117    }
118}
119
120// ======================================================================
121// NoSimd - PRIVATE - FWHT (fast Walsh-Hadamard transform)
122
123impl NoSimd {
124    #[inline(always)]
125    fn fwht_2(a: &mut GfElement, b: &mut GfElement) {
126        let sum = engine::add_mod(*a, *b);
127        let dif = engine::sub_mod(*a, *b);
128        *a = sum;
129        *b = dif;
130    }
131
132    #[inline(always)]
133    fn fwht_4(data: &mut [GfElement], dist: usize) {
134        let mut t0 = data[0];
135        let mut t1 = data[dist];
136        let mut t2 = data[dist * 2];
137        let mut t3 = data[dist * 3];
138
139        Self::fwht_2(&mut t0, &mut t1);
140        Self::fwht_2(&mut t2, &mut t3);
141        Self::fwht_2(&mut t0, &mut t2);
142        Self::fwht_2(&mut t1, &mut t3);
143
144        data[0] = t0;
145        data[dist] = t1;
146        data[dist * 2] = t2;
147        data[dist * 3] = t3;
148    }
149
150    #[inline(always)]
151    fn fwht_private(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
152        // TWO LAYERS AT TIME
153
154        let mut dist = 1;
155        let mut dist4 = 4;
156        while dist4 <= GF_ORDER {
157            let mut r = 0;
158            while r < truncated_size {
159                for i in r..r + dist {
160                    Self::fwht_4(&mut data[i..], dist)
161                }
162                r += dist4;
163            }
164
165            dist = dist4;
166            dist4 <<= 2;
167        }
168
169        // FINAL ODD LAYER
170
171        if dist < GF_ORDER {
172            for i in 0..dist {
173                // inlined manually as Rust doesn't like
174                // `fwht_2(&mut data[i], &mut data[i + dist])`
175                let sum = engine::add_mod(data[i], data[i + dist]);
176                let dif = engine::sub_mod(data[i], data[i + dist]);
177                data[i] = sum;
178                data[i + dist] = dif;
179            }
180        }
181    }
182}
183
184// ======================================================================
185// NoSimd - PRIVATE - FFT (fast Fourier transform)
186
187impl NoSimd {
188    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
189    #[inline(always)]
190    fn fft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
191        self.mul_add(x, y, log_m);
192        Self::xor(y, x);
193    }
194
195    #[inline(always)]
196    fn fft_butterfly_two_layers(
197        &self,
198        data: &mut ShardsRefMut,
199        pos: usize,
200        dist: usize,
201        log_m01: GfElement,
202        log_m23: GfElement,
203        log_m02: GfElement,
204    ) {
205        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
206
207        // FIRST LAYER
208
209        if log_m02 == GF_MODULUS {
210            Self::xor(s2, s0);
211            Self::xor(s3, s1);
212        } else {
213            self.fft_butterfly_partial(s0, s2, log_m02);
214            self.fft_butterfly_partial(s1, s3, log_m02);
215        }
216
217        // SECOND LAYER
218
219        if log_m01 == GF_MODULUS {
220            Self::xor(s1, s0);
221        } else {
222            self.fft_butterfly_partial(s0, s1, log_m01);
223        }
224
225        if log_m23 == GF_MODULUS {
226            Self::xor(s3, s2);
227        } else {
228            self.fft_butterfly_partial(s2, s3, log_m23);
229        }
230    }
231
232    #[inline(always)]
233    fn fft_private(
234        &self,
235        data: &mut ShardsRefMut,
236        pos: usize,
237        size: usize,
238        truncated_size: usize,
239        skew_delta: usize,
240    ) {
241        // TWO LAYERS AT TIME
242
243        let mut dist4 = size;
244        let mut dist = size >> 2;
245        while dist != 0 {
246            let mut r = 0;
247            while r < truncated_size {
248                let base = r + dist + skew_delta - 1;
249
250                let log_m01 = self.skew[base];
251                let log_m02 = self.skew[base + dist];
252                let log_m23 = self.skew[base + dist * 2];
253
254                for i in r..r + dist {
255                    self.fft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
256                }
257
258                r += dist4;
259            }
260            dist4 = dist;
261            dist >>= 2;
262        }
263
264        // FINAL ODD LAYER
265
266        if dist4 == 2 {
267            let mut r = 0;
268            while r < truncated_size {
269                let log_m = self.skew[r + skew_delta];
270
271                let (x, y) = data.dist2_mut(pos + r, 1);
272
273                if log_m == GF_MODULUS {
274                    Self::xor(y, x);
275                } else {
276                    self.fft_butterfly_partial(x, y, log_m)
277                }
278
279                r += 2;
280            }
281        }
282    }
283}
284
285// ======================================================================
286// NoSimd - PRIVATE - IFFT (inverse fast Fourier transform)
287
288impl NoSimd {
289    // Partial butterfly, caller must do `GF_MODULUS` check with `xor`.
290    #[inline(always)]
291    fn ifft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
292        Self::xor(y, x);
293        self.mul_add(x, y, log_m);
294    }
295
296    #[inline(always)]
297    fn ifft_butterfly_two_layers(
298        &self,
299        data: &mut ShardsRefMut,
300        pos: usize,
301        dist: usize,
302        log_m01: GfElement,
303        log_m23: GfElement,
304        log_m02: GfElement,
305    ) {
306        let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
307
308        // FIRST LAYER
309
310        if log_m01 == GF_MODULUS {
311            Self::xor(s1, s0);
312        } else {
313            self.ifft_butterfly_partial(s0, s1, log_m01);
314        }
315
316        if log_m23 == GF_MODULUS {
317            Self::xor(s3, s2);
318        } else {
319            self.ifft_butterfly_partial(s2, s3, log_m23);
320        }
321
322        // SECOND LAYER
323
324        if log_m02 == GF_MODULUS {
325            Self::xor(s2, s0);
326            Self::xor(s3, s1);
327        } else {
328            self.ifft_butterfly_partial(s0, s2, log_m02);
329            self.ifft_butterfly_partial(s1, s3, log_m02);
330        }
331    }
332
333    #[inline(always)]
334    fn ifft_private(
335        &self,
336        data: &mut ShardsRefMut,
337        pos: usize,
338        size: usize,
339        truncated_size: usize,
340        skew_delta: usize,
341    ) {
342        // TWO LAYERS AT TIME
343
344        let mut dist = 1;
345        let mut dist4 = 4;
346        while dist4 <= size {
347            let mut r = 0;
348            while r < truncated_size {
349                let base = r + dist + skew_delta - 1;
350
351                let log_m01 = self.skew[base];
352                let log_m02 = self.skew[base + dist];
353                let log_m23 = self.skew[base + dist * 2];
354
355                for i in r..r + dist {
356                    self.ifft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
357                }
358
359                r += dist4;
360            }
361            dist = dist4;
362            dist4 <<= 2;
363        }
364
365        // FINAL ODD LAYER
366
367        if dist < size {
368            let log_m = self.skew[dist + skew_delta - 1];
369            if log_m == GF_MODULUS {
370                Self::xor_within(data, pos + dist, pos, dist);
371            } else {
372                let (mut a, mut b) = data.split_at_mut(pos + dist);
373                for i in 0..dist {
374                    self.ifft_butterfly_partial(
375                        &mut a[pos + i], // data[pos + i]
376                        &mut b[i],       // data[pos + i + dist]
377                        log_m,
378                    );
379                }
380            }
381        }
382    }
383}
384
385// ======================================================================
386// TESTS
387
388// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.