reed_solomon_simd/engine/
engine_nosimd.rs1use std::iter::zip;
2
3use crate::engine::{
4 tables::{self, Mul16, Skew},
5 utils, Engine, GfElement, ShardsRefMut, GF_MODULUS,
6};
7
8#[derive(Clone, Copy)]
15pub struct NoSimd {
16 mul16: &'static Mul16,
17 skew: &'static Skew,
18}
19
20impl NoSimd {
21 pub fn new() -> Self {
29 let mul16 = &*tables::MUL16;
30 let skew = &*tables::SKEW;
31
32 Self { mul16, skew }
33 }
34}
35
36impl Engine for NoSimd {
37 fn fft(
38 &self,
39 data: &mut ShardsRefMut,
40 pos: usize,
41 size: usize,
42 truncated_size: usize,
43 skew_delta: usize,
44 ) {
45 self.fft_private(data, pos, size, truncated_size, skew_delta);
46 }
47
48 fn ifft(
49 &self,
50 data: &mut ShardsRefMut,
51 pos: usize,
52 size: usize,
53 truncated_size: usize,
54 skew_delta: usize,
55 ) {
56 self.ifft_private(data, pos, size, truncated_size, skew_delta);
57 }
58
59 fn mul(&self, x: &mut [[u8; 64]], log_m: GfElement) {
60 let lut = &self.mul16[log_m as usize];
61
62 for x_chunk in x.iter_mut() {
63 let (x_lo, x_hi) = x_chunk.split_at_mut(32);
64
65 for i in 0..32 {
66 let lo = x_lo[i];
67 let hi = x_hi[i];
68 let prod = lut[0][usize::from(lo & 15)]
69 ^ lut[1][usize::from(lo >> 4)]
70 ^ lut[2][usize::from(hi & 15)]
71 ^ lut[3][usize::from(hi >> 4)];
72 x_lo[i] = prod as u8;
73 x_hi[i] = (prod >> 8) as u8;
74 }
75 }
76 }
77}
78
79impl Default for NoSimd {
83 fn default() -> Self {
84 Self::new()
85 }
86}
87
88impl NoSimd {
92 fn mul_add(&self, x: &mut [[u8; 64]], y: &[[u8; 64]], log_m: GfElement) {
94 let lut = &self.mul16[log_m as usize];
95
96 for (x_chunk, y_chunk) in zip(x.iter_mut(), y.iter()) {
97 let (x_lo, x_hi) = x_chunk.split_at_mut(32);
98 let (y_lo, y_hi) = y_chunk.split_at(32);
99
100 for i in 0..32 {
101 let lo = y_lo[i];
102 let hi = y_hi[i];
103 let prod = lut[0][usize::from(lo & 15)]
104 ^ lut[1][usize::from(lo >> 4)]
105 ^ lut[2][usize::from(hi & 15)]
106 ^ lut[3][usize::from(hi >> 4)];
107 x_lo[i] ^= prod as u8;
108 x_hi[i] ^= (prod >> 8) as u8;
109 }
110 }
111 }
112}
113
114impl NoSimd {
118 #[inline(always)]
120 fn fft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) {
121 self.mul_add(x, y, log_m);
122 utils::xor(y, x);
123 }
124
125 #[inline(always)]
126 fn fft_butterfly_two_layers(
127 &self,
128 data: &mut ShardsRefMut,
129 pos: usize,
130 dist: usize,
131 log_m01: GfElement,
132 log_m23: GfElement,
133 log_m02: GfElement,
134 ) {
135 let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
136
137 if log_m02 == GF_MODULUS {
140 utils::xor(s2, s0);
141 utils::xor(s3, s1);
142 } else {
143 self.fft_butterfly_partial(s0, s2, log_m02);
144 self.fft_butterfly_partial(s1, s3, log_m02);
145 }
146
147 if log_m01 == GF_MODULUS {
150 utils::xor(s1, s0);
151 } else {
152 self.fft_butterfly_partial(s0, s1, log_m01);
153 }
154
155 if log_m23 == GF_MODULUS {
156 utils::xor(s3, s2);
157 } else {
158 self.fft_butterfly_partial(s2, s3, log_m23);
159 }
160 }
161
162 #[inline(always)]
163 fn fft_private(
164 &self,
165 data: &mut ShardsRefMut,
166 pos: usize,
167 size: usize,
168 truncated_size: usize,
169 skew_delta: usize,
170 ) {
171 let mut dist4 = size;
174 let mut dist = size >> 2;
175 while dist != 0 {
176 let mut r = 0;
177 while r < truncated_size {
178 let base = r + dist + skew_delta - 1;
179
180 let log_m01 = self.skew[base];
181 let log_m02 = self.skew[base + dist];
182 let log_m23 = self.skew[base + dist * 2];
183
184 for i in r..r + dist {
185 self.fft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02);
186 }
187
188 r += dist4;
189 }
190 dist4 = dist;
191 dist >>= 2;
192 }
193
194 if dist4 == 2 {
197 let mut r = 0;
198 while r < truncated_size {
199 let log_m = self.skew[r + skew_delta];
200
201 let (x, y) = data.dist2_mut(pos + r, 1);
202
203 if log_m == GF_MODULUS {
204 utils::xor(y, x);
205 } else {
206 self.fft_butterfly_partial(x, y, log_m);
207 }
208
209 r += 2;
210 }
211 }
212 }
213}
214
215impl NoSimd {
219 #[inline(always)]
221 fn ifft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) {
222 utils::xor(y, x);
223 self.mul_add(x, y, log_m);
224 }
225
226 #[inline(always)]
227 fn ifft_butterfly_two_layers(
228 &self,
229 data: &mut ShardsRefMut,
230 pos: usize,
231 dist: usize,
232 log_m01: GfElement,
233 log_m23: GfElement,
234 log_m02: GfElement,
235 ) {
236 let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
237
238 if log_m01 == GF_MODULUS {
241 utils::xor(s1, s0);
242 } else {
243 self.ifft_butterfly_partial(s0, s1, log_m01);
244 }
245
246 if log_m23 == GF_MODULUS {
247 utils::xor(s3, s2);
248 } else {
249 self.ifft_butterfly_partial(s2, s3, log_m23);
250 }
251
252 if log_m02 == GF_MODULUS {
255 utils::xor(s2, s0);
256 utils::xor(s3, s1);
257 } else {
258 self.ifft_butterfly_partial(s0, s2, log_m02);
259 self.ifft_butterfly_partial(s1, s3, log_m02);
260 }
261 }
262
263 #[inline(always)]
264 fn ifft_private(
265 &self,
266 data: &mut ShardsRefMut,
267 pos: usize,
268 size: usize,
269 truncated_size: usize,
270 skew_delta: usize,
271 ) {
272 let mut dist = 1;
275 let mut dist4 = 4;
276 while dist4 <= size {
277 let mut r = 0;
278 while r < truncated_size {
279 let base = r + dist + skew_delta - 1;
280
281 let log_m01 = self.skew[base];
282 let log_m02 = self.skew[base + dist];
283 let log_m23 = self.skew[base + dist * 2];
284
285 for i in r..r + dist {
286 self.ifft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02);
287 }
288
289 r += dist4;
290 }
291 dist = dist4;
292 dist4 <<= 2;
293 }
294
295 if dist < size {
298 let log_m = self.skew[dist + skew_delta - 1];
299 if log_m == GF_MODULUS {
300 utils::xor_within(data, pos + dist, pos, dist);
301 } else {
302 let (mut a, mut b) = data.split_at_mut(pos + dist);
303 for i in 0..dist {
304 self.ifft_butterfly_partial(
305 &mut a[pos + i], &mut b[i], log_m,
308 );
309 }
310 }
311 }
312 }
313}
314
315#[cfg(test)]
321mod tests {
322 use crate::engine::{Engine, Naive, NoSimd};
323
324 use rand::{Rng, SeedableRng};
325 use rand_chacha::ChaCha8Rng;
326
327 #[test]
328 fn mul() {
329 let naive = Naive::default();
330 let nosimd = NoSimd::default();
331
332 let mut rng = ChaCha8Rng::from_seed([0; 32]);
333
334 for shard_chunks in 0..6 {
335 let mut data_nosimd = vec![[0; 64]; shard_chunks];
336 rng.fill(data_nosimd.as_flattened_mut());
337 let mut data_naive = data_nosimd.clone();
338
339 let log_m = rng.gen();
340
341 nosimd.mul(&mut data_nosimd, log_m);
342 naive.mul(&mut data_naive, log_m);
343
344 assert_eq!(data_nosimd, data_naive);
345 }
346 }
347}