1use crate::engine::{
2 self,
3 tables::{self, Mul16, Skew},
4 Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
5};
6
7#[derive(Clone)]
14pub struct NoSimd {
15 mul16: &'static Mul16,
16 skew: &'static Skew,
17}
18
19impl NoSimd {
20 pub fn new() -> Self {
26 let mul16 = tables::initialize_mul16();
27 let skew = tables::initialize_skew();
28
29 tables::initialize_log_walsh::<Self>();
31
32 Self { mul16, skew }
33 }
34}
35
36impl Engine for NoSimd {
37 fn fft(
38 &self,
39 data: &mut ShardsRefMut,
40 pos: usize,
41 size: usize,
42 truncated_size: usize,
43 skew_delta: usize,
44 ) {
45 self.fft_private(data, pos, size, truncated_size, skew_delta);
46 }
47
48 fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
49 Self::fwht_private(data, truncated_size);
50 }
51
52 fn ifft(
53 &self,
54 data: &mut ShardsRefMut,
55 pos: usize,
56 size: usize,
57 truncated_size: usize,
58 skew_delta: usize,
59 ) {
60 self.ifft_private(data, pos, size, truncated_size, skew_delta);
61 }
62
63 fn mul(&self, x: &mut [u8], log_m: GfElement) {
64 let lut = &self.mul16[log_m as usize];
65
66 let mut pos = 0;
67 while pos < x.len() {
68 for i in 0..32 {
69 let lo = x[pos + i] as usize;
70 let hi = x[pos + i + 32] as usize;
71 let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
72 x[pos + i] = prod as u8;
73 x[pos + i + 32] = (prod >> 8) as u8;
74 }
75 pos += 64;
76 }
77 }
78
79 fn xor(x: &mut [u8], y: &[u8]) {
80 let x64: &mut [u64] = bytemuck::cast_slice_mut(x);
81 let y64: &[u64] = bytemuck::cast_slice(y);
82
83 for i in 0..x64.len() {
84 x64[i] ^= y64[i];
85 }
86 }
87}
88
89impl Default for NoSimd {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98impl NoSimd {
102 fn mul_add(&self, x: &mut [u8], y: &[u8], log_m: GfElement) {
104 let lut = &self.mul16[log_m as usize];
105
106 let mut pos = 0;
107 while pos < x.len() {
108 for i in 0..32 {
109 let lo = y[pos + i] as usize;
110 let hi = y[pos + i + 32] as usize;
111 let prod = lut[0][lo & 15] ^ lut[1][lo >> 4] ^ lut[2][hi & 15] ^ lut[3][hi >> 4];
112 x[pos + i] ^= prod as u8;
113 x[pos + i + 32] ^= (prod >> 8) as u8;
114 }
115 pos += 64;
116 }
117 }
118}
119
120impl NoSimd {
124 #[inline(always)]
125 fn fwht_2(a: &mut GfElement, b: &mut GfElement) {
126 let sum = engine::add_mod(*a, *b);
127 let dif = engine::sub_mod(*a, *b);
128 *a = sum;
129 *b = dif;
130 }
131
132 #[inline(always)]
133 fn fwht_4(data: &mut [GfElement], dist: usize) {
134 let mut t0 = data[0];
135 let mut t1 = data[dist];
136 let mut t2 = data[dist * 2];
137 let mut t3 = data[dist * 3];
138
139 Self::fwht_2(&mut t0, &mut t1);
140 Self::fwht_2(&mut t2, &mut t3);
141 Self::fwht_2(&mut t0, &mut t2);
142 Self::fwht_2(&mut t1, &mut t3);
143
144 data[0] = t0;
145 data[dist] = t1;
146 data[dist * 2] = t2;
147 data[dist * 3] = t3;
148 }
149
150 #[inline(always)]
151 fn fwht_private(data: &mut [GfElement; GF_ORDER], truncated_size: usize) {
152 let mut dist = 1;
155 let mut dist4 = 4;
156 while dist4 <= GF_ORDER {
157 let mut r = 0;
158 while r < truncated_size {
159 for i in r..r + dist {
160 Self::fwht_4(&mut data[i..], dist)
161 }
162 r += dist4;
163 }
164
165 dist = dist4;
166 dist4 <<= 2;
167 }
168
169 if dist < GF_ORDER {
172 for i in 0..dist {
173 let sum = engine::add_mod(data[i], data[i + dist]);
176 let dif = engine::sub_mod(data[i], data[i + dist]);
177 data[i] = sum;
178 data[i + dist] = dif;
179 }
180 }
181 }
182}
183
184impl NoSimd {
188 #[inline(always)]
190 fn fft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
191 self.mul_add(x, y, log_m);
192 Self::xor(y, x);
193 }
194
195 #[inline(always)]
196 fn fft_butterfly_two_layers(
197 &self,
198 data: &mut ShardsRefMut,
199 pos: usize,
200 dist: usize,
201 log_m01: GfElement,
202 log_m23: GfElement,
203 log_m02: GfElement,
204 ) {
205 let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
206
207 if log_m02 == GF_MODULUS {
210 Self::xor(s2, s0);
211 Self::xor(s3, s1);
212 } else {
213 self.fft_butterfly_partial(s0, s2, log_m02);
214 self.fft_butterfly_partial(s1, s3, log_m02);
215 }
216
217 if log_m01 == GF_MODULUS {
220 Self::xor(s1, s0);
221 } else {
222 self.fft_butterfly_partial(s0, s1, log_m01);
223 }
224
225 if log_m23 == GF_MODULUS {
226 Self::xor(s3, s2);
227 } else {
228 self.fft_butterfly_partial(s2, s3, log_m23);
229 }
230 }
231
232 #[inline(always)]
233 fn fft_private(
234 &self,
235 data: &mut ShardsRefMut,
236 pos: usize,
237 size: usize,
238 truncated_size: usize,
239 skew_delta: usize,
240 ) {
241 let mut dist4 = size;
244 let mut dist = size >> 2;
245 while dist != 0 {
246 let mut r = 0;
247 while r < truncated_size {
248 let base = r + dist + skew_delta - 1;
249
250 let log_m01 = self.skew[base];
251 let log_m02 = self.skew[base + dist];
252 let log_m23 = self.skew[base + dist * 2];
253
254 for i in r..r + dist {
255 self.fft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
256 }
257
258 r += dist4;
259 }
260 dist4 = dist;
261 dist >>= 2;
262 }
263
264 if dist4 == 2 {
267 let mut r = 0;
268 while r < truncated_size {
269 let log_m = self.skew[r + skew_delta];
270
271 let (x, y) = data.dist2_mut(pos + r, 1);
272
273 if log_m == GF_MODULUS {
274 Self::xor(y, x);
275 } else {
276 self.fft_butterfly_partial(x, y, log_m)
277 }
278
279 r += 2;
280 }
281 }
282 }
283}
284
285impl NoSimd {
289 #[inline(always)]
291 fn ifft_butterfly_partial(&self, x: &mut [u8], y: &mut [u8], log_m: GfElement) {
292 Self::xor(y, x);
293 self.mul_add(x, y, log_m);
294 }
295
296 #[inline(always)]
297 fn ifft_butterfly_two_layers(
298 &self,
299 data: &mut ShardsRefMut,
300 pos: usize,
301 dist: usize,
302 log_m01: GfElement,
303 log_m23: GfElement,
304 log_m02: GfElement,
305 ) {
306 let (s0, s1, s2, s3) = data.dist4_mut(pos, dist);
307
308 if log_m01 == GF_MODULUS {
311 Self::xor(s1, s0);
312 } else {
313 self.ifft_butterfly_partial(s0, s1, log_m01);
314 }
315
316 if log_m23 == GF_MODULUS {
317 Self::xor(s3, s2);
318 } else {
319 self.ifft_butterfly_partial(s2, s3, log_m23);
320 }
321
322 if log_m02 == GF_MODULUS {
325 Self::xor(s2, s0);
326 Self::xor(s3, s1);
327 } else {
328 self.ifft_butterfly_partial(s0, s2, log_m02);
329 self.ifft_butterfly_partial(s1, s3, log_m02);
330 }
331 }
332
333 #[inline(always)]
334 fn ifft_private(
335 &self,
336 data: &mut ShardsRefMut,
337 pos: usize,
338 size: usize,
339 truncated_size: usize,
340 skew_delta: usize,
341 ) {
342 let mut dist = 1;
345 let mut dist4 = 4;
346 while dist4 <= size {
347 let mut r = 0;
348 while r < truncated_size {
349 let base = r + dist + skew_delta - 1;
350
351 let log_m01 = self.skew[base];
352 let log_m02 = self.skew[base + dist];
353 let log_m23 = self.skew[base + dist * 2];
354
355 for i in r..r + dist {
356 self.ifft_butterfly_two_layers(data, pos + i, dist, log_m01, log_m23, log_m02)
357 }
358
359 r += dist4;
360 }
361 dist = dist4;
362 dist4 <<= 2;
363 }
364
365 if dist < size {
368 let log_m = self.skew[dist + skew_delta - 1];
369 if log_m == GF_MODULUS {
370 Self::xor_within(data, pos + dist, pos, dist);
371 } else {
372 let (mut a, mut b) = data.split_at_mut(pos + dist);
373 for i in 0..dist {
374 self.ifft_butterfly_partial(
375 &mut a[pos + i], &mut b[i], log_m,
378 );
379 }
380 }
381 }
382 }
383}
384
385