ppv_lite86/
generic.rs

1#![allow(non_camel_case_types)]
2
3use crate::soft::{x2, x4};
4use crate::types::*;
5use core::ops::*;
6use zerocopy::{FromBytes, IntoBytes};
7
8zerocopy::cryptocorrosion_derive_traits! {
9    #[repr(C)]
10    #[derive(Clone, Copy)]
11    pub union vec128_storage {
12        d: [u32; 4],
13        q: [u64; 2],
14    }
15}
16
17impl From<[u32; 4]> for vec128_storage {
18    #[inline(always)]
19    fn from(d: [u32; 4]) -> Self {
20        Self { d }
21    }
22}
23impl From<vec128_storage> for [u32; 4] {
24    #[inline(always)]
25    fn from(d: vec128_storage) -> Self {
26        unsafe { d.d }
27    }
28}
29impl From<[u64; 2]> for vec128_storage {
30    #[inline(always)]
31    fn from(q: [u64; 2]) -> Self {
32        Self { q }
33    }
34}
35impl From<vec128_storage> for [u64; 2] {
36    #[inline(always)]
37    fn from(q: vec128_storage) -> Self {
38        unsafe { q.q }
39    }
40}
41impl Default for vec128_storage {
42    #[inline(always)]
43    fn default() -> Self {
44        Self { q: [0, 0] }
45    }
46}
47impl Eq for vec128_storage {}
48impl PartialEq<vec128_storage> for vec128_storage {
49    #[inline(always)]
50    fn eq(&self, rhs: &Self) -> bool {
51        unsafe { self.q == rhs.q }
52    }
53}
54#[derive(Clone, Copy, PartialEq, Eq, Default)]
55pub struct vec256_storage {
56    v128: [vec128_storage; 2],
57}
58impl vec256_storage {
59    #[inline(always)]
60    pub fn new128(v128: [vec128_storage; 2]) -> Self {
61        Self { v128 }
62    }
63    #[inline(always)]
64    pub fn split128(self) -> [vec128_storage; 2] {
65        self.v128
66    }
67}
68impl From<vec256_storage> for [u64; 4] {
69    #[inline(always)]
70    fn from(q: vec256_storage) -> Self {
71        let [a, b]: [u64; 2] = q.v128[0].into();
72        let [c, d]: [u64; 2] = q.v128[1].into();
73        [a, b, c, d]
74    }
75}
76impl From<[u64; 4]> for vec256_storage {
77    #[inline(always)]
78    fn from([a, b, c, d]: [u64; 4]) -> Self {
79        Self {
80            v128: [[a, b].into(), [c, d].into()],
81        }
82    }
83}
84#[derive(Clone, Copy, PartialEq, Eq, Default)]
85pub struct vec512_storage {
86    v128: [vec128_storage; 4],
87}
88impl vec512_storage {
89    #[inline(always)]
90    pub fn new128(v128: [vec128_storage; 4]) -> Self {
91        Self { v128 }
92    }
93    #[inline(always)]
94    pub fn split128(self) -> [vec128_storage; 4] {
95        self.v128
96    }
97}
98
99#[inline(always)]
100fn dmap<T, F>(t: T, f: F) -> T
101where
102    T: Store<vec128_storage> + Into<vec128_storage>,
103    F: Fn(u32) -> u32,
104{
105    let t: vec128_storage = t.into();
106    let d = unsafe { t.d };
107    let d = vec128_storage {
108        d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
109    };
110    unsafe { T::unpack(d) }
111}
112
113fn dmap2<T, F>(a: T, b: T, f: F) -> T
114where
115    T: Store<vec128_storage> + Into<vec128_storage>,
116    F: Fn(u32, u32) -> u32,
117{
118    let a: vec128_storage = a.into();
119    let b: vec128_storage = b.into();
120    let ao = unsafe { a.d };
121    let bo = unsafe { b.d };
122    let d = vec128_storage {
123        d: [
124            f(ao[0], bo[0]),
125            f(ao[1], bo[1]),
126            f(ao[2], bo[2]),
127            f(ao[3], bo[3]),
128        ],
129    };
130    unsafe { T::unpack(d) }
131}
132
133#[inline(always)]
134fn qmap<T, F>(t: T, f: F) -> T
135where
136    T: Store<vec128_storage> + Into<vec128_storage>,
137    F: Fn(u64) -> u64,
138{
139    let t: vec128_storage = t.into();
140    let q = unsafe { t.q };
141    let q = vec128_storage {
142        q: [f(q[0]), f(q[1])],
143    };
144    unsafe { T::unpack(q) }
145}
146
147#[inline(always)]
148fn qmap2<T, F>(a: T, b: T, f: F) -> T
149where
150    T: Store<vec128_storage> + Into<vec128_storage>,
151    F: Fn(u64, u64) -> u64,
152{
153    let a: vec128_storage = a.into();
154    let b: vec128_storage = b.into();
155    let ao = unsafe { a.q };
156    let bo = unsafe { b.q };
157    let q = vec128_storage {
158        q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
159    };
160    unsafe { T::unpack(q) }
161}
162
163#[inline(always)]
164fn o_of_q(q: [u64; 2]) -> u128 {
165    u128::from(q[0]) | (u128::from(q[1]) << 64)
166}
167
168#[inline(always)]
169fn q_of_o(o: u128) -> [u64; 2] {
170    [o as u64, (o >> 64) as u64]
171}
172
173#[inline(always)]
174fn omap<T, F>(a: T, f: F) -> T
175where
176    T: Store<vec128_storage> + Into<vec128_storage>,
177    F: Fn(u128) -> u128,
178{
179    let a: vec128_storage = a.into();
180    let ao = o_of_q(unsafe { a.q });
181    let o = vec128_storage { q: q_of_o(f(ao)) };
182    unsafe { T::unpack(o) }
183}
184
185#[inline(always)]
186fn omap2<T, F>(a: T, b: T, f: F) -> T
187where
188    T: Store<vec128_storage> + Into<vec128_storage>,
189    F: Fn(u128, u128) -> u128,
190{
191    let a: vec128_storage = a.into();
192    let b: vec128_storage = b.into();
193    let ao = o_of_q(unsafe { a.q });
194    let bo = o_of_q(unsafe { b.q });
195    let o = vec128_storage {
196        q: q_of_o(f(ao, bo)),
197    };
198    unsafe { T::unpack(o) }
199}
200
201impl RotateEachWord128 for u128x1_generic {}
202impl BitOps128 for u128x1_generic {}
203impl BitOps64 for u128x1_generic {}
204impl BitOps64 for u64x2_generic {}
205impl BitOps32 for u128x1_generic {}
206impl BitOps32 for u64x2_generic {}
207impl BitOps32 for u32x4_generic {}
208impl BitOps0 for u128x1_generic {}
209impl BitOps0 for u64x2_generic {}
210impl BitOps0 for u32x4_generic {}
211
212macro_rules! impl_bitops {
213    ($vec:ident) => {
214        impl Not for $vec {
215            type Output = Self;
216            #[inline(always)]
217            fn not(self) -> Self::Output {
218                omap(self, |x| !x)
219            }
220        }
221        impl BitAnd for $vec {
222            type Output = Self;
223            #[inline(always)]
224            fn bitand(self, rhs: Self) -> Self::Output {
225                omap2(self, rhs, |x, y| x & y)
226            }
227        }
228        impl BitOr for $vec {
229            type Output = Self;
230            #[inline(always)]
231            fn bitor(self, rhs: Self) -> Self::Output {
232                omap2(self, rhs, |x, y| x | y)
233            }
234        }
235        impl BitXor for $vec {
236            type Output = Self;
237            #[inline(always)]
238            fn bitxor(self, rhs: Self) -> Self::Output {
239                omap2(self, rhs, |x, y| x ^ y)
240            }
241        }
242        impl AndNot for $vec {
243            type Output = Self;
244            #[inline(always)]
245            fn andnot(self, rhs: Self) -> Self::Output {
246                omap2(self, rhs, |x, y| !x & y)
247            }
248        }
249        impl BitAndAssign for $vec {
250            #[inline(always)]
251            fn bitand_assign(&mut self, rhs: Self) {
252                *self = *self & rhs
253            }
254        }
255        impl BitOrAssign for $vec {
256            #[inline(always)]
257            fn bitor_assign(&mut self, rhs: Self) {
258                *self = *self | rhs
259            }
260        }
261        impl BitXorAssign for $vec {
262            #[inline(always)]
263            fn bitxor_assign(&mut self, rhs: Self) {
264                *self = *self ^ rhs
265            }
266        }
267
268        impl Swap64 for $vec {
269            #[inline(always)]
270            fn swap1(self) -> Self {
271                qmap(self, |x| {
272                    ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
273                })
274            }
275            #[inline(always)]
276            fn swap2(self) -> Self {
277                qmap(self, |x| {
278                    ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
279                })
280            }
281            #[inline(always)]
282            fn swap4(self) -> Self {
283                qmap(self, |x| {
284                    ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
285                })
286            }
287            #[inline(always)]
288            fn swap8(self) -> Self {
289                qmap(self, |x| {
290                    ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
291                })
292            }
293            #[inline(always)]
294            fn swap16(self) -> Self {
295                dmap(self, |x| x.rotate_left(16))
296            }
297            #[inline(always)]
298            fn swap32(self) -> Self {
299                qmap(self, |x| x.rotate_left(32))
300            }
301            #[inline(always)]
302            fn swap64(self) -> Self {
303                omap(self, |x| (x << 64) | (x >> 64))
304            }
305        }
306    };
307}
308impl_bitops!(u32x4_generic);
309impl_bitops!(u64x2_generic);
310impl_bitops!(u128x1_generic);
311
312impl RotateEachWord32 for u32x4_generic {
313    #[inline(always)]
314    fn rotate_each_word_right7(self) -> Self {
315        dmap(self, |x| x.rotate_right(7))
316    }
317    #[inline(always)]
318    fn rotate_each_word_right8(self) -> Self {
319        dmap(self, |x| x.rotate_right(8))
320    }
321    #[inline(always)]
322    fn rotate_each_word_right11(self) -> Self {
323        dmap(self, |x| x.rotate_right(11))
324    }
325    #[inline(always)]
326    fn rotate_each_word_right12(self) -> Self {
327        dmap(self, |x| x.rotate_right(12))
328    }
329    #[inline(always)]
330    fn rotate_each_word_right16(self) -> Self {
331        dmap(self, |x| x.rotate_right(16))
332    }
333    #[inline(always)]
334    fn rotate_each_word_right20(self) -> Self {
335        dmap(self, |x| x.rotate_right(20))
336    }
337    #[inline(always)]
338    fn rotate_each_word_right24(self) -> Self {
339        dmap(self, |x| x.rotate_right(24))
340    }
341    #[inline(always)]
342    fn rotate_each_word_right25(self) -> Self {
343        dmap(self, |x| x.rotate_right(25))
344    }
345}
346
347impl RotateEachWord32 for u64x2_generic {
348    #[inline(always)]
349    fn rotate_each_word_right7(self) -> Self {
350        qmap(self, |x| x.rotate_right(7))
351    }
352    #[inline(always)]
353    fn rotate_each_word_right8(self) -> Self {
354        qmap(self, |x| x.rotate_right(8))
355    }
356    #[inline(always)]
357    fn rotate_each_word_right11(self) -> Self {
358        qmap(self, |x| x.rotate_right(11))
359    }
360    #[inline(always)]
361    fn rotate_each_word_right12(self) -> Self {
362        qmap(self, |x| x.rotate_right(12))
363    }
364    #[inline(always)]
365    fn rotate_each_word_right16(self) -> Self {
366        qmap(self, |x| x.rotate_right(16))
367    }
368    #[inline(always)]
369    fn rotate_each_word_right20(self) -> Self {
370        qmap(self, |x| x.rotate_right(20))
371    }
372    #[inline(always)]
373    fn rotate_each_word_right24(self) -> Self {
374        qmap(self, |x| x.rotate_right(24))
375    }
376    #[inline(always)]
377    fn rotate_each_word_right25(self) -> Self {
378        qmap(self, |x| x.rotate_right(25))
379    }
380}
381impl RotateEachWord64 for u64x2_generic {
382    #[inline(always)]
383    fn rotate_each_word_right32(self) -> Self {
384        qmap(self, |x| x.rotate_right(32))
385    }
386}
387
388// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
389#[inline(always)]
390fn rotate_u128_right(x: u128, i: u32) -> u128 {
391    (x >> i) | (x << (128 - i))
392}
393#[test]
394fn test_rotate_u128() {
395    const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
396    assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
397}
398
399impl RotateEachWord32 for u128x1_generic {
400    #[inline(always)]
401    fn rotate_each_word_right7(self) -> Self {
402        Self([rotate_u128_right(self.0[0], 7)])
403    }
404    #[inline(always)]
405    fn rotate_each_word_right8(self) -> Self {
406        Self([rotate_u128_right(self.0[0], 8)])
407    }
408    #[inline(always)]
409    fn rotate_each_word_right11(self) -> Self {
410        Self([rotate_u128_right(self.0[0], 11)])
411    }
412    #[inline(always)]
413    fn rotate_each_word_right12(self) -> Self {
414        Self([rotate_u128_right(self.0[0], 12)])
415    }
416    #[inline(always)]
417    fn rotate_each_word_right16(self) -> Self {
418        Self([rotate_u128_right(self.0[0], 16)])
419    }
420    #[inline(always)]
421    fn rotate_each_word_right20(self) -> Self {
422        Self([rotate_u128_right(self.0[0], 20)])
423    }
424    #[inline(always)]
425    fn rotate_each_word_right24(self) -> Self {
426        Self([rotate_u128_right(self.0[0], 24)])
427    }
428    #[inline(always)]
429    fn rotate_each_word_right25(self) -> Self {
430        Self([rotate_u128_right(self.0[0], 25)])
431    }
432}
433impl RotateEachWord64 for u128x1_generic {
434    #[inline(always)]
435    fn rotate_each_word_right32(self) -> Self {
436        Self([rotate_u128_right(self.0[0], 32)])
437    }
438}
439
440#[derive(Copy, Clone)]
441pub struct GenericMachine;
442impl Machine for GenericMachine {
443    type u32x4 = u32x4_generic;
444    type u64x2 = u64x2_generic;
445    type u128x1 = u128x1_generic;
446    type u32x4x2 = u32x4x2_generic;
447    type u64x2x2 = u64x2x2_generic;
448    type u64x4 = u64x4_generic;
449    type u128x2 = u128x2_generic;
450    type u32x4x4 = u32x4x4_generic;
451    type u64x2x4 = u64x2x4_generic;
452    type u128x4 = u128x4_generic;
453    #[inline(always)]
454    unsafe fn instance() -> Self {
455        Self
456    }
457}
458
459zerocopy::cryptocorrosion_derive_traits! {
460    #[repr(transparent)]
461    #[derive(Copy, Clone, Debug, PartialEq)]
462    pub struct u32x4_generic([u32; 4]);
463}
464
465zerocopy::cryptocorrosion_derive_traits! {
466    #[repr(transparent)]
467    #[derive(Copy, Clone, Debug, PartialEq)]
468    pub struct u64x2_generic([u64; 2]);
469}
470
471zerocopy::cryptocorrosion_derive_traits! {
472    #[repr(transparent)]
473    #[derive(Copy, Clone, Debug, PartialEq)]
474    pub struct u128x1_generic([u128; 1]);
475}
476
477impl From<u32x4_generic> for vec128_storage {
478    #[inline(always)]
479    fn from(d: u32x4_generic) -> Self {
480        Self { d: d.0 }
481    }
482}
483impl From<u64x2_generic> for vec128_storage {
484    #[inline(always)]
485    fn from(q: u64x2_generic) -> Self {
486        Self { q: q.0 }
487    }
488}
489impl From<u128x1_generic> for vec128_storage {
490    #[inline(always)]
491    fn from(o: u128x1_generic) -> Self {
492        Self { q: q_of_o(o.0[0]) }
493    }
494}
495
496impl Store<vec128_storage> for u32x4_generic {
497    #[inline(always)]
498    unsafe fn unpack(s: vec128_storage) -> Self {
499        Self(s.d)
500    }
501}
502impl Store<vec128_storage> for u64x2_generic {
503    #[inline(always)]
504    unsafe fn unpack(s: vec128_storage) -> Self {
505        Self(s.q)
506    }
507}
508impl Store<vec128_storage> for u128x1_generic {
509    #[inline(always)]
510    unsafe fn unpack(s: vec128_storage) -> Self {
511        Self([o_of_q(s.q); 1])
512    }
513}
514
515impl ArithOps for u32x4_generic {}
516impl ArithOps for u64x2_generic {}
517impl ArithOps for u128x1_generic {}
518
519impl Add for u32x4_generic {
520    type Output = Self;
521    #[inline(always)]
522    fn add(self, rhs: Self) -> Self::Output {
523        dmap2(self, rhs, |x, y| x.wrapping_add(y))
524    }
525}
526impl Add for u64x2_generic {
527    type Output = Self;
528    #[inline(always)]
529    fn add(self, rhs: Self) -> Self::Output {
530        qmap2(self, rhs, |x, y| x.wrapping_add(y))
531    }
532}
533impl Add for u128x1_generic {
534    type Output = Self;
535    #[inline(always)]
536    fn add(self, rhs: Self) -> Self::Output {
537        omap2(self, rhs, |x, y| x.wrapping_add(y))
538    }
539}
540impl AddAssign for u32x4_generic {
541    #[inline(always)]
542    fn add_assign(&mut self, rhs: Self) {
543        *self = *self + rhs
544    }
545}
546impl AddAssign for u64x2_generic {
547    #[inline(always)]
548    fn add_assign(&mut self, rhs: Self) {
549        *self = *self + rhs
550    }
551}
552impl AddAssign for u128x1_generic {
553    #[inline(always)]
554    fn add_assign(&mut self, rhs: Self) {
555        *self = *self + rhs
556    }
557}
558impl BSwap for u32x4_generic {
559    #[inline(always)]
560    fn bswap(self) -> Self {
561        dmap(self, |x| x.swap_bytes())
562    }
563}
564impl BSwap for u64x2_generic {
565    #[inline(always)]
566    fn bswap(self) -> Self {
567        qmap(self, |x| x.swap_bytes())
568    }
569}
570impl BSwap for u128x1_generic {
571    #[inline(always)]
572    fn bswap(self) -> Self {
573        omap(self, |x| x.swap_bytes())
574    }
575}
576impl StoreBytes for u32x4_generic {
577    #[inline(always)]
578    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
579        let x = u32x4_generic::read_from_bytes(input).unwrap();
580        dmap(x, |x| x.to_le())
581    }
582    #[inline(always)]
583    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
584        let x = u32x4_generic::read_from_bytes(input).unwrap();
585        dmap(x, |x| x.to_be())
586    }
587    #[inline(always)]
588    fn write_le(self, out: &mut [u8]) {
589        let x = dmap(self, |x| x.to_le());
590        x.write_to(out).unwrap();
591    }
592    #[inline(always)]
593    fn write_be(self, out: &mut [u8]) {
594        let x = dmap(self, |x| x.to_be());
595        x.write_to(out).unwrap();
596    }
597}
598impl StoreBytes for u64x2_generic {
599    #[inline(always)]
600    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
601        let x = u64x2_generic::read_from_bytes(input).unwrap();
602        qmap(x, |x| x.to_le())
603    }
604    #[inline(always)]
605    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
606        let x = u64x2_generic::read_from_bytes(input).unwrap();
607        qmap(x, |x| x.to_be())
608    }
609    #[inline(always)]
610    fn write_le(self, out: &mut [u8]) {
611        let x = qmap(self, |x| x.to_le());
612        x.write_to(out).unwrap();
613    }
614    #[inline(always)]
615    fn write_be(self, out: &mut [u8]) {
616        let x = qmap(self, |x| x.to_be());
617        x.write_to(out).unwrap();
618    }
619}
620
621#[derive(Copy, Clone)]
622pub struct G0;
623#[derive(Copy, Clone)]
624pub struct G1;
625pub type u32x4x2_generic = x2<u32x4_generic, G0>;
626pub type u64x2x2_generic = x2<u64x2_generic, G0>;
627pub type u64x4_generic = x2<u64x2_generic, G1>;
628pub type u128x2_generic = x2<u128x1_generic, G0>;
629pub type u32x4x4_generic = x4<u32x4_generic>;
630pub type u64x2x4_generic = x4<u64x2_generic>;
631pub type u128x4_generic = x4<u128x1_generic>;
632
633impl Vector<[u32; 16]> for u32x4x4_generic {
634    fn to_scalars(self) -> [u32; 16] {
635        let [a, b, c, d] = self.0;
636        let a = a.0;
637        let b = b.0;
638        let c = c.0;
639        let d = d.0;
640        [
641            a[0], a[1], a[2], a[3], //
642            b[0], b[1], b[2], b[3], //
643            c[0], c[1], c[2], c[3], //
644            d[0], d[1], d[2], d[3], //
645        ]
646    }
647}
648
649impl MultiLane<[u32; 4]> for u32x4_generic {
650    #[inline(always)]
651    fn to_lanes(self) -> [u32; 4] {
652        self.0
653    }
654    #[inline(always)]
655    fn from_lanes(xs: [u32; 4]) -> Self {
656        Self(xs)
657    }
658}
659impl MultiLane<[u64; 2]> for u64x2_generic {
660    #[inline(always)]
661    fn to_lanes(self) -> [u64; 2] {
662        self.0
663    }
664    #[inline(always)]
665    fn from_lanes(xs: [u64; 2]) -> Self {
666        Self(xs)
667    }
668}
669impl MultiLane<[u64; 4]> for u64x4_generic {
670    #[inline(always)]
671    fn to_lanes(self) -> [u64; 4] {
672        let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
673        [a[0], a[1], b[0], b[1]]
674    }
675    #[inline(always)]
676    fn from_lanes(xs: [u64; 4]) -> Self {
677        let (a, b) = (
678            u64x2_generic::from_lanes([xs[0], xs[1]]),
679            u64x2_generic::from_lanes([xs[2], xs[3]]),
680        );
681        x2::new([a, b])
682    }
683}
684impl MultiLane<[u128; 1]> for u128x1_generic {
685    #[inline(always)]
686    fn to_lanes(self) -> [u128; 1] {
687        self.0
688    }
689    #[inline(always)]
690    fn from_lanes(xs: [u128; 1]) -> Self {
691        Self(xs)
692    }
693}
694impl Vec4<u32> for u32x4_generic {
695    #[inline(always)]
696    fn extract(self, i: u32) -> u32 {
697        self.0[i as usize]
698    }
699    #[inline(always)]
700    fn insert(mut self, v: u32, i: u32) -> Self {
701        self.0[i as usize] = v;
702        self
703    }
704}
705impl Vec4<u64> for u64x4_generic {
706    #[inline(always)]
707    fn extract(self, i: u32) -> u64 {
708        let d: [u64; 4] = self.to_lanes();
709        d[i as usize]
710    }
711    #[inline(always)]
712    fn insert(self, v: u64, i: u32) -> Self {
713        self.0[(i / 2) as usize].insert(v, i % 2);
714        self
715    }
716}
717impl Vec2<u64> for u64x2_generic {
718    #[inline(always)]
719    fn extract(self, i: u32) -> u64 {
720        self.0[i as usize]
721    }
722    #[inline(always)]
723    fn insert(mut self, v: u64, i: u32) -> Self {
724        self.0[i as usize] = v;
725        self
726    }
727}
728
729impl Words4 for u32x4_generic {
730    #[inline(always)]
731    fn shuffle2301(self) -> Self {
732        self.swap64()
733    }
734    #[inline(always)]
735    fn shuffle1230(self) -> Self {
736        let x = self.0;
737        Self([x[3], x[0], x[1], x[2]])
738    }
739    #[inline(always)]
740    fn shuffle3012(self) -> Self {
741        let x = self.0;
742        Self([x[1], x[2], x[3], x[0]])
743    }
744}
745impl LaneWords4 for u32x4_generic {
746    #[inline(always)]
747    fn shuffle_lane_words2301(self) -> Self {
748        self.shuffle2301()
749    }
750    #[inline(always)]
751    fn shuffle_lane_words1230(self) -> Self {
752        self.shuffle1230()
753    }
754    #[inline(always)]
755    fn shuffle_lane_words3012(self) -> Self {
756        self.shuffle3012()
757    }
758}
759
760impl Words4 for u64x4_generic {
761    #[inline(always)]
762    fn shuffle2301(self) -> Self {
763        x2::new([self.0[1], self.0[0]])
764    }
765    #[inline(always)]
766    fn shuffle1230(self) -> Self {
767        unimplemented!()
768    }
769    #[inline(always)]
770    fn shuffle3012(self) -> Self {
771        unimplemented!()
772    }
773}
774
775impl u32x4<GenericMachine> for u32x4_generic {}
776impl u64x2<GenericMachine> for u64x2_generic {}
777impl u128x1<GenericMachine> for u128x1_generic {}
778impl u32x4x2<GenericMachine> for u32x4x2_generic {}
779impl u64x2x2<GenericMachine> for u64x2x2_generic {}
780impl u64x4<GenericMachine> for u64x4_generic {}
781impl u128x2<GenericMachine> for u128x2_generic {}
782impl u32x4x4<GenericMachine> for u32x4x4_generic {}
783impl u64x2x4<GenericMachine> for u64x2x4_generic {}
784impl u128x4<GenericMachine> for u128x4_generic {}
785
786#[macro_export]
787macro_rules! dispatch {
788    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
789        #[inline(always)]
790        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
791            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
792            #[inline(always)]
793            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
794            fn_impl($mach, $($arg),*)
795        }
796    };
797    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
798        dispatch!($mach, $MTy, {
799            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
800        });
801    }
802}
803#[macro_export]
804macro_rules! dispatch_light128 {
805    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
806        #[inline(always)]
807        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
808            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
809            #[inline(always)]
810            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
811            fn_impl($mach, $($arg),*)
812        }
813    };
814    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
815        dispatch!($mach, $MTy, {
816            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
817        });
818    }
819}
820#[macro_export]
821macro_rules! dispatch_light256 {
822    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
823        #[inline(always)]
824        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
825            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
826            #[inline(always)]
827            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
828            fn_impl($mach, $($arg),*)
829        }
830    };
831    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
832        dispatch!($mach, $MTy, {
833            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
834        });
835    }
836}
837#[macro_export]
838macro_rules! dispatch_light512 {
839    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
840        #[inline(always)]
841        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
842            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
843            #[inline(always)]
844            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
845            fn_impl($mach, $($arg),*)
846        }
847    };
848    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
849        dispatch!($mach, $MTy, {
850            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
851        });
852    }
853}
854
855#[cfg(test)]
856mod test {
857    use super::*;
858
859    #[test]
860    fn test_bswap32() {
861        let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
862        let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
863
864        let m = unsafe { GenericMachine::instance() };
865
866        let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
867        let x = x.bswap();
868
869        let y = m.vec(ys);
870        assert_eq!(x, y);
871    }
872}