ppv_lite86/x86_64/
mod.rs

1// crate minimums: sse2, x86_64
2
3use crate::types::*;
4use core::arch::x86_64::{__m128i, __m256i};
5
6mod sse2;
7
8#[derive(Copy, Clone)]
9pub struct YesS3;
10#[derive(Copy, Clone)]
11pub struct NoS3;
12
13#[derive(Copy, Clone)]
14pub struct YesS4;
15#[derive(Copy, Clone)]
16pub struct NoS4;
17
18#[derive(Copy, Clone)]
19pub struct YesA1;
20#[derive(Copy, Clone)]
21pub struct NoA1;
22
23#[derive(Copy, Clone)]
24pub struct YesA2;
25#[derive(Copy, Clone)]
26pub struct NoA2;
27
28#[derive(Copy, Clone)]
29pub struct YesNI;
30#[derive(Copy, Clone)]
31pub struct NoNI;
32
33use core::marker::PhantomData;
34
35#[derive(Copy, Clone)]
36pub struct SseMachine<S3, S4, NI>(PhantomData<(S3, S4, NI)>);
37impl<S3: Copy, S4: Copy, NI: Copy> Machine for SseMachine<S3, S4, NI>
38where
39    sse2::u128x1_sse2<S3, S4, NI>: Swap64,
40    sse2::u64x2_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
41    sse2::u32x4_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
42    sse2::u64x4_sse2<S3, S4, NI>: BSwap + Words4,
43    sse2::u128x1_sse2<S3, S4, NI>: BSwap,
44    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x2x2_sse2<S3, S4, NI>>,
45    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x4_sse2<S3, S4, NI>>,
46    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u32x4x2_sse2<S3, S4, NI>>,
47    sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u64x2x4_sse2<S3, S4, NI>>,
48    sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u32x4x4_sse2<S3, S4, NI>>,
49{
50    type u32x4 = sse2::u32x4_sse2<S3, S4, NI>;
51    type u64x2 = sse2::u64x2_sse2<S3, S4, NI>;
52    type u128x1 = sse2::u128x1_sse2<S3, S4, NI>;
53
54    type u32x4x2 = sse2::u32x4x2_sse2<S3, S4, NI>;
55    type u64x2x2 = sse2::u64x2x2_sse2<S3, S4, NI>;
56    type u64x4 = sse2::u64x4_sse2<S3, S4, NI>;
57    type u128x2 = sse2::u128x2_sse2<S3, S4, NI>;
58
59    type u32x4x4 = sse2::u32x4x4_sse2<S3, S4, NI>;
60    type u64x2x4 = sse2::u64x2x4_sse2<S3, S4, NI>;
61    type u128x4 = sse2::u128x4_sse2<S3, S4, NI>;
62
63    #[inline(always)]
64    unsafe fn instance() -> Self {
65        SseMachine(PhantomData)
66    }
67}
68
69#[derive(Copy, Clone)]
70pub struct Avx2Machine<NI>(PhantomData<NI>);
71impl<NI: Copy> Machine for Avx2Machine<NI>
72where
73    sse2::u128x1_sse2<YesS3, YesS4, NI>: BSwap + Swap64,
74    sse2::u64x2_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
75    sse2::u32x4_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
76    sse2::u64x4_sse2<YesS3, YesS4, NI>: BSwap + Words4,
77{
78    type u32x4 = sse2::u32x4_sse2<YesS3, YesS4, NI>;
79    type u64x2 = sse2::u64x2_sse2<YesS3, YesS4, NI>;
80    type u128x1 = sse2::u128x1_sse2<YesS3, YesS4, NI>;
81
82    type u32x4x2 = sse2::avx2::u32x4x2_avx2<NI>;
83    type u64x2x2 = sse2::u64x2x2_sse2<YesS3, YesS4, NI>;
84    type u64x4 = sse2::u64x4_sse2<YesS3, YesS4, NI>;
85    type u128x2 = sse2::u128x2_sse2<YesS3, YesS4, NI>;
86
87    type u32x4x4 = sse2::avx2::u32x4x4_avx2<NI>;
88    type u64x2x4 = sse2::u64x2x4_sse2<YesS3, YesS4, NI>;
89    type u128x4 = sse2::u128x4_sse2<YesS3, YesS4, NI>;
90
91    #[inline(always)]
92    unsafe fn instance() -> Self {
93        Avx2Machine(PhantomData)
94    }
95}
96
97pub type SSE2 = SseMachine<NoS3, NoS4, NoNI>;
98pub type SSSE3 = SseMachine<YesS3, NoS4, NoNI>;
99pub type SSE41 = SseMachine<YesS3, YesS4, NoNI>;
100/// AVX but not AVX2: only 128-bit integer operations, but use VEX versions of everything
101/// to avoid expensive SSE/VEX conflicts.
102pub type AVX = SseMachine<YesS3, YesS4, NoNI>;
103pub type AVX2 = Avx2Machine<NoNI>;
104
105zerocopy::cryptocorrosion_derive_traits! {
106    #[repr(C)]
107    /// Generic wrapper for unparameterized storage of any of the possible impls.
108    /// Converting into and out of this type should be essentially free, although it may be more
109    /// aligned than a particular impl requires.
110    #[allow(non_camel_case_types)]
111    #[derive(Copy, Clone)]
112    pub union vec128_storage {
113        u32x4: [u32; 4],
114        u64x2: [u64; 2],
115        u128x1: [u128; 1],
116        sse2: __m128i,
117    }
118}
119
120impl Store<vec128_storage> for vec128_storage {
121    #[inline(always)]
122    unsafe fn unpack(p: vec128_storage) -> Self {
123        p
124    }
125}
126impl<'a> From<&'a vec128_storage> for &'a [u32; 4] {
127    #[inline(always)]
128    fn from(x: &'a vec128_storage) -> Self {
129        unsafe { &x.u32x4 }
130    }
131}
132impl From<[u32; 4]> for vec128_storage {
133    #[inline(always)]
134    fn from(u32x4: [u32; 4]) -> Self {
135        vec128_storage { u32x4 }
136    }
137}
138impl Default for vec128_storage {
139    #[inline(always)]
140    fn default() -> Self {
141        vec128_storage { u128x1: [0] }
142    }
143}
144impl Eq for vec128_storage {}
145impl PartialEq for vec128_storage {
146    #[inline(always)]
147    fn eq(&self, rhs: &Self) -> bool {
148        unsafe { self.u128x1 == rhs.u128x1 }
149    }
150}
151
152#[allow(non_camel_case_types)]
153#[derive(Copy, Clone)]
154pub union vec256_storage {
155    u32x8: [u32; 8],
156    u64x4: [u64; 4],
157    u128x2: [u128; 2],
158    sse2: [vec128_storage; 2],
159    avx: __m256i,
160}
161impl From<[u64; 4]> for vec256_storage {
162    #[inline(always)]
163    fn from(u64x4: [u64; 4]) -> Self {
164        vec256_storage { u64x4 }
165    }
166}
167impl Default for vec256_storage {
168    #[inline(always)]
169    fn default() -> Self {
170        vec256_storage { u128x2: [0, 0] }
171    }
172}
173impl vec256_storage {
174    #[inline(always)]
175    pub fn new128(xs: [vec128_storage; 2]) -> Self {
176        Self { sse2: xs }
177    }
178    #[inline(always)]
179    pub fn split128(self) -> [vec128_storage; 2] {
180        unsafe { self.sse2 }
181    }
182}
183impl Eq for vec256_storage {}
184impl PartialEq for vec256_storage {
185    #[inline(always)]
186    fn eq(&self, rhs: &Self) -> bool {
187        unsafe { self.sse2 == rhs.sse2 }
188    }
189}
190
191#[allow(non_camel_case_types)]
192#[derive(Copy, Clone)]
193pub union vec512_storage {
194    u32x16: [u32; 16],
195    u64x8: [u64; 8],
196    u128x4: [u128; 4],
197    sse2: [vec128_storage; 4],
198    avx: [vec256_storage; 2],
199}
200impl Default for vec512_storage {
201    #[inline(always)]
202    fn default() -> Self {
203        vec512_storage {
204            u128x4: [0, 0, 0, 0],
205        }
206    }
207}
208impl vec512_storage {
209    #[inline(always)]
210    pub fn new128(xs: [vec128_storage; 4]) -> Self {
211        Self { sse2: xs }
212    }
213    #[inline(always)]
214    pub fn split128(self) -> [vec128_storage; 4] {
215        unsafe { self.sse2 }
216    }
217}
218impl Eq for vec512_storage {}
219impl PartialEq for vec512_storage {
220    #[inline(always)]
221    fn eq(&self, rhs: &Self) -> bool {
222        unsafe { self.avx == rhs.avx }
223    }
224}
225
226macro_rules! impl_into {
227    ($storage:ident, $array:ty, $name:ident) => {
228        impl From<$storage> for $array {
229            #[inline(always)]
230            fn from(vec: $storage) -> Self {
231                unsafe { vec.$name }
232            }
233        }
234    };
235}
236impl_into!(vec128_storage, [u32; 4], u32x4);
237impl_into!(vec128_storage, [u64; 2], u64x2);
238impl_into!(vec128_storage, [u128; 1], u128x1);
239impl_into!(vec256_storage, [u32; 8], u32x8);
240impl_into!(vec256_storage, [u64; 4], u64x4);
241impl_into!(vec256_storage, [u128; 2], u128x2);
242impl_into!(vec512_storage, [u32; 16], u32x16);
243impl_into!(vec512_storage, [u64; 8], u64x8);
244impl_into!(vec512_storage, [u128; 4], u128x4);
245
246/// Generate the full set of optimized implementations to take advantage of the most important
247/// hardware feature sets.
248///
249/// This dispatcher is suitable for maximizing throughput.
250#[macro_export]
251macro_rules! dispatch {
252    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
253        #[cfg(feature = "std")]
254        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
255            #[inline(always)]
256            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
257            use std::arch::x86_64::*;
258            #[target_feature(enable = "avx2")]
259            unsafe fn impl_avx2($($arg: $argty),*) -> $ret {
260                let ret = fn_impl($crate::x86_64::AVX2::instance(), $($arg),*);
261                _mm256_zeroupper();
262                ret
263            }
264            #[target_feature(enable = "avx")]
265            #[target_feature(enable = "sse4.1")]
266            #[target_feature(enable = "ssse3")]
267            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
268                let ret = fn_impl($crate::x86_64::AVX::instance(), $($arg),*);
269                _mm256_zeroupper();
270                ret
271            }
272            #[target_feature(enable = "sse4.1")]
273            #[target_feature(enable = "ssse3")]
274            unsafe fn impl_sse41($($arg: $argty),*) -> $ret {
275                fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
276            }
277            #[target_feature(enable = "ssse3")]
278            unsafe fn impl_ssse3($($arg: $argty),*) -> $ret {
279                fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
280            }
281            #[target_feature(enable = "sse2")]
282            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
283                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
284            }
285            unsafe {
286                if is_x86_feature_detected!("avx2") {
287                    impl_avx2($($arg),*)
288                } else if is_x86_feature_detected!("avx") {
289                    impl_avx($($arg),*)
290                } else if is_x86_feature_detected!("sse4.1") {
291                    impl_sse41($($arg),*)
292                } else if is_x86_feature_detected!("ssse3") {
293                    impl_ssse3($($arg),*)
294                } else if is_x86_feature_detected!("sse2") {
295                    impl_sse2($($arg),*)
296                } else {
297                    unimplemented!()
298                }
299            }
300        }
301        #[cfg(not(feature = "std"))]
302        #[inline(always)]
303        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
304            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
305            unsafe {
306                if cfg!(target_feature = "avx2") {
307                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
308                } else if cfg!(target_feature = "avx") {
309                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
310                } else if cfg!(target_feature = "sse4.1") {
311                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
312                } else if cfg!(target_feature = "ssse3") {
313                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
314                } else {
315                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
316                }
317            }
318        }
319    };
320    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
321        dispatch!($mach, $MTy, {
322            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
323        });
324    }
325}
326
327/// Generate only the basic implementations necessary to be able to operate efficiently on 128-bit
328/// vectors on this platfrom. For x86-64, that would mean SSE2 and AVX.
329///
330/// This dispatcher is suitable for vector operations that do not benefit from advanced hardware
331/// features (e.g. because they are done infrequently), so minimizing their contribution to code
332/// size is more important.
333#[macro_export]
334macro_rules! dispatch_light128 {
335    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
336        #[cfg(feature = "std")]
337        $($pub $(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
338            #[inline(always)]
339            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
340            use std::arch::x86_64::*;
341            #[target_feature(enable = "avx")]
342            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
343                fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
344            }
345            #[target_feature(enable = "sse2")]
346            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
347                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
348            }
349            unsafe {
350                if is_x86_feature_detected!("avx") {
351                    impl_avx($($arg),*)
352                } else if is_x86_feature_detected!("sse2") {
353                    impl_sse2($($arg),*)
354                } else {
355                    unimplemented!()
356                }
357            }
358        }
359        #[cfg(not(feature = "std"))]
360        #[inline(always)]
361        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
362            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
363            unsafe {
364                if cfg!(target_feature = "avx2") {
365                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
366                } else if cfg!(target_feature = "avx") {
367                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
368                } else if cfg!(target_feature = "sse4.1") {
369                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
370                } else if cfg!(target_feature = "ssse3") {
371                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
372                } else {
373                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
374                }
375            }
376        }
377    };
378    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
379        dispatch_light128!($mach, $MTy, {
380            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
381        });
382    }
383}
384
385/// Generate only the basic implementations necessary to be able to operate efficiently on 256-bit
386/// vectors on this platfrom. For x86-64, that would mean SSE2, AVX, and AVX2.
387///
388/// This dispatcher is suitable for vector operations that do not benefit from advanced hardware
389/// features (e.g. because they are done infrequently), so minimizing their contribution to code
390/// size is more important.
391#[macro_export]
392macro_rules! dispatch_light256 {
393    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
394        #[cfg(feature = "std")]
395        $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> $ret {
396            #[inline(always)]
397            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
398            use std::arch::x86_64::*;
399            #[target_feature(enable = "avx")]
400            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
401                fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
402            }
403            #[target_feature(enable = "sse2")]
404            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
405                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
406            }
407            unsafe {
408                if is_x86_feature_detected!("avx") {
409                    impl_avx($($arg),*)
410                } else if is_x86_feature_detected!("sse2") {
411                    impl_sse2($($arg),*)
412                } else {
413                    unimplemented!()
414                }
415            }
416        }
417        #[cfg(not(feature = "std"))]
418        #[inline(always)]
419        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
420            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
421            unsafe {
422                if cfg!(target_feature = "avx2") {
423                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
424                } else if cfg!(target_feature = "avx") {
425                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
426                } else if cfg!(target_feature = "sse4.1") {
427                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
428                } else if cfg!(target_feature = "ssse3") {
429                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
430                } else {
431                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
432                }
433            }
434        }
435    };
436    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
437        dispatch_light256!($mach, $MTy, {
438            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
439        });
440    }
441}