1use crate::types::*;
4use core::arch::x86_64::{__m128i, __m256i};
5
6mod sse2;
7
8#[derive(Copy, Clone)]
9pub struct YesS3;
10#[derive(Copy, Clone)]
11pub struct NoS3;
12
13#[derive(Copy, Clone)]
14pub struct YesS4;
15#[derive(Copy, Clone)]
16pub struct NoS4;
17
18#[derive(Copy, Clone)]
19pub struct YesA1;
20#[derive(Copy, Clone)]
21pub struct NoA1;
22
23#[derive(Copy, Clone)]
24pub struct YesA2;
25#[derive(Copy, Clone)]
26pub struct NoA2;
27
28#[derive(Copy, Clone)]
29pub struct YesNI;
30#[derive(Copy, Clone)]
31pub struct NoNI;
32
33use core::marker::PhantomData;
34
35#[derive(Copy, Clone)]
36pub struct SseMachine<S3, S4, NI>(PhantomData<(S3, S4, NI)>);
37impl<S3: Copy, S4: Copy, NI: Copy> Machine for SseMachine<S3, S4, NI>
38where
39 sse2::u128x1_sse2<S3, S4, NI>: Swap64,
40 sse2::u64x2_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
41 sse2::u32x4_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
42 sse2::u64x4_sse2<S3, S4, NI>: BSwap + Words4,
43 sse2::u128x1_sse2<S3, S4, NI>: BSwap,
44 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x2x2_sse2<S3, S4, NI>>,
45 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x4_sse2<S3, S4, NI>>,
46 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u32x4x2_sse2<S3, S4, NI>>,
47 sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u64x2x4_sse2<S3, S4, NI>>,
48 sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u32x4x4_sse2<S3, S4, NI>>,
49{
50 type u32x4 = sse2::u32x4_sse2<S3, S4, NI>;
51 type u64x2 = sse2::u64x2_sse2<S3, S4, NI>;
52 type u128x1 = sse2::u128x1_sse2<S3, S4, NI>;
53
54 type u32x4x2 = sse2::u32x4x2_sse2<S3, S4, NI>;
55 type u64x2x2 = sse2::u64x2x2_sse2<S3, S4, NI>;
56 type u64x4 = sse2::u64x4_sse2<S3, S4, NI>;
57 type u128x2 = sse2::u128x2_sse2<S3, S4, NI>;
58
59 type u32x4x4 = sse2::u32x4x4_sse2<S3, S4, NI>;
60 type u64x2x4 = sse2::u64x2x4_sse2<S3, S4, NI>;
61 type u128x4 = sse2::u128x4_sse2<S3, S4, NI>;
62
63 #[inline(always)]
64 unsafe fn instance() -> Self {
65 SseMachine(PhantomData)
66 }
67}
68
69#[derive(Copy, Clone)]
70pub struct Avx2Machine<NI>(PhantomData<NI>);
71impl<NI: Copy> Machine for Avx2Machine<NI>
72where
73 sse2::u128x1_sse2<YesS3, YesS4, NI>: BSwap + Swap64,
74 sse2::u64x2_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
75 sse2::u32x4_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
76 sse2::u64x4_sse2<YesS3, YesS4, NI>: BSwap + Words4,
77{
78 type u32x4 = sse2::u32x4_sse2<YesS3, YesS4, NI>;
79 type u64x2 = sse2::u64x2_sse2<YesS3, YesS4, NI>;
80 type u128x1 = sse2::u128x1_sse2<YesS3, YesS4, NI>;
81
82 type u32x4x2 = sse2::avx2::u32x4x2_avx2<NI>;
83 type u64x2x2 = sse2::u64x2x2_sse2<YesS3, YesS4, NI>;
84 type u64x4 = sse2::u64x4_sse2<YesS3, YesS4, NI>;
85 type u128x2 = sse2::u128x2_sse2<YesS3, YesS4, NI>;
86
87 type u32x4x4 = sse2::avx2::u32x4x4_avx2<NI>;
88 type u64x2x4 = sse2::u64x2x4_sse2<YesS3, YesS4, NI>;
89 type u128x4 = sse2::u128x4_sse2<YesS3, YesS4, NI>;
90
91 #[inline(always)]
92 unsafe fn instance() -> Self {
93 Avx2Machine(PhantomData)
94 }
95}
96
97pub type SSE2 = SseMachine<NoS3, NoS4, NoNI>;
98pub type SSSE3 = SseMachine<YesS3, NoS4, NoNI>;
99pub type SSE41 = SseMachine<YesS3, YesS4, NoNI>;
100pub type AVX = SseMachine<YesS3, YesS4, NoNI>;
103pub type AVX2 = Avx2Machine<NoNI>;
104
105zerocopy::cryptocorrosion_derive_traits! {
106 #[repr(C)]
107 #[allow(non_camel_case_types)]
111 #[derive(Copy, Clone)]
112 pub union vec128_storage {
113 u32x4: [u32; 4],
114 u64x2: [u64; 2],
115 u128x1: [u128; 1],
116 sse2: __m128i,
117 }
118}
119
120impl Store<vec128_storage> for vec128_storage {
121 #[inline(always)]
122 unsafe fn unpack(p: vec128_storage) -> Self {
123 p
124 }
125}
126impl<'a> From<&'a vec128_storage> for &'a [u32; 4] {
127 #[inline(always)]
128 fn from(x: &'a vec128_storage) -> Self {
129 unsafe { &x.u32x4 }
130 }
131}
132impl From<[u32; 4]> for vec128_storage {
133 #[inline(always)]
134 fn from(u32x4: [u32; 4]) -> Self {
135 vec128_storage { u32x4 }
136 }
137}
138impl Default for vec128_storage {
139 #[inline(always)]
140 fn default() -> Self {
141 vec128_storage { u128x1: [0] }
142 }
143}
144impl Eq for vec128_storage {}
145impl PartialEq for vec128_storage {
146 #[inline(always)]
147 fn eq(&self, rhs: &Self) -> bool {
148 unsafe { self.u128x1 == rhs.u128x1 }
149 }
150}
151
152#[allow(non_camel_case_types)]
153#[derive(Copy, Clone)]
154pub union vec256_storage {
155 u32x8: [u32; 8],
156 u64x4: [u64; 4],
157 u128x2: [u128; 2],
158 sse2: [vec128_storage; 2],
159 avx: __m256i,
160}
161impl From<[u64; 4]> for vec256_storage {
162 #[inline(always)]
163 fn from(u64x4: [u64; 4]) -> Self {
164 vec256_storage { u64x4 }
165 }
166}
167impl Default for vec256_storage {
168 #[inline(always)]
169 fn default() -> Self {
170 vec256_storage { u128x2: [0, 0] }
171 }
172}
173impl vec256_storage {
174 #[inline(always)]
175 pub fn new128(xs: [vec128_storage; 2]) -> Self {
176 Self { sse2: xs }
177 }
178 #[inline(always)]
179 pub fn split128(self) -> [vec128_storage; 2] {
180 unsafe { self.sse2 }
181 }
182}
183impl Eq for vec256_storage {}
184impl PartialEq for vec256_storage {
185 #[inline(always)]
186 fn eq(&self, rhs: &Self) -> bool {
187 unsafe { self.sse2 == rhs.sse2 }
188 }
189}
190
191#[allow(non_camel_case_types)]
192#[derive(Copy, Clone)]
193pub union vec512_storage {
194 u32x16: [u32; 16],
195 u64x8: [u64; 8],
196 u128x4: [u128; 4],
197 sse2: [vec128_storage; 4],
198 avx: [vec256_storage; 2],
199}
200impl Default for vec512_storage {
201 #[inline(always)]
202 fn default() -> Self {
203 vec512_storage {
204 u128x4: [0, 0, 0, 0],
205 }
206 }
207}
208impl vec512_storage {
209 #[inline(always)]
210 pub fn new128(xs: [vec128_storage; 4]) -> Self {
211 Self { sse2: xs }
212 }
213 #[inline(always)]
214 pub fn split128(self) -> [vec128_storage; 4] {
215 unsafe { self.sse2 }
216 }
217}
218impl Eq for vec512_storage {}
219impl PartialEq for vec512_storage {
220 #[inline(always)]
221 fn eq(&self, rhs: &Self) -> bool {
222 unsafe { self.avx == rhs.avx }
223 }
224}
225
226macro_rules! impl_into {
227 ($storage:ident, $array:ty, $name:ident) => {
228 impl From<$storage> for $array {
229 #[inline(always)]
230 fn from(vec: $storage) -> Self {
231 unsafe { vec.$name }
232 }
233 }
234 };
235}
236impl_into!(vec128_storage, [u32; 4], u32x4);
237impl_into!(vec128_storage, [u64; 2], u64x2);
238impl_into!(vec128_storage, [u128; 1], u128x1);
239impl_into!(vec256_storage, [u32; 8], u32x8);
240impl_into!(vec256_storage, [u64; 4], u64x4);
241impl_into!(vec256_storage, [u128; 2], u128x2);
242impl_into!(vec512_storage, [u32; 16], u32x16);
243impl_into!(vec512_storage, [u64; 8], u64x8);
244impl_into!(vec512_storage, [u128; 4], u128x4);
245
246#[macro_export]
251macro_rules! dispatch {
252 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
253 #[cfg(feature = "std")]
254 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
255 #[inline(always)]
256 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
257 use std::arch::x86_64::*;
258 #[target_feature(enable = "avx2")]
259 unsafe fn impl_avx2($($arg: $argty),*) -> $ret {
260 let ret = fn_impl($crate::x86_64::AVX2::instance(), $($arg),*);
261 _mm256_zeroupper();
262 ret
263 }
264 #[target_feature(enable = "avx")]
265 #[target_feature(enable = "sse4.1")]
266 #[target_feature(enable = "ssse3")]
267 unsafe fn impl_avx($($arg: $argty),*) -> $ret {
268 let ret = fn_impl($crate::x86_64::AVX::instance(), $($arg),*);
269 _mm256_zeroupper();
270 ret
271 }
272 #[target_feature(enable = "sse4.1")]
273 #[target_feature(enable = "ssse3")]
274 unsafe fn impl_sse41($($arg: $argty),*) -> $ret {
275 fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
276 }
277 #[target_feature(enable = "ssse3")]
278 unsafe fn impl_ssse3($($arg: $argty),*) -> $ret {
279 fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
280 }
281 #[target_feature(enable = "sse2")]
282 unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
283 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
284 }
285 unsafe {
286 if is_x86_feature_detected!("avx2") {
287 impl_avx2($($arg),*)
288 } else if is_x86_feature_detected!("avx") {
289 impl_avx($($arg),*)
290 } else if is_x86_feature_detected!("sse4.1") {
291 impl_sse41($($arg),*)
292 } else if is_x86_feature_detected!("ssse3") {
293 impl_ssse3($($arg),*)
294 } else if is_x86_feature_detected!("sse2") {
295 impl_sse2($($arg),*)
296 } else {
297 unimplemented!()
298 }
299 }
300 }
301 #[cfg(not(feature = "std"))]
302 #[inline(always)]
303 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
304 unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
305 unsafe {
306 if cfg!(target_feature = "avx2") {
307 fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
308 } else if cfg!(target_feature = "avx") {
309 fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
310 } else if cfg!(target_feature = "sse4.1") {
311 fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
312 } else if cfg!(target_feature = "ssse3") {
313 fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
314 } else {
315 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
316 }
317 }
318 }
319 };
320 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
321 dispatch!($mach, $MTy, {
322 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
323 });
324 }
325}
326
327#[macro_export]
334macro_rules! dispatch_light128 {
335 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
336 #[cfg(feature = "std")]
337 $($pub $(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
338 #[inline(always)]
339 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
340 use std::arch::x86_64::*;
341 #[target_feature(enable = "avx")]
342 unsafe fn impl_avx($($arg: $argty),*) -> $ret {
343 fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
344 }
345 #[target_feature(enable = "sse2")]
346 unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
347 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
348 }
349 unsafe {
350 if is_x86_feature_detected!("avx") {
351 impl_avx($($arg),*)
352 } else if is_x86_feature_detected!("sse2") {
353 impl_sse2($($arg),*)
354 } else {
355 unimplemented!()
356 }
357 }
358 }
359 #[cfg(not(feature = "std"))]
360 #[inline(always)]
361 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
362 unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
363 unsafe {
364 if cfg!(target_feature = "avx2") {
365 fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
366 } else if cfg!(target_feature = "avx") {
367 fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
368 } else if cfg!(target_feature = "sse4.1") {
369 fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
370 } else if cfg!(target_feature = "ssse3") {
371 fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
372 } else {
373 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
374 }
375 }
376 }
377 };
378 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
379 dispatch_light128!($mach, $MTy, {
380 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
381 });
382 }
383}
384
385#[macro_export]
392macro_rules! dispatch_light256 {
393 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
394 #[cfg(feature = "std")]
395 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> $ret {
396 #[inline(always)]
397 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
398 use std::arch::x86_64::*;
399 #[target_feature(enable = "avx")]
400 unsafe fn impl_avx($($arg: $argty),*) -> $ret {
401 fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
402 }
403 #[target_feature(enable = "sse2")]
404 unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
405 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
406 }
407 unsafe {
408 if is_x86_feature_detected!("avx") {
409 impl_avx($($arg),*)
410 } else if is_x86_feature_detected!("sse2") {
411 impl_sse2($($arg),*)
412 } else {
413 unimplemented!()
414 }
415 }
416 }
417 #[cfg(not(feature = "std"))]
418 #[inline(always)]
419 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
420 unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
421 unsafe {
422 if cfg!(target_feature = "avx2") {
423 fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
424 } else if cfg!(target_feature = "avx") {
425 fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
426 } else if cfg!(target_feature = "sse4.1") {
427 fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
428 } else if cfg!(target_feature = "ssse3") {
429 fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
430 } else {
431 fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
432 }
433 }
434 }
435 };
436 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
437 dispatch_light256!($mach, $MTy, {
438 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
439 });
440 }
441}