1use lazy_static::lazy_static;
5
6pub enum SimdSupport {
8 None,
9 Neon,
10 Sse,
11 Avx2,
12 Avx512,
13 Lsx,
14 Lasx,
15}
16
17lazy_static! {
18 pub static ref FP16_SIMD_SUPPORT: SimdSupport = {
20 #[cfg(target_arch = "aarch64")]
21 {
22 if aarch64::has_neon_f16_support() {
23 SimdSupport::Neon
24 } else {
25 SimdSupport::None
26 }
27 }
28 #[cfg(target_arch = "x86_64")]
29 {
30 if x86::has_avx512_f16_support() {
31 SimdSupport::Avx512
32 } else if is_x86_feature_detected!("avx2") {
33 SimdSupport::Avx2
34 } else {
35 SimdSupport::None
36 }
37 }
38 #[cfg(target_arch = "loongarch64")]
39 {
40 if loongarch64::has_lasx_support() {
41 SimdSupport::Lasx
42 } else if loongarch64::has_lsx_support() {
43 SimdSupport::Lsx
44 } else {
45 SimdSupport::None
46 }
47 }
48 };
49}
50
51#[cfg(target_arch = "x86_64")]
52mod x86 {
53 use core::arch::x86_64::__cpuid;
54
55 #[inline]
56 fn check_flag(x: usize, position: u32) -> bool {
57 x & (1 << position) != 0
58 }
59
60 pub fn has_avx512_f16_support() -> bool {
61 if !is_x86_feature_detected!("avx512f") {
63 return false;
64 }
65
66 let ext_cpuid_result = unsafe { __cpuid(7) };
70 check_flag(ext_cpuid_result.edx as usize, 23)
71 }
72}
73
74#[cfg(all(target_arch = "aarch64", target_os = "macos"))]
79mod aarch64 {
80 pub fn has_neon_f16_support() -> bool {
81 true
83 }
84}
85
86#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
87mod aarch64 {
88 pub fn has_neon_f16_support() -> bool {
89 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
91 flags & libc::HWCAP_FPHP != 0
92 }
93}
94
95#[cfg(all(target_arch = "aarch64", target_os = "windows"))]
96mod aarch64 {
97 pub fn has_neon_f16_support() -> bool {
98 false
100 }
101}
102
103#[cfg(target_arch = "loongarch64")]
104mod loongarch64 {
105 pub fn has_lsx_support() -> bool {
106 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
108 flags & libc::HWCAP_LOONGARCH_LSX != 0
109 }
110 pub fn has_lasx_support() -> bool {
111 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
113 flags & libc::HWCAP_LOONGARCH_LASX != 0
114 }
115}