sonic_simd/
bits.rs

1use super::traits::BitMask;
2
3macro_rules! impl_bits {
4    () => {};
5    ($($ty:ty)*) => {
6        $(
7            impl BitMask for $ty {
8                const LEN: usize = std::mem::size_of::<$ty>() * 8;
9
10                #[inline]
11                fn before(&self, rhs: &Self) -> bool {
12                    (self.as_little_endian()  & rhs.as_little_endian().wrapping_sub(1)) != 0
13                }
14
15                #[inline]
16                fn first_offset(&self) -> usize {
17                    self.as_little_endian().trailing_zeros() as usize
18                }
19
20                #[inline]
21                fn as_little_endian(&self) -> Self {
22                    #[cfg(target_endian = "little")]
23                    {
24                        self.clone()
25                    }
26                    #[cfg(target_endian = "big")]
27                    {
28                        self.swap_bytes()
29                    }
30                }
31
32                #[inline]
33                fn all_zero(&self) -> bool {
34                    *self == 0
35                }
36
37                #[inline]
38                fn clear_high_bits(&self, n: usize) -> Self {
39                    debug_assert!(n <= Self::LEN);
40                    *self & ((u64::MAX as $ty) >> n)
41                }
42            }
43        )*
44    };
45}
46
47impl_bits!(u16 u32 u64);
48
49/// Use u64 representation the bitmask of Neon vector.
50///         (low)
51/// Vector: 00-ff-ff-ff-ff-00-00-00
52/// Mask  : 0000-1111-1111-1111-1111-0000-0000-0000
53///
54/// first_offset() = 1
55/// clear_high_bits(4) = Mask(0000-1111-1111-1111-[0000]-0000-0000-0000)
56///
57/// reference: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
58pub struct NeonBits(u64);
59
60impl NeonBits {
61    #[inline]
62    pub fn new(u: u64) -> Self {
63        Self(u)
64    }
65}
66
67impl BitMask for NeonBits {
68    const LEN: usize = 16;
69
70    #[inline]
71    fn first_offset(&self) -> usize {
72        (self.as_little_endian().0.trailing_zeros() as usize) >> 2
73    }
74
75    #[inline]
76    fn before(&self, rhs: &Self) -> bool {
77        (self.as_little_endian().0 & rhs.as_little_endian().0.wrapping_sub(1)) != 0
78    }
79
80    #[inline]
81    fn as_little_endian(&self) -> Self {
82        #[cfg(target_endian = "little")]
83        {
84            Self::new(self.0)
85        }
86        #[cfg(target_endian = "big")]
87        {
88            Self::new(self.0.swap_bytes())
89        }
90    }
91
92    #[inline]
93    fn all_zero(&self) -> bool {
94        self.0 == 0
95    }
96
97    #[inline]
98    fn clear_high_bits(&self, n: usize) -> Self {
99        debug_assert!(n <= Self::LEN);
100        Self(self.0 & u64::MAX >> (n * 4))
101    }
102}