polars_arrow/types/bit_chunk.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
use std::fmt::Binary;
use std::ops::{BitAndAssign, Not, Shl, ShlAssign, ShrAssign};
use num_traits::PrimInt;
use super::NativeType;
/// A chunk of bits. This is used to create masks of a given length
/// whose width is `1` bit. In `portable_simd` notation, this corresponds to `m1xY`.
///
/// This (sealed) trait is implemented for [`u8`], [`u16`], [`u32`] and [`u64`].
pub trait BitChunk:
super::private::Sealed
+ PrimInt
+ NativeType
+ Binary
+ ShlAssign
+ Not<Output = Self>
+ ShrAssign<usize>
+ ShlAssign<usize>
+ Shl<usize, Output = Self>
+ BitAndAssign
{
/// convert itself into bytes.
fn to_ne_bytes(self) -> Self::Bytes;
/// convert itself from bytes.
fn from_ne_bytes(v: Self::Bytes) -> Self;
}
macro_rules! bit_chunk {
($ty:ty) => {
impl BitChunk for $ty {
#[inline(always)]
fn to_ne_bytes(self) -> Self::Bytes {
self.to_ne_bytes()
}
#[inline(always)]
fn from_ne_bytes(v: Self::Bytes) -> Self {
Self::from_ne_bytes(v)
}
}
};
}
bit_chunk!(u8);
bit_chunk!(u16);
bit_chunk!(u32);
bit_chunk!(u64);
/// An [`Iterator<Item=bool>`] over a [`BitChunk`].
///
/// This iterator is often compiled to SIMD.
///
/// The [LSB](https://en.wikipedia.org/wiki/Bit_numbering#Least_significant_bit) corresponds
/// to the first slot, as defined by the arrow specification.
/// # Example
/// ```
/// use polars_arrow::types::BitChunkIter;
/// let a = 0b00010000u8;
/// let iter = BitChunkIter::new(a, 7);
/// let r = iter.collect::<Vec<_>>();
/// assert_eq!(r, vec![false, false, false, false, true, false, false]);
/// ```
pub struct BitChunkIter<T: BitChunk> {
value: T,
mask: T,
remaining: usize,
}
impl<T: BitChunk> BitChunkIter<T> {
/// Creates a new [`BitChunkIter`] with `len` bits.
#[inline]
pub fn new(value: T, len: usize) -> Self {
assert!(len <= size_of::<T>() * 8);
Self {
value,
remaining: len,
mask: T::one(),
}
}
}
impl<T: BitChunk> Iterator for BitChunkIter<T> {
type Item = bool;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
};
let result = Some(self.value & self.mask != T::zero());
self.remaining -= 1;
self.mask <<= 1;
result
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
// # Safety
// a mathematical invariant of this iterator
unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkIter<T> {}
/// An [`Iterator<Item=usize>`] over a [`BitChunk`] returning the index of each bit set in the chunk
/// See <https://lemire.me/blog/2018/03/08/iterating-over-set-bits-quickly-simd-edition/> for details
/// # Example
/// ```
/// use polars_arrow::types::BitChunkOnes;
/// let a = 0b00010000u8;
/// let iter = BitChunkOnes::new(a);
/// let r = iter.collect::<Vec<_>>();
/// assert_eq!(r, vec![4]);
/// ```
pub struct BitChunkOnes<T: BitChunk> {
value: T,
remaining: usize,
}
impl<T: BitChunk> BitChunkOnes<T> {
/// Creates a new [`BitChunkOnes`] with `len` bits.
#[inline]
pub fn new(value: T) -> Self {
Self {
value,
remaining: value.count_ones() as usize,
}
}
#[inline]
pub fn from_known_count(value: T, remaining: usize) -> Self {
Self { value, remaining }
}
}
impl<T: BitChunk> Iterator for BitChunkOnes<T> {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
}
let v = self.value.trailing_zeros() as usize;
self.value &= self.value - T::one();
self.remaining -= 1;
Some(v)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
// # Safety
// a mathematical invariant of this iterator
unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkOnes<T> {}