polars_arrow/bitmap/utils/chunk_iterator/
mod.rsmod chunks_exact;
mod merge;
pub use chunks_exact::BitChunksExact;
pub(crate) use merge::merge_reversed;
use crate::trusted_len::TrustedLen;
pub use crate::types::BitChunk;
use crate::types::BitChunkIter;
pub trait BitChunkIterExact<B: BitChunk>: TrustedLen<Item = B> {
fn remainder(&self) -> B;
fn remainder_len(&self) -> usize;
#[inline]
fn remainder_iter(&self) -> BitChunkIter<B> {
BitChunkIter::new(self.remainder(), self.remainder_len())
}
}
#[derive(Debug)]
pub struct BitChunks<'a, T: BitChunk> {
chunk_iterator: std::slice::ChunksExact<'a, u8>,
current: T,
remainder_bytes: &'a [u8],
last_chunk: T,
remaining: usize,
bit_offset: usize,
len: usize,
phantom: std::marker::PhantomData<T>,
}
#[inline]
fn copy_with_merge<T: BitChunk>(dst: &mut T::Bytes, bytes: &[u8], bit_offset: usize) {
bytes
.windows(2)
.chain(std::iter::once([bytes[bytes.len() - 1], 0].as_ref()))
.take(size_of::<T>())
.enumerate()
.for_each(|(i, w)| {
let val = merge_reversed(w[0], w[1], bit_offset);
dst[i] = val;
});
}
impl<'a, T: BitChunk> BitChunks<'a, T> {
pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self {
assert!(offset + len <= slice.len() * 8);
let slice = &slice[offset / 8..];
let bit_offset = offset % 8;
let size_of = size_of::<T>();
let bytes_len = len / 8;
let bytes_upper_len = (len + bit_offset + 7) / 8;
let mut chunks = slice[..bytes_len].chunks_exact(size_of);
let remainder = &slice[bytes_len - chunks.remainder().len()..bytes_upper_len];
let remainder_bytes = if chunks.len() == 0 { slice } else { remainder };
let last_chunk = remainder_bytes
.first()
.map(|first| {
let mut last = T::zero().to_ne_bytes();
last[0] = *first;
T::from_ne_bytes(last)
})
.unwrap_or_else(T::zero);
let remaining = chunks.size_hint().0;
let current = chunks
.next()
.map(|x| match x.try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
})
.unwrap_or_else(T::zero);
Self {
chunk_iterator: chunks,
len,
current,
remaining,
remainder_bytes,
last_chunk,
bit_offset,
phantom: std::marker::PhantomData,
}
}
#[inline]
fn load_next(&mut self) {
self.current = match self.chunk_iterator.next().unwrap().try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
};
}
pub fn remainder(&self) -> T {
let mut remainder = T::zero().to_ne_bytes();
let remainder = match (self.remainder_bytes.is_empty(), self.bit_offset == 0) {
(true, _) => remainder,
(false, true) => {
self.remainder_bytes
.iter()
.take(size_of::<T>())
.enumerate()
.for_each(|(i, val)| remainder[i] = *val);
remainder
},
(false, false) => {
copy_with_merge::<T>(&mut remainder, self.remainder_bytes, self.bit_offset);
remainder
},
};
T::from_ne_bytes(remainder)
}
pub fn remainder_len(&self) -> usize {
self.len - (size_of::<T>() * ((self.len / 8) / size_of::<T>()) * 8)
}
}
impl<T: BitChunk> Iterator for BitChunks<'_, T> {
type Item = T;
#[inline]
fn next(&mut self) -> Option<T> {
if self.remaining == 0 {
return None;
}
let current = self.current;
let combined = if self.bit_offset == 0 {
if self.remaining >= 2 {
self.load_next();
}
current
} else {
let next = if self.remaining >= 2 {
self.load_next();
self.current
} else {
self.last_chunk
};
merge_reversed(current, next, self.bit_offset)
};
self.remaining -= 1;
Some(combined)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
impl<T: BitChunk> BitChunkIterExact<T> for BitChunks<'_, T> {
#[inline]
fn remainder(&self) -> T {
self.remainder()
}
#[inline]
fn remainder_len(&self) -> usize {
self.remainder_len()
}
}
impl<T: BitChunk> ExactSizeIterator for BitChunks<'_, T> {
#[inline]
fn len(&self) -> usize {
self.chunk_iterator.len()
}
}
unsafe impl<T: BitChunk> TrustedLen for BitChunks<'_, T> {}