polars_arrow/bitmap/utils/
mod.rsmod chunk_iterator;
mod chunks_exact_mut;
mod fmt;
mod iterator;
mod slice_iterator;
mod zip_validity;
pub(crate) use chunk_iterator::merge_reversed;
pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact};
pub use chunks_exact_mut::BitChunksExactMut;
pub use fmt::fmt;
pub use iterator::BitmapIter;
use polars_utils::slice::load_padded_le_u64;
pub use slice_iterator::SlicesIterator;
pub use zip_validity::{ZipValidity, ZipValidityIter};
use crate::bitmap::aligned::AlignedBitmapSlice;
#[inline]
pub fn is_set(byte: u8, i: usize) -> bool {
debug_assert!(i < 8);
byte & (1 << i) != 0
}
#[inline(always)]
pub fn set_bit_in_byte(byte: u8, i: usize, value: bool) -> u8 {
debug_assert!(i < 8);
let mask = !(1 << i);
let insert = (value as u8) << i;
(byte & mask) | insert
}
#[inline(always)]
pub unsafe fn get_bit_unchecked(bytes: &[u8], i: usize) -> bool {
let byte = *bytes.get_unchecked(i / 8);
let bit = (byte >> (i % 8)) & 1;
bit != 0
}
#[inline(always)]
pub unsafe fn set_bit_unchecked(bytes: &mut [u8], i: usize, value: bool) {
let byte = bytes.get_unchecked_mut(i / 8);
*byte = set_bit_in_byte(*byte, i % 8, value);
}
#[inline]
pub fn bytes_for(bits: usize) -> usize {
bits.saturating_add(7) / 8
}
pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}
assert!(8 * slice.len() >= offset + len);
let first_byte_idx = offset / 8;
let offset_in_byte = offset % 8;
if offset_in_byte + len <= 64 {
let mut word = load_padded_le_u64(&slice[first_byte_idx..]);
word >>= offset_in_byte;
word <<= 64 - len;
return len - word.count_ones() as usize;
}
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let ones_in_prefix = aligned.prefix().count_ones() as usize;
let ones_in_bulk: usize = aligned.bulk_iter().map(|w| w.count_ones() as usize).sum();
let ones_in_suffix = aligned.suffix().count_ones() as usize;
len - ones_in_prefix - ones_in_bulk - ones_in_suffix
}
pub fn leading_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}
assert!(8 * slice.len() >= offset + len);
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_zeros_in_prefix =
(aligned.prefix().trailing_zeros() as usize).min(aligned.prefix_bitlen());
if leading_zeros_in_prefix < aligned.prefix_bitlen() {
return leading_zeros_in_prefix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().position(|w| w != 0) {
return aligned.prefix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[full_zero_bulk_words].trailing_zeros() as usize;
}
aligned.prefix_bitlen()
+ aligned.bulk_bitlen()
+ (aligned.suffix().trailing_zeros() as usize).min(aligned.suffix_bitlen())
}
pub fn leading_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}
assert!(8 * slice.len() >= offset + len);
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_ones_in_prefix = aligned.prefix().trailing_ones() as usize;
if leading_ones_in_prefix < aligned.prefix_bitlen() {
return leading_ones_in_prefix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().position(|w| w != u64::MAX) {
return aligned.prefix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[full_one_bulk_words].trailing_ones() as usize;
}
aligned.prefix_bitlen() + aligned.bulk_bitlen() + aligned.suffix().trailing_ones() as usize
}
pub fn trailing_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}
assert!(8 * slice.len() >= offset + len);
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_zeros_in_suffix = ((aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.suffix_bitlen());
if trailing_zeros_in_suffix < aligned.suffix_bitlen() {
return trailing_zeros_in_suffix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().rev().position(|w| w != 0) {
return aligned.suffix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_zero_bulk_words - 1].leading_zeros()
as usize;
}
let trailing_zeros_in_prefix = ((aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.prefix_bitlen());
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_zeros_in_prefix
}
pub fn trailing_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}
assert!(8 * slice.len() >= offset + len);
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_ones_in_suffix =
(aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64)).leading_ones() as usize;
if trailing_ones_in_suffix < aligned.suffix_bitlen() {
return trailing_ones_in_suffix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().rev().position(|w| w != u64::MAX) {
return aligned.suffix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_one_bulk_words - 1].leading_ones()
as usize;
}
let trailing_ones_in_prefix =
(aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64)).leading_ones() as usize;
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_ones_in_prefix
}
#[cfg(test)]
mod tests {
use rand::Rng;
use super::*;
use crate::bitmap::Bitmap;
#[test]
fn leading_trailing() {
macro_rules! testcase {
($slice:expr, $offset:expr, $length:expr => lz=$lz:expr,lo=$lo:expr,tz=$tz:expr,to=$to:expr) => {
assert_eq!(
leading_zeros($slice, $offset, $length),
$lz,
"leading_zeros"
);
assert_eq!(leading_ones($slice, $offset, $length), $lo, "leading_ones");
assert_eq!(
trailing_zeros($slice, $offset, $length),
$tz,
"trailing_zeros"
);
assert_eq!(
trailing_ones($slice, $offset, $length),
$to,
"trailing_ones"
);
};
}
testcase!(&[], 0, 0 => lz=0,lo=0,tz=0,to=0);
testcase!(&[0], 0, 1 => lz=1,lo=0,tz=1,to=0);
testcase!(&[1], 0, 1 => lz=0,lo=1,tz=0,to=1);
testcase!(&[0b010], 0, 3 => lz=1,lo=0,tz=1,to=0);
testcase!(&[0b101], 0, 3 => lz=0,lo=1,tz=0,to=1);
testcase!(&[0b100], 0, 3 => lz=2,lo=0,tz=0,to=1);
testcase!(&[0b110], 0, 3 => lz=1,lo=0,tz=0,to=2);
testcase!(&[0b001], 0, 3 => lz=0,lo=1,tz=2,to=0);
testcase!(&[0b011], 0, 3 => lz=0,lo=2,tz=1,to=0);
testcase!(&[0b010], 1, 2 => lz=0,lo=1,tz=1,to=0);
testcase!(&[0b101], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b100], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b110], 1, 2 => lz=0,lo=2,tz=0,to=2);
testcase!(&[0b001], 1, 2 => lz=2,lo=0,tz=2,to=0);
testcase!(&[0b011], 1, 2 => lz=0,lo=1,tz=1,to=0);
}
#[ignore = "Fuzz test. Too slow"]
#[test]
fn leading_trailing_fuzz() {
let mut rng = rand::thread_rng();
const SIZE: usize = 1000;
const REPEATS: usize = 10_000;
let mut v = Vec::<bool>::with_capacity(SIZE);
for _ in 0..REPEATS {
v.clear();
let offset = rng.gen_range(0..SIZE);
let length = rng.gen_range(0..SIZE - offset);
let extra_padding = rng.gen_range(0..64);
let mut num_remaining = usize::min(SIZE, offset + length + extra_padding);
while num_remaining > 0 {
let chunk_size = rng.gen_range(1..=num_remaining);
v.extend(
rng.clone()
.sample_iter(rand::distributions::Slice::new(&[false, true]).unwrap())
.take(chunk_size),
);
num_remaining -= chunk_size;
}
let v_slice = &v[offset..offset + length];
let lz = v_slice.iter().take_while(|&v| !*v).count();
let lo = v_slice.iter().take_while(|&v| *v).count();
let tz = v_slice.iter().rev().take_while(|&v| !*v).count();
let to = v_slice.iter().rev().take_while(|&v| *v).count();
let bm = Bitmap::from_iter(v.iter().copied());
let (slice, _, _) = bm.as_slice();
assert_eq!(leading_zeros(slice, offset, length), lz);
assert_eq!(leading_ones(slice, offset, length), lo);
assert_eq!(trailing_zeros(slice, offset, length), tz);
assert_eq!(trailing_ones(slice, offset, length), to);
}
}
}