1use std::hint::unreachable_unchecked;
2
3use polars_error::{polars_bail, PolarsResult};
4use polars_utils::vec::PushUnchecked;
5
6use super::bitmask::BitMask;
7use super::utils::{count_zeros, fmt, BitChunk, BitChunks, BitChunksExactMut, BitmapIter};
8use super::{intersects_with_mut, Bitmap};
9use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
10use crate::storage::SharedStorage;
11use crate::trusted_len::TrustedLen;
12
13#[derive(Clone)]
47pub struct MutableBitmap {
48 buffer: Vec<u8>,
49 length: usize,
51}
52
53impl std::fmt::Debug for MutableBitmap {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 fmt(&self.buffer, 0, self.len(), f)
56 }
57}
58
59impl PartialEq for MutableBitmap {
60 fn eq(&self, other: &Self) -> bool {
61 self.iter().eq(other.iter())
62 }
63}
64
65impl MutableBitmap {
66 #[inline]
68 pub fn new() -> Self {
69 Self {
70 buffer: Vec::new(),
71 length: 0,
72 }
73 }
74
75 #[inline]
79 pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
80 if length > bytes.len().saturating_mul(8) {
81 polars_bail!(InvalidOperation:
82 "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
83 length,
84 bytes.len().saturating_mul(8)
85 )
86 }
87
88 let min_byte_length_needed = length.div_ceil(8);
90 bytes.drain(min_byte_length_needed..);
91 Ok(Self {
92 length,
93 buffer: bytes,
94 })
95 }
96
97 #[inline]
102 pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
103 Self::try_new(buffer, length).unwrap()
104 }
105
106 #[inline]
108 pub fn with_capacity(capacity: usize) -> Self {
109 Self {
110 buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
111 length: 0,
112 }
113 }
114
115 #[inline]
117 pub fn push(&mut self, value: bool) {
118 if self.length % 8 == 0 {
119 self.buffer.push(0);
120 }
121 let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
122 *byte = set_bit_in_byte(*byte, self.length % 8, value);
123 self.length += 1;
124 }
125
126 #[inline]
129 pub fn pop(&mut self) -> Option<bool> {
130 if self.is_empty() {
131 return None;
132 }
133
134 self.length -= 1;
135 let value = unsafe { self.get_unchecked(self.length) };
136 if self.length % 8 == 0 {
137 self.buffer.pop();
138 }
139 Some(value)
140 }
141
142 #[inline]
146 pub fn get(&self, index: usize) -> bool {
147 assert!(index < self.len());
148 unsafe { self.get_unchecked(index) }
149 }
150
151 #[inline]
156 pub unsafe fn get_unchecked(&self, index: usize) -> bool {
157 get_bit_unchecked(&self.buffer, index)
158 }
159
160 #[inline]
164 pub fn set(&mut self, index: usize, value: bool) {
165 assert!(index < self.len());
166 unsafe {
167 self.set_unchecked(index, value);
168 }
169 }
170
171 #[inline]
176 pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
177 *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
178 }
179
180 #[inline]
185 pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
186 *self.buffer.get_unchecked_mut(index / 8) &= (value as u8) << (index % 8);
187 }
188
189 pub fn iter(&self) -> BitmapIter {
191 BitmapIter::new(&self.buffer, 0, self.length)
192 }
193
194 #[inline]
196 pub fn clear(&mut self) {
197 self.length = 0;
198 self.buffer.clear();
199 }
200
201 #[inline]
205 pub fn extend_constant(&mut self, additional: usize, value: bool) {
206 if additional == 0 {
207 return;
208 }
209
210 if value {
211 self.extend_set(additional)
212 } else {
213 self.extend_unset(additional)
214 }
215 }
216
217 pub fn resize(&mut self, length: usize, value: bool) {
220 if let Some(additional) = length.checked_sub(self.len()) {
221 self.extend_constant(additional, value);
222 } else {
223 self.buffer.truncate(length.saturating_add(7) / 8);
224 self.length = length;
225 }
226 }
227
228 #[inline]
230 pub fn from_len_zeroed(length: usize) -> Self {
231 Self {
232 buffer: vec![0; length.saturating_add(7) / 8],
233 length,
234 }
235 }
236
237 #[inline]
239 pub fn from_len_set(length: usize) -> Self {
240 Self {
241 buffer: vec![u8::MAX; length.saturating_add(7) / 8],
242 length,
243 }
244 }
245
246 #[inline(always)]
248 pub fn reserve(&mut self, additional: usize) {
249 self.buffer
250 .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
251 }
252
253 #[inline]
255 pub fn capacity(&self) -> usize {
256 self.buffer.capacity() * 8
257 }
258
259 #[inline]
264 pub unsafe fn push_unchecked(&mut self, value: bool) {
265 if self.length % 8 == 0 {
266 self.buffer.push_unchecked(0);
267 }
268 let byte = self.buffer.last_mut().unwrap_unchecked();
269 *byte = set_bit_in_byte(*byte, self.length % 8, value);
270 self.length += 1;
271 }
272
273 pub fn unset_bits(&self) -> usize {
279 count_zeros(&self.buffer, 0, self.length)
280 }
281
282 pub fn set_bits(&self) -> usize {
288 self.length - self.unset_bits()
289 }
290
291 #[inline]
293 pub fn len(&self) -> usize {
294 self.length
295 }
296
297 #[inline]
299 pub fn is_empty(&self) -> bool {
300 self.len() == 0
301 }
302
303 #[inline]
306 pub(crate) unsafe fn set_len(&mut self, len: usize) {
307 self.buffer.set_len(len.saturating_add(7) / 8);
308 self.length = len;
309 }
310
311 fn extend_set(&mut self, mut additional: usize) {
312 let offset = self.length % 8;
313 let added = if offset != 0 {
314 let last_index = self.buffer.len() - 1;
316 let last = &mut self.buffer[last_index];
317
318 let remaining = 0b11111111u8;
319 let remaining = remaining >> 8usize.saturating_sub(additional);
320 let remaining = remaining << offset;
321 *last |= remaining;
322 std::cmp::min(additional, 8 - offset)
323 } else {
324 0
325 };
326 self.length += added;
327 additional = additional.saturating_sub(added);
328 if additional > 0 {
329 debug_assert_eq!(self.length % 8, 0);
330 let existing = self.length.saturating_add(7) / 8;
331 let required = (self.length + additional).saturating_add(7) / 8;
332 self.buffer
334 .extend(std::iter::repeat(0b11111111u8).take(required - existing));
335 self.length += additional;
336 }
337 }
338
339 fn extend_unset(&mut self, mut additional: usize) {
340 let offset = self.length % 8;
341 let added = if offset != 0 {
342 let last_index = self.buffer.len() - 1;
344 let last = &mut self.buffer[last_index];
345 *last &= 0b11111111u8 >> (8 - offset); std::cmp::min(additional, 8 - offset)
347 } else {
348 0
349 };
350 self.length += added;
351 additional = additional.saturating_sub(added);
352 if additional > 0 {
353 debug_assert_eq!(self.length % 8, 0);
354 self.buffer
355 .resize((self.length + additional).saturating_add(7) / 8, 0);
356 self.length += additional;
357 }
358 }
359
360 #[inline]
365 pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
366 debug_assert!(index < self.len());
367 let byte = self.buffer.get_unchecked_mut(index / 8);
368 *byte = set_bit_in_byte(*byte, index % 8, value);
369 }
370
371 pub fn shrink_to_fit(&mut self) {
373 self.buffer.shrink_to_fit();
374 }
375
376 pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
380 BitChunks::new(&self.buffer, 0, self.length)
381 }
382
383 pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<T> {
385 BitChunksExactMut::new(&mut self.buffer, self.length)
386 }
387
388 pub fn intersects_with(&self, other: &Self) -> bool {
389 intersects_with_mut(self, other)
390 }
391
392 pub fn freeze(self) -> Bitmap {
393 self.into()
394 }
395}
396
397impl From<MutableBitmap> for Bitmap {
398 #[inline]
399 fn from(buffer: MutableBitmap) -> Self {
400 Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
401 }
402}
403
404impl From<MutableBitmap> for Option<Bitmap> {
405 #[inline]
406 fn from(buffer: MutableBitmap) -> Self {
407 let unset_bits = buffer.unset_bits();
408 if unset_bits > 0 {
409 let bitmap = unsafe {
411 Bitmap::from_inner_unchecked(
412 SharedStorage::from_vec(buffer.buffer),
413 0,
414 buffer.length,
415 Some(unset_bits),
416 )
417 };
418 Some(bitmap)
419 } else {
420 None
421 }
422 }
423}
424
425impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
426 #[inline]
427 fn from(slice: P) -> Self {
428 MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
429 }
430}
431
432impl Extend<bool> for MutableBitmap {
433 fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
434 let mut iterator = iter.into_iter();
435
436 let mut buffer = std::mem::take(&mut self.buffer);
437 let mut length = std::mem::take(&mut self.length);
438
439 let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
440 buffer.reserve(byte_capacity);
441
442 loop {
443 let mut exhausted = false;
444 let mut byte_accum: u8 = 0;
445 let mut mask: u8 = 1;
446
447 while mask != 0 {
449 if let Some(value) = iterator.next() {
450 length += 1;
451 byte_accum |= match value {
452 true => mask,
453 false => 0,
454 };
455 mask <<= 1;
456 } else {
457 exhausted = true;
458 break;
459 }
460 }
461
462 if exhausted && mask == 1 {
464 break;
465 }
466
467 if buffer.len() == buffer.capacity() {
469 let additional_byte_capacity = 1usize.saturating_add(
471 iterator.size_hint().0.saturating_add(7) / 8, );
473 buffer.reserve(additional_byte_capacity)
474 }
475
476 buffer.push(byte_accum);
478 if exhausted {
479 break;
480 }
481 }
482
483 self.buffer = buffer;
484 self.length = length;
485 }
486}
487
488impl FromIterator<bool> for MutableBitmap {
489 fn from_iter<I>(iter: I) -> Self
490 where
491 I: IntoIterator<Item = bool>,
492 {
493 let mut bm = Self::new();
494 bm.extend(iter);
495 bm
496 }
497}
498
499#[inline]
504unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
505 let mut byte = 0u64;
506 let mut mask;
507 for i in 0..8 {
508 mask = 1u64 << (8 * i);
509 for _ in 0..8 {
510 let value = match iterator.next() {
511 Some(value) => value,
512 None => unsafe { unreachable_unchecked() },
513 };
514
515 byte |= match value {
516 true => mask,
517 false => 0,
518 };
519 mask <<= 1;
520 }
521 }
522 byte
523}
524
525#[inline]
528unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
529 let mut byte_accum: u8 = 0;
530 let mut mask: u8 = 1;
531 for _ in 0..len {
532 let value = match iterator.next() {
533 Some(value) => value,
534 None => unsafe { unreachable_unchecked() },
535 };
536
537 byte_accum |= match value {
538 true => mask,
539 false => 0,
540 };
541 mask <<= 1;
542 }
543 byte_accum
544}
545
546#[inline]
550unsafe fn extend_aligned_trusted_iter_unchecked(
551 buffer: &mut Vec<u8>,
552 mut iterator: impl Iterator<Item = bool>,
553) -> usize {
554 let additional_bits = iterator.size_hint().1.unwrap();
555 let chunks = additional_bits / 64;
556 let remainder = additional_bits % 64;
557
558 let additional = (additional_bits + 7) / 8;
559 assert_eq!(
560 additional,
561 chunks * 8 + remainder / 8 + (remainder % 8 > 0) as usize
563 );
564 buffer.reserve(additional);
565
566 for _ in 0..chunks {
568 let chunk = get_chunk_unchecked(&mut iterator);
569 buffer.extend_from_slice(&chunk.to_le_bytes());
570 }
571
572 for _ in 0..(remainder / 8) {
574 let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
575 buffer.push(byte)
576 }
577
578 let remainder = remainder % 8;
580 if remainder > 0 {
581 let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
582 buffer.push(byte)
583 }
584 additional_bits
585}
586
587impl MutableBitmap {
588 #[inline]
590 pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
591 unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
593 }
594
595 #[inline]
600 pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
601 &mut self,
602 mut iterator: I,
603 ) {
604 let mut length = iterator.size_hint().1.unwrap();
606
607 let bit_offset = self.length % 8;
608
609 if length < 8 - bit_offset {
610 if bit_offset == 0 {
611 self.buffer.push(0);
612 }
613 let byte = self.buffer.last_mut().unwrap();
615 let mut i = bit_offset;
616 for value in iterator {
617 *byte = set_bit_in_byte(*byte, i, value);
618 i += 1;
619 }
620 self.length += length;
621 return;
622 }
623
624 if bit_offset != 0 {
628 let byte = self.buffer.last_mut().unwrap();
630 (bit_offset..8).for_each(|i| {
631 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
632 });
633 self.length += 8 - bit_offset;
634 length -= 8 - bit_offset;
635 }
636
637 debug_assert_eq!(self.length % 8, 0);
639
640 unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
641 self.length += length;
642 }
643
644 #[inline]
649 pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
650 where
651 I: Iterator<Item = bool>,
652 {
653 let mut buffer = Vec::<u8>::new();
654
655 let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
656
657 Self { buffer, length }
658 }
659
660 #[inline]
662 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
663 where
664 I: TrustedLen<Item = bool>,
665 {
666 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
668 }
669
670 pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
672 where
673 I: TrustedLen<Item = std::result::Result<bool, E>>,
674 {
675 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
676 }
677
678 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
683 mut iterator: I,
684 ) -> std::result::Result<Self, E>
685 where
686 I: Iterator<Item = std::result::Result<bool, E>>,
687 {
688 let length = iterator.size_hint().1.unwrap();
689
690 let mut buffer = vec![0u8; (length + 7) / 8];
691
692 let chunks = length / 8;
693 let reminder = length % 8;
694
695 let data = buffer.as_mut_slice();
696 data[..chunks].iter_mut().try_for_each(|byte| {
697 (0..8).try_for_each(|i| {
698 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
699 Ok(())
700 })
701 })?;
702
703 if reminder != 0 {
704 let last = &mut data[chunks];
705 iterator.enumerate().try_for_each(|(i, value)| {
706 *last = set_bit_in_byte(*last, i, value?);
707 Ok(())
708 })?;
709 }
710
711 Ok(Self { buffer, length })
712 }
713
714 fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
715 let aligned_offset = offset / 8;
721 let own_offset = self.length % 8;
722 debug_assert_eq!(offset % 8, 0); debug_assert!(own_offset != 0); let bytes_len = length.saturating_add(7) / 8;
726 let items = &slice[aligned_offset..aligned_offset + bytes_len];
727 let buffer = self.buffer.as_mut_slice();
729 let last = &mut buffer[buffer.len() - 1];
730
731 *last &= 0b11111111u8 >> (8 - own_offset); *last |= items[0] << own_offset;
735
736 if length + own_offset <= 8 {
737 self.length += length;
739 return;
740 }
741 let additional = length - (8 - own_offset);
742
743 let remaining = [items[items.len() - 1], 0];
744 let bytes = items
745 .windows(2)
746 .chain(std::iter::once(remaining.as_ref()))
747 .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
748 .take(additional.saturating_add(7) / 8);
749 self.buffer.extend(bytes);
750
751 self.length += length;
752 }
753
754 fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
755 let aligned_offset = offset / 8;
756 let bytes_len = length.saturating_add(7) / 8;
757 let items = &slice[aligned_offset..aligned_offset + bytes_len];
758 self.buffer.extend_from_slice(items);
759 self.length += length;
760 }
761
762 #[inline]
771 pub unsafe fn extend_from_slice_unchecked(
772 &mut self,
773 slice: &[u8],
774 offset: usize,
775 length: usize,
776 ) {
777 if length == 0 {
778 return;
779 };
780 let is_aligned = self.length % 8 == 0;
781 let other_is_aligned = offset % 8 == 0;
782 match (is_aligned, other_is_aligned) {
783 (true, true) => self.extend_aligned(slice, offset, length),
784 (false, true) => self.extend_unaligned(slice, offset, length),
785 _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
787 }
788 debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
790 }
791
792 #[inline]
798 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
799 assert!(offset + length <= slice.len() * 8);
800 unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
802 }
803
804 #[inline]
805 pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
806 let (slice, offset, length) = bitmask.inner();
807 self.extend_from_slice(slice, offset, length)
808 }
809
810 #[inline]
812 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
813 let (slice, offset, length) = bitmap.as_slice();
814 unsafe {
816 self.extend_from_slice_unchecked(slice, offset, length);
817 }
818 }
819
820 #[inline]
823 pub fn as_slice(&self) -> &[u8] {
824 let len = (self.length).saturating_add(7) / 8;
825 &self.buffer[..len]
826 }
827
828 #[inline]
831 pub fn as_mut_slice(&mut self) -> &mut [u8] {
832 let len = (self.length).saturating_add(7) / 8;
833 &mut self.buffer[..len]
834 }
835}
836
837impl Default for MutableBitmap {
838 fn default() -> Self {
839 Self::new()
840 }
841}
842
843impl<'a> IntoIterator for &'a MutableBitmap {
844 type Item = bool;
845 type IntoIter = BitmapIter<'a>;
846
847 fn into_iter(self) -> Self::IntoIter {
848 BitmapIter::<'a>::new(&self.buffer, 0, self.length)
849 }
850}