polars_arrow/bitmap/
mutable.rs

1use std::hint::unreachable_unchecked;
2
3use polars_error::{polars_bail, PolarsResult};
4use polars_utils::vec::PushUnchecked;
5
6use super::bitmask::BitMask;
7use super::utils::{count_zeros, fmt, BitChunk, BitChunks, BitChunksExactMut, BitmapIter};
8use super::{intersects_with_mut, Bitmap};
9use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
10use crate::storage::SharedStorage;
11use crate::trusted_len::TrustedLen;
12
13/// A container of booleans. [`MutableBitmap`] is semantically equivalent
14/// to [`Vec<bool>`].
15///
16/// The two main differences against [`Vec<bool>`] is that each element stored as a single bit,
17/// thereby:
18/// * it uses 8x less memory
19/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics).
20///
21/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`.
22/// # Examples
23/// ```
24/// use polars_arrow::bitmap::MutableBitmap;
25///
26/// let bitmap = MutableBitmap::from([true, false, true]);
27/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
28///
29/// // creation directly from bytes
30/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap();
31/// // note: the first bit is the left-most of the first byte
32/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
33/// // we can also get the slice:
34/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
35/// // debug helps :)
36/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
37///
38/// // It supports mutation in place
39/// bitmap.set(0, false);
40/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
41/// // and `O(1)` random access
42/// assert_eq!(bitmap.get(0), false);
43/// ```
44/// # Implementation
45/// This container is internally a [`Vec<u8>`].
46#[derive(Clone)]
47pub struct MutableBitmap {
48    buffer: Vec<u8>,
49    // invariant: length.saturating_add(7) / 8 == buffer.len();
50    length: usize,
51}
52
53impl std::fmt::Debug for MutableBitmap {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        fmt(&self.buffer, 0, self.len(), f)
56    }
57}
58
59impl PartialEq for MutableBitmap {
60    fn eq(&self, other: &Self) -> bool {
61        self.iter().eq(other.iter())
62    }
63}
64
65impl MutableBitmap {
66    /// Initializes an empty [`MutableBitmap`].
67    #[inline]
68    pub fn new() -> Self {
69        Self {
70            buffer: Vec::new(),
71            length: 0,
72        }
73    }
74
75    /// Initializes a new [`MutableBitmap`] from a [`Vec<u8>`] and a length.
76    /// # Errors
77    /// This function errors iff `length > bytes.len() * 8`
78    #[inline]
79    pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
80        if length > bytes.len().saturating_mul(8) {
81            polars_bail!(InvalidOperation:
82                "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
83                length,
84                bytes.len().saturating_mul(8)
85            )
86        }
87
88        // Ensure invariant holds.
89        let min_byte_length_needed = length.div_ceil(8);
90        bytes.drain(min_byte_length_needed..);
91        Ok(Self {
92            length,
93            buffer: bytes,
94        })
95    }
96
97    /// Initializes a [`MutableBitmap`] from a [`Vec<u8>`] and a length.
98    /// This function is `O(1)`.
99    /// # Panic
100    /// Panics iff the length is larger than the length of the buffer times 8.
101    #[inline]
102    pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
103        Self::try_new(buffer, length).unwrap()
104    }
105
106    /// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
107    #[inline]
108    pub fn with_capacity(capacity: usize) -> Self {
109        Self {
110            buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
111            length: 0,
112        }
113    }
114
115    /// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary.
116    #[inline]
117    pub fn push(&mut self, value: bool) {
118        if self.length % 8 == 0 {
119            self.buffer.push(0);
120        }
121        let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
122        *byte = set_bit_in_byte(*byte, self.length % 8, value);
123        self.length += 1;
124    }
125
126    /// Pop the last bit from the [`MutableBitmap`].
127    /// Note if the [`MutableBitmap`] is empty, this method will return None.
128    #[inline]
129    pub fn pop(&mut self) -> Option<bool> {
130        if self.is_empty() {
131            return None;
132        }
133
134        self.length -= 1;
135        let value = unsafe { self.get_unchecked(self.length) };
136        if self.length % 8 == 0 {
137            self.buffer.pop();
138        }
139        Some(value)
140    }
141
142    /// Returns whether the position `index` is set.
143    /// # Panics
144    /// Panics iff `index >= self.len()`.
145    #[inline]
146    pub fn get(&self, index: usize) -> bool {
147        assert!(index < self.len());
148        unsafe { self.get_unchecked(index) }
149    }
150
151    /// Returns whether the position `index` is set.
152    ///
153    /// # Safety
154    /// The caller must ensure `index < self.len()`.
155    #[inline]
156    pub unsafe fn get_unchecked(&self, index: usize) -> bool {
157        get_bit_unchecked(&self.buffer, index)
158    }
159
160    /// Sets the position `index` to `value`
161    /// # Panics
162    /// Panics iff `index >= self.len()`.
163    #[inline]
164    pub fn set(&mut self, index: usize, value: bool) {
165        assert!(index < self.len());
166        unsafe {
167            self.set_unchecked(index, value);
168        }
169    }
170
171    /// Sets the position `index` to the OR of its original value and `value`.
172    ///
173    /// # Safety
174    /// It's undefined behavior if index >= self.len().
175    #[inline]
176    pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
177        *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
178    }
179
180    /// Sets the position `index` to the AND of its original value and `value`.
181    ///
182    /// # Safety
183    /// It's undefined behavior if index >= self.len().
184    #[inline]
185    pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
186        *self.buffer.get_unchecked_mut(index / 8) &= (value as u8) << (index % 8);
187    }
188
189    /// constructs a new iterator over the bits of [`MutableBitmap`].
190    pub fn iter(&self) -> BitmapIter {
191        BitmapIter::new(&self.buffer, 0, self.length)
192    }
193
194    /// Empties the [`MutableBitmap`].
195    #[inline]
196    pub fn clear(&mut self) {
197        self.length = 0;
198        self.buffer.clear();
199    }
200
201    /// Extends [`MutableBitmap`] by `additional` values of constant `value`.
202    /// # Implementation
203    /// This function is an order of magnitude faster than pushing element by element.
204    #[inline]
205    pub fn extend_constant(&mut self, additional: usize, value: bool) {
206        if additional == 0 {
207            return;
208        }
209
210        if value {
211            self.extend_set(additional)
212        } else {
213            self.extend_unset(additional)
214        }
215    }
216
217    /// Resizes the [`MutableBitmap`] to the specified length, inserting value
218    /// if the length is bigger than the current length.
219    pub fn resize(&mut self, length: usize, value: bool) {
220        if let Some(additional) = length.checked_sub(self.len()) {
221            self.extend_constant(additional, value);
222        } else {
223            self.buffer.truncate(length.saturating_add(7) / 8);
224            self.length = length;
225        }
226    }
227
228    /// Initializes a zeroed [`MutableBitmap`].
229    #[inline]
230    pub fn from_len_zeroed(length: usize) -> Self {
231        Self {
232            buffer: vec![0; length.saturating_add(7) / 8],
233            length,
234        }
235    }
236
237    /// Initializes a [`MutableBitmap`] with all values set to valid/ true.
238    #[inline]
239    pub fn from_len_set(length: usize) -> Self {
240        Self {
241            buffer: vec![u8::MAX; length.saturating_add(7) / 8],
242            length,
243        }
244    }
245
246    /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
247    #[inline(always)]
248    pub fn reserve(&mut self, additional: usize) {
249        self.buffer
250            .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
251    }
252
253    /// Returns the capacity of [`MutableBitmap`] in number of bits.
254    #[inline]
255    pub fn capacity(&self) -> usize {
256        self.buffer.capacity() * 8
257    }
258
259    /// Pushes a new bit to the [`MutableBitmap`]
260    ///
261    /// # Safety
262    /// The caller must ensure that the [`MutableBitmap`] has sufficient capacity.
263    #[inline]
264    pub unsafe fn push_unchecked(&mut self, value: bool) {
265        if self.length % 8 == 0 {
266            self.buffer.push_unchecked(0);
267        }
268        let byte = self.buffer.last_mut().unwrap_unchecked();
269        *byte = set_bit_in_byte(*byte, self.length % 8, value);
270        self.length += 1;
271    }
272
273    /// Returns the number of unset bits on this [`MutableBitmap`].
274    ///
275    /// Guaranteed to be `<= self.len()`.
276    /// # Implementation
277    /// This function is `O(N)`
278    pub fn unset_bits(&self) -> usize {
279        count_zeros(&self.buffer, 0, self.length)
280    }
281
282    /// Returns the number of set bits on this [`MutableBitmap`].
283    ///
284    /// Guaranteed to be `<= self.len()`.
285    /// # Implementation
286    /// This function is `O(N)`
287    pub fn set_bits(&self) -> usize {
288        self.length - self.unset_bits()
289    }
290
291    /// Returns the length of the [`MutableBitmap`].
292    #[inline]
293    pub fn len(&self) -> usize {
294        self.length
295    }
296
297    /// Returns whether [`MutableBitmap`] is empty.
298    #[inline]
299    pub fn is_empty(&self) -> bool {
300        self.len() == 0
301    }
302
303    /// # Safety
304    /// The caller must ensure that the [`MutableBitmap`] was properly initialized up to `len`.
305    #[inline]
306    pub(crate) unsafe fn set_len(&mut self, len: usize) {
307        self.buffer.set_len(len.saturating_add(7) / 8);
308        self.length = len;
309    }
310
311    fn extend_set(&mut self, mut additional: usize) {
312        let offset = self.length % 8;
313        let added = if offset != 0 {
314            // offset != 0 => at least one byte in the buffer
315            let last_index = self.buffer.len() - 1;
316            let last = &mut self.buffer[last_index];
317
318            let remaining = 0b11111111u8;
319            let remaining = remaining >> 8usize.saturating_sub(additional);
320            let remaining = remaining << offset;
321            *last |= remaining;
322            std::cmp::min(additional, 8 - offset)
323        } else {
324            0
325        };
326        self.length += added;
327        additional = additional.saturating_sub(added);
328        if additional > 0 {
329            debug_assert_eq!(self.length % 8, 0);
330            let existing = self.length.saturating_add(7) / 8;
331            let required = (self.length + additional).saturating_add(7) / 8;
332            // add remaining as full bytes
333            self.buffer
334                .extend(std::iter::repeat(0b11111111u8).take(required - existing));
335            self.length += additional;
336        }
337    }
338
339    fn extend_unset(&mut self, mut additional: usize) {
340        let offset = self.length % 8;
341        let added = if offset != 0 {
342            // offset != 0 => at least one byte in the buffer
343            let last_index = self.buffer.len() - 1;
344            let last = &mut self.buffer[last_index];
345            *last &= 0b11111111u8 >> (8 - offset); // unset them
346            std::cmp::min(additional, 8 - offset)
347        } else {
348            0
349        };
350        self.length += added;
351        additional = additional.saturating_sub(added);
352        if additional > 0 {
353            debug_assert_eq!(self.length % 8, 0);
354            self.buffer
355                .resize((self.length + additional).saturating_add(7) / 8, 0);
356            self.length += additional;
357        }
358    }
359
360    /// Sets the position `index` to `value`
361    ///
362    /// # Safety
363    /// Caller must ensure that `index < self.len()`
364    #[inline]
365    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
366        debug_assert!(index < self.len());
367        let byte = self.buffer.get_unchecked_mut(index / 8);
368        *byte = set_bit_in_byte(*byte, index % 8, value);
369    }
370
371    /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
372    pub fn shrink_to_fit(&mut self) {
373        self.buffer.shrink_to_fit();
374    }
375
376    /// Returns an iterator over bits in bit chunks [`BitChunk`].
377    ///
378    /// This iterator is useful to operate over multiple bits via e.g. bitwise.
379    pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
380        BitChunks::new(&self.buffer, 0, self.length)
381    }
382
383    /// Returns an iterator over mutable slices, [`BitChunksExactMut`]
384    pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<T> {
385        BitChunksExactMut::new(&mut self.buffer, self.length)
386    }
387
388    pub fn intersects_with(&self, other: &Self) -> bool {
389        intersects_with_mut(self, other)
390    }
391
392    pub fn freeze(self) -> Bitmap {
393        self.into()
394    }
395}
396
397impl From<MutableBitmap> for Bitmap {
398    #[inline]
399    fn from(buffer: MutableBitmap) -> Self {
400        Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
401    }
402}
403
404impl From<MutableBitmap> for Option<Bitmap> {
405    #[inline]
406    fn from(buffer: MutableBitmap) -> Self {
407        let unset_bits = buffer.unset_bits();
408        if unset_bits > 0 {
409            // SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
410            let bitmap = unsafe {
411                Bitmap::from_inner_unchecked(
412                    SharedStorage::from_vec(buffer.buffer),
413                    0,
414                    buffer.length,
415                    Some(unset_bits),
416                )
417            };
418            Some(bitmap)
419        } else {
420            None
421        }
422    }
423}
424
425impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
426    #[inline]
427    fn from(slice: P) -> Self {
428        MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
429    }
430}
431
432impl Extend<bool> for MutableBitmap {
433    fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
434        let mut iterator = iter.into_iter();
435
436        let mut buffer = std::mem::take(&mut self.buffer);
437        let mut length = std::mem::take(&mut self.length);
438
439        let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
440        buffer.reserve(byte_capacity);
441
442        loop {
443            let mut exhausted = false;
444            let mut byte_accum: u8 = 0;
445            let mut mask: u8 = 1;
446
447            //collect (up to) 8 bits into a byte
448            while mask != 0 {
449                if let Some(value) = iterator.next() {
450                    length += 1;
451                    byte_accum |= match value {
452                        true => mask,
453                        false => 0,
454                    };
455                    mask <<= 1;
456                } else {
457                    exhausted = true;
458                    break;
459                }
460            }
461
462            // break if the iterator was exhausted before it provided a bool for this byte
463            if exhausted && mask == 1 {
464                break;
465            }
466
467            //ensure we have capacity to write the byte
468            if buffer.len() == buffer.capacity() {
469                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
470                let additional_byte_capacity = 1usize.saturating_add(
471                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
472                );
473                buffer.reserve(additional_byte_capacity)
474            }
475
476            // Soundness: capacity was allocated above
477            buffer.push(byte_accum);
478            if exhausted {
479                break;
480            }
481        }
482
483        self.buffer = buffer;
484        self.length = length;
485    }
486}
487
488impl FromIterator<bool> for MutableBitmap {
489    fn from_iter<I>(iter: I) -> Self
490    where
491        I: IntoIterator<Item = bool>,
492    {
493        let mut bm = Self::new();
494        bm.extend(iter);
495        bm
496    }
497}
498
499// [7, 6, 5, 4, 3, 2, 1, 0], [15, 14, 13, 12, 11, 10, 9, 8]
500// [00000001_00000000_00000000_00000000_...] // u64
501/// # Safety
502/// The iterator must be trustedLen and its len must be least `len`.
503#[inline]
504unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
505    let mut byte = 0u64;
506    let mut mask;
507    for i in 0..8 {
508        mask = 1u64 << (8 * i);
509        for _ in 0..8 {
510            let value = match iterator.next() {
511                Some(value) => value,
512                None => unsafe { unreachable_unchecked() },
513            };
514
515            byte |= match value {
516                true => mask,
517                false => 0,
518            };
519            mask <<= 1;
520        }
521    }
522    byte
523}
524
525/// # Safety
526/// The iterator must be trustedLen and its len must be least `len`.
527#[inline]
528unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
529    let mut byte_accum: u8 = 0;
530    let mut mask: u8 = 1;
531    for _ in 0..len {
532        let value = match iterator.next() {
533            Some(value) => value,
534            None => unsafe { unreachable_unchecked() },
535        };
536
537        byte_accum |= match value {
538            true => mask,
539            false => 0,
540        };
541        mask <<= 1;
542    }
543    byte_accum
544}
545
546/// Extends the [`Vec<u8>`] from `iterator`
547/// # Safety
548/// The iterator MUST be [`TrustedLen`].
549#[inline]
550unsafe fn extend_aligned_trusted_iter_unchecked(
551    buffer: &mut Vec<u8>,
552    mut iterator: impl Iterator<Item = bool>,
553) -> usize {
554    let additional_bits = iterator.size_hint().1.unwrap();
555    let chunks = additional_bits / 64;
556    let remainder = additional_bits % 64;
557
558    let additional = (additional_bits + 7) / 8;
559    assert_eq!(
560        additional,
561        // a hint of how the following calculation will be done
562        chunks * 8 + remainder / 8 + (remainder % 8 > 0) as usize
563    );
564    buffer.reserve(additional);
565
566    // chunks of 64 bits
567    for _ in 0..chunks {
568        let chunk = get_chunk_unchecked(&mut iterator);
569        buffer.extend_from_slice(&chunk.to_le_bytes());
570    }
571
572    // remaining complete bytes
573    for _ in 0..(remainder / 8) {
574        let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
575        buffer.push(byte)
576    }
577
578    // remaining bits
579    let remainder = remainder % 8;
580    if remainder > 0 {
581        let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
582        buffer.push(byte)
583    }
584    additional_bits
585}
586
587impl MutableBitmap {
588    /// Extends `self` from a [`TrustedLen`] iterator.
589    #[inline]
590    pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
591        // SAFETY: I: TrustedLen
592        unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
593    }
594
595    /// Extends `self` from an iterator of trusted len.
596    ///
597    /// # Safety
598    /// The caller must guarantee that the iterator has a trusted len.
599    #[inline]
600    pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
601        &mut self,
602        mut iterator: I,
603    ) {
604        // the length of the iterator throughout this function.
605        let mut length = iterator.size_hint().1.unwrap();
606
607        let bit_offset = self.length % 8;
608
609        if length < 8 - bit_offset {
610            if bit_offset == 0 {
611                self.buffer.push(0);
612            }
613            // the iterator will not fill the last byte
614            let byte = self.buffer.last_mut().unwrap();
615            let mut i = bit_offset;
616            for value in iterator {
617                *byte = set_bit_in_byte(*byte, i, value);
618                i += 1;
619            }
620            self.length += length;
621            return;
622        }
623
624        // at this point we know that length will hit a byte boundary and thus
625        // increase the buffer.
626
627        if bit_offset != 0 {
628            // we are in the middle of a byte; lets finish it
629            let byte = self.buffer.last_mut().unwrap();
630            (bit_offset..8).for_each(|i| {
631                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
632            });
633            self.length += 8 - bit_offset;
634            length -= 8 - bit_offset;
635        }
636
637        // everything is aligned; proceed with the bulk operation
638        debug_assert_eq!(self.length % 8, 0);
639
640        unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
641        self.length += length;
642    }
643
644    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
645    ///
646    /// # Safety
647    /// The iterator must report an accurate length.
648    #[inline]
649    pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
650    where
651        I: Iterator<Item = bool>,
652    {
653        let mut buffer = Vec::<u8>::new();
654
655        let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
656
657        Self { buffer, length }
658    }
659
660    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
661    #[inline]
662    pub fn from_trusted_len_iter<I>(iterator: I) -> Self
663    where
664        I: TrustedLen<Item = bool>,
665    {
666        // SAFETY: Iterator is `TrustedLen`
667        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
668    }
669
670    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
671    pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
672    where
673        I: TrustedLen<Item = std::result::Result<bool, E>>,
674    {
675        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
676    }
677
678    /// Creates a new [`MutableBitmap`] from an falible iterator of booleans.
679    ///
680    /// # Safety
681    /// The caller must guarantee that the iterator is `TrustedLen`.
682    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
683        mut iterator: I,
684    ) -> std::result::Result<Self, E>
685    where
686        I: Iterator<Item = std::result::Result<bool, E>>,
687    {
688        let length = iterator.size_hint().1.unwrap();
689
690        let mut buffer = vec![0u8; (length + 7) / 8];
691
692        let chunks = length / 8;
693        let reminder = length % 8;
694
695        let data = buffer.as_mut_slice();
696        data[..chunks].iter_mut().try_for_each(|byte| {
697            (0..8).try_for_each(|i| {
698                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
699                Ok(())
700            })
701        })?;
702
703        if reminder != 0 {
704            let last = &mut data[chunks];
705            iterator.enumerate().try_for_each(|(i, value)| {
706                *last = set_bit_in_byte(*last, i, value?);
707                Ok(())
708            })?;
709        }
710
711        Ok(Self { buffer, length })
712    }
713
714    fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
715        // e.g.
716        // [a, b, --101010]     <- to be extended
717        // [00111111, 11010101] <- to extend
718        // [a, b, 11101010, --001111] expected result
719
720        let aligned_offset = offset / 8;
721        let own_offset = self.length % 8;
722        debug_assert_eq!(offset % 8, 0); // assumed invariant
723        debug_assert!(own_offset != 0); // assumed invariant
724
725        let bytes_len = length.saturating_add(7) / 8;
726        let items = &slice[aligned_offset..aligned_offset + bytes_len];
727        // self has some offset => we need to shift all `items`, and merge the first
728        let buffer = self.buffer.as_mut_slice();
729        let last = &mut buffer[buffer.len() - 1];
730
731        // --101010 | 00111111 << 6 = 11101010
732        // erase previous
733        *last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
734        *last |= items[0] << own_offset;
735
736        if length + own_offset <= 8 {
737            // no new bytes needed
738            self.length += length;
739            return;
740        }
741        let additional = length - (8 - own_offset);
742
743        let remaining = [items[items.len() - 1], 0];
744        let bytes = items
745            .windows(2)
746            .chain(std::iter::once(remaining.as_ref()))
747            .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
748            .take(additional.saturating_add(7) / 8);
749        self.buffer.extend(bytes);
750
751        self.length += length;
752    }
753
754    fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
755        let aligned_offset = offset / 8;
756        let bytes_len = length.saturating_add(7) / 8;
757        let items = &slice[aligned_offset..aligned_offset + bytes_len];
758        self.buffer.extend_from_slice(items);
759        self.length += length;
760    }
761
762    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
763    /// This is the fastest way to extend a [`MutableBitmap`].
764    /// # Implementation
765    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
766    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
767    ///
768    /// # Safety
769    /// Caller must ensure `offset + length <= slice.len() * 8`
770    #[inline]
771    pub unsafe fn extend_from_slice_unchecked(
772        &mut self,
773        slice: &[u8],
774        offset: usize,
775        length: usize,
776    ) {
777        if length == 0 {
778            return;
779        };
780        let is_aligned = self.length % 8 == 0;
781        let other_is_aligned = offset % 8 == 0;
782        match (is_aligned, other_is_aligned) {
783            (true, true) => self.extend_aligned(slice, offset, length),
784            (false, true) => self.extend_unaligned(slice, offset, length),
785            // todo: further optimize the other branches.
786            _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
787        }
788        // internal invariant:
789        debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
790    }
791
792    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
793    /// This is the fastest way to extend a [`MutableBitmap`].
794    /// # Implementation
795    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
796    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
797    #[inline]
798    pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
799        assert!(offset + length <= slice.len() * 8);
800        // SAFETY: invariant is asserted
801        unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
802    }
803
804    #[inline]
805    pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
806        let (slice, offset, length) = bitmask.inner();
807        self.extend_from_slice(slice, offset, length)
808    }
809
810    /// Extends the [`MutableBitmap`] from a [`Bitmap`].
811    #[inline]
812    pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
813        let (slice, offset, length) = bitmap.as_slice();
814        // SAFETY: bitmap.as_slice adheres to the invariant
815        unsafe {
816            self.extend_from_slice_unchecked(slice, offset, length);
817        }
818    }
819
820    /// Returns the slice of bytes of this [`MutableBitmap`].
821    /// Note that the last byte may not be fully used.
822    #[inline]
823    pub fn as_slice(&self) -> &[u8] {
824        let len = (self.length).saturating_add(7) / 8;
825        &self.buffer[..len]
826    }
827
828    /// Returns the slice of bytes of this [`MutableBitmap`].
829    /// Note that the last byte may not be fully used.
830    #[inline]
831    pub fn as_mut_slice(&mut self) -> &mut [u8] {
832        let len = (self.length).saturating_add(7) / 8;
833        &mut self.buffer[..len]
834    }
835}
836
837impl Default for MutableBitmap {
838    fn default() -> Self {
839        Self::new()
840    }
841}
842
843impl<'a> IntoIterator for &'a MutableBitmap {
844    type Item = bool;
845    type IntoIter = BitmapIter<'a>;
846
847    fn into_iter(self) -> Self::IntoIter {
848        BitmapIter::<'a>::new(&self.buffer, 0, self.length)
849    }
850}