arrow_buffer/buffer/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_chunk_iterator::BitChunks;
19use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
20use crate::{
21    bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
22    BooleanBufferBuilder, Buffer, MutableBuffer,
23};
24
25use std::ops::{BitAnd, BitOr, BitXor, Not};
26
27/// A slice-able [`Buffer`] containing bit-packed booleans
28///
29/// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`]
30///
31/// # See Also
32///
33/// * [`NullBuffer`] for representing null values in Arrow arrays
34///
35/// [`NullBuffer`]: crate::NullBuffer
36#[derive(Debug, Clone, Eq)]
37pub struct BooleanBuffer {
38    buffer: Buffer,
39    offset: usize,
40    len: usize,
41}
42
43impl PartialEq for BooleanBuffer {
44    fn eq(&self, other: &Self) -> bool {
45        if self.len != other.len {
46            return false;
47        }
48
49        let lhs = self.bit_chunks().iter_padded();
50        let rhs = other.bit_chunks().iter_padded();
51        lhs.zip(rhs).all(|(a, b)| a == b)
52    }
53}
54
55impl BooleanBuffer {
56    /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
57    ///
58    /// # Panics
59    ///
60    /// This method will panic if `buffer` is not large enough
61    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
62        let total_len = offset.saturating_add(len);
63        let buffer_len = buffer.len();
64        let bit_len = buffer_len.saturating_mul(8);
65        assert!(
66            total_len <= bit_len,
67            "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})"
68        );
69        Self {
70            buffer,
71            offset,
72            len,
73        }
74    }
75
76    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
77    pub fn new_set(length: usize) -> Self {
78        let mut builder = BooleanBufferBuilder::new(length);
79        builder.append_n(length, true);
80        builder.finish()
81    }
82
83    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
84    pub fn new_unset(length: usize) -> Self {
85        let buffer = MutableBuffer::new_null(length).into_buffer();
86        Self {
87            buffer,
88            offset: 0,
89            len: length,
90        }
91    }
92
93    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
94    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
95        let buffer = MutableBuffer::collect_bool(len, f);
96        Self::new(buffer.into(), 0, len)
97    }
98
99    /// Returns the number of set bits in this buffer
100    pub fn count_set_bits(&self) -> usize {
101        self.buffer.count_set_bits_offset(self.offset, self.len)
102    }
103
104    /// Returns a `BitChunks` instance which can be used to iterate over
105    /// this buffer's bits in `u64` chunks
106    #[inline]
107    pub fn bit_chunks(&self) -> BitChunks {
108        BitChunks::new(self.values(), self.offset, self.len)
109    }
110
111    /// Returns the offset of this [`BooleanBuffer`] in bits
112    #[inline]
113    pub fn offset(&self) -> usize {
114        self.offset
115    }
116
117    /// Returns the length of this [`BooleanBuffer`] in bits
118    #[inline]
119    pub fn len(&self) -> usize {
120        self.len
121    }
122
123    /// Returns true if this [`BooleanBuffer`] is empty
124    #[inline]
125    pub fn is_empty(&self) -> bool {
126        self.len == 0
127    }
128
129    /// Free up unused memory.
130    pub fn shrink_to_fit(&mut self) {
131        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
132        self.buffer.shrink_to_fit();
133    }
134
135    /// Returns the boolean value at index `i`.
136    ///
137    /// # Panics
138    ///
139    /// Panics if `i >= self.len()`
140    #[inline]
141    pub fn value(&self, idx: usize) -> bool {
142        assert!(idx < self.len);
143        unsafe { self.value_unchecked(idx) }
144    }
145
146    /// Returns the boolean value at index `i`.
147    ///
148    /// # Safety
149    /// This doesn't check bounds, the caller must ensure that index < self.len()
150    #[inline]
151    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
152        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
153    }
154
155    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
156    #[inline]
157    pub fn values(&self) -> &[u8] {
158        &self.buffer
159    }
160
161    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
162    pub fn slice(&self, offset: usize, len: usize) -> Self {
163        assert!(
164            offset.saturating_add(len) <= self.len,
165            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
166        );
167        Self {
168            buffer: self.buffer.clone(),
169            offset: self.offset + offset,
170            len,
171        }
172    }
173
174    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
175    ///
176    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
177    pub fn sliced(&self) -> Buffer {
178        self.buffer.bit_slice(self.offset, self.len)
179    }
180
181    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
182    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
183    /// return false when the arrays are logically equal
184    pub fn ptr_eq(&self, other: &Self) -> bool {
185        self.buffer.as_ptr() == other.buffer.as_ptr()
186            && self.offset == other.offset
187            && self.len == other.len
188    }
189
190    /// Returns the inner [`Buffer`]
191    #[inline]
192    pub fn inner(&self) -> &Buffer {
193        &self.buffer
194    }
195
196    /// Returns the inner [`Buffer`], consuming self
197    pub fn into_inner(self) -> Buffer {
198        self.buffer
199    }
200
201    /// Returns an iterator over the bits in this [`BooleanBuffer`]
202    pub fn iter(&self) -> BitIterator<'_> {
203        self.into_iter()
204    }
205
206    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
207    pub fn set_indices(&self) -> BitIndexIterator<'_> {
208        BitIndexIterator::new(self.values(), self.offset, self.len)
209    }
210
211    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
212    pub fn set_slices(&self) -> BitSliceIterator<'_> {
213        BitSliceIterator::new(self.values(), self.offset, self.len)
214    }
215}
216
217impl Not for &BooleanBuffer {
218    type Output = BooleanBuffer;
219
220    fn not(self) -> Self::Output {
221        BooleanBuffer {
222            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
223            offset: 0,
224            len: self.len,
225        }
226    }
227}
228
229impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
230    type Output = BooleanBuffer;
231
232    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
233        assert_eq!(self.len, rhs.len);
234        BooleanBuffer {
235            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
236            offset: 0,
237            len: self.len,
238        }
239    }
240}
241
242impl BitOr<&BooleanBuffer> for &BooleanBuffer {
243    type Output = BooleanBuffer;
244
245    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
246        assert_eq!(self.len, rhs.len);
247        BooleanBuffer {
248            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
249            offset: 0,
250            len: self.len,
251        }
252    }
253}
254
255impl BitXor<&BooleanBuffer> for &BooleanBuffer {
256    type Output = BooleanBuffer;
257
258    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
259        assert_eq!(self.len, rhs.len);
260        BooleanBuffer {
261            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
262            offset: 0,
263            len: self.len,
264        }
265    }
266}
267
268impl<'a> IntoIterator for &'a BooleanBuffer {
269    type Item = bool;
270    type IntoIter = BitIterator<'a>;
271
272    fn into_iter(self) -> Self::IntoIter {
273        BitIterator::new(self.values(), self.offset, self.len)
274    }
275}
276
277impl From<&[bool]> for BooleanBuffer {
278    fn from(value: &[bool]) -> Self {
279        let mut builder = BooleanBufferBuilder::new(value.len());
280        builder.append_slice(value);
281        builder.finish()
282    }
283}
284
285impl From<Vec<bool>> for BooleanBuffer {
286    fn from(value: Vec<bool>) -> Self {
287        value.as_slice().into()
288    }
289}
290
291impl FromIterator<bool> for BooleanBuffer {
292    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
293        let iter = iter.into_iter();
294        let (hint, _) = iter.size_hint();
295        let mut builder = BooleanBufferBuilder::new(hint);
296        iter.for_each(|b| builder.append(b));
297        builder.finish()
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_boolean_new() {
307        let bytes = &[0, 1, 2, 3, 4];
308        let buf = Buffer::from(bytes);
309        let offset = 0;
310        let len = 24;
311
312        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
313        assert_eq!(bytes, boolean_buf.values());
314        assert_eq!(offset, boolean_buf.offset());
315        assert_eq!(len, boolean_buf.len());
316
317        assert_eq!(2, boolean_buf.count_set_bits());
318        assert_eq!(&buf, boolean_buf.inner());
319        assert_eq!(buf, boolean_buf.clone().into_inner());
320
321        assert!(!boolean_buf.is_empty())
322    }
323
324    #[test]
325    fn test_boolean_data_equality() {
326        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
327        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
328        assert_eq!(boolean_buf1, boolean_buf2);
329
330        // slice with same offset and same length should still preserve equality
331        let boolean_buf3 = boolean_buf1.slice(8, 16);
332        assert_ne!(boolean_buf1, boolean_buf3);
333        let boolean_buf4 = boolean_buf1.slice(0, 32);
334        assert_eq!(boolean_buf1, boolean_buf4);
335
336        // unequal because of different elements
337        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
338        assert_ne!(boolean_buf1, boolean_buf2);
339
340        // unequal because of different length
341        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
342        assert_ne!(boolean_buf1, boolean_buf2);
343
344        // ptr_eq
345        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
346        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
347        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
348    }
349
350    #[test]
351    fn test_boolean_slice() {
352        let bytes = &[0, 3, 2, 6, 2];
353        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
354        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
355
356        let boolean_slice1 = boolean_buf1.slice(16, 16);
357        let boolean_slice2 = boolean_buf2.slice(0, 16);
358        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
359
360        assert_eq!(bytes, boolean_slice1.values());
361        assert_eq!(16, boolean_slice1.offset);
362        assert_eq!(16, boolean_slice1.len);
363
364        assert_eq!(bytes, boolean_slice2.values());
365        assert_eq!(0, boolean_slice2.offset);
366        assert_eq!(16, boolean_slice2.len);
367    }
368
369    #[test]
370    fn test_boolean_bitand() {
371        let offset = 0;
372        let len = 40;
373
374        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
375        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
376
377        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
378        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
379
380        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
381        assert_eq!(boolean_buf1 & boolean_buf2, expected);
382    }
383
384    #[test]
385    fn test_boolean_bitor() {
386        let offset = 0;
387        let len = 40;
388
389        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
390        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
391
392        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
393        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
394
395        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
396        assert_eq!(boolean_buf1 | boolean_buf2, expected);
397    }
398
399    #[test]
400    fn test_boolean_bitxor() {
401        let offset = 0;
402        let len = 40;
403
404        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
405        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
406
407        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
408        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
409
410        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
411        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
412    }
413
414    #[test]
415    fn test_boolean_not() {
416        let offset = 0;
417        let len = 40;
418
419        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
420        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
421
422        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
423        assert_eq!(!boolean_buf, expected);
424    }
425
426    #[test]
427    fn test_boolean_from_slice_bool() {
428        let v = [true, false, false];
429        let buf = BooleanBuffer::from(&v[..]);
430        assert_eq!(buf.offset(), 0);
431        assert_eq!(buf.len(), 3);
432        assert_eq!(buf.values().len(), 1);
433        assert!(buf.value(0));
434    }
435}