polars_arrow/array/boolean/
mod.rs

1use either::Either;
2
3use super::{Array, Splitable};
4use crate::array::iterator::NonNullValuesIter;
5use crate::bitmap::utils::{BitmapIter, ZipValidity};
6use crate::bitmap::{Bitmap, MutableBitmap};
7use crate::datatypes::{ArrowDataType, PhysicalType};
8use crate::trusted_len::TrustedLen;
9
10mod ffi;
11pub(super) mod fmt;
12mod from;
13mod iterator;
14mod mutable;
15
16pub use mutable::*;
17use polars_error::{polars_bail, PolarsResult};
18
19/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.
20/// It implements [`Array`].
21///
22/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`
23/// where:
24/// * the first item is the array's logical type
25/// * the second is the immutable values
26/// * the third is the immutable validity (whether a value is null or not as a bitmap).
27///
28/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
29/// # Example
30/// ```
31/// use polars_arrow::array::BooleanArray;
32/// use polars_arrow::bitmap::Bitmap;
33/// use polars_arrow::buffer::Buffer;
34///
35/// let array = BooleanArray::from([Some(true), None, Some(false)]);
36/// assert_eq!(array.value(0), true);
37/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);
38/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);
39/// // the underlying representation
40/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));
41/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
42///
43/// ```
44#[derive(Clone)]
45pub struct BooleanArray {
46    dtype: ArrowDataType,
47    values: Bitmap,
48    validity: Option<Bitmap>,
49}
50
51impl BooleanArray {
52    /// The canonical method to create a [`BooleanArray`] out of low-end APIs.
53    /// # Errors
54    /// This function errors iff:
55    /// * The validity is not `None` and its length is different from `values`'s length
56    /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
57    pub fn try_new(
58        dtype: ArrowDataType,
59        values: Bitmap,
60        validity: Option<Bitmap>,
61    ) -> PolarsResult<Self> {
62        if validity
63            .as_ref()
64            .is_some_and(|validity| validity.len() != values.len())
65        {
66            polars_bail!(ComputeError: "validity mask length must match the number of values")
67        }
68
69        if dtype.to_physical_type() != PhysicalType::Boolean {
70            polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean")
71        }
72
73        Ok(Self {
74            dtype,
75            values,
76            validity,
77        })
78    }
79
80    /// Alias to `Self::try_new().unwrap()`
81    pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option<Bitmap>) -> Self {
82        Self::try_new(dtype, values, validity).unwrap()
83    }
84
85    /// Returns an iterator over the optional values of this [`BooleanArray`].
86    #[inline]
87    pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
88        ZipValidity::new_with_validity(self.values().iter(), self.validity())
89    }
90
91    /// Returns an iterator over the values of this [`BooleanArray`].
92    #[inline]
93    pub fn values_iter(&self) -> BitmapIter {
94        self.values().iter()
95    }
96
97    /// Returns an iterator of the non-null values.
98    #[inline]
99    pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {
100        NonNullValuesIter::new(self, self.validity())
101    }
102
103    /// Returns the length of this array
104    #[inline]
105    pub fn len(&self) -> usize {
106        self.values.len()
107    }
108
109    /// The values [`Bitmap`].
110    /// Values on null slots are undetermined (they can be anything).
111    #[inline]
112    pub fn values(&self) -> &Bitmap {
113        &self.values
114    }
115
116    /// Returns the optional validity.
117    #[inline]
118    pub fn validity(&self) -> Option<&Bitmap> {
119        self.validity.as_ref()
120    }
121
122    /// Returns the arrays' [`ArrowDataType`].
123    #[inline]
124    pub fn dtype(&self) -> &ArrowDataType {
125        &self.dtype
126    }
127
128    /// Returns the value at index `i`
129    /// # Panic
130    /// This function panics iff `i >= self.len()`.
131    #[inline]
132    pub fn value(&self, i: usize) -> bool {
133        self.values.get_bit(i)
134    }
135
136    /// Returns the element at index `i` as bool
137    ///
138    /// # Safety
139    /// Caller must be sure that `i < self.len()`
140    #[inline]
141    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
142        self.values.get_bit_unchecked(i)
143    }
144
145    /// Returns the element at index `i` or `None` if it is null
146    /// # Panics
147    /// iff `i >= self.len()`
148    #[inline]
149    pub fn get(&self, i: usize) -> Option<bool> {
150        if !self.is_null(i) {
151            // soundness: Array::is_null panics if i >= self.len
152            unsafe { Some(self.value_unchecked(i)) }
153        } else {
154            None
155        }
156    }
157
158    /// Slices this [`BooleanArray`].
159    /// # Implementation
160    /// This operation is `O(1)` as it amounts to increase up to two ref counts.
161    /// # Panic
162    /// This function panics iff `offset + length > self.len()`.
163    #[inline]
164    pub fn slice(&mut self, offset: usize, length: usize) {
165        assert!(
166            offset + length <= self.len(),
167            "the offset of the new Buffer cannot exceed the existing length"
168        );
169        unsafe { self.slice_unchecked(offset, length) }
170    }
171
172    /// Slices this [`BooleanArray`].
173    /// # Implementation
174    /// This operation is `O(1)` as it amounts to increase two ref counts.
175    ///
176    /// # Safety
177    /// The caller must ensure that `offset + length <= self.len()`.
178    #[inline]
179    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
180        self.validity = self
181            .validity
182            .take()
183            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
184            .filter(|bitmap| bitmap.unset_bits() > 0);
185        self.values.slice_unchecked(offset, length);
186    }
187
188    impl_sliced!();
189    impl_mut_validity!();
190    impl_into_array!();
191
192    /// Returns a clone of this [`BooleanArray`] with new values.
193    /// # Panics
194    /// This function panics iff `values.len() != self.len()`.
195    #[must_use]
196    pub fn with_values(&self, values: Bitmap) -> Self {
197        let mut out = self.clone();
198        out.set_values(values);
199        out
200    }
201
202    /// Sets the values of this [`BooleanArray`].
203    /// # Panics
204    /// This function panics iff `values.len() != self.len()`.
205    pub fn set_values(&mut self, values: Bitmap) {
206        assert_eq!(
207            values.len(),
208            self.len(),
209            "values length must be equal to this arrays length"
210        );
211        self.values = values;
212    }
213
214    /// Applies a function `f` to the values of this array, cloning the values
215    /// iff they are being shared with others
216    ///
217    /// This is an API to use clone-on-write
218    /// # Implementation
219    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
220    /// if it is being shared (since it results in a `O(N)` memcopy).
221    /// # Panics
222    /// This function panics if the function modifies the length of the [`MutableBitmap`].
223    pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
224        let values = std::mem::take(&mut self.values);
225        let mut values = values.make_mut();
226        f(&mut values);
227        if let Some(validity) = &self.validity {
228            assert_eq!(validity.len(), values.len());
229        }
230        self.values = values.into();
231    }
232
233    /// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
234    pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
235        use Either::*;
236
237        if let Some(bitmap) = self.validity {
238            match bitmap.into_mut() {
239                Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))),
240                Right(mutable_bitmap) => match self.values.into_mut() {
241                    Left(immutable) => Left(BooleanArray::new(
242                        self.dtype,
243                        immutable,
244                        Some(mutable_bitmap.into()),
245                    )),
246                    Right(mutable) => Right(
247                        MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap))
248                            .unwrap(),
249                    ),
250                },
251            }
252        } else {
253            match self.values.into_mut() {
254                Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)),
255                Right(mutable) => {
256                    Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap())
257                },
258            }
259        }
260    }
261
262    /// Returns a new empty [`BooleanArray`].
263    pub fn new_empty(dtype: ArrowDataType) -> Self {
264        Self::new(dtype, Bitmap::new(), None)
265    }
266
267    /// Returns a new [`BooleanArray`] whose all slots are null / `None`.
268    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
269        let bitmap = Bitmap::new_zeroed(length);
270        Self::new(dtype, bitmap.clone(), Some(bitmap))
271    }
272
273    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
274    #[inline]
275    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
276        MutableBooleanArray::from_trusted_len_values_iter(iterator).into()
277    }
278
279    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
280    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
281    /// but this crate does not mark it as such.
282    ///
283    /// # Safety
284    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
285    /// I.e. that `size_hint().1` correctly reports its length.
286    #[inline]
287    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
288        iterator: I,
289    ) -> Self {
290        MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()
291    }
292
293    /// Creates a new [`BooleanArray`] from a slice of `bool`.
294    #[inline]
295    pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
296        MutableBooleanArray::from_slice(slice).into()
297    }
298
299    /// Creates a [`BooleanArray`] from an iterator of trusted length.
300    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
301    /// but this crate does not mark it as such.
302    ///
303    /// # Safety
304    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
305    /// I.e. that `size_hint().1` correctly reports its length.
306    #[inline]
307    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
308    where
309        P: std::borrow::Borrow<bool>,
310        I: Iterator<Item = Option<P>>,
311    {
312        MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()
313    }
314
315    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
316    #[inline]
317    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
318    where
319        P: std::borrow::Borrow<bool>,
320        I: TrustedLen<Item = Option<P>>,
321    {
322        MutableBooleanArray::from_trusted_len_iter(iterator).into()
323    }
324
325    /// Creates a [`BooleanArray`] from an falible iterator of trusted length.
326    ///
327    /// # Safety
328    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
329    /// I.e. that `size_hint().1` correctly reports its length.
330    #[inline]
331    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
332    where
333        P: std::borrow::Borrow<bool>,
334        I: Iterator<Item = Result<Option<P>, E>>,
335    {
336        Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())
337    }
338
339    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
340    #[inline]
341    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>
342    where
343        P: std::borrow::Borrow<bool>,
344        I: TrustedLen<Item = Result<Option<P>, E>>,
345    {
346        Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())
347    }
348
349    /// Returns its internal representation
350    #[must_use]
351    pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option<Bitmap>) {
352        let Self {
353            dtype,
354            values,
355            validity,
356        } = self;
357        (dtype, values, validity)
358    }
359
360    /// Creates a [`BooleanArray`] from its internal representation.
361    /// This is the inverted from [`BooleanArray::into_inner`]
362    ///
363    /// # Safety
364    /// Callers must ensure all invariants of this struct are upheld.
365    pub unsafe fn from_inner_unchecked(
366        dtype: ArrowDataType,
367        values: Bitmap,
368        validity: Option<Bitmap>,
369    ) -> Self {
370        Self {
371            dtype,
372            values,
373            validity,
374        }
375    }
376}
377
378impl Array for BooleanArray {
379    impl_common_array!();
380
381    fn validity(&self) -> Option<&Bitmap> {
382        self.validity.as_ref()
383    }
384
385    #[inline]
386    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
387        Box::new(self.clone().with_validity(validity))
388    }
389}
390
391impl Splitable for BooleanArray {
392    fn check_bound(&self, offset: usize) -> bool {
393        offset <= self.len()
394    }
395
396    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
397        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
398        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
399
400        (
401            Self {
402                dtype: self.dtype.clone(),
403                values: lhs_values,
404                validity: lhs_validity,
405            },
406            Self {
407                dtype: self.dtype.clone(),
408                values: rhs_values,
409                validity: rhs_validity,
410            },
411        )
412    }
413}
414
415impl From<Bitmap> for BooleanArray {
416    fn from(values: Bitmap) -> Self {
417        Self {
418            dtype: ArrowDataType::Boolean,
419            values,
420            validity: None,
421        }
422    }
423}