1use crate::array::print_long_array;
19use crate::builder::BooleanBuilder;
20use crate::iterator::BooleanIter;
21use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::DataType;
25use std::any::Any;
26use std::sync::Arc;
27
28#[derive(Clone)]
68pub struct BooleanArray {
69 values: BooleanBuffer,
70 nulls: Option<NullBuffer>,
71}
72
73impl std::fmt::Debug for BooleanArray {
74 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75 write!(f, "BooleanArray\n[\n")?;
76 print_long_array(self, f, |array, index, f| {
77 std::fmt::Debug::fmt(&array.value(index), f)
78 })?;
79 write!(f, "]")
80 }
81}
82
83impl BooleanArray {
84 pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
90 if let Some(n) = nulls.as_ref() {
91 assert_eq!(values.len(), n.len());
92 }
93 Self { values, nulls }
94 }
95
96 pub fn new_null(len: usize) -> Self {
98 Self {
99 values: BooleanBuffer::new_unset(len),
100 nulls: Some(NullBuffer::new_null(len)),
101 }
102 }
103
104 pub fn new_scalar(value: bool) -> Scalar<Self> {
106 let values = match value {
107 true => BooleanBuffer::new_set(1),
108 false => BooleanBuffer::new_unset(1),
109 };
110 Scalar::new(Self::new(values, None))
111 }
112
113 pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self {
119 BooleanBuffer::new(buffer.into(), offset, len).into()
120 }
121
122 pub fn new_from_u8(value: &[u8]) -> Self {
128 BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into()
129 }
130
131 pub fn len(&self) -> usize {
133 self.values.len()
134 }
135
136 pub fn is_empty(&self) -> bool {
138 self.values.is_empty()
139 }
140
141 pub fn slice(&self, offset: usize, length: usize) -> Self {
143 Self {
144 values: self.values.slice(offset, length),
145 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
146 }
147 }
148
149 pub fn builder(capacity: usize) -> BooleanBuilder {
151 BooleanBuilder::with_capacity(capacity)
152 }
153
154 pub fn values(&self) -> &BooleanBuffer {
156 &self.values
157 }
158
159 pub fn true_count(&self) -> usize {
161 match self.nulls() {
162 Some(nulls) => {
163 let null_chunks = nulls.inner().bit_chunks().iter_padded();
164 let value_chunks = self.values().bit_chunks().iter_padded();
165 null_chunks
166 .zip(value_chunks)
167 .map(|(a, b)| (a & b).count_ones() as usize)
168 .sum()
169 }
170 None => self.values().count_set_bits(),
171 }
172 }
173
174 pub fn false_count(&self) -> usize {
176 self.len() - self.null_count() - self.true_count()
177 }
178
179 pub unsafe fn value_unchecked(&self, i: usize) -> bool {
184 self.values.value_unchecked(i)
185 }
186
187 pub fn value(&self, i: usize) -> bool {
191 assert!(
192 i < self.len(),
193 "Trying to access an element at index {} from a BooleanArray of length {}",
194 i,
195 self.len()
196 );
197 unsafe { self.value_unchecked(i) }
200 }
201
202 pub fn take_iter<'a>(
204 &'a self,
205 indexes: impl Iterator<Item = Option<usize>> + 'a,
206 ) -> impl Iterator<Item = Option<bool>> + 'a {
207 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
208 }
209
210 pub unsafe fn take_iter_unchecked<'a>(
215 &'a self,
216 indexes: impl Iterator<Item = Option<usize>> + 'a,
217 ) -> impl Iterator<Item = Option<bool>> + 'a {
218 indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
219 }
220
221 pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
232 where
233 F: FnMut(T::Item) -> bool,
234 {
235 let nulls = left.logical_nulls();
236 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
237 op(left.value_unchecked(i))
239 });
240 Self::new(values, nulls)
241 }
242
243 pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
260 where
261 F: FnMut(T::Item, S::Item) -> bool,
262 {
263 assert_eq!(left.len(), right.len());
264
265 let nulls = NullBuffer::union(
266 left.logical_nulls().as_ref(),
267 right.logical_nulls().as_ref(),
268 );
269 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
270 op(left.value_unchecked(i), right.value_unchecked(i))
272 });
273 Self::new(values, nulls)
274 }
275
276 pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
278 (self.values, self.nulls)
279 }
280}
281
282impl Array for BooleanArray {
283 fn as_any(&self) -> &dyn Any {
284 self
285 }
286
287 fn to_data(&self) -> ArrayData {
288 self.clone().into()
289 }
290
291 fn into_data(self) -> ArrayData {
292 self.into()
293 }
294
295 fn data_type(&self) -> &DataType {
296 &DataType::Boolean
297 }
298
299 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
300 Arc::new(self.slice(offset, length))
301 }
302
303 fn len(&self) -> usize {
304 self.values.len()
305 }
306
307 fn is_empty(&self) -> bool {
308 self.values.is_empty()
309 }
310
311 fn shrink_to_fit(&mut self) {
312 self.values.shrink_to_fit();
313 if let Some(nulls) = &mut self.nulls {
314 nulls.shrink_to_fit();
315 }
316 }
317
318 fn offset(&self) -> usize {
319 self.values.offset()
320 }
321
322 fn nulls(&self) -> Option<&NullBuffer> {
323 self.nulls.as_ref()
324 }
325
326 fn logical_null_count(&self) -> usize {
327 self.null_count()
328 }
329
330 fn get_buffer_memory_size(&self) -> usize {
331 let mut sum = self.values.inner().capacity();
332 if let Some(x) = &self.nulls {
333 sum += x.buffer().capacity()
334 }
335 sum
336 }
337
338 fn get_array_memory_size(&self) -> usize {
339 std::mem::size_of::<Self>() + self.get_buffer_memory_size()
340 }
341}
342
343impl ArrayAccessor for &BooleanArray {
344 type Item = bool;
345
346 fn value(&self, index: usize) -> Self::Item {
347 BooleanArray::value(self, index)
348 }
349
350 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
351 BooleanArray::value_unchecked(self, index)
352 }
353}
354
355impl From<Vec<bool>> for BooleanArray {
356 fn from(data: Vec<bool>) -> Self {
357 let mut mut_buf = MutableBuffer::new_null(data.len());
358 {
359 let mut_slice = mut_buf.as_slice_mut();
360 for (i, b) in data.iter().enumerate() {
361 if *b {
362 bit_util::set_bit(mut_slice, i);
363 }
364 }
365 }
366 let array_data = ArrayData::builder(DataType::Boolean)
367 .len(data.len())
368 .add_buffer(mut_buf.into());
369
370 let array_data = unsafe { array_data.build_unchecked() };
371 BooleanArray::from(array_data)
372 }
373}
374
375impl From<Vec<Option<bool>>> for BooleanArray {
376 fn from(data: Vec<Option<bool>>) -> Self {
377 data.iter().collect()
378 }
379}
380
381impl From<ArrayData> for BooleanArray {
382 fn from(data: ArrayData) -> Self {
383 assert_eq!(
384 data.data_type(),
385 &DataType::Boolean,
386 "BooleanArray expected ArrayData with type {} got {}",
387 DataType::Boolean,
388 data.data_type()
389 );
390 assert_eq!(
391 data.buffers().len(),
392 1,
393 "BooleanArray data should contain a single buffer only (values buffer)"
394 );
395 let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
396
397 Self {
398 values,
399 nulls: data.nulls().cloned(),
400 }
401 }
402}
403
404impl From<BooleanArray> for ArrayData {
405 fn from(array: BooleanArray) -> Self {
406 let builder = ArrayDataBuilder::new(DataType::Boolean)
407 .len(array.values.len())
408 .offset(array.values.offset())
409 .nulls(array.nulls)
410 .buffers(vec![array.values.into_inner()]);
411
412 unsafe { builder.build_unchecked() }
413 }
414}
415
416impl<'a> IntoIterator for &'a BooleanArray {
417 type Item = Option<bool>;
418 type IntoIter = BooleanIter<'a>;
419
420 fn into_iter(self) -> Self::IntoIter {
421 BooleanIter::<'a>::new(self)
422 }
423}
424
425impl<'a> BooleanArray {
426 pub fn iter(&'a self) -> BooleanIter<'a> {
428 BooleanIter::<'a>::new(self)
429 }
430}
431
432impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
433 fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
434 let iter = iter.into_iter();
435 let (_, data_len) = iter.size_hint();
436 let data_len = data_len.expect("Iterator must be sized"); let num_bytes = bit_util::ceil(data_len, 8);
439 let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
440 let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
441
442 let data = val_builder.as_slice_mut();
443
444 let null_slice = null_builder.as_slice_mut();
445 iter.enumerate().for_each(|(i, item)| {
446 if let Some(a) = item.borrow() {
447 bit_util::set_bit(null_slice, i);
448 if *a {
449 bit_util::set_bit(data, i);
450 }
451 }
452 });
453
454 let data = unsafe {
455 ArrayData::new_unchecked(
456 DataType::Boolean,
457 data_len,
458 None,
459 Some(null_builder.into()),
460 0,
461 vec![val_builder.into()],
462 vec![],
463 )
464 };
465 BooleanArray::from(data)
466 }
467}
468
469impl From<BooleanBuffer> for BooleanArray {
470 fn from(values: BooleanBuffer) -> Self {
471 Self {
472 values,
473 nulls: None,
474 }
475 }
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481 use arrow_buffer::Buffer;
482 use rand::{thread_rng, Rng};
483
484 #[test]
485 fn test_boolean_fmt_debug() {
486 let arr = BooleanArray::from(vec![true, false, false]);
487 assert_eq!(
488 "BooleanArray\n[\n true,\n false,\n false,\n]",
489 format!("{arr:?}")
490 );
491 }
492
493 #[test]
494 fn test_boolean_with_null_fmt_debug() {
495 let mut builder = BooleanArray::builder(3);
496 builder.append_value(true);
497 builder.append_null();
498 builder.append_value(false);
499 let arr = builder.finish();
500 assert_eq!(
501 "BooleanArray\n[\n true,\n null,\n false,\n]",
502 format!("{arr:?}")
503 );
504 }
505
506 #[test]
507 fn test_boolean_array_from_vec() {
508 let buf = Buffer::from([10_u8]);
509 let arr = BooleanArray::from(vec![false, true, false, true]);
510 assert_eq!(&buf, arr.values().inner());
511 assert_eq!(4, arr.len());
512 assert_eq!(0, arr.offset());
513 assert_eq!(0, arr.null_count());
514 for i in 0..4 {
515 assert!(!arr.is_null(i));
516 assert!(arr.is_valid(i));
517 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
518 }
519 }
520
521 #[test]
522 fn test_boolean_array_from_vec_option() {
523 let buf = Buffer::from([10_u8]);
524 let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
525 assert_eq!(&buf, arr.values().inner());
526 assert_eq!(4, arr.len());
527 assert_eq!(0, arr.offset());
528 assert_eq!(1, arr.null_count());
529 for i in 0..4 {
530 if i == 2 {
531 assert!(arr.is_null(i));
532 assert!(!arr.is_valid(i));
533 } else {
534 assert!(!arr.is_null(i));
535 assert!(arr.is_valid(i));
536 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
537 }
538 }
539 }
540
541 #[test]
542 fn test_boolean_array_from_packed() {
543 let v = [1_u8, 2_u8, 3_u8];
544 let arr = BooleanArray::new_from_packed(v, 0, 24);
545 assert_eq!(24, arr.len());
546 assert_eq!(0, arr.offset());
547 assert_eq!(0, arr.null_count());
548 assert!(arr.nulls.is_none());
549 for i in 0..24 {
550 assert!(!arr.is_null(i));
551 assert!(arr.is_valid(i));
552 assert_eq!(
553 i == 0 || i == 9 || i == 16 || i == 17,
554 arr.value(i),
555 "failed t {i}"
556 )
557 }
558 }
559
560 #[test]
561 fn test_boolean_array_from_slice_u8() {
562 let v: Vec<u8> = vec![1, 2, 3];
563 let slice = &v[..];
564 let arr = BooleanArray::new_from_u8(slice);
565 assert_eq!(24, arr.len());
566 assert_eq!(0, arr.offset());
567 assert_eq!(0, arr.null_count());
568 assert!(arr.nulls().is_none());
569 for i in 0..24 {
570 assert!(!arr.is_null(i));
571 assert!(arr.is_valid(i));
572 assert_eq!(
573 i == 0 || i == 9 || i == 16 || i == 17,
574 arr.value(i),
575 "failed t {i}"
576 )
577 }
578 }
579
580 #[test]
581 fn test_boolean_array_from_iter() {
582 let v = vec![Some(false), Some(true), Some(false), Some(true)];
583 let arr = v.into_iter().collect::<BooleanArray>();
584 assert_eq!(4, arr.len());
585 assert_eq!(0, arr.offset());
586 assert_eq!(0, arr.null_count());
587 assert!(arr.nulls().is_none());
588 for i in 0..3 {
589 assert!(!arr.is_null(i));
590 assert!(arr.is_valid(i));
591 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
592 }
593 }
594
595 #[test]
596 fn test_boolean_array_from_nullable_iter() {
597 let v = vec![Some(true), None, Some(false), None];
598 let arr = v.into_iter().collect::<BooleanArray>();
599 assert_eq!(4, arr.len());
600 assert_eq!(0, arr.offset());
601 assert_eq!(2, arr.null_count());
602 assert!(arr.nulls().is_some());
603
604 assert!(arr.is_valid(0));
605 assert!(arr.is_null(1));
606 assert!(arr.is_valid(2));
607 assert!(arr.is_null(3));
608
609 assert!(arr.value(0));
610 assert!(!arr.value(2));
611 }
612
613 #[test]
614 fn test_boolean_array_builder() {
615 let buf = Buffer::from([27_u8]);
618 let buf2 = buf.clone();
619 let data = ArrayData::builder(DataType::Boolean)
620 .len(5)
621 .offset(2)
622 .add_buffer(buf)
623 .build()
624 .unwrap();
625 let arr = BooleanArray::from(data);
626 assert_eq!(&buf2, arr.values().inner());
627 assert_eq!(5, arr.len());
628 assert_eq!(2, arr.offset());
629 assert_eq!(0, arr.null_count());
630 for i in 0..3 {
631 assert_eq!(i != 0, arr.value(i), "failed at {i}");
632 }
633 }
634
635 #[test]
636 #[should_panic(
637 expected = "Trying to access an element at index 4 from a BooleanArray of length 3"
638 )]
639 fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
640 let v = vec![Some(true), None, Some(false)];
641 let array = v.into_iter().collect::<BooleanArray>();
642
643 array.value(4);
644 }
645
646 #[test]
647 #[should_panic(expected = "BooleanArray data should contain a single buffer only \
648 (values buffer)")]
649 #[cfg(not(feature = "force_validate"))]
652 fn test_boolean_array_invalid_buffer_len() {
653 let data = unsafe {
654 ArrayData::builder(DataType::Boolean)
655 .len(5)
656 .build_unchecked()
657 };
658 drop(BooleanArray::from(data));
659 }
660
661 #[test]
662 #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
663 fn test_from_array_data_validation() {
664 let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
665 }
666
667 #[test]
668 #[cfg_attr(miri, ignore)] fn test_true_false_count() {
670 let mut rng = thread_rng();
671
672 for _ in 0..10 {
673 let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect();
675 let b = BooleanArray::from(d.clone());
676
677 let expected_true = d.iter().filter(|x| **x).count();
678 assert_eq!(b.true_count(), expected_true);
679 assert_eq!(b.false_count(), d.len() - expected_true);
680
681 let d: Vec<_> = (0..2000)
683 .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5)))
684 .collect();
685 let b = BooleanArray::from(d.clone());
686
687 let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count();
688 assert_eq!(b.true_count(), expected_true);
689
690 let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count();
691 assert_eq!(b.false_count(), expected_false);
692 }
693 }
694
695 #[test]
696 fn test_into_parts() {
697 let boolean_array = [Some(true), None, Some(false)]
698 .into_iter()
699 .collect::<BooleanArray>();
700 let (values, nulls) = boolean_array.into_parts();
701 assert_eq!(values.values(), &[0b0000_0001]);
702 assert!(nulls.is_some());
703 assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]);
704
705 let boolean_array =
706 BooleanArray::from(vec![false, false, false, false, false, false, false, true]);
707 let (values, nulls) = boolean_array.into_parts();
708 assert_eq!(values.values(), &[0b1000_0000]);
709 assert!(nulls.is_none());
710 }
711}