1use crate::types::{ByteArrayType, GenericBinaryType};
19use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::DataType;
22
23pub type GenericBinaryArray<OffsetSize> = GenericByteArray<GenericBinaryType<OffsetSize>>;
25
26impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
27 pub fn from_vec(v: Vec<&[u8]>) -> Self {
31 Self::from_iter_values(v)
32 }
33
34 pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self {
36 v.into_iter().collect()
37 }
38
39 fn from_list(v: GenericListArray<OffsetSize>) -> Self {
40 let v = v.into_data();
41 assert_eq!(
42 v.child_data().len(),
43 1,
44 "BinaryArray can only be created from list array of u8 values \
45 (i.e. List<PrimitiveArray<u8>>)."
46 );
47 let child_data = &v.child_data()[0];
48
49 assert_eq!(
50 child_data.child_data().len(),
51 0,
52 "BinaryArray can only be created from list array of u8 values \
53 (i.e. List<PrimitiveArray<u8>>)."
54 );
55 assert_eq!(
56 child_data.data_type(),
57 &DataType::UInt8,
58 "BinaryArray can only be created from List<u8> arrays, mismatched data types."
59 );
60 assert_eq!(
61 child_data.null_count(),
62 0,
63 "The child array cannot contain null values."
64 );
65
66 let builder = ArrayData::builder(Self::DATA_TYPE)
67 .len(v.len())
68 .offset(v.offset())
69 .add_buffer(v.buffers()[0].clone())
70 .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
71 .nulls(v.nulls().cloned());
72
73 let data = unsafe { builder.build_unchecked() };
74 Self::from(data)
75 }
76
77 pub fn take_iter<'a>(
79 &'a self,
80 indexes: impl Iterator<Item = Option<usize>> + 'a,
81 ) -> impl Iterator<Item = Option<&'a [u8]>> {
82 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
83 }
84
85 pub unsafe fn take_iter_unchecked<'a>(
90 &'a self,
91 indexes: impl Iterator<Item = Option<usize>> + 'a,
92 ) -> impl Iterator<Item = Option<&'a [u8]>> {
93 indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
94 }
95}
96
97impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> {
98 fn from(v: Vec<Option<&[u8]>>) -> Self {
99 Self::from_opt_vec(v)
100 }
101}
102
103impl<OffsetSize: OffsetSizeTrait> From<Vec<&[u8]>> for GenericBinaryArray<OffsetSize> {
104 fn from(v: Vec<&[u8]>) -> Self {
105 Self::from_iter_values(v)
106 }
107}
108
109impl<T: OffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
110 fn from(v: GenericListArray<T>) -> Self {
111 Self::from_list(v)
112 }
113}
114
115impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>>
116 for GenericBinaryArray<OffsetSize>
117{
118 fn from(value: GenericStringArray<OffsetSize>) -> Self {
119 let builder = value
120 .into_data()
121 .into_builder()
122 .data_type(GenericBinaryType::<OffsetSize>::DATA_TYPE);
123
124 Self::from(unsafe { builder.build_unchecked() })
127 }
128}
129
130pub type BinaryArray = GenericBinaryArray<i32>;
171
172pub type LargeBinaryArray = GenericBinaryArray<i64>;
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use crate::{ListArray, StringArray};
216 use arrow_buffer::Buffer;
217 use arrow_schema::Field;
218 use std::sync::Arc;
219
220 #[test]
221 fn test_binary_array() {
222 let values: [u8; 12] = [
223 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
224 ];
225 let offsets: [i32; 4] = [0, 5, 5, 12];
226
227 let array_data = ArrayData::builder(DataType::Binary)
229 .len(3)
230 .add_buffer(Buffer::from_slice_ref(offsets))
231 .add_buffer(Buffer::from_slice_ref(values))
232 .build()
233 .unwrap();
234 let binary_array = BinaryArray::from(array_data);
235 assert_eq!(3, binary_array.len());
236 assert_eq!(0, binary_array.null_count());
237 assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
238 assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
239 binary_array.value_unchecked(0)
240 });
241 assert_eq!([] as [u8; 0], binary_array.value(1));
242 assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
243 assert_eq!(
244 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
245 binary_array.value(2)
246 );
247 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
248 binary_array.value_unchecked(2)
249 });
250 assert_eq!(5, binary_array.value_offsets()[2]);
251 assert_eq!(7, binary_array.value_length(2));
252 for i in 0..3 {
253 assert!(binary_array.is_valid(i));
254 assert!(!binary_array.is_null(i));
255 }
256 }
257
258 #[test]
259 fn test_binary_array_with_offsets() {
260 let values: [u8; 12] = [
261 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
262 ];
263 let offsets: [i32; 4] = [0, 5, 5, 12];
264
265 let array_data = ArrayData::builder(DataType::Binary)
267 .len(2)
268 .offset(1)
269 .add_buffer(Buffer::from_slice_ref(offsets))
270 .add_buffer(Buffer::from_slice_ref(values))
271 .build()
272 .unwrap();
273 let binary_array = BinaryArray::from(array_data);
274 assert_eq!(
275 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
276 binary_array.value(1)
277 );
278 assert_eq!(5, binary_array.value_offsets()[0]);
279 assert_eq!(0, binary_array.value_length(0));
280 assert_eq!(5, binary_array.value_offsets()[1]);
281 assert_eq!(7, binary_array.value_length(1));
282 }
283
284 #[test]
285 fn test_large_binary_array() {
286 let values: [u8; 12] = [
287 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
288 ];
289 let offsets: [i64; 4] = [0, 5, 5, 12];
290
291 let array_data = ArrayData::builder(DataType::LargeBinary)
293 .len(3)
294 .add_buffer(Buffer::from_slice_ref(offsets))
295 .add_buffer(Buffer::from_slice_ref(values))
296 .build()
297 .unwrap();
298 let binary_array = LargeBinaryArray::from(array_data);
299 assert_eq!(3, binary_array.len());
300 assert_eq!(0, binary_array.null_count());
301 assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
302 assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
303 binary_array.value_unchecked(0)
304 });
305 assert_eq!([] as [u8; 0], binary_array.value(1));
306 assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
307 assert_eq!(
308 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
309 binary_array.value(2)
310 );
311 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
312 binary_array.value_unchecked(2)
313 });
314 assert_eq!(5, binary_array.value_offsets()[2]);
315 assert_eq!(7, binary_array.value_length(2));
316 for i in 0..3 {
317 assert!(binary_array.is_valid(i));
318 assert!(!binary_array.is_null(i));
319 }
320 }
321
322 #[test]
323 fn test_large_binary_array_with_offsets() {
324 let values: [u8; 12] = [
325 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
326 ];
327 let offsets: [i64; 4] = [0, 5, 5, 12];
328
329 let array_data = ArrayData::builder(DataType::LargeBinary)
331 .len(2)
332 .offset(1)
333 .add_buffer(Buffer::from_slice_ref(offsets))
334 .add_buffer(Buffer::from_slice_ref(values))
335 .build()
336 .unwrap();
337 let binary_array = LargeBinaryArray::from(array_data);
338 assert_eq!(
339 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
340 binary_array.value(1)
341 );
342 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
343 binary_array.value_unchecked(1)
344 });
345 assert_eq!(5, binary_array.value_offsets()[0]);
346 assert_eq!(0, binary_array.value_length(0));
347 assert_eq!(5, binary_array.value_offsets()[1]);
348 assert_eq!(7, binary_array.value_length(1));
349 }
350
351 fn _test_generic_binary_array_from_list_array<O: OffsetSizeTrait>() {
352 let values = b"helloparquet";
353 let child_data = ArrayData::builder(DataType::UInt8)
354 .len(12)
355 .add_buffer(Buffer::from(values))
356 .build()
357 .unwrap();
358 let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap());
359
360 let array_data1 = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
362 .len(3)
363 .add_buffer(Buffer::from_slice_ref(offsets))
364 .add_buffer(Buffer::from_slice_ref(values))
365 .build()
366 .unwrap();
367 let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
368
369 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
370 Field::new_list_field(DataType::UInt8, false),
371 ));
372
373 let array_data2 = ArrayData::builder(data_type)
374 .len(3)
375 .add_buffer(Buffer::from_slice_ref(offsets))
376 .add_child_data(child_data)
377 .build()
378 .unwrap();
379 let list_array = GenericListArray::<O>::from(array_data2);
380 let binary_array2 = GenericBinaryArray::<O>::from(list_array);
381
382 assert_eq!(binary_array1.len(), binary_array2.len());
383 assert_eq!(binary_array1.null_count(), binary_array2.null_count());
384 assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
385 for i in 0..binary_array1.len() {
386 assert_eq!(binary_array1.value(i), binary_array2.value(i));
387 assert_eq!(binary_array1.value(i), unsafe {
388 binary_array2.value_unchecked(i)
389 });
390 assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
391 }
392 }
393
394 #[test]
395 fn test_binary_array_from_list_array() {
396 _test_generic_binary_array_from_list_array::<i32>();
397 }
398
399 #[test]
400 fn test_large_binary_array_from_list_array() {
401 _test_generic_binary_array_from_list_array::<i64>();
402 }
403
404 fn _test_generic_binary_array_from_list_array_with_offset<O: OffsetSizeTrait>() {
405 let values = b"HelloArrowAndParquet";
406 let child_data = ArrayData::builder(DataType::UInt8)
408 .len(15)
409 .offset(5)
410 .add_buffer(Buffer::from(values))
411 .build()
412 .unwrap();
413
414 let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
415 let null_buffer = Buffer::from_slice_ref([0b101]);
416 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
417 Field::new_list_field(DataType::UInt8, false),
418 ));
419
420 let array_data = ArrayData::builder(data_type)
422 .len(2)
423 .offset(1)
424 .add_buffer(Buffer::from_slice_ref(offsets))
425 .null_bit_buffer(Some(null_buffer))
426 .add_child_data(child_data)
427 .build()
428 .unwrap();
429 let list_array = GenericListArray::<O>::from(array_data);
430 let binary_array = GenericBinaryArray::<O>::from(list_array);
431
432 assert_eq!(2, binary_array.len());
433 assert_eq!(1, binary_array.null_count());
434 assert!(binary_array.is_null(0));
435 assert!(binary_array.is_valid(1));
436 assert_eq!(b"Parquet", binary_array.value(1));
437 }
438
439 #[test]
440 fn test_binary_array_from_list_array_with_offset() {
441 _test_generic_binary_array_from_list_array_with_offset::<i32>();
442 }
443
444 #[test]
445 fn test_large_binary_array_from_list_array_with_offset() {
446 _test_generic_binary_array_from_list_array_with_offset::<i64>();
447 }
448
449 fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
450 let values = b"HelloArrow";
451 let child_data = ArrayData::builder(DataType::UInt8)
452 .len(10)
453 .add_buffer(Buffer::from(values))
454 .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010])))
455 .build()
456 .unwrap();
457
458 let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
459 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
460 Field::new_list_field(DataType::UInt8, true),
461 ));
462
463 let array_data = ArrayData::builder(data_type)
465 .len(2)
466 .add_buffer(Buffer::from_slice_ref(offsets))
467 .add_child_data(child_data)
468 .build()
469 .unwrap();
470 let list_array = GenericListArray::<O>::from(array_data);
471 drop(GenericBinaryArray::<O>::from(list_array));
472 }
473
474 #[test]
475 #[should_panic(expected = "The child array cannot contain null values.")]
476 fn test_binary_array_from_list_array_with_child_nulls_failed() {
477 _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i32>();
478 }
479
480 #[test]
481 #[should_panic(expected = "The child array cannot contain null values.")]
482 fn test_large_binary_array_from_list_array_with_child_nulls_failed() {
483 _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i64>();
484 }
485
486 fn test_generic_binary_array_from_opt_vec<T: OffsetSizeTrait>() {
487 let values: Vec<Option<&[u8]>> =
488 vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
489 let array = GenericBinaryArray::<T>::from_opt_vec(values);
490 assert_eq!(array.len(), 5);
491 assert_eq!(array.value(0), b"one");
492 assert_eq!(array.value(1), b"two");
493 assert_eq!(array.value(3), b"");
494 assert_eq!(array.value(4), b"three");
495 assert!(!array.is_null(0));
496 assert!(!array.is_null(1));
497 assert!(array.is_null(2));
498 assert!(!array.is_null(3));
499 assert!(!array.is_null(4));
500 }
501
502 #[test]
503 fn test_large_binary_array_from_opt_vec() {
504 test_generic_binary_array_from_opt_vec::<i64>()
505 }
506
507 #[test]
508 fn test_binary_array_from_opt_vec() {
509 test_generic_binary_array_from_opt_vec::<i32>()
510 }
511
512 #[test]
513 fn test_binary_array_from_unbound_iter() {
514 let value_iter = (0..)
516 .scan(0usize, |pos, i| {
517 if *pos < 10 {
518 *pos += 1;
519 Some(Some(format!("value {i}")))
520 } else {
521 None
523 }
524 })
525 .take(100);
527
528 let (_, upper_size_bound) = value_iter.size_hint();
529 assert_eq!(upper_size_bound, Some(100));
531 let binary_array: BinaryArray = value_iter.collect();
532 assert_eq!(binary_array.len(), 10);
534 }
535
536 #[test]
537 #[should_panic(
538 expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
539 )]
540 fn test_binary_array_from_incorrect_list_array() {
541 let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
542 let values_data = ArrayData::builder(DataType::UInt32)
543 .len(12)
544 .add_buffer(Buffer::from_slice_ref(values))
545 .build()
546 .unwrap();
547 let offsets: [i32; 4] = [0, 5, 5, 12];
548
549 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false)));
550 let array_data = ArrayData::builder(data_type)
551 .len(3)
552 .add_buffer(Buffer::from_slice_ref(offsets))
553 .add_child_data(values_data)
554 .build()
555 .unwrap();
556 let list_array = ListArray::from(array_data);
557 drop(BinaryArray::from(list_array));
558 }
559
560 #[test]
561 #[should_panic(
562 expected = "Trying to access an element at index 4 from a BinaryArray of length 3"
563 )]
564 fn test_binary_array_get_value_index_out_of_bound() {
565 let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
566 let offsets: [i32; 4] = [0, 5, 5, 12];
567 let array_data = ArrayData::builder(DataType::Binary)
568 .len(3)
569 .add_buffer(Buffer::from_slice_ref(offsets))
570 .add_buffer(Buffer::from_slice_ref(values))
571 .build()
572 .unwrap();
573 let binary_array = BinaryArray::from(array_data);
574 binary_array.value(4);
575 }
576
577 #[test]
578 #[should_panic(expected = "LargeBinaryArray expects DataType::LargeBinary")]
579 fn test_binary_array_validation() {
580 let array = BinaryArray::from_iter_values([&[1, 2]]);
581 let _ = LargeBinaryArray::from(array.into_data());
582 }
583
584 #[test]
585 fn test_binary_array_all_null() {
586 let data = vec![None];
587 let array = BinaryArray::from(data);
588 array
589 .into_data()
590 .validate_full()
591 .expect("All null array has valid array data");
592 }
593
594 #[test]
595 fn test_large_binary_array_all_null() {
596 let data = vec![None];
597 let array = LargeBinaryArray::from(data);
598 array
599 .into_data()
600 .validate_full()
601 .expect("All null array has valid array data");
602 }
603
604 #[test]
605 fn test_empty_offsets() {
606 let string = BinaryArray::from(
607 ArrayData::builder(DataType::Binary)
608 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
609 .build()
610 .unwrap(),
611 );
612 assert_eq!(string.value_offsets(), &[0]);
613 let string = LargeBinaryArray::from(
614 ArrayData::builder(DataType::LargeBinary)
615 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
616 .build()
617 .unwrap(),
618 );
619 assert_eq!(string.len(), 0);
620 assert_eq!(string.value_offsets(), &[0]);
621 }
622
623 #[test]
624 fn test_to_from_string() {
625 let s = StringArray::from_iter_values(["a", "b", "c", "d"]);
626 let b = BinaryArray::from(s.clone());
627 let sa = StringArray::from(b); assert_eq!(s, sa);
630 }
631}