1use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
23use arrow_data::ArrayDataBuilder;
24use std::any::Any;
25use std::sync::Arc;
26
27pub struct GenericByteBuilder<T: ByteArrayType> {
32 value_builder: UInt8BufferBuilder,
33 offsets_builder: BufferBuilder<T::Offset>,
34 null_buffer_builder: NullBufferBuilder,
35}
36
37impl<T: ByteArrayType> GenericByteBuilder<T> {
38 pub fn new() -> Self {
40 Self::with_capacity(1024, 1024)
41 }
42
43 pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50 let mut offsets_builder = BufferBuilder::<T::Offset>::new(item_capacity + 1);
51 offsets_builder.append(T::Offset::from_usize(0).unwrap());
52 Self {
53 value_builder: UInt8BufferBuilder::new(data_capacity),
54 offsets_builder,
55 null_buffer_builder: NullBufferBuilder::new(item_capacity),
56 }
57 }
58
59 pub unsafe fn new_from_buffer(
66 offsets_buffer: MutableBuffer,
67 value_buffer: MutableBuffer,
68 null_buffer: Option<MutableBuffer>,
69 ) -> Self {
70 let offsets_builder = BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
71 let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
72
73 let null_buffer_builder = null_buffer
74 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
75 .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
76
77 Self {
78 offsets_builder,
79 value_builder,
80 null_buffer_builder,
81 }
82 }
83
84 #[inline]
85 fn next_offset(&self) -> T::Offset {
86 T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
87 }
88
89 #[inline]
105 pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
106 self.value_builder.append_slice(value.as_ref().as_ref());
107 self.null_buffer_builder.append(true);
108 self.offsets_builder.append(self.next_offset());
109 }
110
111 #[inline]
118 pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
119 match value {
120 None => self.append_null(),
121 Some(v) => self.append_value(v),
122 };
123 }
124
125 #[inline]
127 pub fn append_null(&mut self) {
128 self.null_buffer_builder.append(false);
129 self.offsets_builder.append(self.next_offset());
130 }
131
132 pub fn finish(&mut self) -> GenericByteArray<T> {
134 let array_type = T::DATA_TYPE;
135 let array_builder = ArrayDataBuilder::new(array_type)
136 .len(self.len())
137 .add_buffer(self.offsets_builder.finish())
138 .add_buffer(self.value_builder.finish())
139 .nulls(self.null_buffer_builder.finish());
140
141 self.offsets_builder.append(self.next_offset());
142 let array_data = unsafe { array_builder.build_unchecked() };
143 GenericByteArray::from(array_data)
144 }
145
146 pub fn finish_cloned(&self) -> GenericByteArray<T> {
148 let array_type = T::DATA_TYPE;
149 let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
150 let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
151 let array_builder = ArrayDataBuilder::new(array_type)
152 .len(self.len())
153 .add_buffer(offset_buffer)
154 .add_buffer(value_buffer)
155 .nulls(self.null_buffer_builder.finish_cloned());
156
157 let array_data = unsafe { array_builder.build_unchecked() };
158 GenericByteArray::from(array_data)
159 }
160
161 pub fn values_slice(&self) -> &[u8] {
163 self.value_builder.as_slice()
164 }
165
166 pub fn offsets_slice(&self) -> &[T::Offset] {
168 self.offsets_builder.as_slice()
169 }
170
171 pub fn validity_slice(&self) -> Option<&[u8]> {
173 self.null_buffer_builder.as_slice()
174 }
175
176 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
178 self.null_buffer_builder.as_slice_mut()
179 }
180}
181
182impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
183 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
184 write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
185 f.debug_struct("")
186 .field("value_builder", &self.value_builder)
187 .field("offsets_builder", &self.offsets_builder)
188 .field("null_buffer_builder", &self.null_buffer_builder)
189 .finish()
190 }
191}
192
193impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
194 fn default() -> Self {
195 Self::new()
196 }
197}
198
199impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
200 fn len(&self) -> usize {
202 self.null_buffer_builder.len()
203 }
204
205 fn finish(&mut self) -> ArrayRef {
207 Arc::new(self.finish())
208 }
209
210 fn finish_cloned(&self) -> ArrayRef {
212 Arc::new(self.finish_cloned())
213 }
214
215 fn as_any(&self) -> &dyn Any {
217 self
218 }
219
220 fn as_any_mut(&mut self) -> &mut dyn Any {
222 self
223 }
224
225 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
227 self
228 }
229}
230
231impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
232 #[inline]
233 fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
234 for v in iter {
235 self.append_option(v)
236 }
237 }
238}
239
240pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
290
291impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
292 fn write_str(&mut self, s: &str) -> std::fmt::Result {
293 self.value_builder.append_slice(s.as_bytes());
294 Ok(())
295 }
296}
297
298pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
344
345impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
346 fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
347 self.value_builder.append_slice(bs);
348 Ok(bs.len())
349 }
350
351 fn flush(&mut self) -> std::io::Result<()> {
352 Ok(())
353 }
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359 use crate::array::Array;
360 use crate::GenericStringArray;
361 use std::fmt::Write as _;
362 use std::io::Write as _;
363
364 fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
365 let mut builder = GenericBinaryBuilder::<O>::new();
366
367 builder.append_value(b"hello");
368 builder.append_value(b"");
369 builder.append_null();
370 builder.append_value(b"rust");
371
372 let array = builder.finish();
373
374 assert_eq!(4, array.len());
375 assert_eq!(1, array.null_count());
376 assert_eq!(b"hello", array.value(0));
377 assert_eq!([] as [u8; 0], array.value(1));
378 assert!(array.is_null(2));
379 assert_eq!(b"rust", array.value(3));
380 assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
381 assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
382 }
383
384 #[test]
385 fn test_binary_builder() {
386 _test_generic_binary_builder::<i32>()
387 }
388
389 #[test]
390 fn test_large_binary_builder() {
391 _test_generic_binary_builder::<i64>()
392 }
393
394 fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
395 let mut builder = GenericBinaryBuilder::<O>::new();
396 builder.append_null();
397 builder.append_null();
398 builder.append_null();
399 assert_eq!(3, builder.len());
400 assert!(!builder.is_empty());
401
402 let array = builder.finish();
403 assert_eq!(3, array.null_count());
404 assert_eq!(3, array.len());
405 assert!(array.is_null(0));
406 assert!(array.is_null(1));
407 assert!(array.is_null(2));
408 }
409
410 #[test]
411 fn test_binary_builder_all_nulls() {
412 _test_generic_binary_builder_all_nulls::<i32>()
413 }
414
415 #[test]
416 fn test_large_binary_builder_all_nulls() {
417 _test_generic_binary_builder_all_nulls::<i64>()
418 }
419
420 fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
421 let mut builder = GenericBinaryBuilder::<O>::new();
422
423 builder.append_value(b"hello");
424 builder.append_value(b"");
425 builder.append_null();
426 builder.append_value(b"rust");
427 builder.finish();
428
429 assert!(builder.is_empty());
430
431 builder.append_value(b"parquet");
432 builder.append_null();
433 builder.append_value(b"arrow");
434 builder.append_value(b"");
435 let array = builder.finish();
436
437 assert_eq!(4, array.len());
438 assert_eq!(1, array.null_count());
439 assert_eq!(b"parquet", array.value(0));
440 assert!(array.is_null(1));
441 assert_eq!(b"arrow", array.value(2));
442 assert_eq!(b"", array.value(1));
443 assert_eq!(O::zero(), array.value_offsets()[0]);
444 assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
445 assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
446 }
447
448 #[test]
449 fn test_binary_builder_reset() {
450 _test_generic_binary_builder_reset::<i32>()
451 }
452
453 #[test]
454 fn test_large_binary_builder_reset() {
455 _test_generic_binary_builder_reset::<i64>()
456 }
457
458 fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
459 let mut builder = GenericStringBuilder::<O>::new();
460 let owned = "arrow".to_owned();
461
462 builder.append_value("hello");
463 builder.append_value("");
464 builder.append_value(&owned);
465 builder.append_null();
466 builder.append_option(Some("rust"));
467 builder.append_option(None::<&str>);
468 builder.append_option(None::<String>);
469 assert_eq!(7, builder.len());
470
471 assert_eq!(
472 GenericStringArray::<O>::from(vec![
473 Some("hello"),
474 Some(""),
475 Some("arrow"),
476 None,
477 Some("rust"),
478 None,
479 None
480 ]),
481 builder.finish()
482 );
483 }
484
485 #[test]
486 fn test_string_array_builder() {
487 _test_generic_string_array_builder::<i32>()
488 }
489
490 #[test]
491 fn test_large_string_array_builder() {
492 _test_generic_string_array_builder::<i64>()
493 }
494
495 fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
496 let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
497
498 builder.append_value("hello");
499 builder.append_value("rust");
500 builder.append_null();
501
502 builder.finish();
503 assert!(builder.is_empty());
504 assert_eq!(&[O::zero()], builder.offsets_slice());
505
506 builder.append_value("arrow");
507 builder.append_value("parquet");
508 let arr = builder.finish();
509 assert!(arr.nulls().is_none());
511 assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
512 }
513
514 #[test]
515 fn test_string_array_builder_finish() {
516 _test_generic_string_array_builder_finish::<i32>()
517 }
518
519 #[test]
520 fn test_large_string_array_builder_finish() {
521 _test_generic_string_array_builder_finish::<i64>()
522 }
523
524 fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
525 let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
526
527 builder.append_value("hello");
528 builder.append_value("rust");
529 builder.append_null();
530
531 let mut arr = builder.finish_cloned();
532 assert!(!builder.is_empty());
533 assert_eq!(3, arr.len());
534
535 builder.append_value("arrow");
536 builder.append_value("parquet");
537 arr = builder.finish();
538
539 assert!(arr.nulls().is_some());
540 assert_eq!(&[O::zero()], builder.offsets_slice());
541 assert_eq!(5, arr.len());
542 }
543
544 #[test]
545 fn test_string_array_builder_finish_cloned() {
546 _test_generic_string_array_builder_finish_cloned::<i32>()
547 }
548
549 #[test]
550 fn test_large_string_array_builder_finish_cloned() {
551 _test_generic_string_array_builder_finish_cloned::<i64>()
552 }
553
554 #[test]
555 fn test_extend() {
556 let mut builder = GenericStringBuilder::<i32>::new();
557 builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
558 builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
559 let array = builder.finish();
560 assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
561 assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
562 }
563
564 #[test]
565 fn test_write_str() {
566 let mut builder = GenericStringBuilder::<i32>::new();
567 write!(builder, "foo").unwrap();
568 builder.append_value("");
569 writeln!(builder, "bar").unwrap();
570 builder.append_value("");
571 write!(builder, "fiz").unwrap();
572 write!(builder, "buz").unwrap();
573 builder.append_value("");
574 let a = builder.finish();
575 let r: Vec<_> = a.iter().flatten().collect();
576 assert_eq!(r, &["foo", "bar\n", "fizbuz"])
577 }
578
579 #[test]
580 fn test_write_bytes() {
581 let mut builder = GenericBinaryBuilder::<i32>::new();
582 write!(builder, "foo").unwrap();
583 builder.append_value("");
584 writeln!(builder, "bar").unwrap();
585 builder.append_value("");
586 write!(builder, "fiz").unwrap();
587 write!(builder, "buz").unwrap();
588 builder.append_value("");
589 let a = builder.finish();
590 let r: Vec<_> = a.iter().flatten().collect();
591 assert_eq!(
592 r,
593 &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
594 )
595 }
596}