1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::types::*;
20use crate::{ArrayRef, PrimitiveArray};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{Buffer, MutableBuffer};
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28pub type Int8Builder = PrimitiveBuilder<Int8Type>;
30pub type Int16Builder = PrimitiveBuilder<Int16Type>;
32pub type Int32Builder = PrimitiveBuilder<Int32Type>;
34pub type Int64Builder = PrimitiveBuilder<Int64Type>;
36pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
38pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
40pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
42pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
44pub type Float16Builder = PrimitiveBuilder<Float16Type>;
46pub type Float32Builder = PrimitiveBuilder<Float32Type>;
48pub type Float64Builder = PrimitiveBuilder<Float64Type>;
50
51pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
53pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
55pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
57pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
59
60pub type Date32Builder = PrimitiveBuilder<Date32Type>;
62pub type Date64Builder = PrimitiveBuilder<Date64Type>;
64
65pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
67pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
69pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
71pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
73
74pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
76pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
78pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
80
81pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
83pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
85pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
87pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
89
90pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
92pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
94
95#[derive(Debug)]
97pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
98 values_builder: BufferBuilder<T::Native>,
99 null_buffer_builder: NullBufferBuilder,
100 data_type: DataType,
101}
102
103impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
104 fn as_any(&self) -> &dyn Any {
106 self
107 }
108
109 fn as_any_mut(&mut self) -> &mut dyn Any {
111 self
112 }
113
114 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
116 self
117 }
118
119 fn len(&self) -> usize {
121 self.values_builder.len()
122 }
123
124 fn finish(&mut self) -> ArrayRef {
126 Arc::new(self.finish())
127 }
128
129 fn finish_cloned(&self) -> ArrayRef {
131 Arc::new(self.finish_cloned())
132 }
133}
134
135impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
136 fn default() -> Self {
137 Self::new()
138 }
139}
140
141impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
142 pub fn new() -> Self {
144 Self::with_capacity(1024)
145 }
146
147 pub fn with_capacity(capacity: usize) -> Self {
149 Self {
150 values_builder: BufferBuilder::<T::Native>::new(capacity),
151 null_buffer_builder: NullBufferBuilder::new(capacity),
152 data_type: T::DATA_TYPE,
153 }
154 }
155
156 pub fn new_from_buffer(
158 values_buffer: MutableBuffer,
159 null_buffer: Option<MutableBuffer>,
160 ) -> Self {
161 let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
162
163 let null_buffer_builder = null_buffer
164 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
165 .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
166
167 Self {
168 values_builder,
169 null_buffer_builder,
170 data_type: T::DATA_TYPE,
171 }
172 }
173
174 pub fn with_data_type(self, data_type: DataType) -> Self {
184 assert!(
185 PrimitiveArray::<T>::is_compatible(&data_type),
186 "incompatible data type for builder, expected {} got {}",
187 T::DATA_TYPE,
188 data_type
189 );
190 Self { data_type, ..self }
191 }
192
193 pub fn capacity(&self) -> usize {
195 self.values_builder.capacity()
196 }
197
198 #[inline]
200 pub fn append_value(&mut self, v: T::Native) {
201 self.null_buffer_builder.append_non_null();
202 self.values_builder.append(v);
203 }
204
205 #[inline]
207 pub fn append_value_n(&mut self, v: T::Native, n: usize) {
208 self.null_buffer_builder.append_n_non_nulls(n);
209 self.values_builder.append_n(n, v);
210 }
211
212 #[inline]
214 pub fn append_null(&mut self) {
215 self.null_buffer_builder.append_null();
216 self.values_builder.advance(1);
217 }
218
219 #[inline]
221 pub fn append_nulls(&mut self, n: usize) {
222 self.null_buffer_builder.append_n_nulls(n);
223 self.values_builder.advance(n);
224 }
225
226 #[inline]
228 pub fn append_option(&mut self, v: Option<T::Native>) {
229 match v {
230 None => self.append_null(),
231 Some(v) => self.append_value(v),
232 };
233 }
234
235 #[inline]
237 pub fn append_slice(&mut self, v: &[T::Native]) {
238 self.null_buffer_builder.append_n_non_nulls(v.len());
239 self.values_builder.append_slice(v);
240 }
241
242 #[inline]
248 pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
249 assert_eq!(
250 values.len(),
251 is_valid.len(),
252 "Value and validity lengths must be equal"
253 );
254 self.null_buffer_builder.append_slice(is_valid);
255 self.values_builder.append_slice(values);
256 }
257
258 #[inline]
264 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
265 let iter = iter.into_iter();
266 let len = iter
267 .size_hint()
268 .1
269 .expect("append_trusted_len_iter requires an upper bound");
270
271 self.null_buffer_builder.append_n_non_nulls(len);
272 self.values_builder.append_trusted_len_iter(iter);
273 }
274
275 pub fn finish(&mut self) -> PrimitiveArray<T> {
277 let len = self.len();
278 let nulls = self.null_buffer_builder.finish();
279 let builder = ArrayData::builder(self.data_type.clone())
280 .len(len)
281 .add_buffer(self.values_builder.finish())
282 .nulls(nulls);
283
284 let array_data = unsafe { builder.build_unchecked() };
285 PrimitiveArray::<T>::from(array_data)
286 }
287
288 pub fn finish_cloned(&self) -> PrimitiveArray<T> {
290 let len = self.len();
291 let nulls = self.null_buffer_builder.finish_cloned();
292 let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
293 let builder = ArrayData::builder(self.data_type.clone())
294 .len(len)
295 .add_buffer(values_buffer)
296 .nulls(nulls);
297
298 let array_data = unsafe { builder.build_unchecked() };
299 PrimitiveArray::<T>::from(array_data)
300 }
301
302 pub fn values_slice(&self) -> &[T::Native] {
304 self.values_builder.as_slice()
305 }
306
307 pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
309 self.values_builder.as_slice_mut()
310 }
311
312 pub fn validity_slice(&self) -> Option<&[u8]> {
314 self.null_buffer_builder.as_slice()
315 }
316
317 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
319 self.null_buffer_builder.as_slice_mut()
320 }
321
322 pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
324 (
325 self.values_builder.as_slice_mut(),
326 self.null_buffer_builder.as_slice_mut(),
327 )
328 }
329}
330
331impl<P: DecimalType> PrimitiveBuilder<P> {
332 pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
334 validate_decimal_precision_and_scale::<P>(precision, scale)?;
335 Ok(Self {
336 data_type: P::TYPE_CONSTRUCTOR(precision, scale),
337 ..self
338 })
339 }
340}
341
342impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
343 pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
345 self.with_timezone_opt(Some(timezone.into()))
346 }
347
348 pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
350 Self {
351 data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
352 ..self
353 }
354 }
355}
356
357impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
358 #[inline]
359 fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
360 for v in iter {
361 self.append_option(v)
362 }
363 }
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369 use arrow_schema::TimeUnit;
370
371 use crate::array::Array;
372 use crate::array::BooleanArray;
373 use crate::array::Date32Array;
374 use crate::array::Int32Array;
375 use crate::array::TimestampSecondArray;
376
377 #[test]
378 fn test_primitive_array_builder_i32() {
379 let mut builder = Int32Array::builder(5);
380 for i in 0..5 {
381 builder.append_value(i);
382 }
383 let arr = builder.finish();
384 assert_eq!(5, arr.len());
385 assert_eq!(0, arr.offset());
386 assert_eq!(0, arr.null_count());
387 for i in 0..5 {
388 assert!(!arr.is_null(i));
389 assert!(arr.is_valid(i));
390 assert_eq!(i as i32, arr.value(i));
391 }
392 }
393
394 #[test]
395 fn test_primitive_array_builder_i32_append_iter() {
396 let mut builder = Int32Array::builder(5);
397 unsafe { builder.append_trusted_len_iter(0..5) };
398 let arr = builder.finish();
399 assert_eq!(5, arr.len());
400 assert_eq!(0, arr.offset());
401 assert_eq!(0, arr.null_count());
402 for i in 0..5 {
403 assert!(!arr.is_null(i));
404 assert!(arr.is_valid(i));
405 assert_eq!(i as i32, arr.value(i));
406 }
407 }
408
409 #[test]
410 fn test_primitive_array_builder_i32_append_nulls() {
411 let mut builder = Int32Array::builder(5);
412 builder.append_nulls(5);
413 let arr = builder.finish();
414 assert_eq!(5, arr.len());
415 assert_eq!(0, arr.offset());
416 assert_eq!(5, arr.null_count());
417 for i in 0..5 {
418 assert!(arr.is_null(i));
419 assert!(!arr.is_valid(i));
420 }
421 }
422
423 #[test]
424 fn test_primitive_array_builder_date32() {
425 let mut builder = Date32Array::builder(5);
426 for i in 0..5 {
427 builder.append_value(i);
428 }
429 let arr = builder.finish();
430 assert_eq!(5, arr.len());
431 assert_eq!(0, arr.offset());
432 assert_eq!(0, arr.null_count());
433 for i in 0..5 {
434 assert!(!arr.is_null(i));
435 assert!(arr.is_valid(i));
436 assert_eq!(i as i32, arr.value(i));
437 }
438 }
439
440 #[test]
441 fn test_primitive_array_builder_timestamp_second() {
442 let mut builder = TimestampSecondArray::builder(5);
443 for i in 0..5 {
444 builder.append_value(i);
445 }
446 let arr = builder.finish();
447 assert_eq!(5, arr.len());
448 assert_eq!(0, arr.offset());
449 assert_eq!(0, arr.null_count());
450 for i in 0..5 {
451 assert!(!arr.is_null(i));
452 assert!(arr.is_valid(i));
453 assert_eq!(i as i64, arr.value(i));
454 }
455 }
456
457 #[test]
458 fn test_primitive_array_builder_bool() {
459 let buf = Buffer::from([72_u8, 2_u8]);
461 let mut builder = BooleanArray::builder(10);
462 for i in 0..10 {
463 if i == 3 || i == 6 || i == 9 {
464 builder.append_value(true);
465 } else {
466 builder.append_value(false);
467 }
468 }
469
470 let arr = builder.finish();
471 assert_eq!(&buf, arr.values().inner());
472 assert_eq!(10, arr.len());
473 assert_eq!(0, arr.offset());
474 assert_eq!(0, arr.null_count());
475 for i in 0..10 {
476 assert!(!arr.is_null(i));
477 assert!(arr.is_valid(i));
478 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
479 }
480 }
481
482 #[test]
483 fn test_primitive_array_builder_append_option() {
484 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
485
486 let mut builder = Int32Array::builder(5);
487 builder.append_option(Some(0));
488 builder.append_option(None);
489 builder.append_option(Some(2));
490 builder.append_option(None);
491 builder.append_option(Some(4));
492 let arr2 = builder.finish();
493
494 assert_eq!(arr1.len(), arr2.len());
495 assert_eq!(arr1.offset(), arr2.offset());
496 assert_eq!(arr1.null_count(), arr2.null_count());
497 for i in 0..5 {
498 assert_eq!(arr1.is_null(i), arr2.is_null(i));
499 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
500 if arr1.is_valid(i) {
501 assert_eq!(arr1.value(i), arr2.value(i));
502 }
503 }
504 }
505
506 #[test]
507 fn test_primitive_array_builder_append_null() {
508 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
509
510 let mut builder = Int32Array::builder(5);
511 builder.append_value(0);
512 builder.append_value(2);
513 builder.append_null();
514 builder.append_null();
515 builder.append_value(4);
516 let arr2 = builder.finish();
517
518 assert_eq!(arr1.len(), arr2.len());
519 assert_eq!(arr1.offset(), arr2.offset());
520 assert_eq!(arr1.null_count(), arr2.null_count());
521 for i in 0..5 {
522 assert_eq!(arr1.is_null(i), arr2.is_null(i));
523 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
524 if arr1.is_valid(i) {
525 assert_eq!(arr1.value(i), arr2.value(i));
526 }
527 }
528 }
529
530 #[test]
531 fn test_primitive_array_builder_append_slice() {
532 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
533
534 let mut builder = Int32Array::builder(5);
535 builder.append_slice(&[0, 2]);
536 builder.append_null();
537 builder.append_null();
538 builder.append_value(4);
539 let arr2 = builder.finish();
540
541 assert_eq!(arr1.len(), arr2.len());
542 assert_eq!(arr1.offset(), arr2.offset());
543 assert_eq!(arr1.null_count(), arr2.null_count());
544 for i in 0..5 {
545 assert_eq!(arr1.is_null(i), arr2.is_null(i));
546 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
547 if arr1.is_valid(i) {
548 assert_eq!(arr1.value(i), arr2.value(i));
549 }
550 }
551 }
552
553 #[test]
554 fn test_primitive_array_builder_finish() {
555 let mut builder = Int32Builder::new();
556 builder.append_slice(&[2, 4, 6, 8]);
557 let mut arr = builder.finish();
558 assert_eq!(4, arr.len());
559 assert_eq!(0, builder.len());
560
561 builder.append_slice(&[1, 3, 5, 7, 9]);
562 arr = builder.finish();
563 assert_eq!(5, arr.len());
564 assert_eq!(0, builder.len());
565 }
566
567 #[test]
568 fn test_primitive_array_builder_finish_cloned() {
569 let mut builder = Int32Builder::new();
570 builder.append_value(23);
571 builder.append_value(45);
572 let result = builder.finish_cloned();
573 assert_eq!(result, Int32Array::from(vec![23, 45]));
574 builder.append_value(56);
575 assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
576
577 builder.append_slice(&[2, 4, 6, 8]);
578 let mut arr = builder.finish();
579 assert_eq!(7, arr.len());
580 assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
581 assert_eq!(0, builder.len());
582
583 builder.append_slice(&[1, 3, 5, 7, 9]);
584 arr = builder.finish();
585 assert_eq!(5, arr.len());
586 assert_eq!(0, builder.len());
587 }
588
589 #[test]
590 fn test_primitive_array_builder_with_data_type() {
591 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
592 builder.append_value(1);
593 let array = builder.finish();
594 assert_eq!(array.precision(), 1);
595 assert_eq!(array.scale(), 2);
596
597 let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
598 let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
599 builder.append_value(1);
600 let array = builder.finish();
601 assert_eq!(array.data_type(), &data_type);
602 }
603
604 #[test]
605 #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
606 fn test_invalid_with_data_type() {
607 Int32Builder::new().with_data_type(DataType::Int64);
608 }
609
610 #[test]
611 fn test_extend() {
612 let mut builder = PrimitiveBuilder::<Int16Type>::new();
613 builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
614 builder.extend([2, 4, 6, 2].into_iter().map(Some));
615 let array = builder.finish();
616 assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
617 }
618}