arrow_array/builder/
primitive_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::types::*;
20use crate::{ArrayRef, PrimitiveArray};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{Buffer, MutableBuffer};
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// A signed 8-bit integer array builder.
29pub type Int8Builder = PrimitiveBuilder<Int8Type>;
30/// A signed 16-bit integer array builder.
31pub type Int16Builder = PrimitiveBuilder<Int16Type>;
32/// A signed 32-bit integer array builder.
33pub type Int32Builder = PrimitiveBuilder<Int32Type>;
34/// A signed 64-bit integer array builder.
35pub type Int64Builder = PrimitiveBuilder<Int64Type>;
36/// An usigned 8-bit integer array builder.
37pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
38/// An usigned 16-bit integer array builder.
39pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
40/// An usigned 32-bit integer array builder.
41pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
42/// An usigned 64-bit integer array builder.
43pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
44/// A 16-bit floating point array builder.
45pub type Float16Builder = PrimitiveBuilder<Float16Type>;
46/// A 32-bit floating point array builder.
47pub type Float32Builder = PrimitiveBuilder<Float32Type>;
48/// A 64-bit floating point array builder.
49pub type Float64Builder = PrimitiveBuilder<Float64Type>;
50
51/// A timestamp second array builder.
52pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
53/// A timestamp millisecond array builder.
54pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
55/// A timestamp microsecond array builder.
56pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
57/// A timestamp nanosecond array builder.
58pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
59
60/// A 32-bit date array builder.
61pub type Date32Builder = PrimitiveBuilder<Date32Type>;
62/// A 64-bit date array builder.
63pub type Date64Builder = PrimitiveBuilder<Date64Type>;
64
65/// A 32-bit elaspsed time in seconds array builder.
66pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
67/// A 32-bit elaspsed time in milliseconds array builder.
68pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
69/// A 64-bit elaspsed time in microseconds array builder.
70pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
71/// A 64-bit elaspsed time in nanoseconds array builder.
72pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
73
74/// A “calendar” interval in months array builder.
75pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
76/// A “calendar” interval in days and milliseconds array builder.
77pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
78/// A “calendar” interval in months, days, and nanoseconds array builder.
79pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
80
81/// An elapsed time in seconds array builder.
82pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
83/// An elapsed time in milliseconds array builder.
84pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
85/// An elapsed time in microseconds array builder.
86pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
87/// An elapsed time in nanoseconds array builder.
88pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
89
90/// A decimal 128 array builder
91pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
92/// A decimal 256 array builder
93pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
94
95/// Builder for [`PrimitiveArray`]
96#[derive(Debug)]
97pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
98    values_builder: BufferBuilder<T::Native>,
99    null_buffer_builder: NullBufferBuilder,
100    data_type: DataType,
101}
102
103impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
104    /// Returns the builder as a non-mutable `Any` reference.
105    fn as_any(&self) -> &dyn Any {
106        self
107    }
108
109    /// Returns the builder as a mutable `Any` reference.
110    fn as_any_mut(&mut self) -> &mut dyn Any {
111        self
112    }
113
114    /// Returns the boxed builder as a box of `Any`.
115    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
116        self
117    }
118
119    /// Returns the number of array slots in the builder
120    fn len(&self) -> usize {
121        self.values_builder.len()
122    }
123
124    /// Builds the array and reset this builder.
125    fn finish(&mut self) -> ArrayRef {
126        Arc::new(self.finish())
127    }
128
129    /// Builds the array without resetting the builder.
130    fn finish_cloned(&self) -> ArrayRef {
131        Arc::new(self.finish_cloned())
132    }
133}
134
135impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
136    fn default() -> Self {
137        Self::new()
138    }
139}
140
141impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
142    /// Creates a new primitive array builder
143    pub fn new() -> Self {
144        Self::with_capacity(1024)
145    }
146
147    /// Creates a new primitive array builder with capacity no of items
148    pub fn with_capacity(capacity: usize) -> Self {
149        Self {
150            values_builder: BufferBuilder::<T::Native>::new(capacity),
151            null_buffer_builder: NullBufferBuilder::new(capacity),
152            data_type: T::DATA_TYPE,
153        }
154    }
155
156    /// Creates a new primitive array builder from buffers
157    pub fn new_from_buffer(
158        values_buffer: MutableBuffer,
159        null_buffer: Option<MutableBuffer>,
160    ) -> Self {
161        let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
162
163        let null_buffer_builder = null_buffer
164            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
165            .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
166
167        Self {
168            values_builder,
169            null_buffer_builder,
170            data_type: T::DATA_TYPE,
171        }
172    }
173
174    /// By default [`PrimitiveBuilder`] uses [`ArrowPrimitiveType::DATA_TYPE`] as the
175    /// data type of the generated array.
176    ///
177    /// This method allows overriding the data type, to allow specifying timezones
178    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal128`] and [`DataType::Decimal256`]
179    ///
180    /// # Panics
181    ///
182    /// This method panics if `data_type` is not [PrimitiveArray::is_compatible]
183    pub fn with_data_type(self, data_type: DataType) -> Self {
184        assert!(
185            PrimitiveArray::<T>::is_compatible(&data_type),
186            "incompatible data type for builder, expected {} got {}",
187            T::DATA_TYPE,
188            data_type
189        );
190        Self { data_type, ..self }
191    }
192
193    /// Returns the capacity of this builder measured in slots of type `T`
194    pub fn capacity(&self) -> usize {
195        self.values_builder.capacity()
196    }
197
198    /// Appends a value of type `T` into the builder
199    #[inline]
200    pub fn append_value(&mut self, v: T::Native) {
201        self.null_buffer_builder.append_non_null();
202        self.values_builder.append(v);
203    }
204
205    /// Appends a value of type `T` into the builder `n` times
206    #[inline]
207    pub fn append_value_n(&mut self, v: T::Native, n: usize) {
208        self.null_buffer_builder.append_n_non_nulls(n);
209        self.values_builder.append_n(n, v);
210    }
211
212    /// Appends a null slot into the builder
213    #[inline]
214    pub fn append_null(&mut self) {
215        self.null_buffer_builder.append_null();
216        self.values_builder.advance(1);
217    }
218
219    /// Appends `n` no. of null's into the builder
220    #[inline]
221    pub fn append_nulls(&mut self, n: usize) {
222        self.null_buffer_builder.append_n_nulls(n);
223        self.values_builder.advance(n);
224    }
225
226    /// Appends an `Option<T>` into the builder
227    #[inline]
228    pub fn append_option(&mut self, v: Option<T::Native>) {
229        match v {
230            None => self.append_null(),
231            Some(v) => self.append_value(v),
232        };
233    }
234
235    /// Appends a slice of type `T` into the builder
236    #[inline]
237    pub fn append_slice(&mut self, v: &[T::Native]) {
238        self.null_buffer_builder.append_n_non_nulls(v.len());
239        self.values_builder.append_slice(v);
240    }
241
242    /// Appends values from a slice of type `T` and a validity boolean slice
243    ///
244    /// # Panics
245    ///
246    /// Panics if `values` and `is_valid` have different lengths
247    #[inline]
248    pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
249        assert_eq!(
250            values.len(),
251            is_valid.len(),
252            "Value and validity lengths must be equal"
253        );
254        self.null_buffer_builder.append_slice(is_valid);
255        self.values_builder.append_slice(values);
256    }
257
258    /// Appends values from a trusted length iterator.
259    ///
260    /// # Safety
261    /// This requires the iterator be a trusted length. This could instead require
262    /// the iterator implement `TrustedLen` once that is stabilized.
263    #[inline]
264    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
265        let iter = iter.into_iter();
266        let len = iter
267            .size_hint()
268            .1
269            .expect("append_trusted_len_iter requires an upper bound");
270
271        self.null_buffer_builder.append_n_non_nulls(len);
272        self.values_builder.append_trusted_len_iter(iter);
273    }
274
275    /// Builds the [`PrimitiveArray`] and reset this builder.
276    pub fn finish(&mut self) -> PrimitiveArray<T> {
277        let len = self.len();
278        let nulls = self.null_buffer_builder.finish();
279        let builder = ArrayData::builder(self.data_type.clone())
280            .len(len)
281            .add_buffer(self.values_builder.finish())
282            .nulls(nulls);
283
284        let array_data = unsafe { builder.build_unchecked() };
285        PrimitiveArray::<T>::from(array_data)
286    }
287
288    /// Builds the [`PrimitiveArray`] without resetting the builder.
289    pub fn finish_cloned(&self) -> PrimitiveArray<T> {
290        let len = self.len();
291        let nulls = self.null_buffer_builder.finish_cloned();
292        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
293        let builder = ArrayData::builder(self.data_type.clone())
294            .len(len)
295            .add_buffer(values_buffer)
296            .nulls(nulls);
297
298        let array_data = unsafe { builder.build_unchecked() };
299        PrimitiveArray::<T>::from(array_data)
300    }
301
302    /// Returns the current values buffer as a slice
303    pub fn values_slice(&self) -> &[T::Native] {
304        self.values_builder.as_slice()
305    }
306
307    /// Returns the current values buffer as a mutable slice
308    pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
309        self.values_builder.as_slice_mut()
310    }
311
312    /// Returns the current null buffer as a slice
313    pub fn validity_slice(&self) -> Option<&[u8]> {
314        self.null_buffer_builder.as_slice()
315    }
316
317    /// Returns the current null buffer as a mutable slice
318    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
319        self.null_buffer_builder.as_slice_mut()
320    }
321
322    /// Returns the current values buffer and null buffer as a slice
323    pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
324        (
325            self.values_builder.as_slice_mut(),
326            self.null_buffer_builder.as_slice_mut(),
327        )
328    }
329}
330
331impl<P: DecimalType> PrimitiveBuilder<P> {
332    /// Sets the precision and scale
333    pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
334        validate_decimal_precision_and_scale::<P>(precision, scale)?;
335        Ok(Self {
336            data_type: P::TYPE_CONSTRUCTOR(precision, scale),
337            ..self
338        })
339    }
340}
341
342impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
343    /// Sets the timezone
344    pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
345        self.with_timezone_opt(Some(timezone.into()))
346    }
347
348    /// Sets an optional timezone
349    pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
350        Self {
351            data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
352            ..self
353        }
354    }
355}
356
357impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
358    #[inline]
359    fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
360        for v in iter {
361            self.append_option(v)
362        }
363    }
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use arrow_schema::TimeUnit;
370
371    use crate::array::Array;
372    use crate::array::BooleanArray;
373    use crate::array::Date32Array;
374    use crate::array::Int32Array;
375    use crate::array::TimestampSecondArray;
376
377    #[test]
378    fn test_primitive_array_builder_i32() {
379        let mut builder = Int32Array::builder(5);
380        for i in 0..5 {
381            builder.append_value(i);
382        }
383        let arr = builder.finish();
384        assert_eq!(5, arr.len());
385        assert_eq!(0, arr.offset());
386        assert_eq!(0, arr.null_count());
387        for i in 0..5 {
388            assert!(!arr.is_null(i));
389            assert!(arr.is_valid(i));
390            assert_eq!(i as i32, arr.value(i));
391        }
392    }
393
394    #[test]
395    fn test_primitive_array_builder_i32_append_iter() {
396        let mut builder = Int32Array::builder(5);
397        unsafe { builder.append_trusted_len_iter(0..5) };
398        let arr = builder.finish();
399        assert_eq!(5, arr.len());
400        assert_eq!(0, arr.offset());
401        assert_eq!(0, arr.null_count());
402        for i in 0..5 {
403            assert!(!arr.is_null(i));
404            assert!(arr.is_valid(i));
405            assert_eq!(i as i32, arr.value(i));
406        }
407    }
408
409    #[test]
410    fn test_primitive_array_builder_i32_append_nulls() {
411        let mut builder = Int32Array::builder(5);
412        builder.append_nulls(5);
413        let arr = builder.finish();
414        assert_eq!(5, arr.len());
415        assert_eq!(0, arr.offset());
416        assert_eq!(5, arr.null_count());
417        for i in 0..5 {
418            assert!(arr.is_null(i));
419            assert!(!arr.is_valid(i));
420        }
421    }
422
423    #[test]
424    fn test_primitive_array_builder_date32() {
425        let mut builder = Date32Array::builder(5);
426        for i in 0..5 {
427            builder.append_value(i);
428        }
429        let arr = builder.finish();
430        assert_eq!(5, arr.len());
431        assert_eq!(0, arr.offset());
432        assert_eq!(0, arr.null_count());
433        for i in 0..5 {
434            assert!(!arr.is_null(i));
435            assert!(arr.is_valid(i));
436            assert_eq!(i as i32, arr.value(i));
437        }
438    }
439
440    #[test]
441    fn test_primitive_array_builder_timestamp_second() {
442        let mut builder = TimestampSecondArray::builder(5);
443        for i in 0..5 {
444            builder.append_value(i);
445        }
446        let arr = builder.finish();
447        assert_eq!(5, arr.len());
448        assert_eq!(0, arr.offset());
449        assert_eq!(0, arr.null_count());
450        for i in 0..5 {
451            assert!(!arr.is_null(i));
452            assert!(arr.is_valid(i));
453            assert_eq!(i as i64, arr.value(i));
454        }
455    }
456
457    #[test]
458    fn test_primitive_array_builder_bool() {
459        // 00000010 01001000
460        let buf = Buffer::from([72_u8, 2_u8]);
461        let mut builder = BooleanArray::builder(10);
462        for i in 0..10 {
463            if i == 3 || i == 6 || i == 9 {
464                builder.append_value(true);
465            } else {
466                builder.append_value(false);
467            }
468        }
469
470        let arr = builder.finish();
471        assert_eq!(&buf, arr.values().inner());
472        assert_eq!(10, arr.len());
473        assert_eq!(0, arr.offset());
474        assert_eq!(0, arr.null_count());
475        for i in 0..10 {
476            assert!(!arr.is_null(i));
477            assert!(arr.is_valid(i));
478            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
479        }
480    }
481
482    #[test]
483    fn test_primitive_array_builder_append_option() {
484        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
485
486        let mut builder = Int32Array::builder(5);
487        builder.append_option(Some(0));
488        builder.append_option(None);
489        builder.append_option(Some(2));
490        builder.append_option(None);
491        builder.append_option(Some(4));
492        let arr2 = builder.finish();
493
494        assert_eq!(arr1.len(), arr2.len());
495        assert_eq!(arr1.offset(), arr2.offset());
496        assert_eq!(arr1.null_count(), arr2.null_count());
497        for i in 0..5 {
498            assert_eq!(arr1.is_null(i), arr2.is_null(i));
499            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
500            if arr1.is_valid(i) {
501                assert_eq!(arr1.value(i), arr2.value(i));
502            }
503        }
504    }
505
506    #[test]
507    fn test_primitive_array_builder_append_null() {
508        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
509
510        let mut builder = Int32Array::builder(5);
511        builder.append_value(0);
512        builder.append_value(2);
513        builder.append_null();
514        builder.append_null();
515        builder.append_value(4);
516        let arr2 = builder.finish();
517
518        assert_eq!(arr1.len(), arr2.len());
519        assert_eq!(arr1.offset(), arr2.offset());
520        assert_eq!(arr1.null_count(), arr2.null_count());
521        for i in 0..5 {
522            assert_eq!(arr1.is_null(i), arr2.is_null(i));
523            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
524            if arr1.is_valid(i) {
525                assert_eq!(arr1.value(i), arr2.value(i));
526            }
527        }
528    }
529
530    #[test]
531    fn test_primitive_array_builder_append_slice() {
532        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
533
534        let mut builder = Int32Array::builder(5);
535        builder.append_slice(&[0, 2]);
536        builder.append_null();
537        builder.append_null();
538        builder.append_value(4);
539        let arr2 = builder.finish();
540
541        assert_eq!(arr1.len(), arr2.len());
542        assert_eq!(arr1.offset(), arr2.offset());
543        assert_eq!(arr1.null_count(), arr2.null_count());
544        for i in 0..5 {
545            assert_eq!(arr1.is_null(i), arr2.is_null(i));
546            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
547            if arr1.is_valid(i) {
548                assert_eq!(arr1.value(i), arr2.value(i));
549            }
550        }
551    }
552
553    #[test]
554    fn test_primitive_array_builder_finish() {
555        let mut builder = Int32Builder::new();
556        builder.append_slice(&[2, 4, 6, 8]);
557        let mut arr = builder.finish();
558        assert_eq!(4, arr.len());
559        assert_eq!(0, builder.len());
560
561        builder.append_slice(&[1, 3, 5, 7, 9]);
562        arr = builder.finish();
563        assert_eq!(5, arr.len());
564        assert_eq!(0, builder.len());
565    }
566
567    #[test]
568    fn test_primitive_array_builder_finish_cloned() {
569        let mut builder = Int32Builder::new();
570        builder.append_value(23);
571        builder.append_value(45);
572        let result = builder.finish_cloned();
573        assert_eq!(result, Int32Array::from(vec![23, 45]));
574        builder.append_value(56);
575        assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
576
577        builder.append_slice(&[2, 4, 6, 8]);
578        let mut arr = builder.finish();
579        assert_eq!(7, arr.len());
580        assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
581        assert_eq!(0, builder.len());
582
583        builder.append_slice(&[1, 3, 5, 7, 9]);
584        arr = builder.finish();
585        assert_eq!(5, arr.len());
586        assert_eq!(0, builder.len());
587    }
588
589    #[test]
590    fn test_primitive_array_builder_with_data_type() {
591        let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
592        builder.append_value(1);
593        let array = builder.finish();
594        assert_eq!(array.precision(), 1);
595        assert_eq!(array.scale(), 2);
596
597        let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
598        let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
599        builder.append_value(1);
600        let array = builder.finish();
601        assert_eq!(array.data_type(), &data_type);
602    }
603
604    #[test]
605    #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
606    fn test_invalid_with_data_type() {
607        Int32Builder::new().with_data_type(DataType::Int64);
608    }
609
610    #[test]
611    fn test_extend() {
612        let mut builder = PrimitiveBuilder::<Int16Type>::new();
613        builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
614        builder.extend([2, 4, 6, 2].into_iter().map(Some));
615        let array = builder.finish();
616        assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
617    }
618}