arrow_array/builder/
primitive_run_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{any::Any, sync::Arc};
19
20use crate::{types::RunEndIndexType, ArrayRef, ArrowPrimitiveType, RunArray};
21
22use super::{ArrayBuilder, PrimitiveBuilder};
23
24use arrow_buffer::ArrowNativeType;
25
26/// Builder for [`RunArray`] of [`PrimitiveArray`](crate::array::PrimitiveArray)
27///
28/// # Example:
29///
30/// ```
31///
32/// # use arrow_array::builder::PrimitiveRunBuilder;
33/// # use arrow_array::cast::AsArray;
34/// # use arrow_array::types::{UInt32Type, Int16Type};
35/// # use arrow_array::{Array, UInt32Array, Int16Array};
36///
37/// let mut builder =
38/// PrimitiveRunBuilder::<Int16Type, UInt32Type>::new();
39/// builder.append_value(1234);
40/// builder.append_value(1234);
41/// builder.append_value(1234);
42/// builder.append_null();
43/// builder.append_value(5678);
44/// builder.append_value(5678);
45/// let array = builder.finish();
46///
47/// assert_eq!(array.run_ends().values(), &[3, 4, 6]);
48///
49/// let av = array.values();
50///
51/// assert!(!av.is_null(0));
52/// assert!(av.is_null(1));
53/// assert!(!av.is_null(2));
54///
55/// // Values are polymorphic and so require a downcast.
56/// let ava: &UInt32Array = av.as_primitive::<UInt32Type>();
57///
58/// assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)]));
59/// ```
60#[derive(Debug)]
61pub struct PrimitiveRunBuilder<R, V>
62where
63    R: RunEndIndexType,
64    V: ArrowPrimitiveType,
65{
66    run_ends_builder: PrimitiveBuilder<R>,
67    values_builder: PrimitiveBuilder<V>,
68    current_value: Option<V::Native>,
69    current_run_end_index: usize,
70    prev_run_end_index: usize,
71}
72
73impl<R, V> Default for PrimitiveRunBuilder<R, V>
74where
75    R: RunEndIndexType,
76    V: ArrowPrimitiveType,
77{
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl<R, V> PrimitiveRunBuilder<R, V>
84where
85    R: RunEndIndexType,
86    V: ArrowPrimitiveType,
87{
88    /// Creates a new `PrimitiveRunBuilder`
89    pub fn new() -> Self {
90        Self {
91            run_ends_builder: PrimitiveBuilder::new(),
92            values_builder: PrimitiveBuilder::new(),
93            current_value: None,
94            current_run_end_index: 0,
95            prev_run_end_index: 0,
96        }
97    }
98
99    /// Creates a new `PrimitiveRunBuilder` with the provided capacity
100    ///
101    /// `capacity`: the expected number of run-end encoded values.
102    pub fn with_capacity(capacity: usize) -> Self {
103        Self {
104            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
105            values_builder: PrimitiveBuilder::with_capacity(capacity),
106            current_value: None,
107            current_run_end_index: 0,
108            prev_run_end_index: 0,
109        }
110    }
111}
112
113impl<R, V> ArrayBuilder for PrimitiveRunBuilder<R, V>
114where
115    R: RunEndIndexType,
116    V: ArrowPrimitiveType,
117{
118    /// Returns the builder as a non-mutable `Any` reference.
119    fn as_any(&self) -> &dyn Any {
120        self
121    }
122
123    /// Returns the builder as a mutable `Any` reference.
124    fn as_any_mut(&mut self) -> &mut dyn Any {
125        self
126    }
127
128    /// Returns the boxed builder as a box of `Any`.
129    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
130        self
131    }
132
133    /// Returns the length of logical array encoded by
134    /// the eventual runs array.
135    fn len(&self) -> usize {
136        self.current_run_end_index
137    }
138
139    /// Builds the array and reset this builder.
140    fn finish(&mut self) -> ArrayRef {
141        Arc::new(self.finish())
142    }
143
144    /// Builds the array without resetting the builder.
145    fn finish_cloned(&self) -> ArrayRef {
146        Arc::new(self.finish_cloned())
147    }
148}
149
150impl<R, V> PrimitiveRunBuilder<R, V>
151where
152    R: RunEndIndexType,
153    V: ArrowPrimitiveType,
154{
155    /// Appends optional value to the logical array encoded by the RunArray.
156    pub fn append_option(&mut self, value: Option<V::Native>) {
157        if self.current_run_end_index == 0 {
158            self.current_run_end_index = 1;
159            self.current_value = value;
160            return;
161        }
162        if self.current_value != value {
163            self.append_run_end();
164            self.current_value = value;
165        }
166
167        self.current_run_end_index += 1;
168    }
169
170    /// Appends value to the logical array encoded by the run-ends array.
171    pub fn append_value(&mut self, value: V::Native) {
172        self.append_option(Some(value))
173    }
174
175    /// Appends null to the logical array encoded by the run-ends array.
176    pub fn append_null(&mut self) {
177        self.append_option(None)
178    }
179
180    /// Creates the RunArray and resets the builder.
181    /// Panics if RunArray cannot be built.
182    pub fn finish(&mut self) -> RunArray<R> {
183        // write the last run end to the array.
184        self.append_run_end();
185
186        // reset the run index to zero.
187        self.current_value = None;
188        self.current_run_end_index = 0;
189
190        // build the run encoded array by adding run_ends and values array as its children.
191        let run_ends_array = self.run_ends_builder.finish();
192        let values_array = self.values_builder.finish();
193        RunArray::<R>::try_new(&run_ends_array, &values_array).unwrap()
194    }
195
196    /// Creates the RunArray and without resetting the builder.
197    /// Panics if RunArray cannot be built.
198    pub fn finish_cloned(&self) -> RunArray<R> {
199        let mut run_ends_array = self.run_ends_builder.finish_cloned();
200        let mut values_array = self.values_builder.finish_cloned();
201
202        // Add current run if one exists
203        if self.prev_run_end_index != self.current_run_end_index {
204            let mut run_end_builder = run_ends_array.into_builder().unwrap();
205            let mut values_builder = values_array.into_builder().unwrap();
206            self.append_run_end_with_builders(&mut run_end_builder, &mut values_builder);
207            run_ends_array = run_end_builder.finish();
208            values_array = values_builder.finish();
209        }
210
211        RunArray::try_new(&run_ends_array, &values_array).unwrap()
212    }
213
214    // Appends the current run to the array.
215    fn append_run_end(&mut self) {
216        // empty array or the function called without appending any value.
217        if self.prev_run_end_index == self.current_run_end_index {
218            return;
219        }
220        let run_end_index = self.run_end_index_as_native();
221        self.run_ends_builder.append_value(run_end_index);
222        self.values_builder.append_option(self.current_value);
223        self.prev_run_end_index = self.current_run_end_index;
224    }
225
226    // Similar to `append_run_end` but on custom builders.
227    // Used in `finish_cloned` which is not suppose to mutate `self`.
228    fn append_run_end_with_builders(
229        &self,
230        run_ends_builder: &mut PrimitiveBuilder<R>,
231        values_builder: &mut PrimitiveBuilder<V>,
232    ) {
233        let run_end_index = self.run_end_index_as_native();
234        run_ends_builder.append_value(run_end_index);
235        values_builder.append_option(self.current_value);
236    }
237
238    fn run_end_index_as_native(&self) -> R::Native {
239        R::Native::from_usize(self.current_run_end_index)
240        .unwrap_or_else(|| panic!(
241                "Cannot convert `current_run_end_index` {} from `usize` to native form of arrow datatype {}",
242                self.current_run_end_index,
243                R::DATA_TYPE
244        ))
245    }
246}
247
248impl<R, V> Extend<Option<V::Native>> for PrimitiveRunBuilder<R, V>
249where
250    R: RunEndIndexType,
251    V: ArrowPrimitiveType,
252{
253    fn extend<T: IntoIterator<Item = Option<V::Native>>>(&mut self, iter: T) {
254        for elem in iter {
255            self.append_option(elem);
256        }
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use crate::builder::PrimitiveRunBuilder;
263    use crate::cast::AsArray;
264    use crate::types::{Int16Type, UInt32Type};
265    use crate::{Array, UInt32Array};
266
267    #[test]
268    fn test_primitive_ree_array_builder() {
269        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::new();
270        builder.append_value(1234);
271        builder.append_value(1234);
272        builder.append_value(1234);
273        builder.append_null();
274        builder.append_value(5678);
275        builder.append_value(5678);
276
277        let array = builder.finish();
278
279        assert_eq!(array.null_count(), 0);
280        assert_eq!(array.logical_null_count(), 1);
281        assert_eq!(array.len(), 6);
282
283        assert_eq!(array.run_ends().values(), &[3, 4, 6]);
284
285        let av = array.values();
286
287        assert!(!av.is_null(0));
288        assert!(av.is_null(1));
289        assert!(!av.is_null(2));
290
291        // Values are polymorphic and so require a downcast.
292        let ava: &UInt32Array = av.as_primitive::<UInt32Type>();
293
294        assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)]));
295    }
296
297    #[test]
298    fn test_extend() {
299        let mut builder = PrimitiveRunBuilder::<Int16Type, Int16Type>::new();
300        builder.extend([1, 2, 2, 5, 5, 4, 4].into_iter().map(Some));
301        builder.extend([4, 4, 6, 2].into_iter().map(Some));
302        let array = builder.finish();
303
304        assert_eq!(array.len(), 11);
305        assert_eq!(array.null_count(), 0);
306        assert_eq!(array.logical_null_count(), 0);
307        assert_eq!(array.run_ends().values(), &[1, 3, 5, 9, 10, 11]);
308        assert_eq!(
309            array.values().as_primitive::<Int16Type>().values(),
310            &[1, 2, 5, 4, 6, 2]
311        );
312    }
313}