arrow_array/builder/
fixed_size_binary_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
19use crate::{ArrayRef, FixedSizeBinaryArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::NullBufferBuilder;
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`FixedSizeBinaryArray`]
28/// ```
29/// # use arrow_array::builder::FixedSizeBinaryBuilder;
30/// # use arrow_array::Array;
31/// #
32/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
33/// // [b"hello", null, b"arrow"]
34/// builder.append_value(b"hello").unwrap();
35/// builder.append_null();
36/// builder.append_value(b"arrow").unwrap();
37///
38/// let array = builder.finish();
39/// assert_eq!(array.value(0), b"hello");
40/// assert!(array.is_null(1));
41/// assert_eq!(array.value(2), b"arrow");
42/// ```
43#[derive(Debug)]
44pub struct FixedSizeBinaryBuilder {
45    values_builder: UInt8BufferBuilder,
46    null_buffer_builder: NullBufferBuilder,
47    value_length: i32,
48}
49
50impl FixedSizeBinaryBuilder {
51    /// Creates a new [`FixedSizeBinaryBuilder`]
52    pub fn new(byte_width: i32) -> Self {
53        Self::with_capacity(1024, byte_width)
54    }
55
56    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
57    /// that can be appended without reallocating
58    pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
59        assert!(
60            byte_width >= 0,
61            "value length ({byte_width}) of the array must >= 0"
62        );
63        Self {
64            values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
65            null_buffer_builder: NullBufferBuilder::new(capacity),
66            value_length: byte_width,
67        }
68    }
69
70    /// Appends a byte slice into the builder.
71    ///
72    /// Automatically update the null buffer to delimit the slice appended in as a
73    /// distinct value element.
74    #[inline]
75    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
76        if self.value_length != value.as_ref().len() as i32 {
77            Err(ArrowError::InvalidArgumentError(
78                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
79                    .to_string(),
80            ))
81        } else {
82            self.values_builder.append_slice(value.as_ref());
83            self.null_buffer_builder.append_non_null();
84            Ok(())
85        }
86    }
87
88    /// Append a null value to the array.
89    #[inline]
90    pub fn append_null(&mut self) {
91        self.values_builder
92            .append_slice(&vec![0u8; self.value_length as usize][..]);
93        self.null_buffer_builder.append_null();
94    }
95
96    /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
97    pub fn finish(&mut self) -> FixedSizeBinaryArray {
98        let array_length = self.len();
99        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
100            .add_buffer(self.values_builder.finish())
101            .nulls(self.null_buffer_builder.finish())
102            .len(array_length);
103        let array_data = unsafe { array_data_builder.build_unchecked() };
104        FixedSizeBinaryArray::from(array_data)
105    }
106
107    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
108    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
109        let array_length = self.len();
110        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
111        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
112            .add_buffer(values_buffer)
113            .nulls(self.null_buffer_builder.finish_cloned())
114            .len(array_length);
115        let array_data = unsafe { array_data_builder.build_unchecked() };
116        FixedSizeBinaryArray::from(array_data)
117    }
118
119    /// Returns the current null buffer as a slice
120    pub fn validity_slice(&self) -> Option<&[u8]> {
121        self.null_buffer_builder.as_slice()
122    }
123}
124
125impl ArrayBuilder for FixedSizeBinaryBuilder {
126    /// Returns the builder as a non-mutable `Any` reference.
127    fn as_any(&self) -> &dyn Any {
128        self
129    }
130
131    /// Returns the builder as a mutable `Any` reference.
132    fn as_any_mut(&mut self) -> &mut dyn Any {
133        self
134    }
135
136    /// Returns the boxed builder as a box of `Any`.
137    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
138        self
139    }
140
141    /// Returns the number of array slots in the builder
142    fn len(&self) -> usize {
143        self.null_buffer_builder.len()
144    }
145
146    /// Builds the array and reset this builder.
147    fn finish(&mut self) -> ArrayRef {
148        Arc::new(self.finish())
149    }
150
151    /// Builds the array without resetting the builder.
152    fn finish_cloned(&self) -> ArrayRef {
153        Arc::new(self.finish_cloned())
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    use crate::Array;
162
163    #[test]
164    fn test_fixed_size_binary_builder() {
165        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
166
167        //  [b"hello", null, "arrow"]
168        builder.append_value(b"hello").unwrap();
169        builder.append_null();
170        builder.append_value(b"arrow").unwrap();
171        let array: FixedSizeBinaryArray = builder.finish();
172
173        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
174        assert_eq!(3, array.len());
175        assert_eq!(1, array.null_count());
176        assert_eq!(10, array.value_offset(2));
177        assert_eq!(5, array.value_length());
178    }
179
180    #[test]
181    fn test_fixed_size_binary_builder_finish_cloned() {
182        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
183
184        //  [b"hello", null, "arrow"]
185        builder.append_value(b"hello").unwrap();
186        builder.append_null();
187        builder.append_value(b"arrow").unwrap();
188        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
189
190        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
191        assert_eq!(3, array.len());
192        assert_eq!(1, array.null_count());
193        assert_eq!(10, array.value_offset(2));
194        assert_eq!(5, array.value_length());
195
196        //  [b"finis", null, "clone"]
197        builder.append_value(b"finis").unwrap();
198        builder.append_null();
199        builder.append_value(b"clone").unwrap();
200
201        array = builder.finish();
202
203        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
204        assert_eq!(6, array.len());
205        assert_eq!(2, array.null_count());
206        assert_eq!(25, array.value_offset(5));
207        assert_eq!(5, array.value_length());
208    }
209
210    #[test]
211    fn test_fixed_size_binary_builder_with_zero_value_length() {
212        let mut builder = FixedSizeBinaryBuilder::new(0);
213
214        builder.append_value(b"").unwrap();
215        builder.append_null();
216        builder.append_value(b"").unwrap();
217        assert!(!builder.is_empty());
218
219        let array: FixedSizeBinaryArray = builder.finish();
220        assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
221        assert_eq!(3, array.len());
222        assert_eq!(1, array.null_count());
223        assert_eq!(0, array.value_offset(2));
224        assert_eq!(0, array.value_length());
225        assert_eq!(b"", array.value(0));
226        assert_eq!(b"", array.value(2));
227    }
228
229    #[test]
230    #[should_panic(
231        expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
232    )]
233    fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
234        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
235        builder.append_value(b"hello").unwrap();
236    }
237    #[test]
238    fn test_fixed_size_binary_builder_empty() {
239        let mut builder = FixedSizeBinaryBuilder::new(5);
240        assert!(builder.is_empty());
241
242        let fixed_size_binary_array = builder.finish();
243        assert_eq!(
244            &DataType::FixedSizeBinary(5),
245            fixed_size_binary_array.data_type()
246        );
247        assert_eq!(0, fixed_size_binary_array.len());
248    }
249
250    #[test]
251    #[should_panic(expected = "value length (-1) of the array must >= 0")]
252    fn test_fixed_size_binary_builder_invalid_value_length() {
253        let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
254    }
255}