arrow_buffer/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::{iter, marker::PhantomData};
30
31/// Builder for creating a [Buffer] object.
32///
33/// A [Buffer] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// # Example:
39///
40/// ```
41/// # use arrow_buffer::builder::BufferBuilder;
42///
43/// let mut builder = BufferBuilder::<u8>::new(100);
44/// builder.append_slice(&[42, 43, 44]);
45/// builder.append(45);
46/// let buffer = builder.finish();
47///
48/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
49/// ```
50#[derive(Debug)]
51pub struct BufferBuilder<T: ArrowNativeType> {
52 buffer: MutableBuffer,
53 len: usize,
54 _marker: PhantomData<T>,
55}
56
57impl<T: ArrowNativeType> BufferBuilder<T> {
58 /// Creates a new builder with initial capacity for _at least_ `capacity`
59 /// elements of type `T`.
60 ///
61 /// The capacity can later be manually adjusted with the
62 /// [`reserve()`](BufferBuilder::reserve) method.
63 /// Also the
64 /// [`append()`](BufferBuilder::append),
65 /// [`append_slice()`](BufferBuilder::append_slice) and
66 /// [`advance()`](BufferBuilder::advance)
67 /// methods automatically increase the capacity if needed.
68 ///
69 /// # Example:
70 ///
71 /// ```
72 /// # use arrow_buffer::builder::BufferBuilder;
73 ///
74 /// let mut builder = BufferBuilder::<u8>::new(10);
75 ///
76 /// assert!(builder.capacity() >= 10);
77 /// ```
78 #[inline]
79 pub fn new(capacity: usize) -> Self {
80 let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
81
82 Self {
83 buffer,
84 len: 0,
85 _marker: PhantomData,
86 }
87 }
88
89 /// Creates a new builder from a [`MutableBuffer`]
90 pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
91 let buffer_len = buffer.len();
92 Self {
93 buffer,
94 len: buffer_len / std::mem::size_of::<T>(),
95 _marker: PhantomData,
96 }
97 }
98
99 /// Returns the current number of array elements in the internal buffer.
100 ///
101 /// # Example:
102 ///
103 /// ```
104 /// # use arrow_buffer::builder::BufferBuilder;
105 ///
106 /// let mut builder = BufferBuilder::<u8>::new(10);
107 /// builder.append(42);
108 ///
109 /// assert_eq!(builder.len(), 1);
110 /// ```
111 pub fn len(&self) -> usize {
112 self.len
113 }
114
115 /// Returns whether the internal buffer is empty.
116 ///
117 /// # Example:
118 ///
119 /// ```
120 /// # use arrow_buffer::builder::BufferBuilder;
121 ///
122 /// let mut builder = BufferBuilder::<u8>::new(10);
123 /// builder.append(42);
124 ///
125 /// assert_eq!(builder.is_empty(), false);
126 /// ```
127 pub fn is_empty(&self) -> bool {
128 self.len == 0
129 }
130
131 /// Returns the actual capacity (number of elements) of the internal buffer.
132 ///
133 /// Note: the internal capacity returned by this method might be larger than
134 /// what you'd expect after setting the capacity in the `new()` or `reserve()`
135 /// functions.
136 pub fn capacity(&self) -> usize {
137 let byte_capacity = self.buffer.capacity();
138 byte_capacity / std::mem::size_of::<T>()
139 }
140
141 /// Increases the number of elements in the internal buffer by `n`
142 /// and resizes the buffer as needed.
143 ///
144 /// The values of the newly added elements are 0.
145 /// This method is usually used when appending `NULL` values to the buffer
146 /// as they still require physical memory space.
147 ///
148 /// # Example:
149 ///
150 /// ```
151 /// # use arrow_buffer::builder::BufferBuilder;
152 ///
153 /// let mut builder = BufferBuilder::<u8>::new(10);
154 /// builder.advance(2);
155 ///
156 /// assert_eq!(builder.len(), 2);
157 /// ```
158 #[inline]
159 pub fn advance(&mut self, i: usize) {
160 self.buffer.extend_zeros(i * std::mem::size_of::<T>());
161 self.len += i;
162 }
163
164 /// Reserves memory for _at least_ `n` more elements of type `T`.
165 ///
166 /// # Example:
167 ///
168 /// ```
169 /// # use arrow_buffer::builder::BufferBuilder;
170 ///
171 /// let mut builder = BufferBuilder::<u8>::new(10);
172 /// builder.reserve(10);
173 ///
174 /// assert!(builder.capacity() >= 20);
175 /// ```
176 #[inline]
177 pub fn reserve(&mut self, n: usize) {
178 self.buffer.reserve(n * std::mem::size_of::<T>());
179 }
180
181 /// Appends a value of type `T` into the builder,
182 /// growing the internal buffer as needed.
183 ///
184 /// # Example:
185 ///
186 /// ```
187 /// # use arrow_buffer::builder::BufferBuilder;
188 ///
189 /// let mut builder = BufferBuilder::<u8>::new(10);
190 /// builder.append(42);
191 ///
192 /// assert_eq!(builder.len(), 1);
193 /// ```
194 #[inline]
195 pub fn append(&mut self, v: T) {
196 self.reserve(1);
197 self.buffer.push(v);
198 self.len += 1;
199 }
200
201 /// Appends a value of type `T` into the builder N times,
202 /// growing the internal buffer as needed.
203 ///
204 /// # Example:
205 ///
206 /// ```
207 /// # use arrow_buffer::builder::BufferBuilder;
208 ///
209 /// let mut builder = BufferBuilder::<u8>::new(10);
210 /// builder.append_n(10, 42);
211 ///
212 /// assert_eq!(builder.len(), 10);
213 /// ```
214 #[inline]
215 pub fn append_n(&mut self, n: usize, v: T) {
216 self.reserve(n);
217 self.extend(iter::repeat(v).take(n))
218 }
219
220 /// Appends `n`, zero-initialized values
221 ///
222 /// # Example:
223 ///
224 /// ```
225 /// # use arrow_buffer::builder::BufferBuilder;
226 ///
227 /// let mut builder = BufferBuilder::<u32>::new(10);
228 /// builder.append_n_zeroed(3);
229 ///
230 /// assert_eq!(builder.len(), 3);
231 /// assert_eq!(builder.as_slice(), &[0, 0, 0])
232 #[inline]
233 pub fn append_n_zeroed(&mut self, n: usize) {
234 self.buffer.extend_zeros(n * std::mem::size_of::<T>());
235 self.len += n;
236 }
237
238 /// Appends a slice of type `T`, growing the internal buffer as needed.
239 ///
240 /// # Example:
241 ///
242 /// ```
243 /// # use arrow_buffer::builder::BufferBuilder;
244 ///
245 /// let mut builder = BufferBuilder::<u8>::new(10);
246 /// builder.append_slice(&[42, 44, 46]);
247 ///
248 /// assert_eq!(builder.len(), 3);
249 /// ```
250 #[inline]
251 pub fn append_slice(&mut self, slice: &[T]) {
252 self.buffer.extend_from_slice(slice);
253 self.len += slice.len();
254 }
255
256 /// View the contents of this buffer as a slice
257 ///
258 /// ```
259 /// # use arrow_buffer::builder::BufferBuilder;
260 ///
261 /// let mut builder = BufferBuilder::<f64>::new(10);
262 /// builder.append(1.3);
263 /// builder.append_n(2, 2.3);
264 ///
265 /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
266 /// ```
267 #[inline]
268 pub fn as_slice(&self) -> &[T] {
269 // SAFETY
270 //
271 // - MutableBuffer is aligned and initialized for len elements of T
272 // - MutableBuffer corresponds to a single allocation
273 // - MutableBuffer does not support modification whilst active immutable borrows
274 unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
275 }
276
277 /// View the contents of this buffer as a mutable slice
278 ///
279 /// # Example:
280 ///
281 /// ```
282 /// # use arrow_buffer::builder::BufferBuilder;
283 ///
284 /// let mut builder = BufferBuilder::<f32>::new(10);
285 ///
286 /// builder.append_slice(&[1., 2., 3.4]);
287 /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
288 ///
289 /// builder.as_slice_mut()[1] = 4.2;
290 /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
291 /// ```
292 #[inline]
293 pub fn as_slice_mut(&mut self) -> &mut [T] {
294 // SAFETY
295 //
296 // - MutableBuffer is aligned and initialized for len elements of T
297 // - MutableBuffer corresponds to a single allocation
298 // - MutableBuffer does not support modification whilst active immutable borrows
299 unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
300 }
301
302 /// Shorten this BufferBuilder to `len` items
303 ///
304 /// If `len` is greater than the builder's current length, this has no effect
305 ///
306 /// # Example:
307 ///
308 /// ```
309 /// # use arrow_buffer::builder::BufferBuilder;
310 ///
311 /// let mut builder = BufferBuilder::<u16>::new(10);
312 ///
313 /// builder.append_slice(&[42, 44, 46]);
314 /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
315 ///
316 /// builder.truncate(2);
317 /// assert_eq!(builder.as_slice(), &[42, 44]);
318 ///
319 /// builder.append(12);
320 /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
321 /// ```
322 #[inline]
323 pub fn truncate(&mut self, len: usize) {
324 self.buffer.truncate(len * std::mem::size_of::<T>());
325 self.len = len;
326 }
327
328 /// # Safety
329 /// This requires the iterator be a trusted length. This could instead require
330 /// the iterator implement `TrustedLen` once that is stabilized.
331 #[inline]
332 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
333 let iter = iter.into_iter();
334 let len = iter
335 .size_hint()
336 .1
337 .expect("append_trusted_len_iter expects upper bound");
338 self.reserve(len);
339 self.extend(iter);
340 }
341
342 /// Resets this builder and returns an immutable [Buffer].
343 ///
344 /// # Example:
345 ///
346 /// ```
347 /// # use arrow_buffer::builder::BufferBuilder;
348 ///
349 /// let mut builder = BufferBuilder::<u8>::new(10);
350 /// builder.append_slice(&[42, 44, 46]);
351 ///
352 /// let buffer = builder.finish();
353 ///
354 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355 /// ```
356 #[inline]
357 pub fn finish(&mut self) -> Buffer {
358 let buf = std::mem::take(&mut self.buffer);
359 self.len = 0;
360 buf.into()
361 }
362}
363
364impl<T: ArrowNativeType> Default for BufferBuilder<T> {
365 fn default() -> Self {
366 Self::new(0)
367 }
368}
369
370impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
371 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
372 self.buffer.extend(iter.into_iter().inspect(|_| {
373 self.len += 1;
374 }))
375 }
376}
377
378impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
379 fn from(value: Vec<T>) -> Self {
380 Self::new_from_buffer(MutableBuffer::from(value))
381 }
382}
383
384impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
385 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
386 let mut builder = Self::default();
387 builder.extend(iter);
388 builder
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use super::*;
395 use std::mem;
396
397 #[test]
398 fn default() {
399 let builder = BufferBuilder::<u32>::default();
400 assert!(builder.is_empty());
401 assert!(builder.buffer.is_empty());
402 assert_eq!(builder.buffer.capacity(), 0);
403 }
404
405 #[test]
406 fn from_iter() {
407 let input = [1u16, 2, 3, 4];
408 let builder = input.into_iter().collect::<BufferBuilder<_>>();
409 assert_eq!(builder.len(), 4);
410 assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
411 }
412
413 #[test]
414 fn extend() {
415 let input = [1, 2];
416 let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
417 assert_eq!(builder.len(), 2);
418 builder.extend([3, 4]);
419 assert_eq!(builder.len(), 4);
420 }
421}