arrow_buffer/buffer/
mutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::{handle_alloc_error, Layout};
19use std::mem;
20use std::ptr::NonNull;
21
22use crate::alloc::{Deallocation, ALIGNMENT};
23use crate::{
24    bytes::Bytes,
25    native::{ArrowNativeType, ToByteSlice},
26    util::bit_util,
27};
28
29use super::Buffer;
30
31/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
32///
33/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
34/// along cache lines and in multiple of 64 bytes.
35///
36/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
37/// to insert many items, and `into` to convert it to [`Buffer`].
38///
39/// For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
40///
41/// Note: this may be deprecated in a future release ([#1176](https://github.com/apache/arrow-rs/issues/1176))
42///
43/// # Example
44///
45/// ```
46/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
47/// let mut buffer = MutableBuffer::new(0);
48/// buffer.push(256u32);
49/// buffer.extend_from_slice(&[1u32]);
50/// let buffer: Buffer = buffer.into();
51/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
52/// ```
53#[derive(Debug)]
54pub struct MutableBuffer {
55    // dangling iff capacity = 0
56    data: NonNull<u8>,
57    // invariant: len <= capacity
58    len: usize,
59    layout: Layout,
60}
61
62impl MutableBuffer {
63    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
64    ///
65    /// See [`MutableBuffer::with_capacity`].
66    #[inline]
67    pub fn new(capacity: usize) -> Self {
68        Self::with_capacity(capacity)
69    }
70
71    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
72    ///
73    /// # Panics
74    ///
75    /// If `capacity`, when rounded up to the nearest multiple of [`ALIGNMENT`], is greater
76    /// then `isize::MAX`, then this function will panic.
77    #[inline]
78    pub fn with_capacity(capacity: usize) -> Self {
79        let capacity = bit_util::round_upto_multiple_of_64(capacity);
80        let layout = Layout::from_size_align(capacity, ALIGNMENT)
81            .expect("failed to create layout for MutableBuffer");
82        let data = match layout.size() {
83            0 => dangling_ptr(),
84            _ => {
85                // Safety: Verified size != 0
86                let raw_ptr = unsafe { std::alloc::alloc(layout) };
87                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
88            }
89        };
90        Self {
91            data,
92            len: 0,
93            layout,
94        }
95    }
96
97    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
98    /// all bytes are guaranteed to be `0u8`.
99    /// # Example
100    /// ```
101    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
102    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
103    /// assert_eq!(buffer.len(), 127);
104    /// assert!(buffer.capacity() >= 127);
105    /// let data = buffer.as_slice_mut();
106    /// assert_eq!(data[126], 0u8);
107    /// ```
108    pub fn from_len_zeroed(len: usize) -> Self {
109        let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
110        let data = match layout.size() {
111            0 => dangling_ptr(),
112            _ => {
113                // Safety: Verified size != 0
114                let raw_ptr = unsafe { std::alloc::alloc_zeroed(layout) };
115                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
116            }
117        };
118        Self { data, len, layout }
119    }
120
121    /// Allocates a new [MutableBuffer] from given `Bytes`.
122    pub(crate) fn from_bytes(bytes: Bytes) -> Result<Self, Bytes> {
123        let layout = match bytes.deallocation() {
124            Deallocation::Standard(layout) => *layout,
125            _ => return Err(bytes),
126        };
127
128        let len = bytes.len();
129        let data = bytes.ptr();
130        mem::forget(bytes);
131
132        Ok(Self { data, len, layout })
133    }
134
135    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
136    /// This is useful to create a buffer for packed bitmaps.
137    pub fn new_null(len: usize) -> Self {
138        let num_bytes = bit_util::ceil(len, 8);
139        MutableBuffer::from_len_zeroed(num_bytes)
140    }
141
142    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
143    /// is true). Also extend the length of this buffer to be `end`.
144    ///
145    /// This is useful when one wants to clear (or set) the bits and then manipulate
146    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
147    /// from `data_mut()`).
148    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
149        assert!(end <= self.layout.size());
150        let v = if val { 255 } else { 0 };
151        unsafe {
152            std::ptr::write_bytes(self.data.as_ptr(), v, end);
153            self.len = end;
154        }
155        self
156    }
157
158    /// Ensure that `count` bytes from `start` contain zero bits
159    ///
160    /// This is used to initialize the bits in a buffer, however, it has no impact on the
161    /// `len` of the buffer and so can be used to initialize the memory region from
162    /// `len` to `capacity`.
163    pub fn set_null_bits(&mut self, start: usize, count: usize) {
164        assert!(
165            start.saturating_add(count) <= self.layout.size(),
166            "range start index {start} and count {count} out of bounds for \
167            buffer of length {}",
168            self.layout.size(),
169        );
170
171        // Safety: `self.data[start..][..count]` is in-bounds and well-aligned for `u8`
172        unsafe {
173            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
174        }
175    }
176
177    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
178    /// `self.len + additional > capacity`.
179    /// # Example
180    /// ```
181    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
182    /// let mut buffer = MutableBuffer::new(0);
183    /// buffer.reserve(253); // allocates for the first time
184    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
185    /// let buffer: Buffer = buffer.into();
186    /// assert_eq!(buffer.len(), 253);
187    /// ```
188    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
189    // exits.
190    #[inline(always)]
191    pub fn reserve(&mut self, additional: usize) {
192        let required_cap = self.len + additional;
193        if required_cap > self.layout.size() {
194            let new_capacity = bit_util::round_upto_multiple_of_64(required_cap);
195            let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2);
196            self.reallocate(new_capacity)
197        }
198    }
199
200    #[cold]
201    fn reallocate(&mut self, capacity: usize) {
202        let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap();
203        if new_layout.size() == 0 {
204            if self.layout.size() != 0 {
205                // Safety: data was allocated with layout
206                unsafe { std::alloc::dealloc(self.as_mut_ptr(), self.layout) };
207                self.layout = new_layout
208            }
209            return;
210        }
211
212        let data = match self.layout.size() {
213            // Safety: new_layout is not empty
214            0 => unsafe { std::alloc::alloc(new_layout) },
215            // Safety: verified new layout is valid and not empty
216            _ => unsafe { std::alloc::realloc(self.as_mut_ptr(), self.layout, capacity) },
217        };
218        self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout));
219        self.layout = new_layout;
220    }
221
222    /// Truncates this buffer to `len` bytes
223    ///
224    /// If `len` is greater than the buffer's current length, this has no effect
225    #[inline(always)]
226    pub fn truncate(&mut self, len: usize) {
227        if len > self.len {
228            return;
229        }
230        self.len = len;
231    }
232
233    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
234    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
235    /// # Example
236    /// ```
237    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
238    /// let mut buffer = MutableBuffer::new(0);
239    /// buffer.resize(253, 2); // allocates for the first time
240    /// assert_eq!(buffer.as_slice()[252], 2u8);
241    /// ```
242    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
243    // exits.
244    #[inline(always)]
245    pub fn resize(&mut self, new_len: usize, value: u8) {
246        if new_len > self.len {
247            let diff = new_len - self.len;
248            self.reserve(diff);
249            // write the value
250            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
251        }
252        // this truncates the buffer when new_len < self.len
253        self.len = new_len;
254    }
255
256    /// Shrinks the capacity of the buffer as much as possible.
257    /// The new capacity will aligned to the nearest 64 bit alignment.
258    ///
259    /// # Example
260    /// ```
261    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
262    /// // 2 cache lines
263    /// let mut buffer = MutableBuffer::new(128);
264    /// assert_eq!(buffer.capacity(), 128);
265    /// buffer.push(1);
266    /// buffer.push(2);
267    ///
268    /// buffer.shrink_to_fit();
269    /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
270    /// ```
271    pub fn shrink_to_fit(&mut self) {
272        let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
273        if new_capacity < self.layout.size() {
274            self.reallocate(new_capacity)
275        }
276    }
277
278    /// Returns whether this buffer is empty or not.
279    #[inline]
280    pub const fn is_empty(&self) -> bool {
281        self.len == 0
282    }
283
284    /// Returns the length (the number of bytes written) in this buffer.
285    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
286    #[inline]
287    pub const fn len(&self) -> usize {
288        self.len
289    }
290
291    /// Returns the total capacity in this buffer, in bytes.
292    ///
293    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
294    #[inline]
295    pub const fn capacity(&self) -> usize {
296        self.layout.size()
297    }
298
299    /// Clear all existing data from this buffer.
300    pub fn clear(&mut self) {
301        self.len = 0
302    }
303
304    /// Returns the data stored in this buffer as a slice.
305    pub fn as_slice(&self) -> &[u8] {
306        self
307    }
308
309    /// Returns the data stored in this buffer as a mutable slice.
310    pub fn as_slice_mut(&mut self) -> &mut [u8] {
311        self
312    }
313
314    /// Returns a raw pointer to this buffer's internal memory
315    /// This pointer is guaranteed to be aligned along cache-lines.
316    #[inline]
317    pub const fn as_ptr(&self) -> *const u8 {
318        self.data.as_ptr()
319    }
320
321    /// Returns a mutable raw pointer to this buffer's internal memory
322    /// This pointer is guaranteed to be aligned along cache-lines.
323    #[inline]
324    pub fn as_mut_ptr(&mut self) -> *mut u8 {
325        self.data.as_ptr()
326    }
327
328    #[inline]
329    pub(super) fn into_buffer(self) -> Buffer {
330        let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
331        std::mem::forget(self);
332        Buffer::from(bytes)
333    }
334
335    /// View this buffer as a mutable slice of a specific type.
336    ///
337    /// # Panics
338    ///
339    /// This function panics if the underlying buffer is not aligned
340    /// correctly for type `T`.
341    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
342        // SAFETY
343        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
344        // implementation outside this crate, and this method checks alignment
345        let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
346        assert!(prefix.is_empty() && suffix.is_empty());
347        offsets
348    }
349
350    /// View buffer as a immutable slice of a specific type.
351    ///
352    /// # Panics
353    ///
354    /// This function panics if the underlying buffer is not aligned
355    /// correctly for type `T`.
356    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
357        // SAFETY
358        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
359        // implementation outside this crate, and this method checks alignment
360        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
361        assert!(prefix.is_empty() && suffix.is_empty());
362        offsets
363    }
364
365    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
366    /// # Example
367    /// ```
368    /// # use arrow_buffer::buffer::MutableBuffer;
369    /// let mut buffer = MutableBuffer::new(0);
370    /// buffer.extend_from_slice(&[2u32, 0]);
371    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
372    /// ```
373    #[inline]
374    pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
375        let additional = mem::size_of_val(items);
376        self.reserve(additional);
377        unsafe {
378            // this assumes that `[ToByteSlice]` can be copied directly
379            // without calling `to_byte_slice` for each element,
380            // which is correct for all ArrowNativeType implementations.
381            let src = items.as_ptr() as *const u8;
382            let dst = self.data.as_ptr().add(self.len);
383            std::ptr::copy_nonoverlapping(src, dst, additional)
384        }
385        self.len += additional;
386    }
387
388    /// Extends the buffer with a new item, increasing its capacity if needed.
389    /// # Example
390    /// ```
391    /// # use arrow_buffer::buffer::MutableBuffer;
392    /// let mut buffer = MutableBuffer::new(0);
393    /// buffer.push(256u32);
394    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
395    /// ```
396    #[inline]
397    pub fn push<T: ToByteSlice>(&mut self, item: T) {
398        let additional = std::mem::size_of::<T>();
399        self.reserve(additional);
400        unsafe {
401            let src = item.to_byte_slice().as_ptr();
402            let dst = self.data.as_ptr().add(self.len);
403            std::ptr::copy_nonoverlapping(src, dst, additional);
404        }
405        self.len += additional;
406    }
407
408    /// Extends the buffer with a new item, without checking for sufficient capacity
409    /// # Safety
410    /// Caller must ensure that the capacity()-len()>=`size_of<T>`()
411    #[inline]
412    pub unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
413        let additional = std::mem::size_of::<T>();
414        let src = item.to_byte_slice().as_ptr();
415        let dst = self.data.as_ptr().add(self.len);
416        std::ptr::copy_nonoverlapping(src, dst, additional);
417        self.len += additional;
418    }
419
420    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
421    #[inline]
422    pub fn extend_zeros(&mut self, additional: usize) {
423        self.resize(self.len + additional, 0);
424    }
425
426    /// # Safety
427    /// The caller must ensure that the buffer was properly initialized up to `len`.
428    #[inline]
429    pub unsafe fn set_len(&mut self, len: usize) {
430        assert!(len <= self.capacity());
431        self.len = len;
432    }
433
434    /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer`
435    ///
436    /// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
437    /// as it eliminates the conditional `Iterator::next`
438    #[inline]
439    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
440        let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
441
442        let chunks = len / 64;
443        let remainder = len % 64;
444        for chunk in 0..chunks {
445            let mut packed = 0;
446            for bit_idx in 0..64 {
447                let i = bit_idx + chunk * 64;
448                packed |= (f(i) as u64) << bit_idx;
449            }
450
451            // SAFETY: Already allocated sufficient capacity
452            unsafe { buffer.push_unchecked(packed) }
453        }
454
455        if remainder != 0 {
456            let mut packed = 0;
457            for bit_idx in 0..remainder {
458                let i = bit_idx + chunks * 64;
459                packed |= (f(i) as u64) << bit_idx;
460            }
461
462            // SAFETY: Already allocated sufficient capacity
463            unsafe { buffer.push_unchecked(packed) }
464        }
465
466        buffer.truncate(bit_util::ceil(len, 8));
467        buffer
468    }
469}
470
471/// Creates a non-null pointer with alignment of [`ALIGNMENT`]
472///
473/// This is similar to [`NonNull::dangling`]
474#[inline]
475pub(crate) fn dangling_ptr() -> NonNull<u8> {
476    // SAFETY: ALIGNMENT is a non-zero usize which is then cast
477    // to a *mut u8. Therefore, `ptr` is not null and the conditions for
478    // calling new_unchecked() are respected.
479    #[cfg(miri)]
480    {
481        // Since miri implies a nightly rust version we can use the unstable strict_provenance feature
482        unsafe { NonNull::new_unchecked(std::ptr::without_provenance_mut(ALIGNMENT)) }
483    }
484    #[cfg(not(miri))]
485    {
486        unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) }
487    }
488}
489
490impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
491    #[inline]
492    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
493        let iterator = iter.into_iter();
494        self.extend_from_iter(iterator)
495    }
496}
497
498impl<T: ArrowNativeType> From<Vec<T>> for MutableBuffer {
499    fn from(value: Vec<T>) -> Self {
500        // Safety
501        // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable
502        let data = unsafe { NonNull::new_unchecked(value.as_ptr() as _) };
503        let len = value.len() * mem::size_of::<T>();
504        // Safety
505        // Vec guaranteed to have a valid layout matching that of `Layout::array`
506        // This is based on `RawVec::current_memory`
507        let layout = unsafe { Layout::array::<T>(value.capacity()).unwrap_unchecked() };
508        mem::forget(value);
509        Self { data, len, layout }
510    }
511}
512
513impl MutableBuffer {
514    #[inline]
515    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
516        &mut self,
517        mut iterator: I,
518    ) {
519        let item_size = std::mem::size_of::<T>();
520        let (lower, _) = iterator.size_hint();
521        let additional = lower * item_size;
522        self.reserve(additional);
523
524        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
525        let mut len = SetLenOnDrop::new(&mut self.len);
526        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) };
527        let capacity = self.layout.size();
528
529        while len.local_len + item_size <= capacity {
530            if let Some(item) = iterator.next() {
531                unsafe {
532                    let src = item.to_byte_slice().as_ptr();
533                    std::ptr::copy_nonoverlapping(src, dst, item_size);
534                    dst = dst.add(item_size);
535                }
536                len.local_len += item_size;
537            } else {
538                break;
539            }
540        }
541        drop(len);
542
543        iterator.for_each(|item| self.push(item));
544    }
545
546    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
547    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
548    /// # Example
549    /// ```
550    /// # use arrow_buffer::buffer::MutableBuffer;
551    /// let v = vec![1u32];
552    /// let iter = v.iter().map(|x| x * 2);
553    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
554    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
555    /// ```
556    /// # Safety
557    /// This method assumes that the iterator's size is correct and is undefined behavior
558    /// to use it on an iterator that reports an incorrect length.
559    // This implementation is required for two reasons:
560    // 1. there is no trait `TrustedLen` in stable rust and therefore
561    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
562    // 2. `from_trusted_len_iter` is faster.
563    #[inline]
564    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
565        iterator: I,
566    ) -> Self {
567        let item_size = std::mem::size_of::<T>();
568        let (_, upper) = iterator.size_hint();
569        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
570        let len = upper * item_size;
571
572        let mut buffer = MutableBuffer::new(len);
573
574        let mut dst = buffer.data.as_ptr();
575        for item in iterator {
576            // note how there is no reserve here (compared with `extend_from_iter`)
577            let src = item.to_byte_slice().as_ptr();
578            std::ptr::copy_nonoverlapping(src, dst, item_size);
579            dst = dst.add(item_size);
580        }
581        assert_eq!(
582            dst.offset_from(buffer.data.as_ptr()) as usize,
583            len,
584            "Trusted iterator length was not accurately reported"
585        );
586        buffer.len = len;
587        buffer
588    }
589
590    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
591    /// # use arrow_buffer::buffer::MutableBuffer;
592    /// # Example
593    /// ```
594    /// # use arrow_buffer::buffer::MutableBuffer;
595    /// let v = vec![false, true, false];
596    /// let iter = v.iter().map(|x| *x || true);
597    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
598    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
599    /// ```
600    /// # Safety
601    /// This method assumes that the iterator's size is correct and is undefined behavior
602    /// to use it on an iterator that reports an incorrect length.
603    // This implementation is required for two reasons:
604    // 1. there is no trait `TrustedLen` in stable rust and therefore
605    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
606    // 2. `from_trusted_len_iter_bool` is faster.
607    #[inline]
608    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
609        let (_, upper) = iterator.size_hint();
610        let len = upper.expect("from_trusted_len_iter requires an upper limit");
611
612        Self::collect_bool(len, |_| iterator.next().unwrap())
613    }
614
615    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
616    /// if any of the items of the iterator is an error.
617    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
618    /// # Safety
619    /// This method assumes that the iterator's size is correct and is undefined behavior
620    /// to use it on an iterator that reports an incorrect length.
621    #[inline]
622    pub unsafe fn try_from_trusted_len_iter<
623        E,
624        T: ArrowNativeType,
625        I: Iterator<Item = Result<T, E>>,
626    >(
627        iterator: I,
628    ) -> Result<Self, E> {
629        let item_size = std::mem::size_of::<T>();
630        let (_, upper) = iterator.size_hint();
631        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
632        let len = upper * item_size;
633
634        let mut buffer = MutableBuffer::new(len);
635
636        let mut dst = buffer.data.as_ptr();
637        for item in iterator {
638            let item = item?;
639            // note how there is no reserve here (compared with `extend_from_iter`)
640            let src = item.to_byte_slice().as_ptr();
641            std::ptr::copy_nonoverlapping(src, dst, item_size);
642            dst = dst.add(item_size);
643        }
644        // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less
645        // generic method to reduce compile time
646        unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) {
647            assert_eq!(
648                dst.offset_from(buffer.data.as_ptr()) as usize,
649                len,
650                "Trusted iterator length was not accurately reported"
651            );
652            buffer.len = len;
653        }
654        finalize_buffer(dst, &mut buffer, len);
655        Ok(buffer)
656    }
657}
658
659impl Default for MutableBuffer {
660    fn default() -> Self {
661        Self::with_capacity(0)
662    }
663}
664
665impl std::ops::Deref for MutableBuffer {
666    type Target = [u8];
667
668    fn deref(&self) -> &[u8] {
669        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
670    }
671}
672
673impl std::ops::DerefMut for MutableBuffer {
674    fn deref_mut(&mut self) -> &mut [u8] {
675        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
676    }
677}
678
679impl Drop for MutableBuffer {
680    fn drop(&mut self) {
681        if self.layout.size() != 0 {
682            // Safety: data was allocated with standard allocator with given layout
683            unsafe { std::alloc::dealloc(self.data.as_ptr() as _, self.layout) };
684        }
685    }
686}
687
688impl PartialEq for MutableBuffer {
689    fn eq(&self, other: &MutableBuffer) -> bool {
690        if self.len != other.len {
691            return false;
692        }
693        if self.layout != other.layout {
694            return false;
695        }
696        self.as_slice() == other.as_slice()
697    }
698}
699
700unsafe impl Sync for MutableBuffer {}
701unsafe impl Send for MutableBuffer {}
702
703struct SetLenOnDrop<'a> {
704    len: &'a mut usize,
705    local_len: usize,
706}
707
708impl<'a> SetLenOnDrop<'a> {
709    #[inline]
710    fn new(len: &'a mut usize) -> Self {
711        SetLenOnDrop {
712            local_len: *len,
713            len,
714        }
715    }
716}
717
718impl Drop for SetLenOnDrop<'_> {
719    #[inline]
720    fn drop(&mut self) {
721        *self.len = self.local_len;
722    }
723}
724
725/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
726impl std::iter::FromIterator<bool> for MutableBuffer {
727    fn from_iter<I>(iter: I) -> Self
728    where
729        I: IntoIterator<Item = bool>,
730    {
731        let mut iterator = iter.into_iter();
732        let mut result = {
733            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
734            MutableBuffer::new(byte_capacity)
735        };
736
737        loop {
738            let mut exhausted = false;
739            let mut byte_accum: u8 = 0;
740            let mut mask: u8 = 1;
741
742            //collect (up to) 8 bits into a byte
743            while mask != 0 {
744                if let Some(value) = iterator.next() {
745                    byte_accum |= match value {
746                        true => mask,
747                        false => 0,
748                    };
749                    mask <<= 1;
750                } else {
751                    exhausted = true;
752                    break;
753                }
754            }
755
756            // break if the iterator was exhausted before it provided a bool for this byte
757            if exhausted && mask == 1 {
758                break;
759            }
760
761            //ensure we have capacity to write the byte
762            if result.len() == result.capacity() {
763                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
764                let additional_byte_capacity = 1usize.saturating_add(
765                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
766                );
767                result.reserve(additional_byte_capacity)
768            }
769
770            // Soundness: capacity was allocated above
771            unsafe { result.push_unchecked(byte_accum) };
772            if exhausted {
773                break;
774            }
775        }
776        result
777    }
778}
779
780impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
781    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
782        let mut buffer = Self::default();
783        buffer.extend_from_iter(iter.into_iter());
784        buffer
785    }
786}
787
788#[cfg(test)]
789mod tests {
790    use super::*;
791
792    #[test]
793    fn test_mutable_new() {
794        let buf = MutableBuffer::new(63);
795        assert_eq!(64, buf.capacity());
796        assert_eq!(0, buf.len());
797        assert!(buf.is_empty());
798    }
799
800    #[test]
801    fn test_mutable_default() {
802        let buf = MutableBuffer::default();
803        assert_eq!(0, buf.capacity());
804        assert_eq!(0, buf.len());
805        assert!(buf.is_empty());
806
807        let mut buf = MutableBuffer::default();
808        buf.extend_from_slice(b"hello");
809        assert_eq!(5, buf.len());
810        assert_eq!(b"hello", buf.as_slice());
811    }
812
813    #[test]
814    fn test_mutable_extend_from_slice() {
815        let mut buf = MutableBuffer::new(100);
816        buf.extend_from_slice(b"hello");
817        assert_eq!(5, buf.len());
818        assert_eq!(b"hello", buf.as_slice());
819
820        buf.extend_from_slice(b" world");
821        assert_eq!(11, buf.len());
822        assert_eq!(b"hello world", buf.as_slice());
823
824        buf.clear();
825        assert_eq!(0, buf.len());
826        buf.extend_from_slice(b"hello arrow");
827        assert_eq!(11, buf.len());
828        assert_eq!(b"hello arrow", buf.as_slice());
829    }
830
831    #[test]
832    fn mutable_extend_from_iter() {
833        let mut buf = MutableBuffer::new(0);
834        buf.extend(vec![1u32, 2]);
835        assert_eq!(8, buf.len());
836        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
837
838        buf.extend(vec![3u32, 4]);
839        assert_eq!(16, buf.len());
840        assert_eq!(
841            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
842            buf.as_slice()
843        );
844    }
845
846    #[test]
847    fn mutable_extend_from_iter_unaligned_u64() {
848        let mut buf = MutableBuffer::new(16);
849        buf.push(1_u8);
850        buf.extend([1_u64]);
851        assert_eq!(9, buf.len());
852        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
853    }
854
855    #[test]
856    fn mutable_extend_from_slice_unaligned_u64() {
857        let mut buf = MutableBuffer::new(16);
858        buf.extend_from_slice(&[1_u8]);
859        buf.extend_from_slice(&[1_u64]);
860        assert_eq!(9, buf.len());
861        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
862    }
863
864    #[test]
865    fn mutable_push_unaligned_u64() {
866        let mut buf = MutableBuffer::new(16);
867        buf.push(1_u8);
868        buf.push(1_u64);
869        assert_eq!(9, buf.len());
870        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
871    }
872
873    #[test]
874    fn mutable_push_unchecked_unaligned_u64() {
875        let mut buf = MutableBuffer::new(16);
876        unsafe {
877            buf.push_unchecked(1_u8);
878            buf.push_unchecked(1_u64);
879        }
880        assert_eq!(9, buf.len());
881        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
882    }
883
884    #[test]
885    fn test_from_trusted_len_iter() {
886        let iter = vec![1u32, 2].into_iter();
887        let buf = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
888        assert_eq!(8, buf.len());
889        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
890    }
891
892    #[test]
893    fn test_mutable_reserve() {
894        let mut buf = MutableBuffer::new(1);
895        assert_eq!(64, buf.capacity());
896
897        // Reserving a smaller capacity should have no effect.
898        buf.reserve(10);
899        assert_eq!(64, buf.capacity());
900
901        buf.reserve(80);
902        assert_eq!(128, buf.capacity());
903
904        buf.reserve(129);
905        assert_eq!(256, buf.capacity());
906    }
907
908    #[test]
909    fn test_mutable_resize() {
910        let mut buf = MutableBuffer::new(1);
911        assert_eq!(64, buf.capacity());
912        assert_eq!(0, buf.len());
913
914        buf.resize(20, 0);
915        assert_eq!(64, buf.capacity());
916        assert_eq!(20, buf.len());
917
918        buf.resize(10, 0);
919        assert_eq!(64, buf.capacity());
920        assert_eq!(10, buf.len());
921
922        buf.resize(100, 0);
923        assert_eq!(128, buf.capacity());
924        assert_eq!(100, buf.len());
925
926        buf.resize(30, 0);
927        assert_eq!(128, buf.capacity());
928        assert_eq!(30, buf.len());
929
930        buf.resize(0, 0);
931        assert_eq!(128, buf.capacity());
932        assert_eq!(0, buf.len());
933    }
934
935    #[test]
936    fn test_mutable_into() {
937        let mut buf = MutableBuffer::new(1);
938        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
939        assert_eq!(19, buf.len());
940        assert_eq!(64, buf.capacity());
941        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
942
943        let immutable_buf: Buffer = buf.into();
944        assert_eq!(19, immutable_buf.len());
945        assert_eq!(64, immutable_buf.capacity());
946        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
947    }
948
949    #[test]
950    fn test_mutable_equal() {
951        let mut buf = MutableBuffer::new(1);
952        let mut buf2 = MutableBuffer::new(1);
953
954        buf.extend_from_slice(&[0xaa]);
955        buf2.extend_from_slice(&[0xaa, 0xbb]);
956        assert!(buf != buf2);
957
958        buf.extend_from_slice(&[0xbb]);
959        assert_eq!(buf, buf2);
960
961        buf2.reserve(65);
962        assert!(buf != buf2);
963    }
964
965    #[test]
966    fn test_mutable_shrink_to_fit() {
967        let mut buffer = MutableBuffer::new(128);
968        assert_eq!(buffer.capacity(), 128);
969        buffer.push(1);
970        buffer.push(2);
971
972        buffer.shrink_to_fit();
973        assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
974    }
975
976    #[test]
977    fn test_mutable_set_null_bits() {
978        let mut buffer = MutableBuffer::new(8).with_bitset(8, true);
979
980        for i in 0..=buffer.capacity() {
981            buffer.set_null_bits(i, 0);
982            assert_eq!(buffer[..8], [255; 8][..]);
983        }
984
985        buffer.set_null_bits(1, 4);
986        assert_eq!(buffer[..8], [255, 0, 0, 0, 0, 255, 255, 255][..]);
987    }
988
989    #[test]
990    #[should_panic = "out of bounds for buffer of length"]
991    fn test_mutable_set_null_bits_oob() {
992        let mut buffer = MutableBuffer::new(64);
993        buffer.set_null_bits(1, buffer.capacity());
994    }
995
996    #[test]
997    #[should_panic = "out of bounds for buffer of length"]
998    fn test_mutable_set_null_bits_oob_by_overflow() {
999        let mut buffer = MutableBuffer::new(0);
1000        buffer.set_null_bits(1, usize::MAX);
1001    }
1002
1003    #[test]
1004    fn from_iter() {
1005        let buffer = [1u16, 2, 3, 4].into_iter().collect::<MutableBuffer>();
1006        assert_eq!(buffer.len(), 4 * mem::size_of::<u16>());
1007        assert_eq!(buffer.as_slice(), &[1, 0, 2, 0, 3, 0, 4, 0]);
1008    }
1009
1010    #[test]
1011    #[should_panic(expected = "failed to create layout for MutableBuffer: LayoutError")]
1012    fn test_with_capacity_panics_above_max_capacity() {
1013        let max_capacity = isize::MAX as usize - (isize::MAX as usize % ALIGNMENT);
1014        let _ = MutableBuffer::with_capacity(max_capacity + 1);
1015    }
1016}