arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::alloc::{Allocation, Deallocation};
24use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
25use crate::BufferBuilder;
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28use super::ops::bitwise_unary_op_helper;
29use super::{MutableBuffer, ScalarBuffer};
30
31/// A contiguous memory region that can be shared with other buffers and across
32/// thread boundaries that stores Arrow data.
33///
34/// `Buffer`s can be sliced and cloned without copying the underlying data and can
35/// be created from memory allocated by non-Rust sources such as C/C++.
36///
37/// # Example: Create a `Buffer` from a `Vec` (without copying)
38/// ```
39/// # use arrow_buffer::Buffer;
40/// let vec: Vec<u32> = vec![1, 2, 3];
41/// let buffer = Buffer::from(vec);
42/// ```
43///
44/// # Example: Convert a `Buffer` to a `Vec` (without copying)
45///
46/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
47/// no other references and the types are aligned correctly.
48/// ```
49/// # use arrow_buffer::Buffer;
50/// # let vec: Vec<u32> = vec![1, 2, 3];
51/// # let buffer = Buffer::from(vec);
52/// // convert the buffer back into a Vec of u32
53/// // note this will fail if the buffer is shared or not aligned correctly
54/// let vec: Vec<u32> = buffer.into_vec().unwrap();
55/// ```
56///
57/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
58///
59/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
60/// regions. You can create a buffer from a `Bytes` instance using the `From`
61/// implementation, also without copying.
62///
63/// ```
64/// # use arrow_buffer::Buffer;
65/// let bytes = bytes::Bytes::from("hello");
66/// let buffer = Buffer::from(bytes);
67///```
68#[derive(Clone, Debug)]
69pub struct Buffer {
70    /// the internal byte buffer.
71    data: Arc<Bytes>,
72
73    /// Pointer into `data` valid
74    ///
75    /// We store a pointer instead of an offset to avoid pointer arithmetic
76    /// which causes LLVM to fail to vectorise code correctly
77    ptr: *const u8,
78
79    /// Byte length of the buffer.
80    ///
81    /// Must be less than or equal to `data.len()`
82    length: usize,
83}
84
85impl PartialEq for Buffer {
86    fn eq(&self, other: &Self) -> bool {
87        self.as_slice().eq(other.as_slice())
88    }
89}
90
91impl Eq for Buffer {}
92
93unsafe impl Send for Buffer where Bytes: Send {}
94unsafe impl Sync for Buffer where Bytes: Sync {}
95
96impl Buffer {
97    /// Create a new Buffer from a (internal) `Bytes`
98    ///
99    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
100    /// and is different than [`bytes::Bytes`].
101    ///
102    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
103    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
104    pub fn from_bytes(bytes: Bytes) -> Self {
105        Self::from(bytes)
106    }
107
108    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
109    ///
110    /// self.ptr and self.data can be different after slicing or advancing the buffer.
111    pub fn ptr_offset(&self) -> usize {
112        // Safety: `ptr` is always in bounds of `data`.
113        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
114    }
115
116    /// Returns the pointer to the start of the buffer without the offset.
117    pub fn data_ptr(&self) -> NonNull<u8> {
118        self.data.ptr()
119    }
120
121    /// Create a [`Buffer`] from the provided [`Vec`] without copying
122    #[inline]
123    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
124        MutableBuffer::from(vec).into()
125    }
126
127    /// Initializes a [Buffer] from a slice of items.
128    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
129        let slice = items.as_ref();
130        let capacity = std::mem::size_of_val(slice);
131        let mut buffer = MutableBuffer::with_capacity(capacity);
132        buffer.extend_from_slice(slice);
133        buffer.into()
134    }
135
136    /// Creates a buffer from an existing memory region.
137    ///
138    /// Ownership of the memory is tracked via reference counting
139    /// and the memory will be freed using the `drop` method of
140    /// [crate::alloc::Allocation] when the reference count reaches zero.
141    ///
142    /// # Arguments
143    ///
144    /// * `ptr` - Pointer to raw parts
145    /// * `len` - Length of raw parts in **bytes**
146    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
147    ///
148    /// # Safety
149    ///
150    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
151    pub unsafe fn from_custom_allocation(
152        ptr: NonNull<u8>,
153        len: usize,
154        owner: Arc<dyn Allocation>,
155    ) -> Self {
156        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
157    }
158
159    /// Auxiliary method to create a new Buffer
160    unsafe fn build_with_arguments(
161        ptr: NonNull<u8>,
162        len: usize,
163        deallocation: Deallocation,
164    ) -> Self {
165        let bytes = Bytes::new(ptr, len, deallocation);
166        let ptr = bytes.as_ptr();
167        Buffer {
168            ptr,
169            data: Arc::new(bytes),
170            length: len,
171        }
172    }
173
174    /// Returns the number of bytes in the buffer
175    #[inline]
176    pub fn len(&self) -> usize {
177        self.length
178    }
179
180    /// Returns the capacity of this buffer.
181    /// For externally owned buffers, this returns zero
182    #[inline]
183    pub fn capacity(&self) -> usize {
184        self.data.capacity()
185    }
186
187    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
188    ///
189    /// If the buffer is shared, this is a no-op.
190    ///
191    /// If the memory was allocated with a custom allocator, this is a no-op.
192    ///
193    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
194    ///
195    /// The memory region will be reallocated using `std::alloc::realloc`.
196    pub fn shrink_to_fit(&mut self) {
197        let offset = self.ptr_offset();
198        let is_empty = self.is_empty();
199        let desired_capacity = if is_empty {
200            0
201        } else {
202            // For realloc to work, we cannot free the elements before the offset
203            offset + self.len()
204        };
205        if desired_capacity < self.capacity() {
206            if let Some(bytes) = Arc::get_mut(&mut self.data) {
207                if bytes.try_realloc(desired_capacity).is_ok() {
208                    // Realloc complete - update our pointer into `bytes`:
209                    self.ptr = if is_empty {
210                        bytes.as_ptr()
211                    } else {
212                        // SAFETY: we kept all elements leading up to the offset
213                        unsafe { bytes.as_ptr().add(offset) }
214                    }
215                } else {
216                    // Failure to reallocate is fine; we just failed to free up memory.
217                }
218            }
219        }
220    }
221
222    /// Returns true if the buffer is empty.
223    #[inline]
224    pub fn is_empty(&self) -> bool {
225        self.length == 0
226    }
227
228    /// Returns the byte slice stored in this buffer
229    pub fn as_slice(&self) -> &[u8] {
230        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
231    }
232
233    pub(crate) fn deallocation(&self) -> &Deallocation {
234        self.data.deallocation()
235    }
236
237    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
238    ///
239    /// This function is `O(1)` and does not copy any data, allowing the
240    /// same memory region to be shared between buffers.
241    ///
242    /// # Panics
243    ///
244    /// Panics iff `offset` is larger than `len`.
245    pub fn slice(&self, offset: usize) -> Self {
246        let mut s = self.clone();
247        s.advance(offset);
248        s
249    }
250
251    /// Increases the offset of this buffer by `offset`
252    ///
253    /// # Panics
254    ///
255    /// Panics iff `offset` is larger than `len`.
256    #[inline]
257    pub fn advance(&mut self, offset: usize) {
258        assert!(
259            offset <= self.length,
260            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
261            offset,
262            self.length
263        );
264        self.length -= offset;
265        // Safety:
266        // This cannot overflow as
267        // `self.offset + self.length < self.data.len()`
268        // `offset < self.length`
269        self.ptr = unsafe { self.ptr.add(offset) };
270    }
271
272    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
273    /// with `length` bytes.
274    ///
275    /// This function is `O(1)` and does not copy any data, allowing the same
276    /// memory region to be shared between buffers.
277    ///
278    /// # Panics
279    /// Panics iff `(offset + length)` is larger than the existing length.
280    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
281        assert!(
282            offset.saturating_add(length) <= self.length,
283            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
284            self.length
285        );
286        // Safety:
287        // offset + length <= self.length
288        let ptr = unsafe { self.ptr.add(offset) };
289        Self {
290            data: self.data.clone(),
291            ptr,
292            length,
293        }
294    }
295
296    /// Returns a pointer to the start of this buffer.
297    ///
298    /// Note that this should be used cautiously, and the returned pointer should not be
299    /// stored anywhere, to avoid dangling pointers.
300    #[inline]
301    pub fn as_ptr(&self) -> *const u8 {
302        self.ptr
303    }
304
305    /// View buffer as a slice of a specific type.
306    ///
307    /// # Panics
308    ///
309    /// This function panics if the underlying buffer is not aligned
310    /// correctly for type `T`.
311    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
312        // SAFETY
313        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
314        // implementation outside this crate, and this method checks alignment
315        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
316        assert!(prefix.is_empty() && suffix.is_empty());
317        offsets
318    }
319
320    /// Returns a slice of this buffer starting at a certain bit offset.
321    /// If the offset is byte-aligned the returned buffer is a shallow clone,
322    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
323    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
324        if offset % 8 == 0 {
325            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
326        }
327
328        bitwise_unary_op_helper(self, offset, len, |a| a)
329    }
330
331    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
332    /// in larger chunks and starting at arbitrary bit offsets.
333    /// Note that both `offset` and `length` are measured in bits.
334    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
335        BitChunks::new(self.as_slice(), offset, len)
336    }
337
338    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
339    /// inspected. Note that both `offset` and `length` are measured in bits.
340    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
341        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
342    }
343
344    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
345    /// Returns `Err` if this is shared or its allocation is from an external source or
346    /// it is not allocated with alignment [`ALIGNMENT`]
347    ///
348    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
349    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
350        let ptr = self.ptr;
351        let length = self.length;
352        Arc::try_unwrap(self.data)
353            .and_then(|bytes| {
354                // The pointer of underlying buffer should not be offset.
355                assert_eq!(ptr, bytes.ptr().as_ptr());
356                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
357            })
358            .map_err(|bytes| Buffer {
359                data: bytes,
360                ptr,
361                length,
362            })
363    }
364
365    /// Converts self into a `Vec`, if possible.
366    ///
367    /// This can be used to reuse / mutate the underlying data.
368    ///
369    /// # Errors
370    ///
371    /// Returns `Err(self)` if
372    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
373    /// 2. contains a non-zero offset
374    /// 3. The buffer is shared
375    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
376        let layout = match self.data.deallocation() {
377            Deallocation::Standard(l) => l,
378            _ => return Err(self), // Custom allocation
379        };
380
381        if self.ptr != self.data.as_ptr() {
382            return Err(self); // Data is offset
383        }
384
385        let v_capacity = layout.size() / std::mem::size_of::<T>();
386        match Layout::array::<T>(v_capacity) {
387            Ok(expected) if layout == &expected => {}
388            _ => return Err(self), // Incorrect layout
389        }
390
391        let length = self.length;
392        let ptr = self.ptr;
393        let v_len = self.length / std::mem::size_of::<T>();
394
395        Arc::try_unwrap(self.data)
396            .map(|bytes| unsafe {
397                let ptr = bytes.ptr().as_ptr() as _;
398                std::mem::forget(bytes);
399                // Safety
400                // Verified that bytes layout matches that of Vec
401                Vec::from_raw_parts(ptr, v_len, v_capacity)
402            })
403            .map_err(|bytes| Buffer {
404                data: bytes,
405                ptr,
406                length,
407            })
408    }
409
410    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
411    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
412    /// return false when the arrays are logically equal
413    #[inline]
414    pub fn ptr_eq(&self, other: &Self) -> bool {
415        self.ptr == other.ptr && self.length == other.length
416    }
417}
418
419/// Note that here we deliberately do not implement
420/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
421/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
422/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
423/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
424///
425/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
426/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
427impl From<&[u8]> for Buffer {
428    fn from(p: &[u8]) -> Self {
429        Self::from_slice_ref(p)
430    }
431}
432
433impl<const N: usize> From<[u8; N]> for Buffer {
434    fn from(p: [u8; N]) -> Self {
435        Self::from_slice_ref(p)
436    }
437}
438
439impl<const N: usize> From<&[u8; N]> for Buffer {
440    fn from(p: &[u8; N]) -> Self {
441        Self::from_slice_ref(p)
442    }
443}
444
445impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
446    fn from(value: Vec<T>) -> Self {
447        Self::from_vec(value)
448    }
449}
450
451impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
452    fn from(value: ScalarBuffer<T>) -> Self {
453        value.into_inner()
454    }
455}
456
457/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
458impl From<Bytes> for Buffer {
459    #[inline]
460    fn from(bytes: Bytes) -> Self {
461        let length = bytes.len();
462        let ptr = bytes.as_ptr();
463        Self {
464            data: Arc::new(bytes),
465            ptr,
466            length,
467        }
468    }
469}
470
471/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
472impl From<bytes::Bytes> for Buffer {
473    fn from(bytes: bytes::Bytes) -> Self {
474        let bytes: Bytes = bytes.into();
475        Self::from(bytes)
476    }
477}
478
479/// Create a `Buffer` instance by storing the boolean values into the buffer
480impl FromIterator<bool> for Buffer {
481    fn from_iter<I>(iter: I) -> Self
482    where
483        I: IntoIterator<Item = bool>,
484    {
485        MutableBuffer::from_iter(iter).into()
486    }
487}
488
489impl std::ops::Deref for Buffer {
490    type Target = [u8];
491
492    fn deref(&self) -> &[u8] {
493        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
494    }
495}
496
497impl From<MutableBuffer> for Buffer {
498    #[inline]
499    fn from(buffer: MutableBuffer) -> Self {
500        buffer.into_buffer()
501    }
502}
503
504impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
505    fn from(mut value: BufferBuilder<T>) -> Self {
506        value.finish()
507    }
508}
509
510impl Buffer {
511    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
512    ///
513    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
514    ///
515    /// # Example
516    /// ```
517    /// # use arrow_buffer::buffer::Buffer;
518    /// let v = vec![1u32];
519    /// let iter = v.iter().map(|x| x * 2);
520    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
521    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
522    /// ```
523    /// # Safety
524    /// This method assumes that the iterator's size is correct and is undefined behavior
525    /// to use it on an iterator that reports an incorrect length.
526    // This implementation is required for two reasons:
527    // 1. there is no trait `TrustedLen` in stable rust and therefore
528    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
529    // 2. `from_trusted_len_iter` is faster.
530    #[inline]
531    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
532        iterator: I,
533    ) -> Self {
534        MutableBuffer::from_trusted_len_iter(iterator).into()
535    }
536
537    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
538    /// if any of the items of the iterator is an error.
539    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
540    /// # Safety
541    /// This method assumes that the iterator's size is correct and is undefined behavior
542    /// to use it on an iterator that reports an incorrect length.
543    #[inline]
544    pub unsafe fn try_from_trusted_len_iter<
545        E,
546        T: ArrowNativeType,
547        I: Iterator<Item = Result<T, E>>,
548    >(
549        iterator: I,
550    ) -> Result<Self, E> {
551        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
552    }
553}
554
555impl<T: ArrowNativeType> FromIterator<T> for Buffer {
556    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
557        let vec = Vec::from_iter(iter);
558        Buffer::from_vec(vec)
559    }
560}
561
562#[cfg(test)]
563mod tests {
564    use crate::i256;
565    use std::panic::{RefUnwindSafe, UnwindSafe};
566    use std::thread;
567
568    use super::*;
569
570    #[test]
571    fn test_buffer_data_equality() {
572        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
573        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
574        assert_eq!(buf1, buf2);
575
576        // slice with same offset and same length should still preserve equality
577        let buf3 = buf1.slice(2);
578        assert_ne!(buf1, buf3);
579        let buf4 = buf2.slice_with_length(2, 3);
580        assert_eq!(buf3, buf4);
581
582        // Different capacities should still preserve equality
583        let mut buf2 = MutableBuffer::new(65);
584        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
585
586        let buf2 = buf2.into();
587        assert_eq!(buf1, buf2);
588
589        // unequal because of different elements
590        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
591        assert_ne!(buf1, buf2);
592
593        // unequal because of different length
594        let buf2 = Buffer::from(&[0, 1, 2, 3]);
595        assert_ne!(buf1, buf2);
596    }
597
598    #[test]
599    fn test_from_raw_parts() {
600        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
601        assert_eq!(5, buf.len());
602        assert!(!buf.as_ptr().is_null());
603        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
604    }
605
606    #[test]
607    fn test_from_vec() {
608        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
609        assert_eq!(5, buf.len());
610        assert!(!buf.as_ptr().is_null());
611        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
612    }
613
614    #[test]
615    fn test_copy() {
616        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
617        let buf2 = buf;
618        assert_eq!(5, buf2.len());
619        assert_eq!(64, buf2.capacity());
620        assert!(!buf2.as_ptr().is_null());
621        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
622    }
623
624    #[test]
625    fn test_slice() {
626        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
627        let buf2 = buf.slice(2);
628
629        assert_eq!([6, 8, 10], buf2.as_slice());
630        assert_eq!(3, buf2.len());
631        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
632
633        let buf3 = buf2.slice_with_length(1, 2);
634        assert_eq!([8, 10], buf3.as_slice());
635        assert_eq!(2, buf3.len());
636        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
637
638        let buf4 = buf.slice(5);
639        let empty_slice: [u8; 0] = [];
640        assert_eq!(empty_slice, buf4.as_slice());
641        assert_eq!(0, buf4.len());
642        assert!(buf4.is_empty());
643        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
644    }
645
646    #[test]
647    fn test_shrink_to_fit() {
648        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
649        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
650        assert_eq!(original.capacity(), 64);
651
652        let slice = original.slice_with_length(2, 3);
653        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
654        assert_eq!(slice.as_slice(), &[2, 3, 4]);
655        assert_eq!(slice.capacity(), 64);
656
657        let mut shrunk = slice;
658        shrunk.shrink_to_fit();
659        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
660        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
661
662        // Test that we can handle empty slices:
663        let empty_slice = shrunk.slice_with_length(1, 0);
664        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
665        assert_eq!(empty_slice.as_slice(), &[]);
666        assert_eq!(empty_slice.capacity(), 5);
667
668        let mut shrunk_empty = empty_slice;
669        shrunk_empty.shrink_to_fit();
670        assert_eq!(shrunk_empty.as_slice(), &[]);
671        assert_eq!(shrunk_empty.capacity(), 0);
672    }
673
674    #[test]
675    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
676    fn test_slice_offset_out_of_bound() {
677        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
678        buf.slice(6);
679    }
680
681    #[test]
682    fn test_access_concurrently() {
683        let buffer = Buffer::from([1, 2, 3, 4, 5]);
684        let buffer2 = buffer.clone();
685        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
686
687        let buffer_copy = thread::spawn(move || {
688            // access buffer in another thread.
689            buffer
690        })
691        .join();
692
693        assert!(buffer_copy.is_ok());
694        assert_eq!(buffer2, buffer_copy.ok().unwrap());
695    }
696
697    macro_rules! check_as_typed_data {
698        ($input: expr, $native_t: ty) => {{
699            let buffer = Buffer::from_slice_ref($input);
700            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
701            assert_eq!($input, slice);
702        }};
703    }
704
705    #[test]
706    #[allow(clippy::float_cmp)]
707    fn test_as_typed_data() {
708        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
709        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
710        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
711        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
712        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
713        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
714        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
715        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
716        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
717        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
718    }
719
720    #[test]
721    fn test_count_bits() {
722        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
723        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
724        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
725        assert_eq!(
726            6,
727            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
728        );
729        assert_eq!(
730            16,
731            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
732        );
733    }
734
735    #[test]
736    fn test_count_bits_slice() {
737        assert_eq!(
738            0,
739            Buffer::from(&[0b11111111, 0b00000000])
740                .slice(1)
741                .count_set_bits_offset(0, 8)
742        );
743        assert_eq!(
744            8,
745            Buffer::from(&[0b11111111, 0b11111111])
746                .slice_with_length(1, 1)
747                .count_set_bits_offset(0, 8)
748        );
749        assert_eq!(
750            3,
751            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
752                .slice(2)
753                .count_set_bits_offset(0, 8)
754        );
755        assert_eq!(
756            6,
757            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
758                .slice_with_length(1, 2)
759                .count_set_bits_offset(0, 16)
760        );
761        assert_eq!(
762            16,
763            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
764                .slice(2)
765                .count_set_bits_offset(0, 16)
766        );
767    }
768
769    #[test]
770    fn test_count_bits_offset_slice() {
771        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
772        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
773        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
774        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
775        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
776        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
777        assert_eq!(
778            16,
779            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
780        );
781        assert_eq!(
782            10,
783            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
784        );
785        assert_eq!(
786            10,
787            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
788        );
789        assert_eq!(
790            8,
791            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
792        );
793        assert_eq!(
794            5,
795            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
796        );
797        assert_eq!(
798            0,
799            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
800        );
801        assert_eq!(
802            2,
803            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
804        );
805        assert_eq!(
806            4,
807            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
808        );
809    }
810
811    #[test]
812    fn test_unwind_safe() {
813        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
814        assert_unwind_safe::<Buffer>()
815    }
816
817    #[test]
818    fn test_from_foreign_vec() {
819        let mut vector = vec![1_i32, 2, 3, 4, 5];
820        let buffer = unsafe {
821            Buffer::from_custom_allocation(
822                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
823                vector.len() * std::mem::size_of::<i32>(),
824                Arc::new(vector),
825            )
826        };
827
828        let slice = buffer.typed_data::<i32>();
829        assert_eq!(slice, &[1, 2, 3, 4, 5]);
830
831        let buffer = buffer.slice(std::mem::size_of::<i32>());
832
833        let slice = buffer.typed_data::<i32>();
834        assert_eq!(slice, &[2, 3, 4, 5]);
835    }
836
837    #[test]
838    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
839    fn slice_overflow() {
840        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
841        buffer.slice_with_length(2, usize::MAX);
842    }
843
844    #[test]
845    fn test_vec_interop() {
846        // Test empty vec
847        let a: Vec<i128> = Vec::new();
848        let b = Buffer::from_vec(a);
849        b.into_vec::<i128>().unwrap();
850
851        // Test vec with capacity
852        let a: Vec<i128> = Vec::with_capacity(20);
853        let b = Buffer::from_vec(a);
854        let back = b.into_vec::<i128>().unwrap();
855        assert_eq!(back.len(), 0);
856        assert_eq!(back.capacity(), 20);
857
858        // Test vec with values
859        let mut a: Vec<i128> = Vec::with_capacity(3);
860        a.extend_from_slice(&[1, 2, 3]);
861        let b = Buffer::from_vec(a);
862        let back = b.into_vec::<i128>().unwrap();
863        assert_eq!(back.len(), 3);
864        assert_eq!(back.capacity(), 3);
865
866        // Test vec with values and spare capacity
867        let mut a: Vec<i128> = Vec::with_capacity(20);
868        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
869        let b = Buffer::from_vec(a);
870        let back = b.into_vec::<i128>().unwrap();
871        assert_eq!(back.len(), 7);
872        assert_eq!(back.capacity(), 20);
873
874        // Test incorrect alignment
875        let a: Vec<i128> = Vec::new();
876        let b = Buffer::from_vec(a);
877        let b = b.into_vec::<i32>().unwrap_err();
878        b.into_vec::<i8>().unwrap_err();
879
880        // Test convert between types with same alignment
881        // This is an implementation quirk, but isn't harmful
882        // as ArrowNativeType are trivially transmutable
883        let a: Vec<i64> = vec![1, 2, 3, 4];
884        let b = Buffer::from_vec(a);
885        let back = b.into_vec::<u64>().unwrap();
886        assert_eq!(back.len(), 4);
887        assert_eq!(back.capacity(), 4);
888
889        // i256 has the same layout as i128 so this is valid
890        let mut b: Vec<i128> = Vec::with_capacity(4);
891        b.extend_from_slice(&[1, 2, 3, 4]);
892        let b = Buffer::from_vec(b);
893        let back = b.into_vec::<i256>().unwrap();
894        assert_eq!(back.len(), 2);
895        assert_eq!(back.capacity(), 2);
896
897        // Invalid layout
898        let b: Vec<i128> = vec![1, 2, 3];
899        let b = Buffer::from_vec(b);
900        b.into_vec::<i256>().unwrap_err();
901
902        // Invalid layout
903        let mut b: Vec<i128> = Vec::with_capacity(5);
904        b.extend_from_slice(&[1, 2, 3, 4]);
905        let b = Buffer::from_vec(b);
906        b.into_vec::<i256>().unwrap_err();
907
908        // Truncates length
909        // This is an implementation quirk, but isn't harmful
910        let mut b: Vec<i128> = Vec::with_capacity(4);
911        b.extend_from_slice(&[1, 2, 3]);
912        let b = Buffer::from_vec(b);
913        let back = b.into_vec::<i256>().unwrap();
914        assert_eq!(back.len(), 1);
915        assert_eq!(back.capacity(), 2);
916
917        // Cannot use aligned allocation
918        let b = Buffer::from(MutableBuffer::new(10));
919        let b = b.into_vec::<u8>().unwrap_err();
920        b.into_vec::<u64>().unwrap_err();
921
922        // Test slicing
923        let mut a: Vec<i128> = Vec::with_capacity(20);
924        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
925        let b = Buffer::from_vec(a);
926        let slice = b.slice_with_length(0, 64);
927
928        // Shared reference fails
929        let slice = slice.into_vec::<i128>().unwrap_err();
930        drop(b);
931
932        // Succeeds as no outstanding shared reference
933        let back = slice.into_vec::<i128>().unwrap();
934        assert_eq!(&back, &[1, 4, 7, 8]);
935        assert_eq!(back.capacity(), 20);
936
937        // Slicing by non-multiple length truncates
938        let mut a: Vec<i128> = Vec::with_capacity(8);
939        a.extend_from_slice(&[1, 4, 7, 3]);
940
941        let b = Buffer::from_vec(a);
942        let slice = b.slice_with_length(0, 34);
943        drop(b);
944
945        let back = slice.into_vec::<i128>().unwrap();
946        assert_eq!(&back, &[1, 4]);
947        assert_eq!(back.capacity(), 8);
948
949        // Offset prevents conversion
950        let a: Vec<u32> = vec![1, 3, 4, 6];
951        let b = Buffer::from_vec(a).slice(2);
952        b.into_vec::<u32>().unwrap_err();
953
954        let b = MutableBuffer::new(16).into_buffer();
955        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
956        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
957        b.into_mutable().unwrap();
958
959        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
960        let b = b.into_mutable().unwrap();
961        let b = Buffer::from(b);
962        let b = b.into_vec::<u32>().unwrap();
963        assert_eq!(b, &[1, 3, 5]);
964    }
965
966    #[test]
967    #[should_panic(expected = "capacity overflow")]
968    fn test_from_iter_overflow() {
969        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
970        let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
971    }
972
973    #[test]
974    fn bit_slice_length_preserved() {
975        // Create a boring buffer
976        let buf = Buffer::from_iter(std::iter::repeat(true).take(64));
977
978        let assert_preserved = |offset: usize, len: usize| {
979            let new_buf = buf.bit_slice(offset, len);
980            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
981
982            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
983            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
984            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
985            // want to make sure it doesn't unnecessarily create a deep copy.
986            if offset % 8 == 0 {
987                assert_eq!(new_buf.ptr_offset(), offset / 8);
988            } else {
989                assert_eq!(new_buf.ptr_offset(), 0);
990            }
991        };
992
993        // go through every available value for offset
994        for o in 0..=64 {
995            // and go through every length that could accompany that offset - we can't have a
996            // situation where offset + len > 64, because that would go past the end of the buffer,
997            // so we use the map to ensure it's in range.
998            for l in (o..=64).map(|l| l - o) {
999                // and we just want to make sure every one of these keeps its offset and length
1000                // when neeeded
1001                assert_preserved(o, l);
1002            }
1003        }
1004    }
1005}