arrow_buffer/buffer/
scalar.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::alloc::Deallocation;
19use crate::buffer::Buffer;
20use crate::native::ArrowNativeType;
21use crate::{BufferBuilder, MutableBuffer, OffsetBuffer};
22use std::fmt::Formatter;
23use std::marker::PhantomData;
24use std::ops::Deref;
25
26/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
27///
28/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`,
29/// with the following differences:
30///
31/// - slicing and cloning is O(1).
32/// - it supports external allocated memory
33///
34/// ```
35/// # use arrow_buffer::ScalarBuffer;
36/// // Zero-copy conversion from Vec
37/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
38/// assert_eq!(&buffer, &[1, 2, 3]);
39///
40/// // Zero-copy slicing
41/// let sliced = buffer.slice(1, 2);
42/// assert_eq!(&sliced, &[2, 3]);
43/// ```
44#[derive(Clone)]
45pub struct ScalarBuffer<T: ArrowNativeType> {
46    /// Underlying data buffer
47    buffer: Buffer,
48    phantom: PhantomData<T>,
49}
50
51impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> {
52    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
53        f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish()
54    }
55}
56
57impl<T: ArrowNativeType> ScalarBuffer<T> {
58    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
59    /// and `length` in units of `T`
60    ///
61    /// # Panics
62    ///
63    /// This method will panic if
64    ///
65    /// * `offset` or `len` would result in overflow
66    /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>`
67    /// * `bytes` is not large enough for the requested slice
68    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
69        let size = std::mem::size_of::<T>();
70        let byte_offset = offset.checked_mul(size).expect("offset overflow");
71        let byte_len = len.checked_mul(size).expect("length overflow");
72        buffer.slice_with_length(byte_offset, byte_len).into()
73    }
74
75    /// Free up unused memory.
76    pub fn shrink_to_fit(&mut self) {
77        self.buffer.shrink_to_fit();
78    }
79
80    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
81    pub fn slice(&self, offset: usize, len: usize) -> Self {
82        Self::new(self.buffer.clone(), offset, len)
83    }
84
85    /// Returns the inner [`Buffer`]
86    pub fn inner(&self) -> &Buffer {
87        &self.buffer
88    }
89
90    /// Returns the inner [`Buffer`], consuming self
91    pub fn into_inner(self) -> Buffer {
92        self.buffer
93    }
94
95    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
96    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
97    /// return false when the arrays are logically equal
98    #[inline]
99    pub fn ptr_eq(&self, other: &Self) -> bool {
100        self.buffer.ptr_eq(&other.buffer)
101    }
102}
103
104impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
105    type Target = [T];
106
107    #[inline]
108    fn deref(&self) -> &Self::Target {
109        // SAFETY: Verified alignment in From<Buffer>
110        unsafe {
111            std::slice::from_raw_parts(
112                self.buffer.as_ptr() as *const T,
113                self.buffer.len() / std::mem::size_of::<T>(),
114            )
115        }
116    }
117}
118
119impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
120    #[inline]
121    fn as_ref(&self) -> &[T] {
122        self
123    }
124}
125
126impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> {
127    fn from(value: MutableBuffer) -> Self {
128        Buffer::from(value).into()
129    }
130}
131
132impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
133    fn from(buffer: Buffer) -> Self {
134        let align = std::mem::align_of::<T>();
135        let is_aligned = buffer.as_ptr().align_offset(align) == 0;
136
137        match buffer.deallocation() {
138            Deallocation::Standard(_) => assert!(
139                is_aligned,
140                "Memory pointer is not aligned with the specified scalar type"
141            ),
142            Deallocation::Custom(_, _) =>
143                assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
144        }
145
146        Self {
147            buffer,
148            phantom: Default::default(),
149        }
150    }
151}
152
153impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> {
154    fn from(value: OffsetBuffer<T>) -> Self {
155        value.into_inner()
156    }
157}
158
159impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
160    fn from(value: Vec<T>) -> Self {
161        Self {
162            buffer: Buffer::from_vec(value),
163            phantom: Default::default(),
164        }
165    }
166}
167
168impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
169    fn from(value: ScalarBuffer<T>) -> Self {
170        value
171            .buffer
172            .into_vec()
173            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
174    }
175}
176
177impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
178    fn from(mut value: BufferBuilder<T>) -> Self {
179        let len = value.len();
180        Self::new(value.finish(), 0, len)
181    }
182}
183
184impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> {
185    #[inline]
186    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
187        iter.into_iter().collect::<Vec<_>>().into()
188    }
189}
190
191impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> {
192    type Item = &'a T;
193    type IntoIter = std::slice::Iter<'a, T>;
194
195    fn into_iter(self) -> Self::IntoIter {
196        self.as_ref().iter()
197    }
198}
199
200impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> {
201    fn eq(&self, other: &S) -> bool {
202        self.as_ref().eq(other.as_ref())
203    }
204}
205
206impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] {
207    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
208        self.as_ref().eq(other.as_ref())
209    }
210}
211
212impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] {
213    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
214        self.as_ref().eq(other.as_ref())
215    }
216}
217
218impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
219    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
220        self.as_slice().eq(other.as_ref())
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use std::{ptr::NonNull, sync::Arc};
227
228    use super::*;
229
230    #[test]
231    fn test_basic() {
232        let expected = [0_i32, 1, 2];
233        let buffer = Buffer::from_iter(expected.iter().cloned());
234        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
235        assert_eq!(*typed, expected);
236
237        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
238        assert_eq!(*typed, expected[1..]);
239
240        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
241        assert!(typed.is_empty());
242
243        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
244        assert!(typed.is_empty());
245    }
246
247    #[test]
248    fn test_debug() {
249        let buffer = ScalarBuffer::from(vec![1, 2, 3]);
250        assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])");
251    }
252
253    #[test]
254    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
255    fn test_unaligned() {
256        let expected = [0_i32, 1, 2];
257        let buffer = Buffer::from_iter(expected.iter().cloned());
258        let buffer = buffer.slice(1);
259        ScalarBuffer::<i32>::new(buffer, 0, 2);
260    }
261
262    #[test]
263    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
264    fn test_length_out_of_bounds() {
265        let buffer = Buffer::from_iter([0_i32, 1, 2]);
266        ScalarBuffer::<i32>::new(buffer, 1, 3);
267    }
268
269    #[test]
270    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
271    fn test_offset_out_of_bounds() {
272        let buffer = Buffer::from_iter([0_i32, 1, 2]);
273        ScalarBuffer::<i32>::new(buffer, 4, 0);
274    }
275
276    #[test]
277    #[should_panic(expected = "offset overflow")]
278    fn test_length_overflow() {
279        let buffer = Buffer::from_iter([0_i32, 1, 2]);
280        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
281    }
282
283    #[test]
284    #[should_panic(expected = "offset overflow")]
285    fn test_start_overflow() {
286        let buffer = Buffer::from_iter([0_i32, 1, 2]);
287        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
288    }
289
290    #[test]
291    #[should_panic(expected = "length overflow")]
292    fn test_end_overflow() {
293        let buffer = Buffer::from_iter([0_i32, 1, 2]);
294        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
295    }
296
297    #[test]
298    fn convert_from_buffer_builder() {
299        let input = vec![1, 2, 3, 4];
300        let buffer_builder = BufferBuilder::from(input.clone());
301        let scalar_buffer = ScalarBuffer::from(buffer_builder);
302        assert_eq!(scalar_buffer.as_ref(), input);
303    }
304
305    #[test]
306    fn into_vec() {
307        let input = vec![1u8, 2, 3, 4];
308
309        // No copy
310        let input_buffer = Buffer::from_vec(input.clone());
311        let input_ptr = input_buffer.as_ptr();
312        let input_len = input_buffer.len();
313        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
314        let vec = Vec::from(scalar_buffer);
315        assert_eq!(vec.as_slice(), input.as_slice());
316        assert_eq!(vec.as_ptr(), input_ptr);
317
318        // Custom allocation - makes a copy
319        let mut input_clone = input.clone();
320        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
321        let dealloc = Arc::new(());
322        let buffer =
323            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
324        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
325        let vec = Vec::from(scalar_buffer);
326        assert_eq!(vec, input.as_slice());
327        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
328
329        // Offset - makes a copy
330        let input_buffer = Buffer::from_vec(input.clone());
331        let input_ptr = input_buffer.as_ptr();
332        let input_len = input_buffer.len();
333        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
334        let vec = Vec::from(scalar_buffer);
335        assert_eq!(vec.as_slice(), &input[1..]);
336        assert_ne!(vec.as_ptr(), input_ptr);
337
338        // Inner buffer Arc ref count != 0 - makes a copy
339        let buffer = Buffer::from_slice_ref(input.as_slice());
340        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
341        let vec = Vec::from(scalar_buffer);
342        assert_eq!(vec, input.as_slice());
343        assert_ne!(vec.as_ptr(), input.as_ptr());
344    }
345}