arrow_data/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19
20use crate::bit_mask::set_bits;
21use crate::{layout, ArrayData};
22use arrow_buffer::buffer::NullBuffer;
23use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
24use arrow_schema::DataType;
25use std::ffi::c_void;
26
27/// ABI-compatible struct for ArrowArray from C Data Interface
28/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
29///
30/// ```
31/// # use arrow_data::ArrayData;
32/// # use arrow_data::ffi::FFI_ArrowArray;
33/// fn export_array(array: &ArrayData) -> FFI_ArrowArray {
34///     FFI_ArrowArray::new(array)
35/// }
36/// ```
37#[repr(C)]
38#[derive(Debug)]
39pub struct FFI_ArrowArray {
40    length: i64,
41    null_count: i64,
42    offset: i64,
43    n_buffers: i64,
44    n_children: i64,
45    buffers: *mut *const c_void,
46    children: *mut *mut FFI_ArrowArray,
47    dictionary: *mut FFI_ArrowArray,
48    release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
49    // When exported, this MUST contain everything that is owned by this array.
50    // for example, any buffer pointed to in `buffers` must be here, as well
51    // as the `buffers` pointer itself.
52    // In other words, everything in [FFI_ArrowArray] must be owned by
53    // `private_data` and can assume that they do not outlive `private_data`.
54    private_data: *mut c_void,
55}
56
57impl Drop for FFI_ArrowArray {
58    fn drop(&mut self) {
59        match self.release {
60            None => (),
61            Some(release) => unsafe { release(self) },
62        };
63    }
64}
65
66unsafe impl Send for FFI_ArrowArray {}
67unsafe impl Sync for FFI_ArrowArray {}
68
69// callback used to drop [FFI_ArrowArray] when it is exported
70unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
71    if array.is_null() {
72        return;
73    }
74    let array = &mut *array;
75
76    // take ownership of `private_data`, therefore dropping it`
77    let private = Box::from_raw(array.private_data as *mut ArrayPrivateData);
78    for child in private.children.iter() {
79        let _ = Box::from_raw(*child);
80    }
81    if !private.dictionary.is_null() {
82        let _ = Box::from_raw(private.dictionary);
83    }
84
85    array.release = None;
86}
87
88/// Aligns the provided `nulls` to the provided `data_offset`
89///
90/// This is a temporary measure until offset is removed from ArrayData (#1799)
91fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option<Buffer> {
92    let nulls = nulls?;
93    if data_offset == nulls.offset() {
94        // Underlying buffer is already aligned
95        return Some(nulls.buffer().clone());
96    }
97    if data_offset == 0 {
98        return Some(nulls.inner().sliced());
99    }
100    let mut builder = MutableBuffer::new_null(data_offset + nulls.len());
101    set_bits(
102        builder.as_slice_mut(),
103        nulls.validity(),
104        data_offset,
105        nulls.offset(),
106        nulls.len(),
107    );
108    Some(builder.into())
109}
110
111struct ArrayPrivateData {
112    #[allow(dead_code)]
113    buffers: Vec<Option<Buffer>>,
114    buffers_ptr: Box<[*const c_void]>,
115    children: Box<[*mut FFI_ArrowArray]>,
116    dictionary: *mut FFI_ArrowArray,
117}
118
119impl FFI_ArrowArray {
120    /// creates a new `FFI_ArrowArray` from existing data.
121    pub fn new(data: &ArrayData) -> Self {
122        let data_layout = layout(data.data_type());
123
124        let mut buffers = if data_layout.can_contain_null_mask {
125            // * insert the null buffer at the start
126            // * make all others `Option<Buffer>`.
127            std::iter::once(align_nulls(data.offset(), data.nulls()))
128                .chain(data.buffers().iter().map(|b| Some(b.clone())))
129                .collect::<Vec<_>>()
130        } else {
131            data.buffers().iter().map(|b| Some(b.clone())).collect()
132        };
133
134        // `n_buffers` is the number of buffers by the spec.
135        let mut n_buffers = {
136            data_layout.buffers.len() + {
137                // If the layout has a null buffer by Arrow spec.
138                // Note that even the array doesn't have a null buffer because it has
139                // no null value, we still need to count 1 here to follow the spec.
140                usize::from(data_layout.can_contain_null_mask)
141            }
142        } as i64;
143
144        if data_layout.variadic {
145            // Save the lengths of all variadic buffers into a new buffer.
146            // The first buffer is `views`, and the rest are variadic.
147            let mut data_buffers_lengths = Vec::new();
148            for buffer in data.buffers().iter().skip(1) {
149                data_buffers_lengths.push(buffer.len() as i64);
150                n_buffers += 1;
151            }
152
153            buffers.push(Some(ScalarBuffer::from(data_buffers_lengths).into_inner()));
154            n_buffers += 1;
155        }
156
157        let buffers_ptr = buffers
158            .iter()
159            .flat_map(|maybe_buffer| match maybe_buffer {
160                Some(b) => Some(b.as_ptr() as *const c_void),
161                // This is for null buffer. We only put a null pointer for
162                // null buffer if by spec it can contain null mask.
163                None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
164                None => None,
165            })
166            .collect::<Box<[_]>>();
167
168        let empty = vec![];
169        let (child_data, dictionary) = match data.data_type() {
170            DataType::Dictionary(_, _) => (
171                empty.as_slice(),
172                Box::into_raw(Box::new(FFI_ArrowArray::new(&data.child_data()[0]))),
173            ),
174            _ => (data.child_data(), std::ptr::null_mut()),
175        };
176
177        let children = child_data
178            .iter()
179            .map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
180            .collect::<Box<_>>();
181        let n_children = children.len() as i64;
182
183        // As in the IPC format, emit null_count = length for Null type
184        let null_count = match data.data_type() {
185            DataType::Null => data.len(),
186            _ => data.null_count(),
187        };
188
189        // create the private data owning everything.
190        // any other data must be added here, e.g. via a struct, to track lifetime.
191        let mut private_data = Box::new(ArrayPrivateData {
192            buffers,
193            buffers_ptr,
194            children,
195            dictionary,
196        });
197
198        Self {
199            length: data.len() as i64,
200            null_count: null_count as i64,
201            offset: data.offset() as i64,
202            n_buffers,
203            n_children,
204            buffers: private_data.buffers_ptr.as_mut_ptr(),
205            children: private_data.children.as_mut_ptr(),
206            dictionary,
207            release: Some(release_array),
208            private_data: Box::into_raw(private_data) as *mut c_void,
209        }
210    }
211
212    /// Takes ownership of the pointed to [`FFI_ArrowArray`]
213    ///
214    /// This acts to [move] the data out of `array`, setting the release callback to NULL
215    ///
216    /// # Safety
217    ///
218    /// * `array` must be [valid] for reads and writes
219    /// * `array` must be properly aligned
220    /// * `array` must point to a properly initialized value of [`FFI_ArrowArray`]
221    ///
222    /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
223    /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
224    pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
225        std::ptr::replace(array, Self::empty())
226    }
227
228    /// create an empty `FFI_ArrowArray`, which can be used to import data into
229    pub fn empty() -> Self {
230        Self {
231            length: 0,
232            null_count: 0,
233            offset: 0,
234            n_buffers: 0,
235            n_children: 0,
236            buffers: std::ptr::null_mut(),
237            children: std::ptr::null_mut(),
238            dictionary: std::ptr::null_mut(),
239            release: None,
240            private_data: std::ptr::null_mut(),
241        }
242    }
243
244    /// the length of the array
245    #[inline]
246    pub fn len(&self) -> usize {
247        self.length as usize
248    }
249
250    /// whether the array is empty
251    #[inline]
252    pub fn is_empty(&self) -> bool {
253        self.length == 0
254    }
255
256    /// Whether the array has been released
257    #[inline]
258    pub fn is_released(&self) -> bool {
259        self.release.is_none()
260    }
261
262    /// the offset of the array
263    #[inline]
264    pub fn offset(&self) -> usize {
265        self.offset as usize
266    }
267
268    /// the null count of the array
269    #[inline]
270    pub fn null_count(&self) -> usize {
271        self.null_count as usize
272    }
273
274    /// Returns the null count, checking for validity
275    #[inline]
276    pub fn null_count_opt(&self) -> Option<usize> {
277        usize::try_from(self.null_count).ok()
278    }
279
280    /// Set the null count of the array
281    ///
282    /// # Safety
283    /// Null count must match that of null buffer
284    #[inline]
285    pub unsafe fn set_null_count(&mut self, null_count: i64) {
286        self.null_count = null_count;
287    }
288
289    /// Returns the buffer at the provided index
290    ///
291    /// # Panic
292    /// Panics if index exceeds the number of buffers or the buffer is not correctly aligned
293    #[inline]
294    pub fn buffer(&self, index: usize) -> *const u8 {
295        assert!(!self.buffers.is_null());
296        assert!(index < self.num_buffers());
297        // SAFETY:
298        // If buffers is not null must be valid for reads up to num_buffers
299        unsafe { std::ptr::read_unaligned((self.buffers as *mut *const u8).add(index)) }
300    }
301
302    /// Returns the number of buffers
303    #[inline]
304    pub fn num_buffers(&self) -> usize {
305        self.n_buffers as _
306    }
307
308    /// Returns the child at the provided index
309    #[inline]
310    pub fn child(&self, index: usize) -> &FFI_ArrowArray {
311        assert!(!self.children.is_null());
312        assert!(index < self.num_children());
313        // Safety:
314        // If children is not null must be valid for reads up to num_children
315        unsafe {
316            let child = std::ptr::read_unaligned(self.children.add(index));
317            child.as_ref().unwrap()
318        }
319    }
320
321    /// Returns the number of children
322    #[inline]
323    pub fn num_children(&self) -> usize {
324        self.n_children as _
325    }
326
327    /// Returns the dictionary if any
328    #[inline]
329    pub fn dictionary(&self) -> Option<&Self> {
330        // Safety:
331        // If dictionary is not null should be valid for reads of `Self`
332        unsafe { self.dictionary.as_ref() }
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    // More tests located in top-level arrow crate
341
342    #[test]
343    fn null_array_n_buffers() {
344        let data = ArrayData::new_null(&DataType::Null, 10);
345
346        let ffi_array = FFI_ArrowArray::new(&data);
347        assert_eq!(0, ffi_array.n_buffers);
348
349        let private_data =
350            unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
351
352        assert_eq!(0, private_data.buffers_ptr.len());
353
354        let _ = Box::into_raw(private_data);
355    }
356}