odbc_api/buffers/
bin_column.rs

1use crate::{
2    buffers::Indicator,
3    columnar_bulk_inserter::BoundInputSlice,
4    error::TooLargeBufferSize,
5    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
6    DataType, Error,
7};
8
9use log::debug;
10use odbc_sys::{CDataType, NULL_DATA};
11use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
12
13/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
14/// variable amount of bytes up to a maximum length. Since elements of this type have variable
15/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
16/// Therefore this buffer type is used for variable sized binary data whether it is nullable or not.
17#[derive(Debug)]
18pub struct BinColumn {
19    /// Maximum element length.
20    max_len: usize,
21    values: Vec<u8>,
22    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
23    /// with the same index. Please note that this value may be larger than `max_len` if the value
24    /// has been truncated.
25    indicators: Vec<isize>,
26}
27
28impl BinColumn {
29    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
30    /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
31    /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
32    /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
33    /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
34    /// new.
35    pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
36        let len = element_size * batch_size;
37        let mut values = Vec::new();
38        values
39            .try_reserve_exact(len)
40            .map_err(|_| TooLargeBufferSize {
41                num_elements: batch_size,
42                element_size,
43            })?;
44        values.resize(len, 0);
45        Ok(BinColumn {
46            max_len: element_size,
47            values,
48            indicators: vec![0; batch_size],
49        })
50    }
51
52    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
53    /// have a maximum length of `max_len`.
54    pub fn new(batch_size: usize, element_size: usize) -> Self {
55        let len = element_size * batch_size;
56        let mut values = Vec::new();
57        values.reserve_exact(len);
58        values.resize(len, 0);
59        BinColumn {
60            max_len: element_size,
61            values,
62            indicators: vec![0; batch_size],
63        }
64    }
65
66    /// Return the value for the given row index.
67    ///
68    /// The column buffer does not know how many elements were in the last row group, and therefore
69    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
70    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
71    /// equal to the maximum number of elements in the buffer.
72    pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
73        self.content_length_at(row_index).map(|length| {
74            let offset = row_index * self.max_len;
75            &self.values[offset..offset + length]
76        })
77    }
78
79    /// Indicator value at the specified position. Useful to detect truncation of data.
80    ///
81    /// The column buffer does not know how many elements were in the last row group, and therefore
82    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
83    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
84    /// equal to the maximum number of elements in the buffer.
85    pub fn indicator_at(&self, row_index: usize) -> Indicator {
86        Indicator::from_isize(self.indicators[row_index])
87    }
88
89    /// Length of value at the specified position. This is different from an indicator as it refers
90    /// to the length of the value in the buffer, not to the length of the value in the datasource.
91    /// The two things are different for truncated values.
92    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
93        match self.indicator_at(row_index) {
94            Indicator::Null => None,
95            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
96            Indicator::NoTotal => Some(self.max_len),
97            Indicator::Length(length) => {
98                let length = min(self.max_len, length);
99                Some(length)
100            }
101        }
102    }
103
104    /// `Some` if any value is truncated in the range [0, num_rows).
105    ///
106    /// After fetching data we may want to know if any value has been truncated due to the buffer
107    /// not being able to hold elements of that size. This method checks the indicator buffer
108    /// element wise and reports one indicator which indicates a size large than the maximum element
109    /// size, if it exits.
110    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
111        self.indicators
112            .iter()
113            .copied()
114            .take(num_rows)
115            .find_map(|indicator| {
116                let indicator = Indicator::from_isize(indicator);
117                indicator.is_truncated(self.max_len).then_some(indicator)
118            })
119    }
120
121    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
122    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
123    ///
124    /// # Parameters
125    ///
126    /// * `new_max_len`: New maximum string length without terminating zero.
127    pub fn set_max_len(&mut self, new_max_len: usize) {
128        let batch_size = self.indicators.len();
129        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
130        let new_values = vec![0u8; new_max_len * batch_size];
131        // Set all indicators to NULL
132        self.fill_null(0, batch_size);
133        self.values = new_values;
134        self.max_len = new_max_len;
135    }
136
137    /// Maximum length of elements in bytes.
138    pub fn max_len(&self) -> usize {
139        self.max_len
140    }
141
142    /// View of the first `num_rows` values of a binary column.
143    ///
144    /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
145    /// column buffer does not know how many elements were in the last row group, and therefore can
146    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
147    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
148    /// to the maximum number of elements in the buffer.
149    pub fn view(&self, num_rows: usize) -> BinColumnView<'_> {
150        BinColumnView {
151            num_rows,
152            col: self,
153        }
154    }
155
156    /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
157    /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
158    /// element length.
159    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
160        if let Some(input) = input {
161            self.indicators[index] = input.len().try_into().unwrap();
162            if input.len() > self.max_len {
163                panic!(
164                    "Tried to insert a value into a binary buffer which is larger than the maximum \
165                    allowed element length for the buffer."
166                );
167            }
168            let start = self.max_len * index;
169            let end = start + input.len();
170            let buf = &mut self.values[start..end];
171            buf.copy_from_slice(input);
172        } else {
173            self.indicators[index] = NULL_DATA;
174        }
175    }
176
177    /// Fills the column with NULL, between From and To
178    pub fn fill_null(&mut self, from: usize, to: usize) {
179        for index in from..to {
180            self.indicators[index] = NULL_DATA;
181        }
182    }
183
184    /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
185    /// you find an unexpected large input during insertion.
186    ///
187    /// This is however costly, as not only does the new buffer have to be allocated, but all values
188    /// have to copied from the old to the new buffer.
189    ///
190    /// This method could also be used to reduce the maximum length, which would truncate values in
191    /// the process.
192    ///
193    /// This method does not adjust indicator buffers as these might hold values larger than the
194    /// maximum length.
195    ///
196    /// # Parameters
197    ///
198    /// * `new_max_len`: New maximum element length in bytes.
199    /// * `num_rows`: Number of valid rows currently stored in this buffer.
200    pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
201        debug!(
202            "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
203            num_rows, self.max_len, new_max_len
204        );
205
206        let batch_size = self.indicators.len();
207        // Allocate a new buffer large enough to hold a batch of elements with maximum length.
208        let mut new_values = vec![0; new_max_len * batch_size];
209        // Copy values from old to new buffer.
210        let max_copy_length = min(self.max_len, new_max_len);
211        for ((&indicator, old_value), new_value) in self
212            .indicators
213            .iter()
214            .zip(self.values.chunks_exact_mut(self.max_len))
215            .zip(new_values.chunks_exact_mut(new_max_len))
216            .take(num_rows)
217        {
218            match Indicator::from_isize(indicator) {
219                Indicator::Null => (),
220                Indicator::NoTotal => {
221                    // There is no good choice here in case we are expanding the buffer. Since
222                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
223                    // be padded with 0. I currently cannot think of any use case there it would
224                    // matter.
225                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
226                }
227                Indicator::Length(num_bytes_len) => {
228                    let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
229                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
230                }
231            }
232        }
233        self.values = new_values;
234        self.max_len = new_max_len;
235    }
236
237    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
238    /// length should the input be too large.
239    ///
240    /// # Parameters
241    ///
242    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
243    ///   currently in the buffer.
244    /// * `bytes`: Value to store.
245    pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
246        if let Some(bytes) = bytes {
247            if bytes.len() > self.max_len {
248                let new_max_len = (bytes.len() as f64 * 1.2) as usize;
249                self.resize_max_element_length(new_max_len, index)
250            }
251
252            let offset = index * self.max_len;
253            self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
254            // And of course set the indicator correctly.
255            self.indicators[index] = bytes.len().try_into().unwrap();
256        } else {
257            self.indicators[index] = NULL_DATA;
258        }
259    }
260
261    /// Maximum number of elements this buffer can hold.
262    pub fn capacity(&self) -> usize {
263        self.indicators.len()
264    }
265}
266
267unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
268    type SliceMut = BinColumnSliceMut<'a>;
269
270    unsafe fn as_view_mut(
271        &'a mut self,
272        parameter_index: u16,
273        stmt: StatementRef<'a>,
274    ) -> Self::SliceMut {
275        BinColumnSliceMut {
276            column: self,
277            stmt,
278            parameter_index,
279        }
280    }
281}
282
283/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
284/// values.
285pub struct BinColumnSliceMut<'a> {
286    column: &'a mut BinColumn,
287    // Needed to rebind the column in case of reallocation
288    stmt: StatementRef<'a>,
289    // Also needed to rebind the column in case of reallocation
290    parameter_index: u16,
291}
292
293impl BinColumnSliceMut<'_> {
294    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
295    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
296    /// allowed element length. `element` must be specified without the terminating zero.
297    pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
298        self.column.set_value(row_index, element)
299    }
300
301    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
302    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
303    /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
304    /// one. This makes this an extremly expensive operation.
305    pub fn ensure_max_element_length(
306        &mut self,
307        element_length: usize,
308        num_rows_to_copy: usize,
309    ) -> Result<(), Error> {
310        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
311        // in order to hold it. This invalidates the pointers previously bound to the statement. So
312        // we rebind them.
313        if element_length > self.column.max_len() {
314            self.column
315                .resize_max_element_length(element_length, num_rows_to_copy);
316            unsafe {
317                self.stmt
318                    .bind_input_parameter(self.parameter_index, self.column)
319                    .into_result(&self.stmt)?
320            }
321        }
322        Ok(())
323    }
324}
325
326#[derive(Debug, Clone, Copy)]
327pub struct BinColumnView<'c> {
328    num_rows: usize,
329    col: &'c BinColumn,
330}
331
332impl<'c> BinColumnView<'c> {
333    /// The number of valid elements in the text column.
334    pub fn len(&self) -> usize {
335        self.num_rows
336    }
337
338    /// True if, and only if there are no valid rows in the column buffer.
339    pub fn is_empty(&self) -> bool {
340        self.num_rows == 0
341    }
342
343    /// Slice of text at the specified row index without terminating zero.
344    pub fn get(&self, index: usize) -> Option<&'c [u8]> {
345        self.col.value_at(index)
346    }
347
348    /// Iterator over the valid elements of the text buffer
349    pub fn iter(&self) -> BinColumnIt<'c> {
350        BinColumnIt {
351            pos: 0,
352            num_rows: self.num_rows,
353            col: self.col,
354        }
355    }
356
357    /// Finds an indicator larger than max element in the range [0, num_rows).
358    ///
359    /// After fetching data we may want to know if any value has been truncated due to the buffer
360    /// not being able to hold elements of that size. This method checks the indicator buffer
361    /// element wise.
362    pub fn has_truncated_values(&self) -> Option<Indicator> {
363        self.col.has_truncated_values(self.num_rows)
364    }
365}
366
367/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
368#[derive(Debug)]
369pub struct BinColumnIt<'c> {
370    pos: usize,
371    num_rows: usize,
372    col: &'c BinColumn,
373}
374
375impl<'c> Iterator for BinColumnIt<'c> {
376    type Item = Option<&'c [u8]>;
377
378    fn next(&mut self) -> Option<Self::Item> {
379        if self.pos == self.num_rows {
380            None
381        } else {
382            let ret = Some(self.col.value_at(self.pos));
383            self.pos += 1;
384            ret
385        }
386    }
387
388    fn size_hint(&self) -> (usize, Option<usize>) {
389        let len = self.num_rows - self.pos;
390        (len, Some(len))
391    }
392}
393
394impl ExactSizeIterator for BinColumnIt<'_> {}
395
396unsafe impl CData for BinColumn {
397    fn cdata_type(&self) -> CDataType {
398        CDataType::Binary
399    }
400
401    fn indicator_ptr(&self) -> *const isize {
402        self.indicators.as_ptr()
403    }
404
405    fn value_ptr(&self) -> *const c_void {
406        self.values.as_ptr() as *const c_void
407    }
408
409    fn buffer_length(&self) -> isize {
410        self.max_len.try_into().unwrap()
411    }
412}
413
414impl HasDataType for BinColumn {
415    fn data_type(&self) -> DataType {
416        DataType::Varbinary {
417            length: NonZeroUsize::new(self.max_len),
418        }
419    }
420}
421
422unsafe impl CDataMut for BinColumn {
423    fn mut_indicator_ptr(&mut self) -> *mut isize {
424        self.indicators.as_mut_ptr()
425    }
426
427    fn mut_value_ptr(&mut self) -> *mut c_void {
428        self.values.as_mut_ptr() as *mut c_void
429    }
430}
431
432#[cfg(test)]
433mod test {
434    use crate::error::TooLargeBufferSize;
435
436    use super::BinColumn;
437
438    #[test]
439    fn allocating_too_big_a_binary_column() {
440        let two_gib = 2_147_483_648;
441        let result = BinColumn::try_new(10_000, two_gib);
442        let error = result.unwrap_err();
443        assert!(matches!(
444            error,
445            TooLargeBufferSize {
446                num_elements: 10_000,
447                element_size: 2_147_483_648
448            }
449        ))
450    }
451}