odbc_api/buffers/
text_column.rs

1use crate::{
2    columnar_bulk_inserter::BoundInputSlice,
3    error::TooLargeBufferSize,
4    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
5    DataType, Error,
6};
7
8use super::{ColumnBuffer, Indicator};
9
10use log::debug;
11use odbc_sys::{CDataType, NULL_DATA};
12use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
13use widestring::U16Str;
14
15/// A column buffer for character data. The actual encoding used may depend on your system locale.
16pub type CharColumn = TextColumn<u8>;
17
18/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
19/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
20/// implied encoding does not depend on the system locale.
21pub type WCharColumn = TextColumn<u16>;
22
23/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
24/// variable amount of characters up to a maximum string length. Since most SQL types have a string
25/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
26/// manager should take care of the conversion. Since elements of this type have variable length an
27/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
28/// not matter for this buffer.
29///
30/// Character type `C` is intended to be either `u8` or `u16`.
31#[derive(Debug)]
32pub struct TextColumn<C> {
33    /// Maximum text length without terminating zero.
34    max_str_len: usize,
35    values: Vec<C>,
36    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
37    /// with the same index. Please note that this value may be larger than `max_str_len` if the
38    /// text has been truncated.
39    indicators: Vec<isize>,
40}
41
42impl<C> TextColumn<C> {
43    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
44    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
45    /// one in order to make space for the null terminating zero at the end of strings. Uses a
46    /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
47    /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
48    /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
49    pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
50    where
51        C: Default + Copy,
52    {
53        // Element size is +1 to account for terminating zero
54        let element_size = max_str_len + 1;
55        let len = element_size * batch_size;
56        let mut values = Vec::new();
57        values
58            .try_reserve_exact(len)
59            .map_err(|_| TooLargeBufferSize {
60                num_elements: batch_size,
61                // We want the element size in bytes
62                element_size: element_size * size_of::<C>(),
63            })?;
64        values.resize(len, C::default());
65        Ok(TextColumn {
66            max_str_len,
67            values,
68            indicators: vec![0; batch_size],
69        })
70    }
71
72    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
73    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
74    /// one in order to make space for the null terminating zero at the end of strings. All
75    /// indicators are set to [`crate::sys::NULL_DATA`] by default.
76    pub fn new(batch_size: usize, max_str_len: usize) -> Self
77    where
78        C: Default + Copy,
79    {
80        // Element size is +1 to account for terminating zero
81        let element_size = max_str_len + 1;
82        let len = element_size * batch_size;
83        let mut values = Vec::new();
84        values.reserve_exact(len);
85        values.resize(len, C::default());
86        TextColumn {
87            max_str_len,
88            values,
89            indicators: vec![NULL_DATA; batch_size],
90        }
91    }
92
93    /// Bytes of string at the specified position. Includes interior nuls, but excludes the
94    /// terminating nul.
95    ///
96    /// The column buffer does not know how many elements were in the last row group, and therefore
97    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
98    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
99    /// equal to the maximum number of elements in the buffer.
100    pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
101        self.content_length_at(row_index).map(|length| {
102            let offset = row_index * (self.max_str_len + 1);
103            &self.values[offset..offset + length]
104        })
105    }
106
107    /// Maximum length of elements
108    pub fn max_len(&self) -> usize {
109        self.max_str_len
110    }
111
112    /// Indicator value at the specified position. Useful to detect truncation of data.
113    ///
114    /// The column buffer does not know how many elements were in the last row group, and therefore
115    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
116    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
117    /// equal to the maximum number of elements in the buffer.
118    pub fn indicator_at(&self, row_index: usize) -> Indicator {
119        Indicator::from_isize(self.indicators[row_index])
120    }
121
122    /// Length of value at the specified position. This is different from an indicator as it refers
123    /// to the length of the value in the buffer, not to the length of the value in the datasource.
124    /// The two things are different for truncated values.
125    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
126        match self.indicator_at(row_index) {
127            Indicator::Null => None,
128            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
129            Indicator::NoTotal => Some(self.max_str_len),
130            Indicator::Length(length_in_bytes) => {
131                let length_in_chars = length_in_bytes / size_of::<C>();
132                let length = min(self.max_str_len, length_in_chars);
133                Some(length)
134            }
135        }
136    }
137
138    /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
139    ///
140    /// After fetching data we may want to know if any value has been truncated due to the buffer
141    /// not being able to hold elements of that size. This method checks the indicator buffer
142    /// element wise.
143    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
144        let max_bin_length = self.max_str_len * size_of::<C>();
145        self.indicators
146            .iter()
147            .copied()
148            .take(num_rows)
149            .find_map(|indicator| {
150                let indicator = Indicator::from_isize(indicator);
151                indicator.is_truncated(max_bin_length).then_some(indicator)
152            })
153    }
154
155    /// Changes the maximum string length the buffer can hold. This operation is useful if you find
156    /// an unexpected large input string during insertion.
157    ///
158    /// This is however costly, as not only does the new buffer have to be allocated, but all values
159    /// have to copied from the old to the new buffer.
160    ///
161    /// This method could also be used to reduce the maximum string length, which would truncate
162    /// strings in the process.
163    ///
164    /// This method does not adjust indicator buffers as these might hold values larger than the
165    /// maximum string length.
166    ///
167    /// # Parameters
168    ///
169    /// * `new_max_str_len`: New maximum string length without terminating zero.
170    /// * `num_rows`: Number of valid rows currently stored in this buffer.
171    pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
172    where
173        C: Default + Copy,
174    {
175        debug!(
176            "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
177            num_rows, self.max_str_len, new_max_str_len
178        );
179
180        let batch_size = self.indicators.len();
181        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
182        let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
183        // Copy values from old to new buffer.
184        let max_copy_length = min(self.max_str_len, new_max_str_len);
185        for ((&indicator, old_value), new_value) in self
186            .indicators
187            .iter()
188            .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
189            .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
190            .take(num_rows)
191        {
192            match Indicator::from_isize(indicator) {
193                Indicator::Null => (),
194                Indicator::NoTotal => {
195                    // There is no good choice here in case we are expanding the buffer. Since
196                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
197                    // be padded with 0. I currently cannot think of any use case there it would
198                    // matter.
199                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
200                }
201                Indicator::Length(num_bytes_len) => {
202                    let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
203                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
204                }
205            }
206        }
207        self.values = new_values;
208        self.max_str_len = new_max_str_len;
209    }
210
211    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
212    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
213    /// allowed element length. `input` must be specified without the terminating zero.
214    pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
215    where
216        C: Default + Copy,
217    {
218        if let Some(input) = input {
219            self.set_mut(index, input.len()).copy_from_slice(input);
220        } else {
221            self.indicators[index] = NULL_DATA;
222        }
223    }
224
225    /// Can be used to set a value at a specific row index without performing a memcopy on an input
226    /// slice and instead provides direct access to the underlying buffer.
227    ///
228    /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
229    /// be more convenient. This method is very useful if you want to `write!` a string value to the
230    /// buffer and the binary (**!**) length of the formatted string is known upfront.
231    ///
232    /// # Example: Write timestamp to text column.
233    ///
234    /// ```
235    /// use odbc_api::buffers::TextColumn;
236    /// use std::io::Write;
237    ///
238    /// /// Writes times formatted as hh::mm::ss.fff
239    /// fn write_time(
240    ///     col: &mut TextColumn<u8>,
241    ///     index: usize,
242    ///     hours: u8,
243    ///     minutes: u8,
244    ///     seconds: u8,
245    ///     milliseconds: u16)
246    /// {
247    ///     write!(
248    ///         col.set_mut(index, 12),
249    ///         "{:02}:{:02}:{:02}.{:03}",
250    ///         hours, minutes, seconds, milliseconds
251    ///     ).unwrap();
252    /// }
253    /// ```
254    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
255    where
256        C: Default,
257    {
258        if length > self.max_str_len {
259            panic!(
260                "Tried to insert a value into a text buffer which is larger than the maximum \
261                allowed string length for the buffer."
262            );
263        }
264        self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
265        let start = (self.max_str_len + 1) * index;
266        let end = start + length;
267        // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
268        // driver would not care about the value in the index buffer and only look for the
269        // terminating zero.
270        self.values[end] = C::default();
271        &mut self.values[start..end]
272    }
273
274    /// Fills the column with NULL, between From and To
275    pub fn fill_null(&mut self, from: usize, to: usize) {
276        for index in from..to {
277            self.indicators[index] = NULL_DATA;
278        }
279    }
280
281    /// Provides access to the raw underlying value buffer. Normal applications should have little
282    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
283    /// from the ODBC in memory representation into other kinds of buffers.
284    ///
285    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
286    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
287    /// terminating zero at the end of each string. For the actual value length call
288    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
289    pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
290        &self.values[..(self.max_str_len + 1) * num_valid_rows]
291    }
292
293    /// The maximum number of rows the TextColumn can hold.
294    pub fn row_capacity(&self) -> usize {
295        self.values.len()
296    }
297}
298
299impl WCharColumn {
300    /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
301    /// the terminating nul.
302    ///
303    /// # Safety
304    ///
305    /// The column buffer does not know how many elements were in the last row group, and therefore
306    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
307    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
308    /// equal to the maximum number of elements in the buffer.
309    pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
310        self.value_at(row_index).map(U16Str::from_slice)
311    }
312}
313
314unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
315where
316    TextColumn<C>: CDataMut + HasDataType,
317{
318    type View<'a> = TextColumnView<'a, C>;
319
320    fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
321        TextColumnView {
322            num_rows: valid_rows,
323            col: self,
324        }
325    }
326
327    fn fill_default(&mut self, from: usize, to: usize) {
328        self.fill_null(from, to)
329    }
330
331    /// Maximum number of text strings this column may hold.
332    fn capacity(&self) -> usize {
333        self.indicators.len()
334    }
335
336    fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
337        let max_bin_length = self.max_str_len * size_of::<C>();
338        self.indicators
339            .iter()
340            .copied()
341            .take(num_rows)
342            .find_map(|indicator| {
343                let indicator = Indicator::from_isize(indicator);
344                indicator.is_truncated(max_bin_length).then_some(indicator)
345            })
346    }
347}
348
349/// Allows read only access to the valid part of a text column.
350///
351/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
352/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
353/// are actually valid, and have been returned with the last row group of the the result set. That
354/// number is maintained on the level of the entire column buffer. So a text column knows the number
355/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
356/// every element acccessed through it, is valid.
357#[derive(Debug, Clone, Copy)]
358pub struct TextColumnView<'c, C> {
359    num_rows: usize,
360    col: &'c TextColumn<C>,
361}
362
363impl<'c, C> TextColumnView<'c, C> {
364    /// The number of valid elements in the text column.
365    pub fn len(&self) -> usize {
366        self.num_rows
367    }
368
369    /// True if, and only if there are no valid rows in the column buffer.
370    pub fn is_empty(&self) -> bool {
371        self.num_rows == 0
372    }
373
374    /// Slice of text at the specified row index without terminating zero.
375    pub fn get(&self, index: usize) -> Option<&'c [C]> {
376        self.col.value_at(index)
377    }
378
379    /// Iterator over the valid elements of the text buffer
380    pub fn iter(&self) -> TextColumnIt<'c, C> {
381        TextColumnIt {
382            pos: 0,
383            num_rows: self.num_rows,
384            col: self.col,
385        }
386    }
387
388    /// Length of value at the specified position. This is different from an indicator as it refers
389    /// to the length of the value in the buffer, not to the length of the value in the datasource.
390    /// The two things are different for truncated values.
391    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
392        if row_index >= self.num_rows {
393            panic!("Row index points beyond the range of valid values.")
394        }
395        self.col.content_length_at(row_index)
396    }
397
398    /// Provides access to the raw underlying value buffer. Normal applications should have little
399    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
400    /// from the ODBC in memory representation into other kinds of buffers.
401    ///
402    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
403    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
404    /// terminating zero at the end of each string. For the actual value length call
405    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
406    pub fn raw_value_buffer(&self) -> &'c [C] {
407        self.col.raw_value_buffer(self.num_rows)
408    }
409
410    pub fn max_len(&self) -> usize {
411        self.col.max_len()
412    }
413
414    /// `Some` if any value is truncated.
415    ///
416    /// After fetching data we may want to know if any value has been truncated due to the buffer
417    /// not being able to hold elements of that size. This method checks the indicator buffer
418    /// element wise.
419    pub fn has_truncated_values(&self) -> Option<Indicator> {
420        self.col.has_truncated_values(self.num_rows)
421    }
422}
423
424unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
425    type SliceMut = TextColumnSliceMut<'a, C>;
426
427    unsafe fn as_view_mut(
428        &'a mut self,
429        parameter_index: u16,
430        stmt: StatementRef<'a>,
431    ) -> Self::SliceMut {
432        TextColumnSliceMut {
433            column: self,
434            stmt,
435            parameter_index,
436        }
437    }
438}
439
440/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
441/// values.
442pub struct TextColumnSliceMut<'a, C> {
443    column: &'a mut TextColumn<C>,
444    // Needed to rebind the column in case of resize
445    stmt: StatementRef<'a>,
446    // Also needed to rebind the column in case of resize
447    parameter_index: u16,
448}
449
450impl<C> TextColumnSliceMut<'_, C>
451where
452    C: Default + Copy,
453{
454    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
455    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
456    /// allowed element length. `element` must be specified without the terminating zero.
457    pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
458        self.column.set_value(row_index, element)
459    }
460
461    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
462    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
463    /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
464    /// one. This makes this an extremely expensive operation.
465    pub fn ensure_max_element_length(
466        &mut self,
467        element_length: usize,
468        num_rows_to_copy: usize,
469    ) -> Result<(), Error>
470    where
471        TextColumn<C>: HasDataType + CData,
472    {
473        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
474        // in order to hold it. This invalidates the pointers previously bound to the statement. So
475        // we rebind them.
476        if element_length > self.column.max_len() {
477            let new_max_str_len = element_length;
478            self.column
479                .resize_max_str(new_max_str_len, num_rows_to_copy);
480            unsafe {
481                self.stmt
482                    .bind_input_parameter(self.parameter_index, self.column)
483                    .into_result(&self.stmt)?
484            }
485        }
486        Ok(())
487    }
488
489    /// Can be used to set a value at a specific row index without performing a memcopy on an input
490    /// slice and instead provides direct access to the underlying buffer.
491    ///
492    /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
493    /// be more convenient. This method is very useful if you want to `write!` a string value to the
494    /// buffer and the binary (**!**) length of the formatted string is known upfront.
495    ///
496    /// # Example: Write timestamp to text column.
497    ///
498    /// ```
499    /// use odbc_api::buffers::TextColumnSliceMut;
500    /// use std::io::Write;
501    ///
502    /// /// Writes times formatted as hh::mm::ss.fff
503    /// fn write_time(
504    ///     col: &mut TextColumnSliceMut<u8>,
505    ///     index: usize,
506    ///     hours: u8,
507    ///     minutes: u8,
508    ///     seconds: u8,
509    ///     milliseconds: u16)
510    /// {
511    ///     write!(
512    ///         col.set_mut(index, 12),
513    ///         "{:02}:{:02}:{:02}.{:03}",
514    ///         hours, minutes, seconds, milliseconds
515    ///     ).unwrap();
516    /// }
517    /// ```
518    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
519        self.column.set_mut(index, length)
520    }
521}
522
523/// Iterator over a text column. See [`TextColumnView::iter`]
524#[derive(Debug)]
525pub struct TextColumnIt<'c, C> {
526    pos: usize,
527    num_rows: usize,
528    col: &'c TextColumn<C>,
529}
530
531impl<'c, C> TextColumnIt<'c, C> {
532    fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
533        if self.pos == self.num_rows {
534            None
535        } else {
536            let ret = Some(self.col.value_at(self.pos));
537            self.pos += 1;
538            ret
539        }
540    }
541}
542
543impl<'c> Iterator for TextColumnIt<'c, u8> {
544    type Item = Option<&'c [u8]>;
545
546    fn next(&mut self) -> Option<Self::Item> {
547        self.next_impl()
548    }
549
550    fn size_hint(&self) -> (usize, Option<usize>) {
551        let len = self.num_rows - self.pos;
552        (len, Some(len))
553    }
554}
555
556impl ExactSizeIterator for TextColumnIt<'_, u8> {}
557
558impl<'c> Iterator for TextColumnIt<'c, u16> {
559    type Item = Option<&'c U16Str>;
560
561    fn next(&mut self) -> Option<Self::Item> {
562        self.next_impl().map(|opt| opt.map(U16Str::from_slice))
563    }
564
565    fn size_hint(&self) -> (usize, Option<usize>) {
566        let len = self.num_rows - self.pos;
567        (len, Some(len))
568    }
569}
570
571impl ExactSizeIterator for TextColumnIt<'_, u16> {}
572
573unsafe impl CData for CharColumn {
574    fn cdata_type(&self) -> CDataType {
575        CDataType::Char
576    }
577
578    fn indicator_ptr(&self) -> *const isize {
579        self.indicators.as_ptr()
580    }
581
582    fn value_ptr(&self) -> *const c_void {
583        self.values.as_ptr() as *const c_void
584    }
585
586    fn buffer_length(&self) -> isize {
587        (self.max_str_len + 1).try_into().unwrap()
588    }
589}
590
591unsafe impl CDataMut for CharColumn {
592    fn mut_indicator_ptr(&mut self) -> *mut isize {
593        self.indicators.as_mut_ptr()
594    }
595
596    fn mut_value_ptr(&mut self) -> *mut c_void {
597        self.values.as_mut_ptr() as *mut c_void
598    }
599}
600
601impl HasDataType for CharColumn {
602    fn data_type(&self) -> DataType {
603        DataType::Varchar {
604            length: NonZeroUsize::new(self.max_str_len),
605        }
606    }
607}
608
609unsafe impl CData for WCharColumn {
610    fn cdata_type(&self) -> CDataType {
611        CDataType::WChar
612    }
613
614    fn indicator_ptr(&self) -> *const isize {
615        self.indicators.as_ptr()
616    }
617
618    fn value_ptr(&self) -> *const c_void {
619        self.values.as_ptr() as *const c_void
620    }
621
622    fn buffer_length(&self) -> isize {
623        ((self.max_str_len + 1) * 2).try_into().unwrap()
624    }
625}
626
627unsafe impl CDataMut for WCharColumn {
628    fn mut_indicator_ptr(&mut self) -> *mut isize {
629        self.indicators.as_mut_ptr()
630    }
631
632    fn mut_value_ptr(&mut self) -> *mut c_void {
633        self.values.as_mut_ptr() as *mut c_void
634    }
635}
636
637impl HasDataType for WCharColumn {
638    fn data_type(&self) -> DataType {
639        DataType::WVarchar {
640            length: NonZeroUsize::new(self.max_str_len),
641        }
642    }
643}