odbc_api/buffers/
text_column.rs

1use crate::{
2    columnar_bulk_inserter::BoundInputSlice, error::TooLargeBufferSize, handles::{CData, CDataMut, HasDataType, Statement, StatementRef, ASSUMED_MAX_LENGTH_OF_W_VARCHAR}, DataType, Error
3};
4
5use super::{ColumnBuffer, Indicator};
6
7use log::debug;
8use odbc_sys::{CDataType, NULL_DATA};
9use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
10use widestring::U16Str;
11
12/// A column buffer for character data. The actual encoding used may depend on your system locale.
13pub type CharColumn = TextColumn<u8>;
14
15/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
16/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
17/// implied encoding does not depend on the system locale.
18pub type WCharColumn = TextColumn<u16>;
19
20/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
21/// variable amount of characters up to a maximum string length. Since most SQL types have a string
22/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
23/// manager should take care of the conversion. Since elements of this type have variable length an
24/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
25/// not matter for this buffer.
26///
27/// Character type `C` is intended to be either `u8` or `u16`.
28#[derive(Debug)]
29pub struct TextColumn<C> {
30    /// Maximum text length without terminating zero.
31    max_str_len: usize,
32    values: Vec<C>,
33    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
34    /// with the same index. Please note that this value may be larger than `max_str_len` if the
35    /// text has been truncated.
36    indicators: Vec<isize>,
37}
38
39impl<C> TextColumn<C> {
40    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
41    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
42    /// one in order to make space for the null terminating zero at the end of strings. Uses a
43    /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
44    /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
45    /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
46    pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
47    where
48        C: Default + Copy,
49    {
50        // Element size is +1 to account for terminating zero
51        let element_size = max_str_len + 1;
52        let len = element_size * batch_size;
53        let mut values = Vec::new();
54        values
55            .try_reserve_exact(len)
56            .map_err(|_| TooLargeBufferSize {
57                num_elements: batch_size,
58                // We want the element size in bytes
59                element_size: element_size * size_of::<C>(),
60            })?;
61        values.resize(len, C::default());
62        Ok(TextColumn {
63            max_str_len,
64            values,
65            indicators: vec![0; batch_size],
66        })
67    }
68
69    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
70    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
71    /// one in order to make space for the null terminating zero at the end of strings. All
72    /// indicators are set to [`crate::sys::NULL_DATA`] by default.
73    pub fn new(batch_size: usize, max_str_len: usize) -> Self
74    where
75        C: Default + Copy,
76    {
77        // Element size is +1 to account for terminating zero
78        let element_size = max_str_len + 1;
79        let len = element_size * batch_size;
80        let mut values = Vec::new();
81        values.reserve_exact(len);
82        values.resize(len, C::default());
83        TextColumn {
84            max_str_len,
85            values,
86            indicators: vec![NULL_DATA; batch_size],
87        }
88    }
89
90    /// Bytes of string at the specified position. Includes interior nuls, but excludes the
91    /// terminating nul.
92    ///
93    /// The column buffer does not know how many elements were in the last row group, and therefore
94    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
95    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
96    /// equal to the maximum number of elements in the buffer.
97    pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
98        self.content_length_at(row_index).map(|length| {
99            let offset = row_index * (self.max_str_len + 1);
100            &self.values[offset..offset + length]
101        })
102    }
103
104    /// Maximum length of elements
105    pub fn max_len(&self) -> usize {
106        self.max_str_len
107    }
108
109    /// Indicator value at the specified position. Useful to detect truncation of data.
110    ///
111    /// The column buffer does not know how many elements were in the last row group, and therefore
112    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
113    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
114    /// equal to the maximum number of elements in the buffer.
115    pub fn indicator_at(&self, row_index: usize) -> Indicator {
116        Indicator::from_isize(self.indicators[row_index])
117    }
118
119    /// Length of value at the specified position. This is different from an indicator as it refers
120    /// to the length of the value in the buffer, not to the length of the value in the datasource.
121    /// The two things are different for truncated values.
122    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
123        match self.indicator_at(row_index) {
124            Indicator::Null => None,
125            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
126            Indicator::NoTotal => Some(self.max_str_len),
127            Indicator::Length(length_in_bytes) => {
128                let length_in_chars = length_in_bytes / size_of::<C>();
129                let length = min(self.max_str_len, length_in_chars);
130                Some(length)
131            }
132        }
133    }
134
135    /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
136    ///
137    /// After fetching data we may want to know if any value has been truncated due to the buffer
138    /// not being able to hold elements of that size. This method checks the indicator buffer
139    /// element wise.
140    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
141        let max_bin_length = self.max_str_len * size_of::<C>();
142        self.indicators
143            .iter()
144            .copied()
145            .take(num_rows)
146            .find_map(|indicator| {
147                let indicator = Indicator::from_isize(indicator);
148                indicator.is_truncated(max_bin_length).then_some(indicator)
149            })
150    }
151
152    /// Changes the maximum string length the buffer can hold. This operation is useful if you find
153    /// an unexpected large input string during insertion.
154    ///
155    /// This is however costly, as not only does the new buffer have to be allocated, but all values
156    /// have to copied from the old to the new buffer.
157    ///
158    /// This method could also be used to reduce the maximum string length, which would truncate
159    /// strings in the process.
160    ///
161    /// This method does not adjust indicator buffers as these might hold values larger than the
162    /// maximum string length.
163    ///
164    /// # Parameters
165    ///
166    /// * `new_max_str_len`: New maximum string length without terminating zero.
167    /// * `num_rows`: Number of valid rows currently stored in this buffer.
168    pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
169    where
170        C: Default + Copy,
171    {
172        debug!(
173            "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
174            num_rows, self.max_str_len, new_max_str_len
175        );
176
177        let batch_size = self.indicators.len();
178        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
179        let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
180        // Copy values from old to new buffer.
181        let max_copy_length = min(self.max_str_len, new_max_str_len);
182        for ((&indicator, old_value), new_value) in self
183            .indicators
184            .iter()
185            .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
186            .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
187            .take(num_rows)
188        {
189            match Indicator::from_isize(indicator) {
190                Indicator::Null => (),
191                Indicator::NoTotal => {
192                    // There is no good choice here in case we are expanding the buffer. Since
193                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
194                    // be padded with 0. I currently cannot think of any use case there it would
195                    // matter.
196                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
197                }
198                Indicator::Length(num_bytes_len) => {
199                    let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
200                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
201                }
202            }
203        }
204        self.values = new_values;
205        self.max_str_len = new_max_str_len;
206    }
207
208    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
209    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
210    /// allowed element length. `input` must be specified without the terminating zero.
211    pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
212    where
213        C: Default + Copy,
214    {
215        if let Some(input) = input {
216            self.set_mut(index, input.len()).copy_from_slice(input);
217        } else {
218            self.indicators[index] = NULL_DATA;
219        }
220    }
221
222    /// Can be used to set a value at a specific row index without performing a memcopy on an input
223    /// slice and instead provides direct access to the underlying buffer.
224    ///
225    /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
226    /// be more convenient. This method is very useful if you want to `write!` a string value to the
227    /// buffer and the binary (**!**) length of the formatted string is known upfront.
228    ///
229    /// # Example: Write timestamp to text column.
230    ///
231    /// ```
232    /// use odbc_api::buffers::TextColumn;
233    /// use std::io::Write;
234    ///
235    /// /// Writes times formatted as hh::mm::ss.fff
236    /// fn write_time(
237    ///     col: &mut TextColumn<u8>,
238    ///     index: usize,
239    ///     hours: u8,
240    ///     minutes: u8,
241    ///     seconds: u8,
242    ///     milliseconds: u16)
243    /// {
244    ///     write!(
245    ///         col.set_mut(index, 12),
246    ///         "{:02}:{:02}:{:02}.{:03}",
247    ///         hours, minutes, seconds, milliseconds
248    ///     ).unwrap();
249    /// }
250    /// ```
251    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
252    where
253        C: Default,
254    {
255        if length > self.max_str_len {
256            panic!(
257                "Tried to insert a value into a text buffer which is larger than the maximum \
258                allowed string length for the buffer."
259            );
260        }
261        self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
262        let start = (self.max_str_len + 1) * index;
263        let end = start + length;
264        // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
265        // driver would not care about the value in the index buffer and only look for the
266        // terminating zero.
267        self.values[end] = C::default();
268        &mut self.values[start..end]
269    }
270
271    /// Fills the column with NULL, between From and To
272    pub fn fill_null(&mut self, from: usize, to: usize) {
273        for index in from..to {
274            self.indicators[index] = NULL_DATA;
275        }
276    }
277
278    /// Provides access to the raw underlying value buffer. Normal applications should have little
279    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
280    /// from the ODBC in memory representation into other kinds of buffers.
281    ///
282    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
283    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
284    /// terminating zero at the end of each string. For the actual value length call
285    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
286    pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
287        &self.values[..(self.max_str_len + 1) * num_valid_rows]
288    }
289
290    /// The maximum number of rows the TextColumn can hold.
291    pub fn row_capacity(&self) -> usize {
292        self.values.len()
293    }
294}
295
296impl WCharColumn {
297    /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
298    /// the terminating nul.
299    ///
300    /// # Safety
301    ///
302    /// The column buffer does not know how many elements were in the last row group, and therefore
303    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
304    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
305    /// equal to the maximum number of elements in the buffer.
306    pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
307        self.value_at(row_index).map(U16Str::from_slice)
308    }
309}
310
311unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
312where
313    TextColumn<C>: CDataMut + HasDataType,
314{
315    type View<'a> = TextColumnView<'a, C>;
316
317    fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
318        TextColumnView {
319            num_rows: valid_rows,
320            col: self,
321        }
322    }
323
324    fn fill_default(&mut self, from: usize, to: usize) {
325        self.fill_null(from, to)
326    }
327
328    /// Maximum number of text strings this column may hold.
329    fn capacity(&self) -> usize {
330        self.indicators.len()
331    }
332
333    fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
334        let max_bin_length = self.max_str_len * size_of::<C>();
335        self.indicators
336            .iter()
337            .copied()
338            .take(num_rows)
339            .find_map(|indicator| {
340                let indicator = Indicator::from_isize(indicator);
341                indicator.is_truncated(max_bin_length).then_some(indicator)
342            })
343    }
344}
345
346/// Allows read only access to the valid part of a text column.
347///
348/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
349/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
350/// are actually valid, and have been returned with the last row group of the the result set. That
351/// number is maintained on the level of the entire column buffer. So a text column knows the number
352/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
353/// every element acccessed through it, is valid.
354#[derive(Debug, Clone, Copy)]
355pub struct TextColumnView<'c, C> {
356    num_rows: usize,
357    col: &'c TextColumn<C>,
358}
359
360impl<'c, C> TextColumnView<'c, C> {
361    /// The number of valid elements in the text column.
362    pub fn len(&self) -> usize {
363        self.num_rows
364    }
365
366    /// True if, and only if there are no valid rows in the column buffer.
367    pub fn is_empty(&self) -> bool {
368        self.num_rows == 0
369    }
370
371    /// Slice of text at the specified row index without terminating zero.
372    pub fn get(&self, index: usize) -> Option<&'c [C]> {
373        self.col.value_at(index)
374    }
375
376    /// Iterator over the valid elements of the text buffer
377    pub fn iter(&self) -> TextColumnIt<'c, C> {
378        TextColumnIt {
379            pos: 0,
380            num_rows: self.num_rows,
381            col: self.col,
382        }
383    }
384
385    /// Length of value at the specified position. This is different from an indicator as it refers
386    /// to the length of the value in the buffer, not to the length of the value in the datasource.
387    /// The two things are different for truncated values.
388    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
389        if row_index >= self.num_rows {
390            panic!("Row index points beyond the range of valid values.")
391        }
392        self.col.content_length_at(row_index)
393    }
394
395    /// Provides access to the raw underlying value buffer. Normal applications should have little
396    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
397    /// from the ODBC in memory representation into other kinds of buffers.
398    ///
399    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
400    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
401    /// terminating zero at the end of each string. For the actual value length call
402    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
403    pub fn raw_value_buffer(&self) -> &'c [C] {
404        self.col.raw_value_buffer(self.num_rows)
405    }
406
407    pub fn max_len(&self) -> usize {
408        self.col.max_len()
409    }
410
411    /// `Some` if any value is truncated.
412    ///
413    /// After fetching data we may want to know if any value has been truncated due to the buffer
414    /// not being able to hold elements of that size. This method checks the indicator buffer
415    /// element wise.
416    pub fn has_truncated_values(&self) -> Option<Indicator> {
417        self.col.has_truncated_values(self.num_rows)
418    }
419}
420
421unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
422    type SliceMut = TextColumnSliceMut<'a, C>;
423
424    unsafe fn as_view_mut(
425        &'a mut self,
426        parameter_index: u16,
427        stmt: StatementRef<'a>,
428    ) -> Self::SliceMut {
429        TextColumnSliceMut {
430            column: self,
431            stmt,
432            parameter_index,
433        }
434    }
435}
436
437/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
438/// values.
439pub struct TextColumnSliceMut<'a, C> {
440    column: &'a mut TextColumn<C>,
441    // Needed to rebind the column in case of resize
442    stmt: StatementRef<'a>,
443    // Also needed to rebind the column in case of resize
444    parameter_index: u16,
445}
446
447impl<C> TextColumnSliceMut<'_, C>
448where
449    C: Default + Copy,
450{
451    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
452    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
453    /// allowed element length. `element` must be specified without the terminating zero.
454    pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
455        self.column.set_value(row_index, element)
456    }
457
458    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
459    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
460    /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
461    /// one. This makes this an extremely expensive operation.
462    pub fn ensure_max_element_length(
463        &mut self,
464        element_length: usize,
465        num_rows_to_copy: usize,
466    ) -> Result<(), Error>
467    where
468        TextColumn<C>: HasDataType + CData,
469    {
470        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
471        // in order to hold it. This invalidates the pointers previously bound to the statement. So
472        // we rebind them.
473        if element_length > self.column.max_len() {
474            let new_max_str_len = element_length;
475            self.column
476                .resize_max_str(new_max_str_len, num_rows_to_copy);
477            unsafe {
478                self.stmt
479                    .bind_input_parameter(self.parameter_index, self.column)
480                    .into_result(&self.stmt)?
481            }
482        }
483        Ok(())
484    }
485
486    /// Can be used to set a value at a specific row index without performing a memcopy on an input
487    /// slice and instead provides direct access to the underlying buffer.
488    ///
489    /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
490    /// be more convenient. This method is very useful if you want to `write!` a string value to the
491    /// buffer and the binary (**!**) length of the formatted string is known upfront.
492    ///
493    /// # Example: Write timestamp to text column.
494    ///
495    /// ```
496    /// use odbc_api::buffers::TextColumnSliceMut;
497    /// use std::io::Write;
498    ///
499    /// /// Writes times formatted as hh::mm::ss.fff
500    /// fn write_time(
501    ///     col: &mut TextColumnSliceMut<u8>,
502    ///     index: usize,
503    ///     hours: u8,
504    ///     minutes: u8,
505    ///     seconds: u8,
506    ///     milliseconds: u16)
507    /// {
508    ///     write!(
509    ///         col.set_mut(index, 12),
510    ///         "{:02}:{:02}:{:02}.{:03}",
511    ///         hours, minutes, seconds, milliseconds
512    ///     ).unwrap();
513    /// }
514    /// ```
515    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
516        self.column.set_mut(index, length)
517    }
518}
519
520/// Iterator over a text column. See [`TextColumnView::iter`]
521#[derive(Debug)]
522pub struct TextColumnIt<'c, C> {
523    pos: usize,
524    num_rows: usize,
525    col: &'c TextColumn<C>,
526}
527
528impl<'c, C> TextColumnIt<'c, C> {
529    fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
530        if self.pos == self.num_rows {
531            None
532        } else {
533            let ret = Some(self.col.value_at(self.pos));
534            self.pos += 1;
535            ret
536        }
537    }
538}
539
540impl<'c> Iterator for TextColumnIt<'c, u8> {
541    type Item = Option<&'c [u8]>;
542
543    fn next(&mut self) -> Option<Self::Item> {
544        self.next_impl()
545    }
546
547    fn size_hint(&self) -> (usize, Option<usize>) {
548        let len = self.num_rows - self.pos;
549        (len, Some(len))
550    }
551}
552
553impl ExactSizeIterator for TextColumnIt<'_, u8> {}
554
555impl<'c> Iterator for TextColumnIt<'c, u16> {
556    type Item = Option<&'c U16Str>;
557
558    fn next(&mut self) -> Option<Self::Item> {
559        self.next_impl().map(|opt| opt.map(U16Str::from_slice))
560    }
561
562    fn size_hint(&self) -> (usize, Option<usize>) {
563        let len = self.num_rows - self.pos;
564        (len, Some(len))
565    }
566}
567
568impl ExactSizeIterator for TextColumnIt<'_, u16> {}
569
570unsafe impl CData for CharColumn {
571    fn cdata_type(&self) -> CDataType {
572        CDataType::Char
573    }
574
575    fn indicator_ptr(&self) -> *const isize {
576        self.indicators.as_ptr()
577    }
578
579    fn value_ptr(&self) -> *const c_void {
580        self.values.as_ptr() as *const c_void
581    }
582
583    fn buffer_length(&self) -> isize {
584        (self.max_str_len + 1).try_into().unwrap()
585    }
586}
587
588unsafe impl CDataMut for CharColumn {
589    fn mut_indicator_ptr(&mut self) -> *mut isize {
590        self.indicators.as_mut_ptr()
591    }
592
593    fn mut_value_ptr(&mut self) -> *mut c_void {
594        self.values.as_mut_ptr() as *mut c_void
595    }
596}
597
598impl HasDataType for CharColumn {
599    fn data_type(&self) -> DataType {
600        DataType::Varchar {
601            length: NonZeroUsize::new(self.max_str_len),
602        }
603    }
604}
605
606unsafe impl CData for WCharColumn {
607    fn cdata_type(&self) -> CDataType {
608        CDataType::WChar
609    }
610
611    fn indicator_ptr(&self) -> *const isize {
612        self.indicators.as_ptr()
613    }
614
615    fn value_ptr(&self) -> *const c_void {
616        self.values.as_ptr() as *const c_void
617    }
618
619    fn buffer_length(&self) -> isize {
620        ((self.max_str_len + 1) * 2).try_into().unwrap()
621    }
622}
623
624unsafe impl CDataMut for WCharColumn {
625    fn mut_indicator_ptr(&mut self) -> *mut isize {
626        self.indicators.as_mut_ptr()
627    }
628
629    fn mut_value_ptr(&mut self) -> *mut c_void {
630        self.values.as_mut_ptr() as *mut c_void
631    }
632}
633
634impl HasDataType for WCharColumn {
635    fn data_type(&self) -> DataType {
636
637        if self.max_str_len <= ASSUMED_MAX_LENGTH_OF_W_VARCHAR {
638            DataType::WVarchar {
639                length: NonZeroUsize::new(self.max_str_len),
640            }
641        } else {
642            DataType::WLongVarchar {
643                length: NonZeroUsize::new(self.max_str_len),
644            }
645        }
646    }
647}