odbc_api/buffers/text_column.rs
1use crate::{
2 columnar_bulk_inserter::BoundInputSlice, error::TooLargeBufferSize, handles::{CData, CDataMut, HasDataType, Statement, StatementRef, ASSUMED_MAX_LENGTH_OF_W_VARCHAR}, DataType, Error
3};
4
5use super::{ColumnBuffer, Indicator};
6
7use log::debug;
8use odbc_sys::{CDataType, NULL_DATA};
9use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
10use widestring::U16Str;
11
12/// A column buffer for character data. The actual encoding used may depend on your system locale.
13pub type CharColumn = TextColumn<u8>;
14
15/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
16/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
17/// implied encoding does not depend on the system locale.
18pub type WCharColumn = TextColumn<u16>;
19
20/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
21/// variable amount of characters up to a maximum string length. Since most SQL types have a string
22/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
23/// manager should take care of the conversion. Since elements of this type have variable length an
24/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
25/// not matter for this buffer.
26///
27/// Character type `C` is intended to be either `u8` or `u16`.
28#[derive(Debug)]
29pub struct TextColumn<C> {
30 /// Maximum text length without terminating zero.
31 max_str_len: usize,
32 values: Vec<C>,
33 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
34 /// with the same index. Please note that this value may be larger than `max_str_len` if the
35 /// text has been truncated.
36 indicators: Vec<isize>,
37}
38
39impl<C> TextColumn<C> {
40 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
41 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
42 /// one in order to make space for the null terminating zero at the end of strings. Uses a
43 /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
44 /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
45 /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
46 pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
47 where
48 C: Default + Copy,
49 {
50 // Element size is +1 to account for terminating zero
51 let element_size = max_str_len + 1;
52 let len = element_size * batch_size;
53 let mut values = Vec::new();
54 values
55 .try_reserve_exact(len)
56 .map_err(|_| TooLargeBufferSize {
57 num_elements: batch_size,
58 // We want the element size in bytes
59 element_size: element_size * size_of::<C>(),
60 })?;
61 values.resize(len, C::default());
62 Ok(TextColumn {
63 max_str_len,
64 values,
65 indicators: vec![0; batch_size],
66 })
67 }
68
69 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
70 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
71 /// one in order to make space for the null terminating zero at the end of strings. All
72 /// indicators are set to [`crate::sys::NULL_DATA`] by default.
73 pub fn new(batch_size: usize, max_str_len: usize) -> Self
74 where
75 C: Default + Copy,
76 {
77 // Element size is +1 to account for terminating zero
78 let element_size = max_str_len + 1;
79 let len = element_size * batch_size;
80 let mut values = Vec::new();
81 values.reserve_exact(len);
82 values.resize(len, C::default());
83 TextColumn {
84 max_str_len,
85 values,
86 indicators: vec![NULL_DATA; batch_size],
87 }
88 }
89
90 /// Bytes of string at the specified position. Includes interior nuls, but excludes the
91 /// terminating nul.
92 ///
93 /// The column buffer does not know how many elements were in the last row group, and therefore
94 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
95 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
96 /// equal to the maximum number of elements in the buffer.
97 pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
98 self.content_length_at(row_index).map(|length| {
99 let offset = row_index * (self.max_str_len + 1);
100 &self.values[offset..offset + length]
101 })
102 }
103
104 /// Maximum length of elements
105 pub fn max_len(&self) -> usize {
106 self.max_str_len
107 }
108
109 /// Indicator value at the specified position. Useful to detect truncation of data.
110 ///
111 /// The column buffer does not know how many elements were in the last row group, and therefore
112 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
113 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
114 /// equal to the maximum number of elements in the buffer.
115 pub fn indicator_at(&self, row_index: usize) -> Indicator {
116 Indicator::from_isize(self.indicators[row_index])
117 }
118
119 /// Length of value at the specified position. This is different from an indicator as it refers
120 /// to the length of the value in the buffer, not to the length of the value in the datasource.
121 /// The two things are different for truncated values.
122 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
123 match self.indicator_at(row_index) {
124 Indicator::Null => None,
125 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
126 Indicator::NoTotal => Some(self.max_str_len),
127 Indicator::Length(length_in_bytes) => {
128 let length_in_chars = length_in_bytes / size_of::<C>();
129 let length = min(self.max_str_len, length_in_chars);
130 Some(length)
131 }
132 }
133 }
134
135 /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
136 ///
137 /// After fetching data we may want to know if any value has been truncated due to the buffer
138 /// not being able to hold elements of that size. This method checks the indicator buffer
139 /// element wise.
140 pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
141 let max_bin_length = self.max_str_len * size_of::<C>();
142 self.indicators
143 .iter()
144 .copied()
145 .take(num_rows)
146 .find_map(|indicator| {
147 let indicator = Indicator::from_isize(indicator);
148 indicator.is_truncated(max_bin_length).then_some(indicator)
149 })
150 }
151
152 /// Changes the maximum string length the buffer can hold. This operation is useful if you find
153 /// an unexpected large input string during insertion.
154 ///
155 /// This is however costly, as not only does the new buffer have to be allocated, but all values
156 /// have to copied from the old to the new buffer.
157 ///
158 /// This method could also be used to reduce the maximum string length, which would truncate
159 /// strings in the process.
160 ///
161 /// This method does not adjust indicator buffers as these might hold values larger than the
162 /// maximum string length.
163 ///
164 /// # Parameters
165 ///
166 /// * `new_max_str_len`: New maximum string length without terminating zero.
167 /// * `num_rows`: Number of valid rows currently stored in this buffer.
168 pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
169 where
170 C: Default + Copy,
171 {
172 debug!(
173 "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
174 num_rows, self.max_str_len, new_max_str_len
175 );
176
177 let batch_size = self.indicators.len();
178 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
179 let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
180 // Copy values from old to new buffer.
181 let max_copy_length = min(self.max_str_len, new_max_str_len);
182 for ((&indicator, old_value), new_value) in self
183 .indicators
184 .iter()
185 .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
186 .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
187 .take(num_rows)
188 {
189 match Indicator::from_isize(indicator) {
190 Indicator::Null => (),
191 Indicator::NoTotal => {
192 // There is no good choice here in case we are expanding the buffer. Since
193 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
194 // be padded with 0. I currently cannot think of any use case there it would
195 // matter.
196 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
197 }
198 Indicator::Length(num_bytes_len) => {
199 let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
200 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
201 }
202 }
203 }
204 self.values = new_values;
205 self.max_str_len = new_max_str_len;
206 }
207
208 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
209 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
210 /// allowed element length. `input` must be specified without the terminating zero.
211 pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
212 where
213 C: Default + Copy,
214 {
215 if let Some(input) = input {
216 self.set_mut(index, input.len()).copy_from_slice(input);
217 } else {
218 self.indicators[index] = NULL_DATA;
219 }
220 }
221
222 /// Can be used to set a value at a specific row index without performing a memcopy on an input
223 /// slice and instead provides direct access to the underlying buffer.
224 ///
225 /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
226 /// be more convenient. This method is very useful if you want to `write!` a string value to the
227 /// buffer and the binary (**!**) length of the formatted string is known upfront.
228 ///
229 /// # Example: Write timestamp to text column.
230 ///
231 /// ```
232 /// use odbc_api::buffers::TextColumn;
233 /// use std::io::Write;
234 ///
235 /// /// Writes times formatted as hh::mm::ss.fff
236 /// fn write_time(
237 /// col: &mut TextColumn<u8>,
238 /// index: usize,
239 /// hours: u8,
240 /// minutes: u8,
241 /// seconds: u8,
242 /// milliseconds: u16)
243 /// {
244 /// write!(
245 /// col.set_mut(index, 12),
246 /// "{:02}:{:02}:{:02}.{:03}",
247 /// hours, minutes, seconds, milliseconds
248 /// ).unwrap();
249 /// }
250 /// ```
251 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
252 where
253 C: Default,
254 {
255 if length > self.max_str_len {
256 panic!(
257 "Tried to insert a value into a text buffer which is larger than the maximum \
258 allowed string length for the buffer."
259 );
260 }
261 self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
262 let start = (self.max_str_len + 1) * index;
263 let end = start + length;
264 // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
265 // driver would not care about the value in the index buffer and only look for the
266 // terminating zero.
267 self.values[end] = C::default();
268 &mut self.values[start..end]
269 }
270
271 /// Fills the column with NULL, between From and To
272 pub fn fill_null(&mut self, from: usize, to: usize) {
273 for index in from..to {
274 self.indicators[index] = NULL_DATA;
275 }
276 }
277
278 /// Provides access to the raw underlying value buffer. Normal applications should have little
279 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
280 /// from the ODBC in memory representation into other kinds of buffers.
281 ///
282 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
283 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
284 /// terminating zero at the end of each string. For the actual value length call
285 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
286 pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
287 &self.values[..(self.max_str_len + 1) * num_valid_rows]
288 }
289
290 /// The maximum number of rows the TextColumn can hold.
291 pub fn row_capacity(&self) -> usize {
292 self.values.len()
293 }
294}
295
296impl WCharColumn {
297 /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
298 /// the terminating nul.
299 ///
300 /// # Safety
301 ///
302 /// The column buffer does not know how many elements were in the last row group, and therefore
303 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
304 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
305 /// equal to the maximum number of elements in the buffer.
306 pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
307 self.value_at(row_index).map(U16Str::from_slice)
308 }
309}
310
311unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
312where
313 TextColumn<C>: CDataMut + HasDataType,
314{
315 type View<'a> = TextColumnView<'a, C>;
316
317 fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
318 TextColumnView {
319 num_rows: valid_rows,
320 col: self,
321 }
322 }
323
324 fn fill_default(&mut self, from: usize, to: usize) {
325 self.fill_null(from, to)
326 }
327
328 /// Maximum number of text strings this column may hold.
329 fn capacity(&self) -> usize {
330 self.indicators.len()
331 }
332
333 fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
334 let max_bin_length = self.max_str_len * size_of::<C>();
335 self.indicators
336 .iter()
337 .copied()
338 .take(num_rows)
339 .find_map(|indicator| {
340 let indicator = Indicator::from_isize(indicator);
341 indicator.is_truncated(max_bin_length).then_some(indicator)
342 })
343 }
344}
345
346/// Allows read only access to the valid part of a text column.
347///
348/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
349/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
350/// are actually valid, and have been returned with the last row group of the the result set. That
351/// number is maintained on the level of the entire column buffer. So a text column knows the number
352/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
353/// every element acccessed through it, is valid.
354#[derive(Debug, Clone, Copy)]
355pub struct TextColumnView<'c, C> {
356 num_rows: usize,
357 col: &'c TextColumn<C>,
358}
359
360impl<'c, C> TextColumnView<'c, C> {
361 /// The number of valid elements in the text column.
362 pub fn len(&self) -> usize {
363 self.num_rows
364 }
365
366 /// True if, and only if there are no valid rows in the column buffer.
367 pub fn is_empty(&self) -> bool {
368 self.num_rows == 0
369 }
370
371 /// Slice of text at the specified row index without terminating zero.
372 pub fn get(&self, index: usize) -> Option<&'c [C]> {
373 self.col.value_at(index)
374 }
375
376 /// Iterator over the valid elements of the text buffer
377 pub fn iter(&self) -> TextColumnIt<'c, C> {
378 TextColumnIt {
379 pos: 0,
380 num_rows: self.num_rows,
381 col: self.col,
382 }
383 }
384
385 /// Length of value at the specified position. This is different from an indicator as it refers
386 /// to the length of the value in the buffer, not to the length of the value in the datasource.
387 /// The two things are different for truncated values.
388 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
389 if row_index >= self.num_rows {
390 panic!("Row index points beyond the range of valid values.")
391 }
392 self.col.content_length_at(row_index)
393 }
394
395 /// Provides access to the raw underlying value buffer. Normal applications should have little
396 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
397 /// from the ODBC in memory representation into other kinds of buffers.
398 ///
399 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
400 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
401 /// terminating zero at the end of each string. For the actual value length call
402 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
403 pub fn raw_value_buffer(&self) -> &'c [C] {
404 self.col.raw_value_buffer(self.num_rows)
405 }
406
407 pub fn max_len(&self) -> usize {
408 self.col.max_len()
409 }
410
411 /// `Some` if any value is truncated.
412 ///
413 /// After fetching data we may want to know if any value has been truncated due to the buffer
414 /// not being able to hold elements of that size. This method checks the indicator buffer
415 /// element wise.
416 pub fn has_truncated_values(&self) -> Option<Indicator> {
417 self.col.has_truncated_values(self.num_rows)
418 }
419}
420
421unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
422 type SliceMut = TextColumnSliceMut<'a, C>;
423
424 unsafe fn as_view_mut(
425 &'a mut self,
426 parameter_index: u16,
427 stmt: StatementRef<'a>,
428 ) -> Self::SliceMut {
429 TextColumnSliceMut {
430 column: self,
431 stmt,
432 parameter_index,
433 }
434 }
435}
436
437/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
438/// values.
439pub struct TextColumnSliceMut<'a, C> {
440 column: &'a mut TextColumn<C>,
441 // Needed to rebind the column in case of resize
442 stmt: StatementRef<'a>,
443 // Also needed to rebind the column in case of resize
444 parameter_index: u16,
445}
446
447impl<C> TextColumnSliceMut<'_, C>
448where
449 C: Default + Copy,
450{
451 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
452 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
453 /// allowed element length. `element` must be specified without the terminating zero.
454 pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
455 self.column.set_value(row_index, element)
456 }
457
458 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
459 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
460 /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
461 /// one. This makes this an extremely expensive operation.
462 pub fn ensure_max_element_length(
463 &mut self,
464 element_length: usize,
465 num_rows_to_copy: usize,
466 ) -> Result<(), Error>
467 where
468 TextColumn<C>: HasDataType + CData,
469 {
470 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
471 // in order to hold it. This invalidates the pointers previously bound to the statement. So
472 // we rebind them.
473 if element_length > self.column.max_len() {
474 let new_max_str_len = element_length;
475 self.column
476 .resize_max_str(new_max_str_len, num_rows_to_copy);
477 unsafe {
478 self.stmt
479 .bind_input_parameter(self.parameter_index, self.column)
480 .into_result(&self.stmt)?
481 }
482 }
483 Ok(())
484 }
485
486 /// Can be used to set a value at a specific row index without performing a memcopy on an input
487 /// slice and instead provides direct access to the underlying buffer.
488 ///
489 /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
490 /// be more convenient. This method is very useful if you want to `write!` a string value to the
491 /// buffer and the binary (**!**) length of the formatted string is known upfront.
492 ///
493 /// # Example: Write timestamp to text column.
494 ///
495 /// ```
496 /// use odbc_api::buffers::TextColumnSliceMut;
497 /// use std::io::Write;
498 ///
499 /// /// Writes times formatted as hh::mm::ss.fff
500 /// fn write_time(
501 /// col: &mut TextColumnSliceMut<u8>,
502 /// index: usize,
503 /// hours: u8,
504 /// minutes: u8,
505 /// seconds: u8,
506 /// milliseconds: u16)
507 /// {
508 /// write!(
509 /// col.set_mut(index, 12),
510 /// "{:02}:{:02}:{:02}.{:03}",
511 /// hours, minutes, seconds, milliseconds
512 /// ).unwrap();
513 /// }
514 /// ```
515 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
516 self.column.set_mut(index, length)
517 }
518}
519
520/// Iterator over a text column. See [`TextColumnView::iter`]
521#[derive(Debug)]
522pub struct TextColumnIt<'c, C> {
523 pos: usize,
524 num_rows: usize,
525 col: &'c TextColumn<C>,
526}
527
528impl<'c, C> TextColumnIt<'c, C> {
529 fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
530 if self.pos == self.num_rows {
531 None
532 } else {
533 let ret = Some(self.col.value_at(self.pos));
534 self.pos += 1;
535 ret
536 }
537 }
538}
539
540impl<'c> Iterator for TextColumnIt<'c, u8> {
541 type Item = Option<&'c [u8]>;
542
543 fn next(&mut self) -> Option<Self::Item> {
544 self.next_impl()
545 }
546
547 fn size_hint(&self) -> (usize, Option<usize>) {
548 let len = self.num_rows - self.pos;
549 (len, Some(len))
550 }
551}
552
553impl ExactSizeIterator for TextColumnIt<'_, u8> {}
554
555impl<'c> Iterator for TextColumnIt<'c, u16> {
556 type Item = Option<&'c U16Str>;
557
558 fn next(&mut self) -> Option<Self::Item> {
559 self.next_impl().map(|opt| opt.map(U16Str::from_slice))
560 }
561
562 fn size_hint(&self) -> (usize, Option<usize>) {
563 let len = self.num_rows - self.pos;
564 (len, Some(len))
565 }
566}
567
568impl ExactSizeIterator for TextColumnIt<'_, u16> {}
569
570unsafe impl CData for CharColumn {
571 fn cdata_type(&self) -> CDataType {
572 CDataType::Char
573 }
574
575 fn indicator_ptr(&self) -> *const isize {
576 self.indicators.as_ptr()
577 }
578
579 fn value_ptr(&self) -> *const c_void {
580 self.values.as_ptr() as *const c_void
581 }
582
583 fn buffer_length(&self) -> isize {
584 (self.max_str_len + 1).try_into().unwrap()
585 }
586}
587
588unsafe impl CDataMut for CharColumn {
589 fn mut_indicator_ptr(&mut self) -> *mut isize {
590 self.indicators.as_mut_ptr()
591 }
592
593 fn mut_value_ptr(&mut self) -> *mut c_void {
594 self.values.as_mut_ptr() as *mut c_void
595 }
596}
597
598impl HasDataType for CharColumn {
599 fn data_type(&self) -> DataType {
600 DataType::Varchar {
601 length: NonZeroUsize::new(self.max_str_len),
602 }
603 }
604}
605
606unsafe impl CData for WCharColumn {
607 fn cdata_type(&self) -> CDataType {
608 CDataType::WChar
609 }
610
611 fn indicator_ptr(&self) -> *const isize {
612 self.indicators.as_ptr()
613 }
614
615 fn value_ptr(&self) -> *const c_void {
616 self.values.as_ptr() as *const c_void
617 }
618
619 fn buffer_length(&self) -> isize {
620 ((self.max_str_len + 1) * 2).try_into().unwrap()
621 }
622}
623
624unsafe impl CDataMut for WCharColumn {
625 fn mut_indicator_ptr(&mut self) -> *mut isize {
626 self.indicators.as_mut_ptr()
627 }
628
629 fn mut_value_ptr(&mut self) -> *mut c_void {
630 self.values.as_mut_ptr() as *mut c_void
631 }
632}
633
634impl HasDataType for WCharColumn {
635 fn data_type(&self) -> DataType {
636
637 if self.max_str_len <= ASSUMED_MAX_LENGTH_OF_W_VARCHAR {
638 DataType::WVarchar {
639 length: NonZeroUsize::new(self.max_str_len),
640 }
641 } else {
642 DataType::WLongVarchar {
643 length: NonZeroUsize::new(self.max_str_len),
644 }
645 }
646 }
647}