odbc_api/buffers/text_column.rs
1use crate::{
2 columnar_bulk_inserter::BoundInputSlice,
3 error::TooLargeBufferSize,
4 handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
5 DataType, Error,
6};
7
8use super::{ColumnBuffer, Indicator};
9
10use log::debug;
11use odbc_sys::{CDataType, NULL_DATA};
12use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
13use widestring::U16Str;
14
15/// A column buffer for character data. The actual encoding used may depend on your system locale.
16pub type CharColumn = TextColumn<u8>;
17
18/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
19/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
20/// implied encoding does not depend on the system locale.
21pub type WCharColumn = TextColumn<u16>;
22
23/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
24/// variable amount of characters up to a maximum string length. Since most SQL types have a string
25/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
26/// manager should take care of the conversion. Since elements of this type have variable length an
27/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
28/// not matter for this buffer.
29///
30/// Character type `C` is intended to be either `u8` or `u16`.
31#[derive(Debug)]
32pub struct TextColumn<C> {
33 /// Maximum text length without terminating zero.
34 max_str_len: usize,
35 values: Vec<C>,
36 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
37 /// with the same index. Please note that this value may be larger than `max_str_len` if the
38 /// text has been truncated.
39 indicators: Vec<isize>,
40}
41
42impl<C> TextColumn<C> {
43 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
44 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
45 /// one in order to make space for the null terminating zero at the end of strings. Uses a
46 /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
47 /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
48 /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
49 pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
50 where
51 C: Default + Copy,
52 {
53 // Element size is +1 to account for terminating zero
54 let element_size = max_str_len + 1;
55 let len = element_size * batch_size;
56 let mut values = Vec::new();
57 values
58 .try_reserve_exact(len)
59 .map_err(|_| TooLargeBufferSize {
60 num_elements: batch_size,
61 // We want the element size in bytes
62 element_size: element_size * size_of::<C>(),
63 })?;
64 values.resize(len, C::default());
65 Ok(TextColumn {
66 max_str_len,
67 values,
68 indicators: vec![0; batch_size],
69 })
70 }
71
72 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
73 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
74 /// one in order to make space for the null terminating zero at the end of strings. All
75 /// indicators are set to [`crate::sys::NULL_DATA`] by default.
76 pub fn new(batch_size: usize, max_str_len: usize) -> Self
77 where
78 C: Default + Copy,
79 {
80 // Element size is +1 to account for terminating zero
81 let element_size = max_str_len + 1;
82 let len = element_size * batch_size;
83 let mut values = Vec::new();
84 values.reserve_exact(len);
85 values.resize(len, C::default());
86 TextColumn {
87 max_str_len,
88 values,
89 indicators: vec![NULL_DATA; batch_size],
90 }
91 }
92
93 /// Bytes of string at the specified position. Includes interior nuls, but excludes the
94 /// terminating nul.
95 ///
96 /// The column buffer does not know how many elements were in the last row group, and therefore
97 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
98 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
99 /// equal to the maximum number of elements in the buffer.
100 pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
101 self.content_length_at(row_index).map(|length| {
102 let offset = row_index * (self.max_str_len + 1);
103 &self.values[offset..offset + length]
104 })
105 }
106
107 /// Maximum length of elements
108 pub fn max_len(&self) -> usize {
109 self.max_str_len
110 }
111
112 /// Indicator value at the specified position. Useful to detect truncation of data.
113 ///
114 /// The column buffer does not know how many elements were in the last row group, and therefore
115 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
116 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
117 /// equal to the maximum number of elements in the buffer.
118 pub fn indicator_at(&self, row_index: usize) -> Indicator {
119 Indicator::from_isize(self.indicators[row_index])
120 }
121
122 /// Length of value at the specified position. This is different from an indicator as it refers
123 /// to the length of the value in the buffer, not to the length of the value in the datasource.
124 /// The two things are different for truncated values.
125 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
126 match self.indicator_at(row_index) {
127 Indicator::Null => None,
128 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
129 Indicator::NoTotal => Some(self.max_str_len),
130 Indicator::Length(length_in_bytes) => {
131 let length_in_chars = length_in_bytes / size_of::<C>();
132 let length = min(self.max_str_len, length_in_chars);
133 Some(length)
134 }
135 }
136 }
137
138 /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
139 ///
140 /// After fetching data we may want to know if any value has been truncated due to the buffer
141 /// not being able to hold elements of that size. This method checks the indicator buffer
142 /// element wise.
143 pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
144 let max_bin_length = self.max_str_len * size_of::<C>();
145 self.indicators
146 .iter()
147 .copied()
148 .take(num_rows)
149 .find_map(|indicator| {
150 let indicator = Indicator::from_isize(indicator);
151 indicator.is_truncated(max_bin_length).then_some(indicator)
152 })
153 }
154
155 /// Changes the maximum string length the buffer can hold. This operation is useful if you find
156 /// an unexpected large input string during insertion.
157 ///
158 /// This is however costly, as not only does the new buffer have to be allocated, but all values
159 /// have to copied from the old to the new buffer.
160 ///
161 /// This method could also be used to reduce the maximum string length, which would truncate
162 /// strings in the process.
163 ///
164 /// This method does not adjust indicator buffers as these might hold values larger than the
165 /// maximum string length.
166 ///
167 /// # Parameters
168 ///
169 /// * `new_max_str_len`: New maximum string length without terminating zero.
170 /// * `num_rows`: Number of valid rows currently stored in this buffer.
171 pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
172 where
173 C: Default + Copy,
174 {
175 debug!(
176 "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
177 num_rows, self.max_str_len, new_max_str_len
178 );
179
180 let batch_size = self.indicators.len();
181 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
182 let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
183 // Copy values from old to new buffer.
184 let max_copy_length = min(self.max_str_len, new_max_str_len);
185 for ((&indicator, old_value), new_value) in self
186 .indicators
187 .iter()
188 .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
189 .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
190 .take(num_rows)
191 {
192 match Indicator::from_isize(indicator) {
193 Indicator::Null => (),
194 Indicator::NoTotal => {
195 // There is no good choice here in case we are expanding the buffer. Since
196 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
197 // be padded with 0. I currently cannot think of any use case there it would
198 // matter.
199 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
200 }
201 Indicator::Length(num_bytes_len) => {
202 let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
203 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
204 }
205 }
206 }
207 self.values = new_values;
208 self.max_str_len = new_max_str_len;
209 }
210
211 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
212 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
213 /// allowed element length. `input` must be specified without the terminating zero.
214 pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
215 where
216 C: Default + Copy,
217 {
218 if let Some(input) = input {
219 self.set_mut(index, input.len()).copy_from_slice(input);
220 } else {
221 self.indicators[index] = NULL_DATA;
222 }
223 }
224
225 /// Can be used to set a value at a specific row index without performing a memcopy on an input
226 /// slice and instead provides direct access to the underlying buffer.
227 ///
228 /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
229 /// be more convenient. This method is very useful if you want to `write!` a string value to the
230 /// buffer and the binary (**!**) length of the formatted string is known upfront.
231 ///
232 /// # Example: Write timestamp to text column.
233 ///
234 /// ```
235 /// use odbc_api::buffers::TextColumn;
236 /// use std::io::Write;
237 ///
238 /// /// Writes times formatted as hh::mm::ss.fff
239 /// fn write_time(
240 /// col: &mut TextColumn<u8>,
241 /// index: usize,
242 /// hours: u8,
243 /// minutes: u8,
244 /// seconds: u8,
245 /// milliseconds: u16)
246 /// {
247 /// write!(
248 /// col.set_mut(index, 12),
249 /// "{:02}:{:02}:{:02}.{:03}",
250 /// hours, minutes, seconds, milliseconds
251 /// ).unwrap();
252 /// }
253 /// ```
254 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
255 where
256 C: Default,
257 {
258 if length > self.max_str_len {
259 panic!(
260 "Tried to insert a value into a text buffer which is larger than the maximum \
261 allowed string length for the buffer."
262 );
263 }
264 self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
265 let start = (self.max_str_len + 1) * index;
266 let end = start + length;
267 // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
268 // driver would not care about the value in the index buffer and only look for the
269 // terminating zero.
270 self.values[end] = C::default();
271 &mut self.values[start..end]
272 }
273
274 /// Fills the column with NULL, between From and To
275 pub fn fill_null(&mut self, from: usize, to: usize) {
276 for index in from..to {
277 self.indicators[index] = NULL_DATA;
278 }
279 }
280
281 /// Provides access to the raw underlying value buffer. Normal applications should have little
282 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
283 /// from the ODBC in memory representation into other kinds of buffers.
284 ///
285 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
286 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
287 /// terminating zero at the end of each string. For the actual value length call
288 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
289 pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
290 &self.values[..(self.max_str_len + 1) * num_valid_rows]
291 }
292
293 /// The maximum number of rows the TextColumn can hold.
294 pub fn row_capacity(&self) -> usize {
295 self.values.len()
296 }
297}
298
299impl WCharColumn {
300 /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
301 /// the terminating nul.
302 ///
303 /// # Safety
304 ///
305 /// The column buffer does not know how many elements were in the last row group, and therefore
306 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
307 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
308 /// equal to the maximum number of elements in the buffer.
309 pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
310 self.value_at(row_index).map(U16Str::from_slice)
311 }
312}
313
314unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
315where
316 TextColumn<C>: CDataMut + HasDataType,
317{
318 type View<'a> = TextColumnView<'a, C>;
319
320 fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
321 TextColumnView {
322 num_rows: valid_rows,
323 col: self,
324 }
325 }
326
327 fn fill_default(&mut self, from: usize, to: usize) {
328 self.fill_null(from, to)
329 }
330
331 /// Maximum number of text strings this column may hold.
332 fn capacity(&self) -> usize {
333 self.indicators.len()
334 }
335
336 fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
337 let max_bin_length = self.max_str_len * size_of::<C>();
338 self.indicators
339 .iter()
340 .copied()
341 .take(num_rows)
342 .find_map(|indicator| {
343 let indicator = Indicator::from_isize(indicator);
344 indicator.is_truncated(max_bin_length).then_some(indicator)
345 })
346 }
347}
348
349/// Allows read only access to the valid part of a text column.
350///
351/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
352/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
353/// are actually valid, and have been returned with the last row group of the the result set. That
354/// number is maintained on the level of the entire column buffer. So a text column knows the number
355/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
356/// every element acccessed through it, is valid.
357#[derive(Debug, Clone, Copy)]
358pub struct TextColumnView<'c, C> {
359 num_rows: usize,
360 col: &'c TextColumn<C>,
361}
362
363impl<'c, C> TextColumnView<'c, C> {
364 /// The number of valid elements in the text column.
365 pub fn len(&self) -> usize {
366 self.num_rows
367 }
368
369 /// True if, and only if there are no valid rows in the column buffer.
370 pub fn is_empty(&self) -> bool {
371 self.num_rows == 0
372 }
373
374 /// Slice of text at the specified row index without terminating zero.
375 pub fn get(&self, index: usize) -> Option<&'c [C]> {
376 self.col.value_at(index)
377 }
378
379 /// Iterator over the valid elements of the text buffer
380 pub fn iter(&self) -> TextColumnIt<'c, C> {
381 TextColumnIt {
382 pos: 0,
383 num_rows: self.num_rows,
384 col: self.col,
385 }
386 }
387
388 /// Length of value at the specified position. This is different from an indicator as it refers
389 /// to the length of the value in the buffer, not to the length of the value in the datasource.
390 /// The two things are different for truncated values.
391 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
392 if row_index >= self.num_rows {
393 panic!("Row index points beyond the range of valid values.")
394 }
395 self.col.content_length_at(row_index)
396 }
397
398 /// Provides access to the raw underlying value buffer. Normal applications should have little
399 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
400 /// from the ODBC in memory representation into other kinds of buffers.
401 ///
402 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
403 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
404 /// terminating zero at the end of each string. For the actual value length call
405 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
406 pub fn raw_value_buffer(&self) -> &'c [C] {
407 self.col.raw_value_buffer(self.num_rows)
408 }
409
410 pub fn max_len(&self) -> usize {
411 self.col.max_len()
412 }
413
414 /// `Some` if any value is truncated.
415 ///
416 /// After fetching data we may want to know if any value has been truncated due to the buffer
417 /// not being able to hold elements of that size. This method checks the indicator buffer
418 /// element wise.
419 pub fn has_truncated_values(&self) -> Option<Indicator> {
420 self.col.has_truncated_values(self.num_rows)
421 }
422}
423
424unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
425 type SliceMut = TextColumnSliceMut<'a, C>;
426
427 unsafe fn as_view_mut(
428 &'a mut self,
429 parameter_index: u16,
430 stmt: StatementRef<'a>,
431 ) -> Self::SliceMut {
432 TextColumnSliceMut {
433 column: self,
434 stmt,
435 parameter_index,
436 }
437 }
438}
439
440/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
441/// values.
442pub struct TextColumnSliceMut<'a, C> {
443 column: &'a mut TextColumn<C>,
444 // Needed to rebind the column in case of resize
445 stmt: StatementRef<'a>,
446 // Also needed to rebind the column in case of resize
447 parameter_index: u16,
448}
449
450impl<C> TextColumnSliceMut<'_, C>
451where
452 C: Default + Copy,
453{
454 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
455 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
456 /// allowed element length. `element` must be specified without the terminating zero.
457 pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
458 self.column.set_value(row_index, element)
459 }
460
461 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
462 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
463 /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
464 /// one. This makes this an extremely expensive operation.
465 pub fn ensure_max_element_length(
466 &mut self,
467 element_length: usize,
468 num_rows_to_copy: usize,
469 ) -> Result<(), Error>
470 where
471 TextColumn<C>: HasDataType + CData,
472 {
473 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
474 // in order to hold it. This invalidates the pointers previously bound to the statement. So
475 // we rebind them.
476 if element_length > self.column.max_len() {
477 let new_max_str_len = element_length;
478 self.column
479 .resize_max_str(new_max_str_len, num_rows_to_copy);
480 unsafe {
481 self.stmt
482 .bind_input_parameter(self.parameter_index, self.column)
483 .into_result(&self.stmt)?
484 }
485 }
486 Ok(())
487 }
488
489 /// Can be used to set a value at a specific row index without performing a memcopy on an input
490 /// slice and instead provides direct access to the underlying buffer.
491 ///
492 /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
493 /// be more convenient. This method is very useful if you want to `write!` a string value to the
494 /// buffer and the binary (**!**) length of the formatted string is known upfront.
495 ///
496 /// # Example: Write timestamp to text column.
497 ///
498 /// ```
499 /// use odbc_api::buffers::TextColumnSliceMut;
500 /// use std::io::Write;
501 ///
502 /// /// Writes times formatted as hh::mm::ss.fff
503 /// fn write_time(
504 /// col: &mut TextColumnSliceMut<u8>,
505 /// index: usize,
506 /// hours: u8,
507 /// minutes: u8,
508 /// seconds: u8,
509 /// milliseconds: u16)
510 /// {
511 /// write!(
512 /// col.set_mut(index, 12),
513 /// "{:02}:{:02}:{:02}.{:03}",
514 /// hours, minutes, seconds, milliseconds
515 /// ).unwrap();
516 /// }
517 /// ```
518 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
519 self.column.set_mut(index, length)
520 }
521}
522
523/// Iterator over a text column. See [`TextColumnView::iter`]
524#[derive(Debug)]
525pub struct TextColumnIt<'c, C> {
526 pos: usize,
527 num_rows: usize,
528 col: &'c TextColumn<C>,
529}
530
531impl<'c, C> TextColumnIt<'c, C> {
532 fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
533 if self.pos == self.num_rows {
534 None
535 } else {
536 let ret = Some(self.col.value_at(self.pos));
537 self.pos += 1;
538 ret
539 }
540 }
541}
542
543impl<'c> Iterator for TextColumnIt<'c, u8> {
544 type Item = Option<&'c [u8]>;
545
546 fn next(&mut self) -> Option<Self::Item> {
547 self.next_impl()
548 }
549
550 fn size_hint(&self) -> (usize, Option<usize>) {
551 let len = self.num_rows - self.pos;
552 (len, Some(len))
553 }
554}
555
556impl ExactSizeIterator for TextColumnIt<'_, u8> {}
557
558impl<'c> Iterator for TextColumnIt<'c, u16> {
559 type Item = Option<&'c U16Str>;
560
561 fn next(&mut self) -> Option<Self::Item> {
562 self.next_impl().map(|opt| opt.map(U16Str::from_slice))
563 }
564
565 fn size_hint(&self) -> (usize, Option<usize>) {
566 let len = self.num_rows - self.pos;
567 (len, Some(len))
568 }
569}
570
571impl ExactSizeIterator for TextColumnIt<'_, u16> {}
572
573unsafe impl CData for CharColumn {
574 fn cdata_type(&self) -> CDataType {
575 CDataType::Char
576 }
577
578 fn indicator_ptr(&self) -> *const isize {
579 self.indicators.as_ptr()
580 }
581
582 fn value_ptr(&self) -> *const c_void {
583 self.values.as_ptr() as *const c_void
584 }
585
586 fn buffer_length(&self) -> isize {
587 (self.max_str_len + 1).try_into().unwrap()
588 }
589}
590
591unsafe impl CDataMut for CharColumn {
592 fn mut_indicator_ptr(&mut self) -> *mut isize {
593 self.indicators.as_mut_ptr()
594 }
595
596 fn mut_value_ptr(&mut self) -> *mut c_void {
597 self.values.as_mut_ptr() as *mut c_void
598 }
599}
600
601impl HasDataType for CharColumn {
602 fn data_type(&self) -> DataType {
603 DataType::Varchar {
604 length: NonZeroUsize::new(self.max_str_len),
605 }
606 }
607}
608
609unsafe impl CData for WCharColumn {
610 fn cdata_type(&self) -> CDataType {
611 CDataType::WChar
612 }
613
614 fn indicator_ptr(&self) -> *const isize {
615 self.indicators.as_ptr()
616 }
617
618 fn value_ptr(&self) -> *const c_void {
619 self.values.as_ptr() as *const c_void
620 }
621
622 fn buffer_length(&self) -> isize {
623 ((self.max_str_len + 1) * 2).try_into().unwrap()
624 }
625}
626
627unsafe impl CDataMut for WCharColumn {
628 fn mut_indicator_ptr(&mut self) -> *mut isize {
629 self.indicators.as_mut_ptr()
630 }
631
632 fn mut_value_ptr(&mut self) -> *mut c_void {
633 self.values.as_mut_ptr() as *mut c_void
634 }
635}
636
637impl HasDataType for WCharColumn {
638 fn data_type(&self) -> DataType {
639 DataType::WVarchar {
640 length: NonZeroUsize::new(self.max_str_len),
641 }
642 }
643}