polars_parquet/parquet/encoding/
plain_byte_array.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/// Decodes according to [Plain strings](https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0),
/// prefixes, lengths and values
/// # Implementation
/// This struct does not allocate on the heap.
use crate::parquet::error::ParquetError;

#[derive(Debug)]
pub struct BinaryIter<'a> {
    values: &'a [u8],
    length: Option<usize>,
}

impl<'a> BinaryIter<'a> {
    pub fn new(values: &'a [u8], length: Option<usize>) -> Self {
        Self { values, length }
    }
}

impl<'a> Iterator for BinaryIter<'a> {
    type Item = Result<&'a [u8], ParquetError>;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        if self.values.len() < 4 {
            return None;
        }
        if let Some(x) = self.length.as_mut() {
            *x = x.saturating_sub(1)
        }
        let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
        self.values = &self.values[4..];
        if length > self.values.len() {
            return Some(Err(ParquetError::oos(
                "A string in plain encoding declares a length that is out of range",
            )));
        }
        let (result, remaining) = self.values.split_at(length);
        self.values = remaining;
        Some(Ok(result))
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.length.unwrap_or_default(), self.length)
    }
}