polars_parquet/parquet/encoding/delta_length_byte_array/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
mod decoder;
mod encoder;

pub(crate) use decoder::Decoder;
pub(crate) use encoder::encode;

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parquet::error::ParquetError;

    #[test]
    fn basic() -> Result<(), ParquetError> {
        let data = vec!["aa", "bbb", "a", "aa", "b"];

        let mut buffer = vec![];
        encode(data.into_iter().map(|x| x.as_bytes()), &mut buffer);

        let mut iter = Decoder::try_new(&buffer)?;

        let result = iter.by_ref().collect::<Result<Vec<_>, _>>()?;
        assert_eq!(
            result,
            vec![
                b"aa".as_ref(),
                b"bbb".as_ref(),
                b"a".as_ref(),
                b"aa".as_ref(),
                b"b".as_ref()
            ]
        );

        let result = iter.values;
        assert_eq!(result, b"aabbbaaab".as_ref());
        Ok(())
    }

    #[test]
    fn many_numbers() -> Result<(), ParquetError> {
        let mut data = vec![];
        for i in 0..136 {
            data.push(format!("a{}", i))
        }

        let expected = data
            .iter()
            .map(|v| v.as_bytes().to_vec())
            .collect::<Vec<_>>();

        let mut buffer = vec![];
        encode(data.into_iter(), &mut buffer);

        let mut iter = Decoder::try_new(&buffer)?;

        let result = iter.by_ref().collect::<Result<Vec<_>, _>>()?;
        assert_eq!(result, expected);

        Ok(())
    }
}