polars_parquet/parquet/encoding/byte_stream_split/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
mod decoder;

pub use decoder::Decoder;

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parquet::error::ParquetError;
    use crate::parquet::types::NativeType;

    #[test]
    fn round_trip_f32() -> Result<(), ParquetError> {
        let data = vec![1.0e-2_f32, 2.5_f32, 3.0e2_f32];
        let mut buffer = vec![];
        encode(&data, &mut buffer);

        let mut decoder = Decoder::try_new(&buffer, size_of::<f32>())?;
        let values = decoder
            .iter_converted(|bytes| f32::from_le_bytes(bytes.try_into().unwrap()))
            .collect::<Vec<_>>();

        assert_eq!(data, values);

        Ok(())
    }

    #[test]
    fn round_trip_f64() -> Result<(), ParquetError> {
        let data = vec![1.0e-2_f64, 2.5_f64, 3.0e2_f64];
        let mut buffer = vec![];
        encode(&data, &mut buffer);

        let mut decoder = Decoder::try_new(&buffer, size_of::<f64>())?;
        let values = decoder
            .iter_converted(|bytes| f64::from_le_bytes(bytes.try_into().unwrap()))
            .collect::<Vec<_>>();

        assert_eq!(data, values);

        Ok(())
    }

    #[test]
    fn fails_for_invalid_values_size() -> Result<(), ParquetError> {
        let buffer = vec![0; 12];

        let result = Decoder::try_new(&buffer, 8);
        assert!(result.is_err());

        Ok(())
    }

    #[test]
    fn fails_for_invalid_element_size() -> Result<(), ParquetError> {
        let buffer = vec![0; 16];

        let result = Decoder::try_new(&buffer, 16);
        assert!(result.is_err());

        Ok(())
    }

    fn encode<T: NativeType>(data: &[T], buffer: &mut Vec<u8>) {
        let element_size = size_of::<T>();
        let num_elements = data.len();
        let total_length = size_of_val(data);
        buffer.resize(total_length, 0);

        for (i, v) in data.iter().enumerate() {
            let value_bytes = v.to_le_bytes();
            let value_bytes_ref = value_bytes.as_ref();
            for n in 0..element_size {
                buffer[(num_elements * n) + i] = value_bytes_ref[n];
            }
        }
    }
}