polars_parquet/parquet/encoding/bitpacked/
encode.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use super::{Unpackable, Unpacked};

/// Encodes (packs) a slice of [`Unpackable`] into bitpacked bytes `packed`, using `num_bits` per value.
///
/// This function assumes that the maximum value in `unpacked` fits in `num_bits` bits
/// and saturates higher values.
///
/// Only the first `ceil8(unpacked.len() * num_bits)` of `packed` are populated.
pub fn encode<T: Unpackable>(unpacked: &[T], num_bits: usize, packed: &mut [u8]) {
    let chunks = unpacked.chunks_exact(T::Unpacked::LENGTH);

    let remainder = chunks.remainder();

    let packed_size = (T::Unpacked::LENGTH * num_bits + 7) / 8;
    if !remainder.is_empty() {
        let packed_chunks = packed.chunks_mut(packed_size);
        let mut last_chunk = T::Unpacked::zero();
        for i in 0..remainder.len() {
            last_chunk[i] = remainder[i]
        }

        chunks
            .chain(std::iter::once(last_chunk.as_ref()))
            .zip(packed_chunks)
            .for_each(|(unpacked, packed)| {
                T::pack(&unpacked.try_into().unwrap(), num_bits, packed);
            });
    } else {
        let packed_chunks = packed.chunks_exact_mut(packed_size);
        chunks.zip(packed_chunks).for_each(|(unpacked, packed)| {
            T::pack(&unpacked.try_into().unwrap(), num_bits, packed);
        });
    }
}

/// Encodes (packs) a potentially incomplete pack of [`Unpackable`] into bitpacked
/// bytes `packed`, using `num_bits` per value.
///
/// This function assumes that the maximum value in `unpacked` fits in `num_bits` bits
/// and saturates higher values.
///
/// Only the first `ceil8(unpacked.len() * num_bits)` of `packed` are populated.
#[inline]
pub fn encode_pack<T: Unpackable>(unpacked: &[T], num_bits: usize, packed: &mut [u8]) {
    if unpacked.len() < T::Unpacked::LENGTH {
        let mut complete_unpacked = T::Unpacked::zero();
        complete_unpacked.as_mut()[..unpacked.len()].copy_from_slice(unpacked);
        T::pack(&complete_unpacked, num_bits, packed)
    } else {
        T::pack(&unpacked.try_into().unwrap(), num_bits, packed)
    }
}