pub mod pb {
#![allow(clippy::all)]
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(unused)]
#![allow(improper_ctypes)]
#![allow(clippy::upper_case_acronyms)]
#![allow(clippy::use_self)]
include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
}
use pb::{
array_encoding::ArrayEncoding as ArrayEncodingEnum,
buffer::BufferType,
nullable::{AllNull, NoNull, Nullability, SomeNull},
page_layout::Layout,
AllNullLayout, ArrayEncoding, Binary, BinaryBlock, BinaryMiniBlock, Bitpack2, Bitpacked,
BitpackedForNonNeg, Dictionary, FixedSizeBinary, FixedSizeList, Flat, Fsst, FsstMiniBlock,
MiniBlockLayout, Nullable, PackedStruct, PackedStructFixedWidthMiniBlock, PageLayout,
RepDefLayer,
};
use crate::{
encodings::physical::block_compress::CompressionConfig, repdef::DefinitionInterpretation,
};
use self::pb::Constant;
pub struct ProtobufUtils {}
impl ProtobufUtils {
pub fn constant(value: Vec<u8>, num_values: u64) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Constant(Constant { value, num_values })),
}
}
pub fn basic_all_null_encoding() -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
nullability: Some(Nullability::AllNulls(AllNull {})),
}))),
}
}
pub fn basic_some_null_encoding(
validity: ArrayEncoding,
values: ArrayEncoding,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
validity: Some(Box::new(validity)),
values: Some(Box::new(values)),
}))),
}))),
}
}
pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
nullability: Some(Nullability::NoNulls(Box::new(NoNull {
values: Some(Box::new(values)),
}))),
}))),
}
}
pub fn flat_encoding(
bits_per_value: u64,
buffer_index: u32,
compression: Option<CompressionConfig>,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
bits_per_value,
buffer: Some(pb::Buffer {
buffer_index,
buffer_type: BufferType::Page as i32,
}),
compression: compression.map(|compression_config| pb::Compression {
scheme: compression_config.scheme.to_string(),
level: compression_config.level,
}),
})),
}
}
pub fn bitpacked_encoding(
compressed_bits_per_value: u64,
uncompressed_bits_per_value: u64,
buffer_index: u32,
signed: bool,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
compressed_bits_per_value,
buffer: Some(pb::Buffer {
buffer_index,
buffer_type: BufferType::Page as i32,
}),
uncompressed_bits_per_value,
signed,
})),
}
}
pub fn bitpacked_for_non_neg_encoding(
compressed_bits_per_value: u64,
uncompressed_bits_per_value: u64,
buffer_index: u32,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
compressed_bits_per_value,
buffer: Some(pb::Buffer {
buffer_index,
buffer_type: BufferType::Page as i32,
}),
uncompressed_bits_per_value,
})),
}
}
pub fn bitpack2(uncompressed_bits_per_value: u64) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Bitpack2(Bitpack2 {
uncompressed_bits_per_value,
})),
}
}
pub fn binary_miniblock() -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::BinaryMiniBlock(BinaryMiniBlock {})),
}
}
pub fn binary_block() -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::BinaryBlock(BinaryBlock {})),
}
}
pub fn fsst_mini_block(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::FsstMiniBlock(Box::new(FsstMiniBlock {
binary_mini_block: Some(Box::new(data)),
symbol_table,
}))),
}
}
pub fn packed_struct(
child_encodings: Vec<ArrayEncoding>,
packed_buffer_index: u32,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
inner: child_encodings,
buffer: Some(pb::Buffer {
buffer_index: packed_buffer_index,
buffer_type: BufferType::Page as i32,
}),
})),
}
}
pub fn packed_struct_fixed_width_mini_block(
data: ArrayEncoding,
bits_per_values: Vec<u32>,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
Box::new(PackedStructFixedWidthMiniBlock {
flat: Some(Box::new(data)),
bits_per_values,
}),
)),
}
}
pub fn binary(
indices_encoding: ArrayEncoding,
bytes_encoding: ArrayEncoding,
null_adjustment: u64,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
bytes: Some(Box::new(bytes_encoding)),
indices: Some(Box::new(indices_encoding)),
null_adjustment,
}))),
}
}
pub fn dict_encoding(
indices: ArrayEncoding,
items: ArrayEncoding,
num_items: u32,
) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
indices: Some(Box::new(indices)),
items: Some(Box::new(items)),
num_dictionary_items: num_items,
}))),
}
}
pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
FixedSizeBinary {
bytes: Some(Box::new(data)),
byte_width,
},
))),
}
}
pub fn fixed_size_list(data: ArrayEncoding, dimension: u64) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
dimension: dimension.try_into().unwrap(),
items: Some(Box::new(data)),
}))),
}
}
pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
ArrayEncoding {
array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
binary: Some(Box::new(data)),
symbol_table,
}))),
}
}
fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
match def {
DefinitionInterpretation::AllValidItem => RepDefLayer::RepdefAllValidItem as i32,
DefinitionInterpretation::AllValidList => RepDefLayer::RepdefAllValidList as i32,
DefinitionInterpretation::NullableItem => RepDefLayer::RepdefNullableItem as i32,
DefinitionInterpretation::NullableList => RepDefLayer::RepdefNullableList as i32,
DefinitionInterpretation::EmptyableList => RepDefLayer::RepdefEmptyableList as i32,
DefinitionInterpretation::NullableAndEmptyableList => {
RepDefLayer::RepdefNullAndEmptyList as i32
}
}
}
pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
let layer = RepDefLayer::try_from(layer).unwrap();
match layer {
RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
RepDefLayer::RepdefNullAndEmptyList => {
DefinitionInterpretation::NullableAndEmptyableList
}
RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
}
}
pub fn miniblock_layout(
rep_encoding: ArrayEncoding,
def_encoding: ArrayEncoding,
value_encoding: ArrayEncoding,
repetition_index_depth: u32,
dictionary_encoding: Option<ArrayEncoding>,
def_meaning: &[DefinitionInterpretation],
num_items: u64,
) -> PageLayout {
assert!(!def_meaning.is_empty());
PageLayout {
layout: Some(Layout::MiniBlockLayout(MiniBlockLayout {
def_compression: Some(def_encoding),
rep_compression: Some(rep_encoding),
value_compression: Some(value_encoding),
repetition_index_depth,
dictionary: dictionary_encoding,
layers: def_meaning
.iter()
.map(|&def| Self::def_inter_to_repdef_layer(def))
.collect(),
num_items,
})),
}
}
pub fn full_zip_layout(
bits_rep: u8,
bits_def: u8,
value_encoding: ArrayEncoding,
def_meaning: &[DefinitionInterpretation],
) -> PageLayout {
PageLayout {
layout: Some(Layout::FullZipLayout(pb::FullZipLayout {
bits_rep: bits_rep as u32,
bits_def: bits_def as u32,
value_compression: Some(value_encoding),
layers: def_meaning
.iter()
.map(|&def| Self::def_inter_to_repdef_layer(def))
.collect(),
})),
}
}
pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> PageLayout {
PageLayout {
layout: Some(Layout::AllNullLayout(AllNullLayout {
layers: def_meaning
.iter()
.map(|&def| Self::def_inter_to_repdef_layer(def))
.collect(),
})),
}
}
pub fn simple_all_null_layout() -> PageLayout {
Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
}
}