1pub mod pb {
6 #![allow(clippy::all)]
7 #![allow(non_upper_case_globals)]
8 #![allow(non_camel_case_types)]
9 #![allow(non_snake_case)]
10 #![allow(unused)]
11 #![allow(improper_ctypes)]
12 #![allow(clippy::upper_case_acronyms)]
13 #![allow(clippy::use_self)]
14 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
15}
16
17use pb::{
18 array_encoding::ArrayEncoding as ArrayEncodingEnum,
19 buffer::BufferType,
20 full_zip_layout,
21 nullable::{AllNull, NoNull, Nullability, SomeNull},
22 page_layout::Layout,
23 AllNullLayout, ArrayEncoding, Binary, Bitpack2, Bitpacked, BitpackedForNonNeg, Dictionary,
24 FixedSizeBinary, FixedSizeList, Flat, Fsst, MiniBlockLayout, Nullable, PackedStruct,
25 PackedStructFixedWidthMiniBlock, PageLayout, RepDefLayer, Variable,
26};
27
28use crate::{
29 encodings::physical::block_compress::CompressionConfig, repdef::DefinitionInterpretation,
30};
31
32use self::pb::Constant;
33
34pub struct ProtobufUtils {}
36
37impl ProtobufUtils {
38 pub fn constant(value: Vec<u8>, num_values: u64) -> ArrayEncoding {
39 ArrayEncoding {
40 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
41 value: value.into(),
42 num_values,
43 })),
44 }
45 }
46
47 pub fn basic_all_null_encoding() -> ArrayEncoding {
48 ArrayEncoding {
49 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
50 nullability: Some(Nullability::AllNulls(AllNull {})),
51 }))),
52 }
53 }
54
55 pub fn basic_some_null_encoding(
56 validity: ArrayEncoding,
57 values: ArrayEncoding,
58 ) -> ArrayEncoding {
59 ArrayEncoding {
60 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
61 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
62 validity: Some(Box::new(validity)),
63 values: Some(Box::new(values)),
64 }))),
65 }))),
66 }
67 }
68
69 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
70 ArrayEncoding {
71 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
72 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
73 values: Some(Box::new(values)),
74 }))),
75 }))),
76 }
77 }
78
79 pub fn flat_encoding(
80 bits_per_value: u64,
81 buffer_index: u32,
82 compression: Option<CompressionConfig>,
83 ) -> ArrayEncoding {
84 ArrayEncoding {
85 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
86 bits_per_value,
87 buffer: Some(pb::Buffer {
88 buffer_index,
89 buffer_type: BufferType::Page as i32,
90 }),
91 compression: compression.map(|compression_config| pb::Compression {
92 scheme: compression_config.scheme.to_string(),
93 level: compression_config.level,
94 }),
95 })),
96 }
97 }
98
99 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding) -> ArrayEncoding {
100 ArrayEncoding {
101 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
102 dimension: dimension.try_into().unwrap(),
103 items: Some(Box::new(items)),
104 }))),
105 }
106 }
107
108 pub fn bitpacked_encoding(
109 compressed_bits_per_value: u64,
110 uncompressed_bits_per_value: u64,
111 buffer_index: u32,
112 signed: bool,
113 ) -> ArrayEncoding {
114 ArrayEncoding {
115 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
116 compressed_bits_per_value,
117 buffer: Some(pb::Buffer {
118 buffer_index,
119 buffer_type: BufferType::Page as i32,
120 }),
121 uncompressed_bits_per_value,
122 signed,
123 })),
124 }
125 }
126
127 pub fn bitpacked_for_non_neg_encoding(
128 compressed_bits_per_value: u64,
129 uncompressed_bits_per_value: u64,
130 buffer_index: u32,
131 ) -> ArrayEncoding {
132 ArrayEncoding {
133 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
134 compressed_bits_per_value,
135 buffer: Some(pb::Buffer {
136 buffer_index,
137 buffer_type: BufferType::Page as i32,
138 }),
139 uncompressed_bits_per_value,
140 })),
141 }
142 }
143 pub fn bitpack2(uncompressed_bits_per_value: u64) -> ArrayEncoding {
144 ArrayEncoding {
145 array_encoding: Some(ArrayEncodingEnum::Bitpack2(Bitpack2 {
146 uncompressed_bits_per_value,
147 })),
148 }
149 }
150
151 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
152 ArrayEncoding {
153 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
154 bits_per_offset: bits_per_offset as u32,
155 })),
156 }
157 }
158
159 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
163 ArrayEncoding {
164 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
165 binary: Some(Box::new(data)),
166 symbol_table: symbol_table.into(),
167 }))),
168 }
169 }
170
171 pub fn packed_struct(
172 child_encodings: Vec<ArrayEncoding>,
173 packed_buffer_index: u32,
174 ) -> ArrayEncoding {
175 ArrayEncoding {
176 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
177 inner: child_encodings,
178 buffer: Some(pb::Buffer {
179 buffer_index: packed_buffer_index,
180 buffer_type: BufferType::Page as i32,
181 }),
182 })),
183 }
184 }
185
186 pub fn packed_struct_fixed_width_mini_block(
187 data: ArrayEncoding,
188 bits_per_values: Vec<u32>,
189 ) -> ArrayEncoding {
190 ArrayEncoding {
191 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
192 Box::new(PackedStructFixedWidthMiniBlock {
193 flat: Some(Box::new(data)),
194 bits_per_values,
195 }),
196 )),
197 }
198 }
199
200 pub fn binary(
201 indices_encoding: ArrayEncoding,
202 bytes_encoding: ArrayEncoding,
203 null_adjustment: u64,
204 ) -> ArrayEncoding {
205 ArrayEncoding {
206 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
207 bytes: Some(Box::new(bytes_encoding)),
208 indices: Some(Box::new(indices_encoding)),
209 null_adjustment,
210 }))),
211 }
212 }
213
214 pub fn dict_encoding(
215 indices: ArrayEncoding,
216 items: ArrayEncoding,
217 num_items: u32,
218 ) -> ArrayEncoding {
219 ArrayEncoding {
220 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
221 indices: Some(Box::new(indices)),
222 items: Some(Box::new(items)),
223 num_dictionary_items: num_items,
224 }))),
225 }
226 }
227
228 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
229 ArrayEncoding {
230 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
231 FixedSizeBinary {
232 bytes: Some(Box::new(data)),
233 byte_width,
234 },
235 ))),
236 }
237 }
238
239 pub fn fixed_size_list(data: ArrayEncoding, dimension: u64) -> ArrayEncoding {
240 ArrayEncoding {
241 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
242 dimension: dimension.try_into().unwrap(),
243 items: Some(Box::new(data)),
244 }))),
245 }
246 }
247
248 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
249 match def {
250 DefinitionInterpretation::AllValidItem => RepDefLayer::RepdefAllValidItem as i32,
251 DefinitionInterpretation::AllValidList => RepDefLayer::RepdefAllValidList as i32,
252 DefinitionInterpretation::NullableItem => RepDefLayer::RepdefNullableItem as i32,
253 DefinitionInterpretation::NullableList => RepDefLayer::RepdefNullableList as i32,
254 DefinitionInterpretation::EmptyableList => RepDefLayer::RepdefEmptyableList as i32,
255 DefinitionInterpretation::NullableAndEmptyableList => {
256 RepDefLayer::RepdefNullAndEmptyList as i32
257 }
258 }
259 }
260
261 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
262 let layer = RepDefLayer::try_from(layer).unwrap();
263 match layer {
264 RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
265 RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
266 RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
267 RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
268 RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
269 RepDefLayer::RepdefNullAndEmptyList => {
270 DefinitionInterpretation::NullableAndEmptyableList
271 }
272 RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
273 }
274 }
275
276 pub fn miniblock_layout(
277 rep_encoding: ArrayEncoding,
278 def_encoding: ArrayEncoding,
279 value_encoding: ArrayEncoding,
280 repetition_index_depth: u32,
281 dictionary_encoding: Option<ArrayEncoding>,
282 def_meaning: &[DefinitionInterpretation],
283 num_items: u64,
284 ) -> PageLayout {
285 assert!(!def_meaning.is_empty());
286 PageLayout {
287 layout: Some(Layout::MiniBlockLayout(MiniBlockLayout {
288 def_compression: Some(def_encoding),
289 rep_compression: Some(rep_encoding),
290 value_compression: Some(value_encoding),
291 repetition_index_depth,
292 dictionary: dictionary_encoding,
293 layers: def_meaning
294 .iter()
295 .map(|&def| Self::def_inter_to_repdef_layer(def))
296 .collect(),
297 num_items,
298 })),
299 }
300 }
301
302 fn full_zip_layout(
303 bits_rep: u8,
304 bits_def: u8,
305 details: full_zip_layout::Details,
306 value_encoding: ArrayEncoding,
307 def_meaning: &[DefinitionInterpretation],
308 num_items: u32,
309 num_visible_items: u32,
310 ) -> PageLayout {
311 PageLayout {
312 layout: Some(Layout::FullZipLayout(pb::FullZipLayout {
313 bits_rep: bits_rep as u32,
314 bits_def: bits_def as u32,
315 details: Some(details),
316 value_compression: Some(value_encoding),
317 num_items,
318 num_visible_items,
319 layers: def_meaning
320 .iter()
321 .map(|&def| Self::def_inter_to_repdef_layer(def))
322 .collect(),
323 })),
324 }
325 }
326
327 pub fn fixed_full_zip_layout(
328 bits_rep: u8,
329 bits_def: u8,
330 bits_per_value: u32,
331 value_encoding: ArrayEncoding,
332 def_meaning: &[DefinitionInterpretation],
333 num_items: u32,
334 num_visible_items: u32,
335 ) -> PageLayout {
336 Self::full_zip_layout(
337 bits_rep,
338 bits_def,
339 full_zip_layout::Details::BitsPerValue(bits_per_value),
340 value_encoding,
341 def_meaning,
342 num_items,
343 num_visible_items,
344 )
345 }
346
347 pub fn variable_full_zip_layout(
348 bits_rep: u8,
349 bits_def: u8,
350 bits_per_offset: u32,
351 value_encoding: ArrayEncoding,
352 def_meaning: &[DefinitionInterpretation],
353 num_items: u32,
354 num_visible_items: u32,
355 ) -> PageLayout {
356 Self::full_zip_layout(
357 bits_rep,
358 bits_def,
359 full_zip_layout::Details::BitsPerOffset(bits_per_offset),
360 value_encoding,
361 def_meaning,
362 num_items,
363 num_visible_items,
364 )
365 }
366
367 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> PageLayout {
368 PageLayout {
369 layout: Some(Layout::AllNullLayout(AllNullLayout {
370 layers: def_meaning
371 .iter()
372 .map(|&def| Self::def_inter_to_repdef_layer(def))
373 .collect(),
374 })),
375 }
376 }
377
378 pub fn simple_all_null_layout() -> PageLayout {
379 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
380 }
381}