lance_encoding/encodings/physical/
bitpack_fastlanes.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::sync::Arc;
5
6use arrow::datatypes::{
7    Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
8};
9use arrow_array::{Array, PrimitiveArray};
10use arrow_schema::DataType;
11use byteorder::{ByteOrder, LittleEndian};
12use bytes::Bytes;
13use futures::future::{BoxFuture, FutureExt};
14use log::trace;
15use snafu::location;
16
17use lance_arrow::DataTypeExt;
18use lance_core::{Error, Result};
19
20use crate::buffer::LanceBuffer;
21use crate::compression_algo::fastlanes::BitPacking;
22use crate::data::BlockInfo;
23use crate::data::{DataBlock, FixedWidthDataBlock, NullableDataBlock};
24use crate::decoder::{MiniBlockDecompressor, PageScheduler, PrimitivePageDecoder};
25use crate::encoder::{
26    ArrayEncoder, EncodedArray, MiniBlockChunk, MiniBlockCompressed, MiniBlockCompressor,
27};
28use crate::format::{pb, ProtobufUtils};
29use crate::statistics::{GetStat, Stat};
30use arrow::array::ArrayRef;
31use bytemuck::cast_slice;
32const LOG_ELEMS_PER_CHUNK: u8 = 10;
33const ELEMS_PER_CHUNK: u64 = 1 << LOG_ELEMS_PER_CHUNK;
34
35// Compute the compressed_bit_width for a given array of integers
36// todo: compute all statistics before encoding
37// todo: see how to use rust macro to rewrite this function
38pub fn compute_compressed_bit_width_for_non_neg(arrays: &[ArrayRef]) -> u64 {
39    debug_assert!(!arrays.is_empty());
40
41    let res;
42
43    match arrays[0].data_type() {
44        DataType::UInt8 => {
45            let mut global_max: u8 = 0;
46            for array in arrays {
47                let primitive_array = array
48                    .as_any()
49                    .downcast_ref::<PrimitiveArray<UInt8Type>>()
50                    .unwrap();
51                let array_max = arrow::compute::bit_or(primitive_array);
52                global_max = global_max.max(array_max.unwrap_or(0));
53            }
54            let num_bits =
55                arrays[0].data_type().byte_width() as u64 * 8 - global_max.leading_zeros() as u64;
56            // we will have constant encoding later
57            if num_bits == 0 {
58                res = 1;
59            } else {
60                res = num_bits;
61            }
62        }
63
64        DataType::Int8 => {
65            let mut global_max_width: u64 = 0;
66            for array in arrays {
67                let primitive_array = array
68                    .as_any()
69                    .downcast_ref::<PrimitiveArray<Int8Type>>()
70                    .unwrap();
71                let array_max_width = arrow::compute::bit_or(primitive_array).unwrap_or(0);
72                global_max_width = global_max_width.max(8 - array_max_width.leading_zeros() as u64);
73            }
74            if global_max_width == 0 {
75                res = 1;
76            } else {
77                res = global_max_width;
78            }
79        }
80
81        DataType::UInt16 => {
82            let mut global_max: u16 = 0;
83            for array in arrays {
84                let primitive_array = array
85                    .as_any()
86                    .downcast_ref::<PrimitiveArray<UInt16Type>>()
87                    .unwrap();
88                let array_max = arrow::compute::bit_or(primitive_array).unwrap_or(0);
89                global_max = global_max.max(array_max);
90            }
91            let num_bits =
92                arrays[0].data_type().byte_width() as u64 * 8 - global_max.leading_zeros() as u64;
93            if num_bits == 0 {
94                res = 1;
95            } else {
96                res = num_bits;
97            }
98        }
99
100        DataType::Int16 => {
101            let mut global_max_width: u64 = 0;
102            for array in arrays {
103                let primitive_array = array
104                    .as_any()
105                    .downcast_ref::<PrimitiveArray<Int16Type>>()
106                    .unwrap();
107                let array_max_width = arrow::compute::bit_or(primitive_array).unwrap_or(0);
108                global_max_width =
109                    global_max_width.max(16 - array_max_width.leading_zeros() as u64);
110            }
111            if global_max_width == 0 {
112                res = 1;
113            } else {
114                res = global_max_width;
115            }
116        }
117
118        DataType::UInt32 => {
119            let mut global_max: u32 = 0;
120            for array in arrays {
121                let primitive_array = array
122                    .as_any()
123                    .downcast_ref::<PrimitiveArray<UInt32Type>>()
124                    .unwrap();
125                let array_max = arrow::compute::bit_or(primitive_array).unwrap_or(0);
126                global_max = global_max.max(array_max);
127            }
128            let num_bits =
129                arrays[0].data_type().byte_width() as u64 * 8 - global_max.leading_zeros() as u64;
130            if num_bits == 0 {
131                res = 1;
132            } else {
133                res = num_bits;
134            }
135        }
136
137        DataType::Int32 => {
138            let mut global_max_width: u64 = 0;
139            for array in arrays {
140                let primitive_array = array
141                    .as_any()
142                    .downcast_ref::<PrimitiveArray<Int32Type>>()
143                    .unwrap();
144                let array_max_width = arrow::compute::bit_or(primitive_array).unwrap_or(0);
145                global_max_width =
146                    global_max_width.max(32 - array_max_width.leading_zeros() as u64);
147            }
148            if global_max_width == 0 {
149                res = 1;
150            } else {
151                res = global_max_width;
152            }
153        }
154
155        DataType::UInt64 => {
156            let mut global_max: u64 = 0;
157            for array in arrays {
158                let primitive_array = array
159                    .as_any()
160                    .downcast_ref::<PrimitiveArray<UInt64Type>>()
161                    .unwrap();
162                let array_max = arrow::compute::bit_or(primitive_array).unwrap_or(0);
163                global_max = global_max.max(array_max);
164            }
165            let num_bits =
166                arrays[0].data_type().byte_width() as u64 * 8 - global_max.leading_zeros() as u64;
167            if num_bits == 0 {
168                res = 1;
169            } else {
170                res = num_bits;
171            }
172        }
173
174        DataType::Int64 => {
175            let mut global_max_width: u64 = 0;
176            for array in arrays {
177                let primitive_array = array
178                    .as_any()
179                    .downcast_ref::<PrimitiveArray<Int64Type>>()
180                    .unwrap();
181                let array_max_width = arrow::compute::bit_or(primitive_array).unwrap_or(0);
182                global_max_width =
183                    global_max_width.max(64 - array_max_width.leading_zeros() as u64);
184            }
185            if global_max_width == 0 {
186                res = 1;
187            } else {
188                res = global_max_width;
189            }
190        }
191        _ => {
192            panic!("BitpackedForNonNegArrayEncoder only supports data types of UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64");
193        }
194    };
195    res
196}
197
198// Bitpack integers using fastlanes algorithm, the input is sliced into chunks of 1024 integers, and bitpacked
199// chunk by chunk. when the input is not a multiple of 1024, the last chunk is padded with zeros, this is fine because
200// we also know the number of rows we have.
201// Here self is a borrow of BitpackedForNonNegArrayEncoder, unpacked is a mutable borrow of FixedWidthDataBlock,
202// data_type can be  one of u8, u16, u32, or u64.
203// buffer_index is a mutable borrow of u32, indicating the buffer index of the output EncodedArray.
204// It outputs an fastlanes bitpacked EncodedArray
205macro_rules! encode_fixed_width {
206    ($self:expr, $unpacked:expr, $data_type:ty, $buffer_index:expr) => {{
207        let num_chunks = $unpacked.num_values.div_ceil(ELEMS_PER_CHUNK);
208        let num_full_chunks = $unpacked.num_values / ELEMS_PER_CHUNK;
209        let uncompressed_bit_width = std::mem::size_of::<$data_type>() as u64 * 8;
210
211        // the output vector type is the same as the input type, for example, when input is u16, output is Vec<u16>
212        let packed_chunk_size = 1024 * $self.compressed_bit_width as usize / uncompressed_bit_width as usize;
213
214        let input_slice = $unpacked.data.borrow_to_typed_slice::<$data_type>();
215        let input = input_slice.as_ref();
216
217        let mut output = Vec::with_capacity(num_chunks as usize * packed_chunk_size);
218
219        // Loop over all but the last chunk.
220        (0..num_full_chunks).for_each(|i| {
221            let start_elem = (i * ELEMS_PER_CHUNK) as usize;
222
223            let output_len = output.len();
224            unsafe {
225                output.set_len(output_len + packed_chunk_size);
226                BitPacking::unchecked_pack(
227                    $self.compressed_bit_width,
228                    &input[start_elem..][..ELEMS_PER_CHUNK as usize],
229                    &mut output[output_len..][..packed_chunk_size],
230                );
231            }
232        });
233
234        if num_chunks != num_full_chunks {
235            let last_chunk_elem_num = $unpacked.num_values % ELEMS_PER_CHUNK;
236            let mut last_chunk = vec![0 as $data_type; ELEMS_PER_CHUNK as usize];
237            last_chunk[..last_chunk_elem_num as usize].clone_from_slice(
238                &input[$unpacked.num_values as usize - last_chunk_elem_num as usize..],
239            );
240
241            let output_len = output.len();
242            unsafe {
243                output.set_len(output_len + packed_chunk_size);
244                BitPacking::unchecked_pack(
245                    $self.compressed_bit_width,
246                    &last_chunk,
247                    &mut output[output_len..][..packed_chunk_size],
248                );
249            }
250        }
251
252        let bitpacked_for_non_neg_buffer_index = *$buffer_index;
253        *$buffer_index += 1;
254
255        let encoding = ProtobufUtils::bitpacked_for_non_neg_encoding(
256            $self.compressed_bit_width as u64,
257            uncompressed_bit_width,
258            bitpacked_for_non_neg_buffer_index,
259        );
260        let packed = DataBlock::FixedWidth(FixedWidthDataBlock {
261            bits_per_value: $self.compressed_bit_width as u64,
262            data: LanceBuffer::reinterpret_vec(output),
263            num_values: $unpacked.num_values,
264            block_info: BlockInfo::new(),
265        });
266
267        Result::Ok(EncodedArray {
268            data: packed,
269            encoding,
270        })
271    }};
272}
273
274#[derive(Debug)]
275pub struct BitpackedForNonNegArrayEncoder {
276    pub compressed_bit_width: usize,
277    pub original_data_type: DataType,
278}
279
280impl BitpackedForNonNegArrayEncoder {
281    pub fn new(compressed_bit_width: usize, data_type: DataType) -> Self {
282        Self {
283            compressed_bit_width,
284            original_data_type: data_type,
285        }
286    }
287}
288
289impl ArrayEncoder for BitpackedForNonNegArrayEncoder {
290    fn encode(
291        &self,
292        data: DataBlock,
293        data_type: &DataType,
294        buffer_index: &mut u32,
295    ) -> Result<EncodedArray> {
296        match data {
297            DataBlock::AllNull(_) => {
298                let encoding = ProtobufUtils::basic_all_null_encoding();
299                Ok(EncodedArray { data, encoding })
300            }
301            DataBlock::FixedWidth(mut unpacked) => {
302                match data_type {
303                    DataType::UInt8 | DataType::Int8 => encode_fixed_width!(self, unpacked, u8, buffer_index),
304                    DataType::UInt16 | DataType::Int16 => encode_fixed_width!(self, unpacked, u16, buffer_index),
305                    DataType::UInt32 | DataType::Int32 => encode_fixed_width!(self, unpacked, u32, buffer_index),
306                    DataType::UInt64 | DataType::Int64 => encode_fixed_width!(self, unpacked, u64, buffer_index),
307                    _ => unreachable!("BitpackedForNonNegArrayEncoder only supports data types of UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64"),
308                }
309            }
310            DataBlock::Nullable(nullable) => {
311                let validity_buffer_index = *buffer_index;
312                *buffer_index += 1;
313
314                let validity_desc = ProtobufUtils::flat_encoding(
315                    1,
316                    validity_buffer_index,
317                    /*compression=*/ None,
318                );
319                let encoded_values: EncodedArray;
320                match *nullable.data {
321                    DataBlock::FixedWidth(mut unpacked) => {
322                        match data_type {
323                            DataType::UInt8 | DataType::Int8 => encoded_values = encode_fixed_width!(self, unpacked, u8, buffer_index)?,
324                            DataType::UInt16 | DataType::Int16 => encoded_values = encode_fixed_width!(self, unpacked, u16, buffer_index)?,
325                            DataType::UInt32 | DataType::Int32 => encoded_values = encode_fixed_width!(self, unpacked, u32, buffer_index)?,
326                            DataType::UInt64 | DataType::Int64 => encoded_values = encode_fixed_width!(self, unpacked, u64, buffer_index)?,
327                            _ => unreachable!("BitpackedForNonNegArrayEncoder only supports data types of UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64"),
328                        }
329                    }
330                    _ => {
331                        return Err(Error::InvalidInput {
332                            source: "Bitpacking only supports fixed width data blocks or a nullable data block with fixed width data block inside or a all null data block".into(),
333                            location: location!(),
334                        });
335                    }
336                }
337                let encoding =
338                    ProtobufUtils::basic_some_null_encoding(validity_desc, encoded_values.encoding);
339                let encoded = DataBlock::Nullable(NullableDataBlock {
340                    data: Box::new(encoded_values.data),
341                    nulls: nullable.nulls,
342                    block_info: BlockInfo::new(),
343                });
344                Ok(EncodedArray {
345                    data: encoded,
346                    encoding,
347                })
348            }
349            _ => {
350                Err(Error::InvalidInput {
351                    source: "Bitpacking only supports fixed width data blocks or a nullable data block with fixed width data block inside or a all null data block".into(),
352                    location: location!(),
353                })
354            }
355        }
356    }
357}
358
359#[derive(Debug)]
360pub struct BitpackedForNonNegScheduler {
361    compressed_bit_width: u64,
362    uncompressed_bits_per_value: u64,
363    buffer_offset: u64,
364}
365
366impl BitpackedForNonNegScheduler {
367    pub fn new(
368        compressed_bit_width: u64,
369        uncompressed_bits_per_value: u64,
370        buffer_offset: u64,
371    ) -> Self {
372        Self {
373            compressed_bit_width,
374            uncompressed_bits_per_value,
375            buffer_offset,
376        }
377    }
378
379    fn locate_chunk_start(&self, relative_row_num: u64) -> u64 {
380        let chunk_size = ELEMS_PER_CHUNK * self.compressed_bit_width / 8;
381        self.buffer_offset + (relative_row_num / ELEMS_PER_CHUNK * chunk_size)
382    }
383
384    fn locate_chunk_end(&self, relative_row_num: u64) -> u64 {
385        let chunk_size = ELEMS_PER_CHUNK * self.compressed_bit_width / 8;
386        self.buffer_offset + (relative_row_num / ELEMS_PER_CHUNK * chunk_size) + chunk_size
387    }
388}
389
390impl PageScheduler for BitpackedForNonNegScheduler {
391    fn schedule_ranges(
392        &self,
393        ranges: &[std::ops::Range<u64>],
394        scheduler: &Arc<dyn crate::EncodingsIo>,
395        top_level_row: u64,
396    ) -> BoxFuture<'static, Result<Box<dyn PrimitivePageDecoder>>> {
397        assert!(!ranges.is_empty());
398
399        let mut byte_ranges = vec![];
400
401        // map one bytes to multiple ranges, one bytes has at least one range corresponding to it
402        let mut bytes_idx_to_range_indices = vec![];
403        let first_byte_range = std::ops::Range {
404            start: self.locate_chunk_start(ranges[0].start),
405            end: self.locate_chunk_end(ranges[0].end - 1),
406        }; // the ranges are half-open
407        byte_ranges.push(first_byte_range);
408        bytes_idx_to_range_indices.push(vec![ranges[0].clone()]);
409
410        for (i, range) in ranges.iter().enumerate().skip(1) {
411            let this_start = self.locate_chunk_start(range.start);
412            let this_end = self.locate_chunk_end(range.end - 1);
413
414            // when the current range start is in the same chunk as the previous range's end, we colaesce this two bytes ranges
415            // when the current range start is not in the same chunk as the previous range's end, we create a new bytes range
416            if this_start == self.locate_chunk_start(ranges[i - 1].end - 1) {
417                byte_ranges.last_mut().unwrap().end = this_end;
418                bytes_idx_to_range_indices
419                    .last_mut()
420                    .unwrap()
421                    .push(range.clone());
422            } else {
423                byte_ranges.push(this_start..this_end);
424                bytes_idx_to_range_indices.push(vec![range.clone()]);
425            }
426        }
427
428        trace!(
429            "Scheduling I/O for {} ranges spread across byte range {}..{}",
430            byte_ranges.len(),
431            byte_ranges[0].start,
432            byte_ranges.last().unwrap().end
433        );
434
435        let bytes = scheduler.submit_request(byte_ranges.clone(), top_level_row);
436
437        // copy the necessary data from `self` to move into the async block
438        let compressed_bit_width = self.compressed_bit_width;
439        let uncompressed_bits_per_value = self.uncompressed_bits_per_value;
440        let num_rows = ranges.iter().map(|range| range.end - range.start).sum();
441
442        async move {
443            let bytes = bytes.await?;
444            let decompressed_output = bitpacked_for_non_neg_decode(
445                compressed_bit_width,
446                uncompressed_bits_per_value,
447                &bytes,
448                &bytes_idx_to_range_indices,
449                num_rows,
450            );
451            Ok(Box::new(BitpackedForNonNegPageDecoder {
452                uncompressed_bits_per_value,
453                decompressed_buf: decompressed_output,
454            }) as Box<dyn PrimitivePageDecoder>)
455        }
456        .boxed()
457    }
458}
459
460#[derive(Debug)]
461struct BitpackedForNonNegPageDecoder {
462    // number of bits in the uncompressed value. E.g. this will be 32 for DataType::UInt32
463    uncompressed_bits_per_value: u64,
464
465    decompressed_buf: LanceBuffer,
466}
467
468impl PrimitivePageDecoder for BitpackedForNonNegPageDecoder {
469    fn decode(&self, rows_to_skip: u64, num_rows: u64) -> Result<DataBlock> {
470        if ![8, 16, 32, 64].contains(&self.uncompressed_bits_per_value) {
471            return Err(Error::InvalidInput {
472                source: "BitpackedForNonNegPageDecoder should only has uncompressed_bits_per_value of 8, 16, 32, or 64".into(),
473                location: location!(),
474            });
475        }
476
477        let elem_size_in_bytes = self.uncompressed_bits_per_value / 8;
478
479        Ok(DataBlock::FixedWidth(FixedWidthDataBlock {
480            data: self.decompressed_buf.slice_with_length(
481                (rows_to_skip * elem_size_in_bytes) as usize,
482                (num_rows * elem_size_in_bytes) as usize,
483            ),
484            bits_per_value: self.uncompressed_bits_per_value,
485            num_values: num_rows,
486            block_info: BlockInfo::new(),
487        }))
488    }
489}
490
491macro_rules! bitpacked_decode {
492    ($uncompressed_type:ty, $compressed_bit_width:expr, $data:expr, $bytes_idx_to_range_indices:expr, $num_rows:expr) => {{
493        let mut decompressed: Vec<$uncompressed_type> = Vec::with_capacity($num_rows as usize);
494        let packed_chunk_size_in_byte: usize = (ELEMS_PER_CHUNK * $compressed_bit_width) as usize / 8;
495        let mut decompress_chunk_buf = vec![0 as $uncompressed_type; ELEMS_PER_CHUNK as usize];
496
497        for (i, bytes) in $data.iter().enumerate() {
498            let mut ranges_idx = 0;
499            let mut curr_range_start = $bytes_idx_to_range_indices[i][0].start;
500            let mut chunk_num = 0;
501
502            while chunk_num * packed_chunk_size_in_byte < bytes.len() {
503                // Copy for memory alignment
504                let chunk_in_u8: Vec<u8> = bytes[chunk_num * packed_chunk_size_in_byte..]
505                    [..packed_chunk_size_in_byte]
506                    .to_vec();
507                chunk_num += 1;
508                let chunk = cast_slice(&chunk_in_u8);
509                unsafe {
510                    BitPacking::unchecked_unpack(
511                        $compressed_bit_width as usize,
512                        chunk,
513                        &mut decompress_chunk_buf,
514                    );
515                }
516
517                loop {
518                    // Case 1: All the elements after (curr_range_start % ELEMS_PER_CHUNK) inside this chunk are needed.
519                    let elems_after_curr_range_start_in_this_chunk =
520                        ELEMS_PER_CHUNK - curr_range_start % ELEMS_PER_CHUNK;
521                    if curr_range_start + elems_after_curr_range_start_in_this_chunk
522                        <= $bytes_idx_to_range_indices[i][ranges_idx].end
523                    {
524                        decompressed.extend_from_slice(
525                            &decompress_chunk_buf[(curr_range_start % ELEMS_PER_CHUNK) as usize..],
526                        );
527                        curr_range_start += elems_after_curr_range_start_in_this_chunk;
528                        break;
529                    } else {
530                        // Case 2: Only part of the elements after (curr_range_start % ELEMS_PER_CHUNK) inside this chunk are needed.
531                        let elems_this_range_needed_in_this_chunk =
532                            ($bytes_idx_to_range_indices[i][ranges_idx].end - curr_range_start)
533                                .min(ELEMS_PER_CHUNK - curr_range_start % ELEMS_PER_CHUNK);
534                        decompressed.extend_from_slice(
535                            &decompress_chunk_buf[(curr_range_start % ELEMS_PER_CHUNK) as usize..]
536                                [..elems_this_range_needed_in_this_chunk as usize],
537                        );
538                        if curr_range_start + elems_this_range_needed_in_this_chunk
539                            == $bytes_idx_to_range_indices[i][ranges_idx].end
540                        {
541                            ranges_idx += 1;
542                            if ranges_idx == $bytes_idx_to_range_indices[i].len() {
543                                break;
544                            }
545                            curr_range_start = $bytes_idx_to_range_indices[i][ranges_idx].start;
546                        } else {
547                            curr_range_start += elems_this_range_needed_in_this_chunk;
548                        }
549                    }
550                }
551            }
552        }
553
554        LanceBuffer::reinterpret_vec(decompressed)
555    }};
556}
557
558fn bitpacked_for_non_neg_decode(
559    compressed_bit_width: u64,
560    uncompressed_bits_per_value: u64,
561    data: &[Bytes],
562    bytes_idx_to_range_indices: &[Vec<std::ops::Range<u64>>],
563    num_rows: u64,
564) -> LanceBuffer {
565    match uncompressed_bits_per_value {
566        8 => bitpacked_decode!(
567            u8,
568            compressed_bit_width,
569            data,
570            bytes_idx_to_range_indices,
571            num_rows
572        ),
573        16 => bitpacked_decode!(
574            u16,
575            compressed_bit_width,
576            data,
577            bytes_idx_to_range_indices,
578            num_rows
579        ),
580        32 => bitpacked_decode!(
581            u32,
582            compressed_bit_width,
583            data,
584            bytes_idx_to_range_indices,
585            num_rows
586        ),
587        64 => bitpacked_decode!(
588            u64,
589            compressed_bit_width,
590            data,
591            bytes_idx_to_range_indices,
592            num_rows
593        ),
594        _ => unreachable!(
595            "bitpacked_for_non_neg_decode only supports 8, 16, 32, 64 uncompressed_bits_per_value"
596        ),
597    }
598}
599
600#[cfg(test)]
601mod tests {
602    // use super::*;
603    // use arrow::array::{
604    //     Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, UInt32Array, UInt64Array,
605    //     UInt8Array,
606    // };
607    // use arrow::datatypes::DataType;
608
609    // #[test_log::test(tokio::test)]
610    // async fn test_compute_compressed_bit_width_for_non_neg() {}
611
612    // use std::collections::HashMap;
613
614    // use lance_datagen::RowCount;
615
616    // use crate::testing::{check_round_trip_encoding_of_data, TestCases};
617    // use crate::version::LanceFileVersion;
618
619    // async fn check_round_trip_bitpacked(array: Arc<dyn Array>) {
620    //     let test_cases = TestCases::default().with_file_version(LanceFileVersion::V2_1);
621    //     check_round_trip_encoding_of_data(vec![array], &test_cases, HashMap::new()).await;
622    // }
623
624    // #[test_log::test(tokio::test)]
625    // async fn test_bitpack_fastlanes_u8() {
626    //     let values: Vec<u8> = vec![5; 1024];
627    //     let array = UInt8Array::from(values);
628    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
629    //     check_round_trip_bitpacked(array).await;
630
631    //     let values: Vec<u8> = vec![66; 1000];
632    //     let array = UInt8Array::from(values);
633    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
634
635    //     check_round_trip_bitpacked(array).await;
636
637    //     let values: Vec<u8> = vec![77; 2000];
638    //     let array = UInt8Array::from(values);
639    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
640
641    //     check_round_trip_bitpacked(array).await;
642
643    //     let values: Vec<u8> = vec![0; 10000];
644    //     let array = UInt8Array::from(values);
645    //     let arr = Arc::new(array) as ArrayRef;
646    //     check_round_trip_bitpacked(arr).await;
647
648    //     let values: Vec<u8> = vec![88; 10000];
649    //     let array = UInt8Array::from(values);
650    //     let arr = Arc::new(array) as ArrayRef;
651    //     check_round_trip_bitpacked(arr).await;
652
653    //     let arr = lance_datagen::gen()
654    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
655    //         .into_batch_rows(RowCount::from(1))
656    //         .unwrap()
657    //         .column(0)
658    //         .clone();
659    //     check_round_trip_bitpacked(arr).await;
660
661    //     let arr = lance_datagen::gen()
662    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
663    //         .into_batch_rows(RowCount::from(20))
664    //         .unwrap()
665    //         .column(0)
666    //         .clone();
667    //     check_round_trip_bitpacked(arr).await;
668
669    //     let arr = lance_datagen::gen()
670    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
671    //         .into_batch_rows(RowCount::from(50))
672    //         .unwrap()
673    //         .column(0)
674    //         .clone();
675    //     check_round_trip_bitpacked(arr).await;
676
677    //     let arr = lance_datagen::gen()
678    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
679    //         .into_batch_rows(RowCount::from(100))
680    //         .unwrap()
681    //         .column(0)
682    //         .clone();
683    //     check_round_trip_bitpacked(arr).await;
684
685    //     let arr = lance_datagen::gen()
686    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
687    //         .into_batch_rows(RowCount::from(1000))
688    //         .unwrap()
689    //         .column(0)
690    //         .clone();
691    //     check_round_trip_bitpacked(arr).await;
692
693    //     let arr = lance_datagen::gen()
694    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
695    //         .into_batch_rows(RowCount::from(1024))
696    //         .unwrap()
697    //         .column(0)
698    //         .clone();
699    //     check_round_trip_bitpacked(arr).await;
700
701    //     let arr = lance_datagen::gen()
702    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
703    //         .into_batch_rows(RowCount::from(2000))
704    //         .unwrap()
705    //         .column(0)
706    //         .clone();
707    //     check_round_trip_bitpacked(arr).await;
708
709    //     let arr = lance_datagen::gen()
710    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt8))
711    //         .into_batch_rows(RowCount::from(3000))
712    //         .unwrap()
713    //         .column(0)
714    //         .clone();
715    //     check_round_trip_bitpacked(arr).await;
716    // }
717
718    // #[test_log::test(tokio::test)]
719    // async fn test_bitpack_fastlanes_u16() {
720    //     let values: Vec<u16> = vec![5; 1024];
721    //     let array = UInt16Array::from(values);
722    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
723    //     check_round_trip_bitpacked(array).await;
724
725    //     let values: Vec<u16> = vec![66; 1000];
726    //     let array = UInt16Array::from(values);
727    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
728
729    //     check_round_trip_bitpacked(array).await;
730
731    //     let values: Vec<u16> = vec![77; 2000];
732    //     let array = UInt16Array::from(values);
733    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
734
735    //     check_round_trip_bitpacked(array).await;
736
737    //     let values: Vec<u16> = vec![0; 10000];
738    //     let array = UInt16Array::from(values);
739    //     let arr = Arc::new(array) as ArrayRef;
740    //     check_round_trip_bitpacked(arr).await;
741
742    //     let values: Vec<u16> = vec![88; 10000];
743    //     let array = UInt16Array::from(values);
744    //     let arr = Arc::new(array) as ArrayRef;
745    //     check_round_trip_bitpacked(arr).await;
746
747    //     let values: Vec<u16> = vec![300; 100];
748    //     let array = UInt16Array::from(values);
749    //     let arr = Arc::new(array) as ArrayRef;
750    //     check_round_trip_bitpacked(arr).await;
751
752    //     let values: Vec<u16> = vec![800; 100];
753    //     let array = UInt16Array::from(values);
754    //     let arr = Arc::new(array) as ArrayRef;
755    //     check_round_trip_bitpacked(arr).await;
756
757    //     let arr = lance_datagen::gen()
758    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
759    //         .into_batch_rows(RowCount::from(1))
760    //         .unwrap()
761    //         .column(0)
762    //         .clone();
763    //     check_round_trip_bitpacked(arr).await;
764
765    //     let arr = lance_datagen::gen()
766    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
767    //         .into_batch_rows(RowCount::from(20))
768    //         .unwrap()
769    //         .column(0)
770    //         .clone();
771    //     check_round_trip_bitpacked(arr).await;
772
773    //     let arr = lance_datagen::gen()
774    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
775    //         .into_batch_rows(RowCount::from(100))
776    //         .unwrap()
777    //         .column(0)
778    //         .clone();
779    //     check_round_trip_bitpacked(arr).await;
780
781    //     let arr = lance_datagen::gen()
782    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
783    //         .into_batch_rows(RowCount::from(1000))
784    //         .unwrap()
785    //         .column(0)
786    //         .clone();
787    //     check_round_trip_bitpacked(arr).await;
788
789    //     let arr = lance_datagen::gen()
790    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
791    //         .into_batch_rows(RowCount::from(1024))
792    //         .unwrap()
793    //         .column(0)
794    //         .clone();
795    //     check_round_trip_bitpacked(arr).await;
796
797    //     let arr = lance_datagen::gen()
798    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
799    //         .into_batch_rows(RowCount::from(2000))
800    //         .unwrap()
801    //         .column(0)
802    //         .clone();
803    //     check_round_trip_bitpacked(arr).await;
804
805    //     let arr = lance_datagen::gen()
806    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt16))
807    //         .into_batch_rows(RowCount::from(3000))
808    //         .unwrap()
809    //         .column(0)
810    //         .clone();
811    //     check_round_trip_bitpacked(arr).await;
812    // }
813
814    // #[test_log::test(tokio::test)]
815    // async fn test_bitpack_fastlanes_u32() {
816    //     let values: Vec<u32> = vec![5; 1024];
817    //     let array = UInt32Array::from(values);
818    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
819    //     check_round_trip_bitpacked(array).await;
820
821    //     let values: Vec<u32> = vec![7; 2000];
822    //     let array = UInt32Array::from(values);
823    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
824    //     check_round_trip_bitpacked(array).await;
825
826    //     let values: Vec<u32> = vec![66; 1000];
827    //     let array = UInt32Array::from(values);
828    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
829    //     check_round_trip_bitpacked(array).await;
830
831    //     let values: Vec<u32> = vec![666; 1000];
832    //     let array = UInt32Array::from(values);
833    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
834    //     check_round_trip_bitpacked(array).await;
835
836    //     let values: Vec<u32> = vec![77; 2000];
837    //     let array = UInt32Array::from(values);
838    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
839    //     check_round_trip_bitpacked(array).await;
840
841    //     let values: Vec<u32> = vec![0; 10000];
842    //     let array = UInt32Array::from(values);
843    //     let arr = Arc::new(array) as ArrayRef;
844    //     check_round_trip_bitpacked(arr).await;
845
846    //     let values: Vec<u32> = vec![1; 10000];
847    //     let array = UInt32Array::from(values);
848    //     let arr = Arc::new(array) as ArrayRef;
849    //     check_round_trip_bitpacked(arr).await;
850
851    //     let values: Vec<u32> = vec![88; 10000];
852    //     let array = UInt32Array::from(values);
853    //     let arr = Arc::new(array) as ArrayRef;
854    //     check_round_trip_bitpacked(arr).await;
855
856    //     let values: Vec<u32> = vec![300; 100];
857    //     let array = UInt32Array::from(values);
858    //     let arr = Arc::new(array) as ArrayRef;
859    //     check_round_trip_bitpacked(arr).await;
860
861    //     let values: Vec<u32> = vec![3000; 100];
862    //     let array = UInt32Array::from(values);
863    //     let arr = Arc::new(array) as ArrayRef;
864    //     check_round_trip_bitpacked(arr).await;
865
866    //     let values: Vec<u32> = vec![800; 100];
867    //     let array = UInt32Array::from(values);
868    //     let arr = Arc::new(array) as ArrayRef;
869    //     check_round_trip_bitpacked(arr).await;
870
871    //     let values: Vec<u32> = vec![8000; 100];
872    //     let array = UInt32Array::from(values);
873    //     let arr = Arc::new(array) as ArrayRef;
874    //     check_round_trip_bitpacked(arr).await;
875
876    //     let values: Vec<u32> = vec![65536; 100];
877    //     let array = UInt32Array::from(values);
878    //     let arr = Arc::new(array) as ArrayRef;
879    //     check_round_trip_bitpacked(arr).await;
880
881    //     let values: Vec<u32> = vec![655360; 100];
882    //     let array = UInt32Array::from(values);
883    //     let arr = Arc::new(array) as ArrayRef;
884    //     check_round_trip_bitpacked(arr).await;
885
886    //     let arr = lance_datagen::gen()
887    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
888    //         .into_batch_rows(RowCount::from(1))
889    //         .unwrap()
890    //         .column(0)
891    //         .clone();
892    //     check_round_trip_bitpacked(arr).await;
893
894    //     let arr = lance_datagen::gen()
895    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
896    //         .into_batch_rows(RowCount::from(20))
897    //         .unwrap()
898    //         .column(0)
899    //         .clone();
900    //     check_round_trip_bitpacked(arr).await;
901
902    //     let arr = lance_datagen::gen()
903    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
904    //         .into_batch_rows(RowCount::from(50))
905    //         .unwrap()
906    //         .column(0)
907    //         .clone();
908    //     check_round_trip_bitpacked(arr).await;
909
910    //     let arr = lance_datagen::gen()
911    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
912    //         .into_batch_rows(RowCount::from(100))
913    //         .unwrap()
914    //         .column(0)
915    //         .clone();
916    //     check_round_trip_bitpacked(arr).await;
917
918    //     let arr = lance_datagen::gen()
919    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
920    //         .into_batch_rows(RowCount::from(1000))
921    //         .unwrap()
922    //         .column(0)
923    //         .clone();
924    //     check_round_trip_bitpacked(arr).await;
925
926    //     let arr = lance_datagen::gen()
927    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
928    //         .into_batch_rows(RowCount::from(1024))
929    //         .unwrap()
930    //         .column(0)
931    //         .clone();
932    //     check_round_trip_bitpacked(arr).await;
933
934    //     let arr = lance_datagen::gen()
935    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
936    //         .into_batch_rows(RowCount::from(2000))
937    //         .unwrap()
938    //         .column(0)
939    //         .clone();
940    //     check_round_trip_bitpacked(arr).await;
941
942    //     let arr = lance_datagen::gen()
943    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt32))
944    //         .into_batch_rows(RowCount::from(3000))
945    //         .unwrap()
946    //         .column(0)
947    //         .clone();
948    //     check_round_trip_bitpacked(arr).await;
949    // }
950
951    // #[test_log::test(tokio::test)]
952    // async fn test_bitpack_fastlanes_u64() {
953    //     let values: Vec<u64> = vec![5; 1024];
954    //     let array = UInt64Array::from(values);
955    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
956    //     check_round_trip_bitpacked(array).await;
957
958    //     let values: Vec<u64> = vec![7; 2000];
959    //     let array = UInt64Array::from(values);
960    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
961    //     check_round_trip_bitpacked(array).await;
962
963    //     let values: Vec<u64> = vec![66; 1000];
964    //     let array = UInt64Array::from(values);
965    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
966    //     check_round_trip_bitpacked(array).await;
967
968    //     let values: Vec<u64> = vec![666; 1000];
969    //     let array = UInt64Array::from(values);
970    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
971    //     check_round_trip_bitpacked(array).await;
972
973    //     let values: Vec<u64> = vec![77; 2000];
974    //     let array = UInt64Array::from(values);
975    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
976    //     check_round_trip_bitpacked(array).await;
977
978    //     let values: Vec<u64> = vec![0; 10000];
979    //     let array = UInt64Array::from(values);
980    //     let arr = Arc::new(array) as ArrayRef;
981    //     check_round_trip_bitpacked(arr).await;
982
983    //     let values: Vec<u64> = vec![1; 10000];
984    //     let array = UInt64Array::from(values);
985    //     let arr = Arc::new(array) as ArrayRef;
986    //     check_round_trip_bitpacked(arr).await;
987
988    //     let values: Vec<u64> = vec![88; 10000];
989    //     let array = UInt64Array::from(values);
990    //     let arr = Arc::new(array) as ArrayRef;
991    //     check_round_trip_bitpacked(arr).await;
992
993    //     let values: Vec<u64> = vec![300; 100];
994    //     let array = UInt64Array::from(values);
995    //     let arr = Arc::new(array) as ArrayRef;
996    //     check_round_trip_bitpacked(arr).await;
997
998    //     let values: Vec<u64> = vec![3000; 100];
999    //     let array = UInt64Array::from(values);
1000    //     let arr = Arc::new(array) as ArrayRef;
1001    //     check_round_trip_bitpacked(arr).await;
1002
1003    //     let values: Vec<u64> = vec![800; 100];
1004    //     let array = UInt64Array::from(values);
1005    //     let arr = Arc::new(array) as ArrayRef;
1006    //     check_round_trip_bitpacked(arr).await;
1007
1008    //     let values: Vec<u64> = vec![8000; 100];
1009    //     let array = UInt64Array::from(values);
1010    //     let arr = Arc::new(array) as ArrayRef;
1011    //     check_round_trip_bitpacked(arr).await;
1012
1013    //     let values: Vec<u64> = vec![65536; 100];
1014    //     let array = UInt64Array::from(values);
1015    //     let arr = Arc::new(array) as ArrayRef;
1016    //     check_round_trip_bitpacked(arr).await;
1017
1018    //     let values: Vec<u64> = vec![655360; 100];
1019    //     let array = UInt64Array::from(values);
1020    //     let arr = Arc::new(array) as ArrayRef;
1021    //     check_round_trip_bitpacked(arr).await;
1022
1023    //     let arr = lance_datagen::gen()
1024    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1025    //         .into_batch_rows(RowCount::from(1))
1026    //         .unwrap()
1027    //         .column(0)
1028    //         .clone();
1029    //     check_round_trip_bitpacked(arr).await;
1030
1031    //     let arr = lance_datagen::gen()
1032    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1033    //         .into_batch_rows(RowCount::from(20))
1034    //         .unwrap()
1035    //         .column(0)
1036    //         .clone();
1037    //     check_round_trip_bitpacked(arr).await;
1038
1039    //     let arr = lance_datagen::gen()
1040    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1041    //         .into_batch_rows(RowCount::from(50))
1042    //         .unwrap()
1043    //         .column(0)
1044    //         .clone();
1045    //     check_round_trip_bitpacked(arr).await;
1046
1047    //     let arr = lance_datagen::gen()
1048    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1049    //         .into_batch_rows(RowCount::from(100))
1050    //         .unwrap()
1051    //         .column(0)
1052    //         .clone();
1053    //     check_round_trip_bitpacked(arr).await;
1054
1055    //     let arr = lance_datagen::gen()
1056    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1057    //         .into_batch_rows(RowCount::from(1000))
1058    //         .unwrap()
1059    //         .column(0)
1060    //         .clone();
1061    //     check_round_trip_bitpacked(arr).await;
1062
1063    //     let arr = lance_datagen::gen()
1064    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1065    //         .into_batch_rows(RowCount::from(1024))
1066    //         .unwrap()
1067    //         .column(0)
1068    //         .clone();
1069    //     check_round_trip_bitpacked(arr).await;
1070
1071    //     let arr = lance_datagen::gen()
1072    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1073    //         .into_batch_rows(RowCount::from(2000))
1074    //         .unwrap()
1075    //         .column(0)
1076    //         .clone();
1077    //     check_round_trip_bitpacked(arr).await;
1078
1079    //     let arr = lance_datagen::gen()
1080    //         .anon_col(lance_datagen::array::rand_type(&DataType::UInt64))
1081    //         .into_batch_rows(RowCount::from(3000))
1082    //         .unwrap()
1083    //         .column(0)
1084    //         .clone();
1085    //     check_round_trip_bitpacked(arr).await;
1086    // }
1087
1088    // #[test_log::test(tokio::test)]
1089    // async fn test_bitpack_fastlanes_i8() {
1090    //     let values: Vec<i8> = vec![-5; 1024];
1091    //     let array = Int8Array::from(values);
1092    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1093    //     check_round_trip_bitpacked(array).await;
1094
1095    //     let values: Vec<i8> = vec![66; 1000];
1096    //     let array = Int8Array::from(values);
1097    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1098
1099    //     check_round_trip_bitpacked(array).await;
1100
1101    //     let values: Vec<i8> = vec![77; 2000];
1102    //     let array = Int8Array::from(values);
1103    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1104
1105    //     check_round_trip_bitpacked(array).await;
1106
1107    //     let values: Vec<i8> = vec![0; 10000];
1108    //     let array = Int8Array::from(values);
1109    //     let arr = Arc::new(array) as ArrayRef;
1110    //     check_round_trip_bitpacked(arr).await;
1111
1112    //     let values: Vec<i8> = vec![88; 10000];
1113    //     let array = Int8Array::from(values);
1114    //     let arr = Arc::new(array) as ArrayRef;
1115    //     check_round_trip_bitpacked(arr).await;
1116
1117    //     let values: Vec<i8> = vec![-88; 10000];
1118    //     let array = Int8Array::from(values);
1119    //     let arr = Arc::new(array) as ArrayRef;
1120    //     check_round_trip_bitpacked(arr).await;
1121
1122    //     let arr = lance_datagen::gen()
1123    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1124    //         .into_batch_rows(RowCount::from(1))
1125    //         .unwrap()
1126    //         .column(0)
1127    //         .clone();
1128    //     check_round_trip_bitpacked(arr).await;
1129
1130    //     let arr = lance_datagen::gen()
1131    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1132    //         .into_batch_rows(RowCount::from(20))
1133    //         .unwrap()
1134    //         .column(0)
1135    //         .clone();
1136    //     check_round_trip_bitpacked(arr).await;
1137
1138    //     let arr = lance_datagen::gen()
1139    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1140    //         .into_batch_rows(RowCount::from(50))
1141    //         .unwrap()
1142    //         .column(0)
1143    //         .clone();
1144    //     check_round_trip_bitpacked(arr).await;
1145
1146    //     let arr = lance_datagen::gen()
1147    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1148    //         .into_batch_rows(RowCount::from(100))
1149    //         .unwrap()
1150    //         .column(0)
1151    //         .clone();
1152    //     check_round_trip_bitpacked(arr).await;
1153
1154    //     let arr = lance_datagen::gen()
1155    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1156    //         .into_batch_rows(RowCount::from(1000))
1157    //         .unwrap()
1158    //         .column(0)
1159    //         .clone();
1160    //     check_round_trip_bitpacked(arr).await;
1161
1162    //     let arr = lance_datagen::gen()
1163    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1164    //         .into_batch_rows(RowCount::from(1024))
1165    //         .unwrap()
1166    //         .column(0)
1167    //         .clone();
1168    //     check_round_trip_bitpacked(arr).await;
1169
1170    //     let arr = lance_datagen::gen()
1171    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1172    //         .into_batch_rows(RowCount::from(2000))
1173    //         .unwrap()
1174    //         .column(0)
1175    //         .clone();
1176    //     check_round_trip_bitpacked(arr).await;
1177
1178    //     let arr = lance_datagen::gen()
1179    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int8))
1180    //         .into_batch_rows(RowCount::from(3000))
1181    //         .unwrap()
1182    //         .column(0)
1183    //         .clone();
1184    //     check_round_trip_bitpacked(arr).await;
1185    // }
1186
1187    // #[test_log::test(tokio::test)]
1188    // async fn test_bitpack_fastlanes_i16() {
1189    //     let values: Vec<i16> = vec![-5; 1024];
1190    //     let array = Int16Array::from(values);
1191    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1192    //     check_round_trip_bitpacked(array).await;
1193
1194    //     let values: Vec<i16> = vec![66; 1000];
1195    //     let array = Int16Array::from(values);
1196    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1197
1198    //     check_round_trip_bitpacked(array).await;
1199
1200    //     let values: Vec<i16> = vec![77; 2000];
1201    //     let array = Int16Array::from(values);
1202    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1203
1204    //     check_round_trip_bitpacked(array).await;
1205
1206    //     let values: Vec<i16> = vec![0; 10000];
1207    //     let array = Int16Array::from(values);
1208    //     let arr = Arc::new(array) as ArrayRef;
1209    //     check_round_trip_bitpacked(arr).await;
1210
1211    //     let values: Vec<i16> = vec![88; 10000];
1212    //     let array = Int16Array::from(values);
1213    //     let arr = Arc::new(array) as ArrayRef;
1214    //     check_round_trip_bitpacked(arr).await;
1215
1216    //     let values: Vec<i16> = vec![300; 100];
1217    //     let array = Int16Array::from(values);
1218    //     let arr = Arc::new(array) as ArrayRef;
1219    //     check_round_trip_bitpacked(arr).await;
1220
1221    //     let values: Vec<i16> = vec![800; 100];
1222    //     let array = Int16Array::from(values);
1223    //     let arr = Arc::new(array) as ArrayRef;
1224    //     check_round_trip_bitpacked(arr).await;
1225
1226    //     let arr = lance_datagen::gen()
1227    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1228    //         .into_batch_rows(RowCount::from(1))
1229    //         .unwrap()
1230    //         .column(0)
1231    //         .clone();
1232    //     check_round_trip_bitpacked(arr).await;
1233
1234    //     let arr = lance_datagen::gen()
1235    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1236    //         .into_batch_rows(RowCount::from(20))
1237    //         .unwrap()
1238    //         .column(0)
1239    //         .clone();
1240    //     check_round_trip_bitpacked(arr).await;
1241
1242    //     let arr = lance_datagen::gen()
1243    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1244    //         .into_batch_rows(RowCount::from(50))
1245    //         .unwrap()
1246    //         .column(0)
1247    //         .clone();
1248    //     check_round_trip_bitpacked(arr).await;
1249
1250    //     let arr = lance_datagen::gen()
1251    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1252    //         .into_batch_rows(RowCount::from(100))
1253    //         .unwrap()
1254    //         .column(0)
1255    //         .clone();
1256    //     check_round_trip_bitpacked(arr).await;
1257
1258    //     let arr = lance_datagen::gen()
1259    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1260    //         .into_batch_rows(RowCount::from(1000))
1261    //         .unwrap()
1262    //         .column(0)
1263    //         .clone();
1264    //     check_round_trip_bitpacked(arr).await;
1265
1266    //     let arr = lance_datagen::gen()
1267    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1268    //         .into_batch_rows(RowCount::from(1024))
1269    //         .unwrap()
1270    //         .column(0)
1271    //         .clone();
1272    //     check_round_trip_bitpacked(arr).await;
1273
1274    //     let arr = lance_datagen::gen()
1275    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1276    //         .into_batch_rows(RowCount::from(2000))
1277    //         .unwrap()
1278    //         .column(0)
1279    //         .clone();
1280    //     check_round_trip_bitpacked(arr).await;
1281
1282    //     let arr = lance_datagen::gen()
1283    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int16))
1284    //         .into_batch_rows(RowCount::from(3000))
1285    //         .unwrap()
1286    //         .column(0)
1287    //         .clone();
1288    //     check_round_trip_bitpacked(arr).await;
1289    // }
1290
1291    // #[test_log::test(tokio::test)]
1292    // async fn test_bitpack_fastlanes_i32() {
1293    //     let values: Vec<i32> = vec![-5; 1024];
1294    //     let array = Int32Array::from(values);
1295    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1296    //     check_round_trip_bitpacked(array).await;
1297
1298    //     let values: Vec<i32> = vec![66; 1000];
1299    //     let array = Int32Array::from(values);
1300    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1301    //     check_round_trip_bitpacked(array).await;
1302
1303    //     let values: Vec<i32> = vec![-66; 1000];
1304    //     let array = Int32Array::from(values);
1305    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1306    //     check_round_trip_bitpacked(array).await;
1307
1308    //     let values: Vec<i32> = vec![77; 2000];
1309    //     let array = Int32Array::from(values);
1310    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1311    //     check_round_trip_bitpacked(array).await;
1312
1313    //     let values: Vec<i32> = vec![-77; 2000];
1314    //     let array = Int32Array::from(values);
1315    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1316    //     check_round_trip_bitpacked(array).await;
1317
1318    //     let values: Vec<i32> = vec![0; 10000];
1319    //     let array = Int32Array::from(values);
1320    //     let arr = Arc::new(array) as ArrayRef;
1321    //     check_round_trip_bitpacked(arr).await;
1322
1323    //     let values: Vec<i32> = vec![88; 10000];
1324    //     let array = Int32Array::from(values);
1325    //     let arr = Arc::new(array) as ArrayRef;
1326    //     check_round_trip_bitpacked(arr).await;
1327
1328    //     let values: Vec<i32> = vec![-88; 10000];
1329    //     let array = Int32Array::from(values);
1330    //     let arr = Arc::new(array) as ArrayRef;
1331    //     check_round_trip_bitpacked(arr).await;
1332
1333    //     let values: Vec<i32> = vec![300; 100];
1334    //     let array = Int32Array::from(values);
1335    //     let arr = Arc::new(array) as ArrayRef;
1336    //     check_round_trip_bitpacked(arr).await;
1337
1338    //     let values: Vec<i32> = vec![-300; 100];
1339    //     let array = Int32Array::from(values);
1340    //     let arr = Arc::new(array) as ArrayRef;
1341    //     check_round_trip_bitpacked(arr).await;
1342
1343    //     let values: Vec<i32> = vec![800; 100];
1344    //     let array = Int32Array::from(values);
1345    //     let arr = Arc::new(array) as ArrayRef;
1346    //     check_round_trip_bitpacked(arr).await;
1347
1348    //     let values: Vec<i32> = vec![-800; 100];
1349    //     let array = Int32Array::from(values);
1350    //     let arr = Arc::new(array) as ArrayRef;
1351    //     check_round_trip_bitpacked(arr).await;
1352
1353    //     let values: Vec<i32> = vec![65536; 100];
1354    //     let array = Int32Array::from(values);
1355    //     let arr = Arc::new(array) as ArrayRef;
1356    //     check_round_trip_bitpacked(arr).await;
1357
1358    //     let values: Vec<i32> = vec![-65536; 100];
1359    //     let array = Int32Array::from(values);
1360    //     let arr = Arc::new(array) as ArrayRef;
1361    //     check_round_trip_bitpacked(arr).await;
1362
1363    //     let arr = lance_datagen::gen()
1364    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1365    //         .into_batch_rows(RowCount::from(1))
1366    //         .unwrap()
1367    //         .column(0)
1368    //         .clone();
1369    //     check_round_trip_bitpacked(arr).await;
1370
1371    //     let arr = lance_datagen::gen()
1372    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1373    //         .into_batch_rows(RowCount::from(20))
1374    //         .unwrap()
1375    //         .column(0)
1376    //         .clone();
1377    //     check_round_trip_bitpacked(arr).await;
1378
1379    //     let arr = lance_datagen::gen()
1380    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1381    //         .into_batch_rows(RowCount::from(50))
1382    //         .unwrap()
1383    //         .column(0)
1384    //         .clone();
1385    //     check_round_trip_bitpacked(arr).await;
1386
1387    //     let arr = lance_datagen::gen()
1388    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1389    //         .into_batch_rows(RowCount::from(100))
1390    //         .unwrap()
1391    //         .column(0)
1392    //         .clone();
1393    //     check_round_trip_bitpacked(arr).await;
1394
1395    //     let arr = lance_datagen::gen()
1396    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1397    //         .into_batch_rows(RowCount::from(1000))
1398    //         .unwrap()
1399    //         .column(0)
1400    //         .clone();
1401    //     check_round_trip_bitpacked(arr).await;
1402
1403    //     let arr = lance_datagen::gen()
1404    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1405    //         .into_batch_rows(RowCount::from(1024))
1406    //         .unwrap()
1407    //         .column(0)
1408    //         .clone();
1409    //     check_round_trip_bitpacked(arr).await;
1410
1411    //     let arr = lance_datagen::gen()
1412    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1413    //         .into_batch_rows(RowCount::from(2000))
1414    //         .unwrap()
1415    //         .column(0)
1416    //         .clone();
1417    //     check_round_trip_bitpacked(arr).await;
1418
1419    //     let arr = lance_datagen::gen()
1420    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int32))
1421    //         .into_batch_rows(RowCount::from(3000))
1422    //         .unwrap()
1423    //         .column(0)
1424    //         .clone();
1425    //     check_round_trip_bitpacked(arr).await;
1426    // }
1427
1428    // #[test_log::test(tokio::test)]
1429    // async fn test_bitpack_fastlanes_i64() {
1430    //     let values: Vec<i64> = vec![-5; 1024];
1431    //     let array = Int64Array::from(values);
1432    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1433    //     check_round_trip_bitpacked(array).await;
1434
1435    //     let values: Vec<i64> = vec![66; 1000];
1436    //     let array = Int64Array::from(values);
1437    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1438    //     check_round_trip_bitpacked(array).await;
1439
1440    //     let values: Vec<i64> = vec![-66; 1000];
1441    //     let array = Int64Array::from(values);
1442    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1443    //     check_round_trip_bitpacked(array).await;
1444
1445    //     let values: Vec<i64> = vec![77; 2000];
1446    //     let array = Int64Array::from(values);
1447    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1448    //     check_round_trip_bitpacked(array).await;
1449
1450    //     let values: Vec<i64> = vec![-77; 2000];
1451    //     let array = Int64Array::from(values);
1452    //     let array: Arc<dyn arrow_array::Array> = Arc::new(array);
1453    //     check_round_trip_bitpacked(array).await;
1454
1455    //     let values: Vec<i64> = vec![0; 10000];
1456    //     let array = Int64Array::from(values);
1457    //     let arr = Arc::new(array) as ArrayRef;
1458    //     check_round_trip_bitpacked(arr).await;
1459
1460    //     let values: Vec<i64> = vec![88; 10000];
1461    //     let array = Int64Array::from(values);
1462    //     let arr = Arc::new(array) as ArrayRef;
1463    //     check_round_trip_bitpacked(arr).await;
1464
1465    //     let values: Vec<i64> = vec![-88; 10000];
1466    //     let array = Int64Array::from(values);
1467    //     let arr = Arc::new(array) as ArrayRef;
1468    //     check_round_trip_bitpacked(arr).await;
1469
1470    //     let values: Vec<i64> = vec![300; 100];
1471    //     let array = Int64Array::from(values);
1472    //     let arr = Arc::new(array) as ArrayRef;
1473    //     check_round_trip_bitpacked(arr).await;
1474
1475    //     let values: Vec<i64> = vec![-300; 100];
1476    //     let array = Int64Array::from(values);
1477    //     let arr = Arc::new(array) as ArrayRef;
1478    //     check_round_trip_bitpacked(arr).await;
1479
1480    //     let values: Vec<i64> = vec![800; 100];
1481    //     let array = Int64Array::from(values);
1482    //     let arr = Arc::new(array) as ArrayRef;
1483    //     check_round_trip_bitpacked(arr).await;
1484
1485    //     let values: Vec<i64> = vec![-800; 100];
1486    //     let array = Int64Array::from(values);
1487    //     let arr = Arc::new(array) as ArrayRef;
1488    //     check_round_trip_bitpacked(arr).await;
1489
1490    //     let values: Vec<i64> = vec![65536; 100];
1491    //     let array = Int64Array::from(values);
1492    //     let arr = Arc::new(array) as ArrayRef;
1493    //     check_round_trip_bitpacked(arr).await;
1494
1495    //     let values: Vec<i64> = vec![-65536; 100];
1496    //     let array = Int64Array::from(values);
1497    //     let arr = Arc::new(array) as ArrayRef;
1498    //     check_round_trip_bitpacked(arr).await;
1499
1500    //     let arr = lance_datagen::gen()
1501    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1502    //         .into_batch_rows(RowCount::from(1))
1503    //         .unwrap()
1504    //         .column(0)
1505    //         .clone();
1506    //     check_round_trip_bitpacked(arr).await;
1507
1508    //     let arr = lance_datagen::gen()
1509    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1510    //         .into_batch_rows(RowCount::from(20))
1511    //         .unwrap()
1512    //         .column(0)
1513    //         .clone();
1514    //     check_round_trip_bitpacked(arr).await;
1515
1516    //     let arr = lance_datagen::gen()
1517    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1518    //         .into_batch_rows(RowCount::from(50))
1519    //         .unwrap()
1520    //         .column(0)
1521    //         .clone();
1522    //     check_round_trip_bitpacked(arr).await;
1523
1524    //     let arr = lance_datagen::gen()
1525    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1526    //         .into_batch_rows(RowCount::from(100))
1527    //         .unwrap()
1528    //         .column(0)
1529    //         .clone();
1530    //     check_round_trip_bitpacked(arr).await;
1531
1532    //     let arr = lance_datagen::gen()
1533    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1534    //         .into_batch_rows(RowCount::from(1000))
1535    //         .unwrap()
1536    //         .column(0)
1537    //         .clone();
1538    //     check_round_trip_bitpacked(arr).await;
1539
1540    //     let arr = lance_datagen::gen()
1541    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1542    //         .into_batch_rows(RowCount::from(1024))
1543    //         .unwrap()
1544    //         .column(0)
1545    //         .clone();
1546    //     check_round_trip_bitpacked(arr).await;
1547
1548    //     let arr = lance_datagen::gen()
1549    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1550    //         .into_batch_rows(RowCount::from(2000))
1551    //         .unwrap()
1552    //         .column(0)
1553    //         .clone();
1554    //     check_round_trip_bitpacked(arr).await;
1555
1556    //     let arr = lance_datagen::gen()
1557    //         .anon_col(lance_datagen::array::rand_type(&DataType::Int64))
1558    //         .into_batch_rows(RowCount::from(3000))
1559    //         .unwrap()
1560    //         .column(0)
1561    //         .clone();
1562    //     check_round_trip_bitpacked(arr).await;
1563    // }
1564}
1565
1566// This macro chunks the FixedWidth DataBlock, bitpacks them with 1024 values per chunk,
1567// it puts the bit-width parameter in front of each chunk,
1568// and the bit-width parameter has the same bit-width as the uncompressed DataBlock
1569// for example, if the input DataBlock has `bits_per_value` of `16`, there will be 2 bytes(16 bits)
1570// in front of each chunk storing the `bit-width` parameter.
1571macro_rules! chunk_data_impl {
1572    ($data:expr, $data_type:ty) => {{
1573        let data_buffer = $data.data.borrow_to_typed_slice::<$data_type>();
1574        let data_buffer = data_buffer.as_ref();
1575
1576        let bit_widths = $data.expect_stat(Stat::BitWidth);
1577        let bit_widths_array = bit_widths
1578            .as_any()
1579            .downcast_ref::<PrimitiveArray<UInt64Type>>()
1580            .unwrap();
1581
1582        let (packed_chunk_sizes, total_size) = bit_widths_array
1583            .values()
1584            .iter()
1585            .map(|&bit_width| {
1586                let chunk_size = ((1024 * bit_width) / $data.bits_per_value) as usize;
1587                (chunk_size, chunk_size + 1)
1588            })
1589            .fold(
1590                (Vec::with_capacity(bit_widths_array.len()), 0),
1591                |(mut sizes, total), (size, inc)| {
1592                    sizes.push(size);
1593                    (sizes, total + inc)
1594                },
1595            );
1596
1597        let mut output: Vec<$data_type> = Vec::with_capacity(total_size);
1598        let mut chunks = Vec::with_capacity(bit_widths_array.len());
1599
1600        for i in 0..bit_widths_array.len() - 1 {
1601            let start_elem = i * ELEMS_PER_CHUNK as usize;
1602            let bit_width = bit_widths_array.value(i) as $data_type;
1603            output.push(bit_width);
1604            let output_len = output.len();
1605            unsafe {
1606                output.set_len(output_len + packed_chunk_sizes[i]);
1607                BitPacking::unchecked_pack(
1608                    bit_width as usize,
1609                    &data_buffer[start_elem..][..ELEMS_PER_CHUNK as usize],
1610                    &mut output[output_len..][..packed_chunk_sizes[i]],
1611                );
1612            }
1613            chunks.push(MiniBlockChunk {
1614                num_bytes: ((1 + packed_chunk_sizes[i]) * std::mem::size_of::<$data_type>()) as u16,
1615                log_num_values: LOG_ELEMS_PER_CHUNK,
1616            });
1617        }
1618
1619        // Handle the last chunk
1620        let last_chunk_elem_num = if $data.num_values % ELEMS_PER_CHUNK == 0 {
1621            1024
1622        } else {
1623            $data.num_values % ELEMS_PER_CHUNK
1624        };
1625        let mut last_chunk = vec![0; ELEMS_PER_CHUNK as usize];
1626        last_chunk[..last_chunk_elem_num as usize].clone_from_slice(
1627            &data_buffer[$data.num_values as usize - last_chunk_elem_num as usize..],
1628        );
1629        let bit_width = bit_widths_array.value(bit_widths_array.len() - 1) as $data_type;
1630        output.push(bit_width);
1631        let output_len = output.len();
1632        unsafe {
1633            output.set_len(output_len + packed_chunk_sizes[bit_widths_array.len() - 1]);
1634            BitPacking::unchecked_pack(
1635                bit_width as usize,
1636                &last_chunk,
1637                &mut output[output_len..][..packed_chunk_sizes[bit_widths_array.len() - 1]],
1638            );
1639        }
1640        chunks.push(MiniBlockChunk {
1641            num_bytes: ((1 + packed_chunk_sizes[bit_widths_array.len() - 1])
1642                * std::mem::size_of::<$data_type>()) as u16,
1643            log_num_values: 0,
1644        });
1645
1646        (
1647            MiniBlockCompressed {
1648                data: LanceBuffer::reinterpret_vec(output),
1649                chunks,
1650                num_values: $data.num_values,
1651            },
1652            ProtobufUtils::bitpack2($data.bits_per_value),
1653        )
1654    }};
1655}
1656
1657#[derive(Debug, Default)]
1658pub struct BitpackMiniBlockEncoder {}
1659
1660impl BitpackMiniBlockEncoder {
1661    fn chunk_data(
1662        &self,
1663        mut data: FixedWidthDataBlock,
1664    ) -> (MiniBlockCompressed, crate::format::pb::ArrayEncoding) {
1665        assert!(data.bits_per_value % 8 == 0);
1666        match data.bits_per_value {
1667            8 => chunk_data_impl!(data, u8),
1668            16 => chunk_data_impl!(data, u16),
1669            32 => chunk_data_impl!(data, u32),
1670            64 => chunk_data_impl!(data, u64),
1671            _ => unreachable!(),
1672        }
1673    }
1674}
1675
1676impl MiniBlockCompressor for BitpackMiniBlockEncoder {
1677    fn compress(
1678        &self,
1679        chunk: DataBlock,
1680    ) -> Result<(MiniBlockCompressed, crate::format::pb::ArrayEncoding)> {
1681        match chunk {
1682            DataBlock::FixedWidth(fixed_width) => Ok(self.chunk_data(fixed_width)),
1683            _ => Err(Error::InvalidInput {
1684                source: format!(
1685                    "Cannot compress a data block of type {} with BitpackMiniBlockEncoder",
1686                    chunk.name()
1687                )
1688                .into(),
1689                location: location!(),
1690            }),
1691        }
1692    }
1693}
1694
1695/// A decompressor for fixed-width data that has
1696/// been written, as-is, to disk in single contiguous array
1697#[derive(Debug)]
1698pub struct BitpackMiniBlockDecompressor {
1699    uncompressed_bit_width: u64,
1700}
1701
1702impl BitpackMiniBlockDecompressor {
1703    pub fn new(description: &pb::Bitpack2) -> Self {
1704        Self {
1705            uncompressed_bit_width: description.uncompressed_bits_per_value,
1706        }
1707    }
1708}
1709
1710impl MiniBlockDecompressor for BitpackMiniBlockDecompressor {
1711    fn decompress(&self, data: LanceBuffer, num_values: u64) -> Result<DataBlock> {
1712        assert!(data.len() >= 8);
1713        assert!(num_values <= ELEMS_PER_CHUNK);
1714
1715        // This macro decompresses a chunk(1024 values) of bitpacked values.
1716        macro_rules! decompress_impl {
1717            ($type:ty) => {{
1718                let uncompressed_bit_width = std::mem::size_of::<$type>() * 8;
1719                let mut decompressed = vec![0 as $type; ELEMS_PER_CHUNK as usize];
1720
1721                // Copy for memory alignment
1722                let chunk_in_u8: Vec<u8> = data.to_vec();
1723                let bit_width_bytes = &chunk_in_u8[..std::mem::size_of::<$type>()];
1724                let bit_width_value = LittleEndian::read_uint(bit_width_bytes, std::mem::size_of::<$type>());
1725                let chunk = cast_slice(&chunk_in_u8[std::mem::size_of::<$type>()..]);
1726
1727                // The bit-packed chunk should have number of bytes (bit_width_value * ELEMS_PER_CHUNK / 8)
1728                assert!(chunk.len() * std::mem::size_of::<$type>() == (bit_width_value * ELEMS_PER_CHUNK as u64) as usize / 8);
1729
1730                unsafe {
1731                    BitPacking::unchecked_unpack(
1732                        bit_width_value as usize,
1733                        chunk,
1734                        &mut decompressed,
1735                    );
1736                }
1737
1738                decompressed.shrink_to(num_values as usize);
1739                Ok(DataBlock::FixedWidth(FixedWidthDataBlock {
1740                    data: LanceBuffer::reinterpret_vec(decompressed),
1741                    bits_per_value: uncompressed_bit_width as u64,
1742                    num_values,
1743                    block_info: BlockInfo::new(),
1744                }))
1745            }};
1746        }
1747
1748        match self.uncompressed_bit_width {
1749            8 => decompress_impl!(u8),
1750            16 => decompress_impl!(u16),
1751            32 => decompress_impl!(u32),
1752            64 => decompress_impl!(u64),
1753            _ => todo!(),
1754        }
1755    }
1756}
1757
1758#[cfg(test)]
1759mod test {
1760    use std::{collections::HashMap, sync::Arc};
1761
1762    use arrow_array::{Int64Array, Int8Array};
1763
1764    use arrow_schema::DataType;
1765
1766    use arrow_array::Array;
1767
1768    use crate::{
1769        testing::{check_round_trip_encoding_of_data, TestCases},
1770        version::LanceFileVersion,
1771    };
1772
1773    #[test_log::test(tokio::test)]
1774    async fn test_miniblock_bitpack() {
1775        let test_cases = TestCases::default().with_file_version(LanceFileVersion::V2_1);
1776
1777        let arrays = vec![
1778            Arc::new(Int8Array::from(vec![100; 1024])) as Arc<dyn Array>,
1779            Arc::new(Int8Array::from(vec![1; 1024])) as Arc<dyn Array>,
1780            Arc::new(Int8Array::from(vec![16; 1024])) as Arc<dyn Array>,
1781            Arc::new(Int8Array::from(vec![-1; 1024])) as Arc<dyn Array>,
1782            Arc::new(Int8Array::from(vec![5; 1])) as Arc<dyn Array>,
1783        ];
1784        check_round_trip_encoding_of_data(arrays, &test_cases, HashMap::new()).await;
1785
1786        for data_type in [DataType::Int16, DataType::Int32, DataType::Int64] {
1787            let int64_arrays = vec![
1788                Int64Array::from(vec![3; 1024]),
1789                Int64Array::from(vec![8; 1024]),
1790                Int64Array::from(vec![16; 1024]),
1791                Int64Array::from(vec![100; 1024]),
1792                Int64Array::from(vec![512; 1024]),
1793                Int64Array::from(vec![1000; 1024]),
1794                Int64Array::from(vec![2000; 1024]),
1795                Int64Array::from(vec![-1; 10]),
1796            ];
1797
1798            let mut arrays = vec![];
1799            for int64_array in int64_arrays {
1800                arrays.push(arrow_cast::cast(&int64_array, &data_type).unwrap());
1801            }
1802            check_round_trip_encoding_of_data(arrays, &test_cases, HashMap::new()).await;
1803        }
1804    }
1805}