lance_encoding/lib.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::ops::Range;
5
6use bytes::Bytes;
7use futures::{future::BoxFuture, FutureExt, TryFutureExt};
8
9use lance_core::Result;
10
11pub mod buffer;
12pub mod compression_algo;
13pub mod data;
14pub mod decoder;
15pub mod encoder;
16pub mod encodings;
17pub mod format;
18pub mod repdef;
19pub mod statistics;
20#[cfg(test)]
21pub mod testing;
22pub mod utils;
23pub mod version;
24
25// We can definitely add support for big-endian machines someday. However, it's not a priority and
26// would involve extensive testing (probably through emulation) to ensure that the encodings are
27// correct.
28#[cfg(not(target_endian = "little"))]
29compile_error!("Lance encodings only support little-endian systems.");
30
31/// A trait for an I/O service
32///
33/// This represents the I/O API that the encoders and decoders need in order to operate.
34/// We specify this as a trait so that lance-encodings does not need to depend on lance-io
35///
36/// In general, it is assumed that this trait will be implemented by some kind of "file reader"
37/// or "file scheduler". The encodings here are all limited to accessing a single file.
38pub trait EncodingsIo: std::fmt::Debug + Send + Sync {
39 /// Submit an I/O request
40 ///
41 /// The response must contain a `Bytes` object for each range requested even if the underlying
42 /// I/O was coalesced into fewer actual requests.
43 ///
44 /// # Arguments
45 ///
46 /// * `ranges` - the byte ranges to request
47 /// * `priority` - the priority of the request
48 ///
49 /// Priority should be set to the lowest row number that this request is delivering data for.
50 /// This is important in cases where indirect I/O causes high priority requests to be submitted
51 /// after low priority requests. We want to fulfill the indirect I/O more quickly so that we
52 /// can decode as quickly as possible.
53 ///
54 /// The implementation should be able to handle empty ranges, and should return an empty
55 /// byte buffer for each empty range.
56 fn submit_request(
57 &self,
58 range: Vec<Range<u64>>,
59 priority: u64,
60 ) -> BoxFuture<'static, Result<Vec<Bytes>>>;
61
62 /// Submit an I/O request with a single range
63 ///
64 /// This is just a utitliy function that wraps [`EncodingsIo::submit_request`] for the common
65 /// case of a single range request.
66 fn submit_single(
67 &self,
68 range: std::ops::Range<u64>,
69 priority: u64,
70 ) -> BoxFuture<'static, lance_core::Result<bytes::Bytes>> {
71 self.submit_request(vec![range], priority)
72 .map_ok(|mut v| v.pop().unwrap())
73 .boxed()
74 }
75}
76
77/// An implementation of EncodingsIo that serves data from an in-memory buffer
78#[derive(Debug)]
79pub struct BufferScheduler {
80 data: Bytes,
81}
82
83impl BufferScheduler {
84 pub fn new(data: Bytes) -> Self {
85 Self { data }
86 }
87
88 fn satisfy_request(&self, req: Range<u64>) -> Bytes {
89 self.data.slice(req.start as usize..req.end as usize)
90 }
91}
92
93impl EncodingsIo for BufferScheduler {
94 fn submit_request(
95 &self,
96 ranges: Vec<Range<u64>>,
97 _priority: u64,
98 ) -> BoxFuture<'static, Result<Vec<Bytes>>> {
99 std::future::ready(Ok(ranges
100 .into_iter()
101 .map(|range| self.satisfy_request(range))
102 .collect::<Vec<_>>()))
103 .boxed()
104 }
105}