pub trait VectorIndex:
Send
+ Sync
+ Debug
+ Index {
Show 15 methods
// Required methods
fn search<'life0, 'life1, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn find_partitions(&self, query: &Query) -> Result<UInt32Array>;
fn search_in_partition<'life0, 'life1, 'async_trait>(
&'life0 self,
partition_id: usize,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn is_loadable(&self) -> bool;
fn use_residual(&self) -> bool;
fn check_can_remap(&self) -> Result<()>;
fn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn row_ids(&self) -> Box<dyn Iterator<Item = &u64> + '_>;
fn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn metric_type(&self) -> DistanceType;
fn ivf_model(&self) -> IvfModel;
fn quantizer(&self) -> Quantizer;
fn sub_index_type(&self) -> (SubIndexType, QuantizationType);
// Provided methods
fn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait { ... }
fn remap_to<'life0, 'async_trait>(
self: Arc<Self>,
_store: ObjectStore,
_mapping: &'life0 HashMap<u64, Option<u64>>,
_column: String,
_index_dir: Path,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait { ... }
}
Expand description
Vector Index for (Approximate) Nearest Neighbor (ANN) Search. It’s always the IVF index, any other index types without partitioning will be treated as IVF with one partition.
Required Methods§
Sourcefn search<'life0, 'life1, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn search<'life0, 'life1, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Search the vector for nearest neighbors.
It returns a RecordBatch with Schema of:
use arrow_schema::{Schema, Field, DataType};
Schema::new(vec![
Field::new("_rowid", DataType::UInt64, true),
Field::new("_distance", DataType::Float32, false),
]);
The pre_filter
argument is used to filter out row ids that we know are
not relevant to the query. For example, it removes deleted rows.
WARNINGS:
- Only supports
f32
now. Will add f64/f16 later.
fn find_partitions(&self, query: &Query) -> Result<UInt32Array>
fn search_in_partition<'life0, 'life1, 'async_trait>(
&'life0 self,
partition_id: usize,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Sourcefn is_loadable(&self) -> bool
fn is_loadable(&self) -> bool
If the index is loadable by IVF, so it can be a sub-index that is loaded on demand by IVF.
Sourcefn use_residual(&self) -> bool
fn use_residual(&self) -> bool
Use residual vector to search.
Sourcefn check_can_remap(&self) -> Result<()>
fn check_can_remap(&self) -> Result<()>
If the index can be remapped return Ok. Else return an error explaining why not
Sourcefn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Load the index from the reader on-demand.
Sourcefn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Remap the index according to mapping
Each item in mapping describes an old row id -> new row id pair. If old row id -> None then that row id has been deleted and can be removed from the index.
If an old row id is not in the mapping then it should be left alone.
Sourcefn metric_type(&self) -> DistanceType
fn metric_type(&self) -> DistanceType
The metric type of this vector index.
fn ivf_model(&self) -> IvfModel
fn quantizer(&self) -> Quantizer
Sourcefn sub_index_type(&self) -> (SubIndexType, QuantizationType)
fn sub_index_type(&self) -> (SubIndexType, QuantizationType)
the index type of this vector index.
Provided Methods§
Sourcefn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Load the partition from the reader on-demand.
Sourcefn remap_to<'life0, 'async_trait>(
self: Arc<Self>,
_store: ObjectStore,
_mapping: &'life0 HashMap<u64, Option<u64>>,
_column: String,
_index_dir: Path,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn remap_to<'life0, 'async_trait>(
self: Arc<Self>,
_store: ObjectStore,
_mapping: &'life0 HashMap<u64, Option<u64>>,
_column: String,
_index_dir: Path,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Remap the index according to mapping
write the remapped index to the index_dir this is available for only v3 index