Trait VectorIndex

Source
pub trait VectorIndex:
    Send
    + Sync
    + Debug
    + Index {
Show 15 methods // Required methods fn search<'life0, 'life1, 'async_trait>( &'life0 self, query: &'life1 Query, pre_filter: Arc<dyn PreFilter>, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait; fn find_partitions(&self, query: &Query) -> Result<UInt32Array>; fn search_in_partition<'life0, 'life1, 'async_trait>( &'life0 self, partition_id: usize, query: &'life1 Query, pre_filter: Arc<dyn PreFilter>, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait; fn is_loadable(&self) -> bool; fn use_residual(&self) -> bool; fn check_can_remap(&self) -> Result<()>; fn load<'life0, 'async_trait>( &'life0 self, reader: Arc<dyn Reader>, offset: usize, length: usize, ) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait; fn row_ids(&self) -> Box<dyn Iterator<Item = &u64> + '_>; fn remap<'life0, 'life1, 'async_trait>( &'life0 mut self, mapping: &'life1 HashMap<u64, Option<u64>>, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait; fn metric_type(&self) -> DistanceType; fn ivf_model(&self) -> IvfModel; fn quantizer(&self) -> Quantizer; fn sub_index_type(&self) -> (SubIndexType, QuantizationType); // Provided methods fn load_partition<'life0, 'async_trait>( &'life0 self, reader: Arc<dyn Reader>, offset: usize, length: usize, _partition_id: usize, ) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait { ... } fn remap_to<'life0, 'async_trait>( self: Arc<Self>, _store: ObjectStore, _mapping: &'life0 HashMap<u64, Option<u64>>, _column: String, _index_dir: Path, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>> where Self: 'async_trait, 'life0: 'async_trait { ... }
}
Expand description

Vector Index for (Approximate) Nearest Neighbor (ANN) Search. It’s always the IVF index, any other index types without partitioning will be treated as IVF with one partition.

Required Methods§

Source

fn search<'life0, 'life1, 'async_trait>( &'life0 self, query: &'life1 Query, pre_filter: Arc<dyn PreFilter>, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Search the vector for nearest neighbors.

It returns a RecordBatch with Schema of:

use arrow_schema::{Schema, Field, DataType};

Schema::new(vec![
  Field::new("_rowid", DataType::UInt64, true),
  Field::new("_distance", DataType::Float32, false),
]);

The pre_filter argument is used to filter out row ids that we know are not relevant to the query. For example, it removes deleted rows.

WARNINGS:

  • Only supports f32 now. Will add f64/f16 later.
Source

fn find_partitions(&self, query: &Query) -> Result<UInt32Array>

Source

fn search_in_partition<'life0, 'life1, 'async_trait>( &'life0 self, partition_id: usize, query: &'life1 Query, pre_filter: Arc<dyn PreFilter>, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Source

fn is_loadable(&self) -> bool

If the index is loadable by IVF, so it can be a sub-index that is loaded on demand by IVF.

Source

fn use_residual(&self) -> bool

Use residual vector to search.

Source

fn check_can_remap(&self) -> Result<()>

If the index can be remapped return Ok. Else return an error explaining why not

Source

fn load<'life0, 'async_trait>( &'life0 self, reader: Arc<dyn Reader>, offset: usize, length: usize, ) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Load the index from the reader on-demand.

Source

fn row_ids(&self) -> Box<dyn Iterator<Item = &u64> + '_>

Return the IDs of rows in the index.

Source

fn remap<'life0, 'life1, 'async_trait>( &'life0 mut self, mapping: &'life1 HashMap<u64, Option<u64>>, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Remap the index according to mapping

Each item in mapping describes an old row id -> new row id pair. If old row id -> None then that row id has been deleted and can be removed from the index.

If an old row id is not in the mapping then it should be left alone.

Source

fn metric_type(&self) -> DistanceType

The metric type of this vector index.

Source

fn ivf_model(&self) -> IvfModel

Source

fn quantizer(&self) -> Quantizer

Source

fn sub_index_type(&self) -> (SubIndexType, QuantizationType)

the index type of this vector index.

Provided Methods§

Source

fn load_partition<'life0, 'async_trait>( &'life0 self, reader: Arc<dyn Reader>, offset: usize, length: usize, _partition_id: usize, ) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Load the partition from the reader on-demand.

Source

fn remap_to<'life0, 'async_trait>( self: Arc<Self>, _store: ObjectStore, _mapping: &'life0 HashMap<u64, Option<u64>>, _column: String, _index_dir: Path, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Remap the index according to mapping

write the remapped index to the index_dir this is available for only v3 index

Implementors§

Source§

impl<Q: Quantization + Send + Sync + 'static> VectorIndex for HNSWIndex<Q>