surrealdb_core/idx/trees/hnsw/
index.rs1use crate::err::Error;
2use crate::idx::planner::checker::HnswConditionChecker;
3use crate::idx::planner::iterators::KnnIteratorResult;
4use crate::idx::trees::hnsw::docs::{HnswDocs, VecDocs};
5use crate::idx::trees::hnsw::elements::HnswElements;
6use crate::idx::trees::hnsw::flavor::HnswFlavor;
7use crate::idx::trees::hnsw::{ElementId, HnswSearch};
8use crate::idx::trees::knn::{KnnResult, KnnResultBuilder};
9use crate::idx::trees::vector::{SharedVector, Vector};
10use crate::idx::IndexKeyBase;
11use crate::kvs::Transaction;
12use crate::sql::index::{HnswParams, VectorType};
13use crate::sql::{Id, Number, Value};
14#[cfg(debug_assertions)]
15use ahash::HashMap;
16use reblessive::tree::Stk;
17use std::collections::VecDeque;
18
19pub struct HnswIndex {
20 dim: usize,
21 vector_type: VectorType,
22 hnsw: HnswFlavor,
23 docs: HnswDocs,
24 vec_docs: VecDocs,
25}
26
27pub(super) struct HnswCheckedSearchContext<'a> {
28 elements: &'a HnswElements,
29 docs: &'a HnswDocs,
30 vec_docs: &'a VecDocs,
31 pt: &'a SharedVector,
32 ef: usize,
33}
34
35impl<'a> HnswCheckedSearchContext<'a> {
36 pub(super) fn new(
37 elements: &'a HnswElements,
38 docs: &'a HnswDocs,
39 vec_docs: &'a VecDocs,
40 pt: &'a SharedVector,
41 ef: usize,
42 ) -> Self {
43 Self {
44 elements,
45 docs,
46 vec_docs,
47 pt,
48 ef,
49 }
50 }
51
52 pub(super) fn pt(&self) -> &SharedVector {
53 self.pt
54 }
55
56 pub(super) fn ef(&self) -> usize {
57 self.ef
58 }
59
60 pub(super) fn docs(&self) -> &HnswDocs {
61 self.docs
62 }
63
64 pub(super) fn vec_docs(&self) -> &VecDocs {
65 self.vec_docs
66 }
67
68 pub(super) fn elements(&self) -> &HnswElements {
69 self.elements
70 }
71}
72
73impl HnswIndex {
74 pub async fn new(
75 tx: &Transaction,
76 ikb: IndexKeyBase,
77 tb: String,
78 p: &HnswParams,
79 ) -> Result<Self, Error> {
80 Ok(Self {
81 dim: p.dimension as usize,
82 vector_type: p.vector_type,
83 hnsw: HnswFlavor::new(ikb.clone(), p)?,
84 docs: HnswDocs::new(tx, tb, ikb.clone()).await?,
85 vec_docs: VecDocs::new(ikb),
86 })
87 }
88
89 pub async fn index_document(
90 &mut self,
91 tx: &Transaction,
92 id: &Id,
93 content: &[Value],
94 ) -> Result<(), Error> {
95 self.hnsw.check_state(tx).await?;
97 let doc_id = self.docs.resolve(tx, id).await?;
99 for value in content.iter().filter(|v| v.is_some()) {
101 let vector = Vector::try_from_value(self.vector_type, self.dim, value)?;
103 vector.check_dimension(self.dim)?;
104 self.vec_docs.insert(tx, vector, doc_id, &mut self.hnsw).await?;
106 }
107 self.docs.finish(tx).await?;
108 Ok(())
109 }
110
111 pub(crate) async fn remove_document(
112 &mut self,
113 tx: &Transaction,
114 id: Id,
115 content: &[Value],
116 ) -> Result<(), Error> {
117 if let Some(doc_id) = self.docs.remove(tx, id).await? {
118 self.hnsw.check_state(tx).await?;
120 for v in content.iter().filter(|v| v.is_some()) {
121 let vector = Vector::try_from_value(self.vector_type, self.dim, v)?;
123 vector.check_dimension(self.dim)?;
124 self.vec_docs.remove(tx, &vector, doc_id, &mut self.hnsw).await?;
126 }
127 self.docs.finish(tx).await?;
128 }
129 Ok(())
130 }
131
132 pub async fn check_state(&mut self, tx: &Transaction) -> Result<(), Error> {
134 self.hnsw.check_state(tx).await
135 }
136
137 pub async fn knn_search(
138 &self,
139 tx: &Transaction,
140 stk: &mut Stk,
141 pt: &[Number],
142 k: usize,
143 ef: usize,
144 mut chk: HnswConditionChecker<'_>,
145 ) -> Result<VecDeque<KnnIteratorResult>, Error> {
146 let vector: SharedVector = Vector::try_from_vector(self.vector_type, pt)?.into();
148 vector.check_dimension(self.dim)?;
149 let search = HnswSearch::new(vector, k, ef);
150 let result = self.search(tx, stk, &search, &mut chk).await?;
152 let res = chk.convert_result(tx, &self.docs, result.docs).await?;
153 Ok(res)
154 }
155
156 pub(super) async fn search(
157 &self,
158 tx: &Transaction,
159 stk: &mut Stk,
160 search: &HnswSearch,
161 chk: &mut HnswConditionChecker<'_>,
162 ) -> Result<KnnResult, Error> {
163 let neighbors = match chk {
165 HnswConditionChecker::Hnsw(_) => self.hnsw.knn_search(tx, search).await?,
166 HnswConditionChecker::HnswCondition(_) => {
167 self.hnsw
168 .knn_search_checked(tx, stk, search, &self.docs, &self.vec_docs, chk)
169 .await?
170 }
171 };
172 self.build_result(tx, neighbors, search.k, chk).await
173 }
174
175 async fn build_result(
176 &self,
177 tx: &Transaction,
178 neighbors: Vec<(f64, ElementId)>,
179 n: usize,
180 chk: &mut HnswConditionChecker<'_>,
181 ) -> Result<KnnResult, Error> {
182 let mut builder = KnnResultBuilder::new(n);
183 for (e_dist, e_id) in neighbors {
184 if builder.check_add(e_dist) {
185 if let Some(v) = self.hnsw.get_vector(tx, &e_id).await? {
186 if let Some(docs) = self.vec_docs.get_docs(tx, &v).await? {
187 let evicted_docs = builder.add(e_dist, docs);
188 chk.expires(evicted_docs);
189 }
190 }
191 }
192 }
193 Ok(builder.build(
194 #[cfg(debug_assertions)]
195 HashMap::default(),
196 ))
197 }
198
199 #[cfg(test)]
200 pub(super) fn check_hnsw_properties(&self, expected_count: usize) {
201 self.hnsw.check_hnsw_properties(expected_count)
202 }
203}