surrealdb_core/idx/trees/hnsw/
index.rs

1use crate::err::Error;
2use crate::idx::planner::checker::HnswConditionChecker;
3use crate::idx::planner::iterators::KnnIteratorResult;
4use crate::idx::trees::hnsw::docs::{HnswDocs, VecDocs};
5use crate::idx::trees::hnsw::elements::HnswElements;
6use crate::idx::trees::hnsw::flavor::HnswFlavor;
7use crate::idx::trees::hnsw::{ElementId, HnswSearch};
8use crate::idx::trees::knn::{KnnResult, KnnResultBuilder};
9use crate::idx::trees::vector::{SharedVector, Vector};
10use crate::idx::IndexKeyBase;
11use crate::kvs::Transaction;
12use crate::sql::index::{HnswParams, VectorType};
13use crate::sql::{Id, Number, Value};
14#[cfg(debug_assertions)]
15use ahash::HashMap;
16use reblessive::tree::Stk;
17use std::collections::VecDeque;
18
19pub struct HnswIndex {
20	dim: usize,
21	vector_type: VectorType,
22	hnsw: HnswFlavor,
23	docs: HnswDocs,
24	vec_docs: VecDocs,
25}
26
27pub(super) struct HnswCheckedSearchContext<'a> {
28	elements: &'a HnswElements,
29	docs: &'a HnswDocs,
30	vec_docs: &'a VecDocs,
31	pt: &'a SharedVector,
32	ef: usize,
33}
34
35impl<'a> HnswCheckedSearchContext<'a> {
36	pub(super) fn new(
37		elements: &'a HnswElements,
38		docs: &'a HnswDocs,
39		vec_docs: &'a VecDocs,
40		pt: &'a SharedVector,
41		ef: usize,
42	) -> Self {
43		Self {
44			elements,
45			docs,
46			vec_docs,
47			pt,
48			ef,
49		}
50	}
51
52	pub(super) fn pt(&self) -> &SharedVector {
53		self.pt
54	}
55
56	pub(super) fn ef(&self) -> usize {
57		self.ef
58	}
59
60	pub(super) fn docs(&self) -> &HnswDocs {
61		self.docs
62	}
63
64	pub(super) fn vec_docs(&self) -> &VecDocs {
65		self.vec_docs
66	}
67
68	pub(super) fn elements(&self) -> &HnswElements {
69		self.elements
70	}
71}
72
73impl HnswIndex {
74	pub async fn new(
75		tx: &Transaction,
76		ikb: IndexKeyBase,
77		tb: String,
78		p: &HnswParams,
79	) -> Result<Self, Error> {
80		Ok(Self {
81			dim: p.dimension as usize,
82			vector_type: p.vector_type,
83			hnsw: HnswFlavor::new(ikb.clone(), p)?,
84			docs: HnswDocs::new(tx, tb, ikb.clone()).await?,
85			vec_docs: VecDocs::new(ikb),
86		})
87	}
88
89	pub async fn index_document(
90		&mut self,
91		tx: &Transaction,
92		id: &Id,
93		content: &[Value],
94	) -> Result<(), Error> {
95		// Ensure the layers are up-to-date
96		self.hnsw.check_state(tx).await?;
97		// Resolve the doc_id
98		let doc_id = self.docs.resolve(tx, id).await?;
99		// Index the values
100		for value in content.iter().filter(|v| v.is_some()) {
101			// Extract the vector
102			let vector = Vector::try_from_value(self.vector_type, self.dim, value)?;
103			vector.check_dimension(self.dim)?;
104			// Insert the vector
105			self.vec_docs.insert(tx, vector, doc_id, &mut self.hnsw).await?;
106		}
107		self.docs.finish(tx).await?;
108		Ok(())
109	}
110
111	pub(crate) async fn remove_document(
112		&mut self,
113		tx: &Transaction,
114		id: Id,
115		content: &[Value],
116	) -> Result<(), Error> {
117		if let Some(doc_id) = self.docs.remove(tx, id).await? {
118			// Ensure the layers are up-to-date
119			self.hnsw.check_state(tx).await?;
120			for v in content.iter().filter(|v| v.is_some()) {
121				// Extract the vector
122				let vector = Vector::try_from_value(self.vector_type, self.dim, v)?;
123				vector.check_dimension(self.dim)?;
124				// Remove the vector
125				self.vec_docs.remove(tx, &vector, doc_id, &mut self.hnsw).await?;
126			}
127			self.docs.finish(tx).await?;
128		}
129		Ok(())
130	}
131
132	// Ensure the layers are up-to-date
133	pub async fn check_state(&mut self, tx: &Transaction) -> Result<(), Error> {
134		self.hnsw.check_state(tx).await
135	}
136
137	pub async fn knn_search(
138		&self,
139		tx: &Transaction,
140		stk: &mut Stk,
141		pt: &[Number],
142		k: usize,
143		ef: usize,
144		mut chk: HnswConditionChecker<'_>,
145	) -> Result<VecDeque<KnnIteratorResult>, Error> {
146		// Extract the vector
147		let vector: SharedVector = Vector::try_from_vector(self.vector_type, pt)?.into();
148		vector.check_dimension(self.dim)?;
149		let search = HnswSearch::new(vector, k, ef);
150		// Do the search
151		let result = self.search(tx, stk, &search, &mut chk).await?;
152		let res = chk.convert_result(tx, &self.docs, result.docs).await?;
153		Ok(res)
154	}
155
156	pub(super) async fn search(
157		&self,
158		tx: &Transaction,
159		stk: &mut Stk,
160		search: &HnswSearch,
161		chk: &mut HnswConditionChecker<'_>,
162	) -> Result<KnnResult, Error> {
163		// Do the search
164		let neighbors = match chk {
165			HnswConditionChecker::Hnsw(_) => self.hnsw.knn_search(tx, search).await?,
166			HnswConditionChecker::HnswCondition(_) => {
167				self.hnsw
168					.knn_search_checked(tx, stk, search, &self.docs, &self.vec_docs, chk)
169					.await?
170			}
171		};
172		self.build_result(tx, neighbors, search.k, chk).await
173	}
174
175	async fn build_result(
176		&self,
177		tx: &Transaction,
178		neighbors: Vec<(f64, ElementId)>,
179		n: usize,
180		chk: &mut HnswConditionChecker<'_>,
181	) -> Result<KnnResult, Error> {
182		let mut builder = KnnResultBuilder::new(n);
183		for (e_dist, e_id) in neighbors {
184			if builder.check_add(e_dist) {
185				if let Some(v) = self.hnsw.get_vector(tx, &e_id).await? {
186					if let Some(docs) = self.vec_docs.get_docs(tx, &v).await? {
187						let evicted_docs = builder.add(e_dist, docs);
188						chk.expires(evicted_docs);
189					}
190				}
191			}
192		}
193		Ok(builder.build(
194			#[cfg(debug_assertions)]
195			HashMap::default(),
196		))
197	}
198
199	#[cfg(test)]
200	pub(super) fn check_hnsw_properties(&self, expected_count: usize) {
201		self.hnsw.check_hnsw_properties(expected_count)
202	}
203}