surrealdb_core/sql/
index.rs

1use crate::err::Error;
2use crate::fnc::util::math::vector::{
3	ChebyshevDistance, CosineDistance, EuclideanDistance, HammingDistance, JaccardSimilarity,
4	ManhattanDistance, MinkowskiDistance, PearsonSimilarity,
5};
6use crate::sql::ident::Ident;
7use crate::sql::scoring::Scoring;
8use crate::sql::statements::info::InfoStructure;
9use crate::sql::{Number, Value};
10use revision::revisioned;
11use serde::{Deserialize, Serialize};
12use std::fmt;
13use std::fmt::{Display, Formatter};
14
15#[revisioned(revision = 2)]
16#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
17#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
18#[non_exhaustive]
19pub enum Index {
20	/// (Basic) non unique
21	#[default]
22	Idx,
23	/// Unique index
24	Uniq,
25	/// Index with Full-Text search capabilities
26	Search(SearchParams),
27	/// M-Tree index for distance based metrics
28	MTree(MTreeParams),
29	/// HNSW index for distance based metrics
30	#[revision(start = 2)]
31	Hnsw(HnswParams),
32}
33
34#[revisioned(revision = 2)]
35#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
36#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
37#[non_exhaustive]
38pub struct SearchParams {
39	pub az: Ident,
40	pub hl: bool,
41	pub sc: Scoring,
42	pub doc_ids_order: u32,
43	pub doc_lengths_order: u32,
44	pub postings_order: u32,
45	pub terms_order: u32,
46	#[revision(start = 2)]
47	pub doc_ids_cache: u32,
48	#[revision(start = 2)]
49	pub doc_lengths_cache: u32,
50	#[revision(start = 2)]
51	pub postings_cache: u32,
52	#[revision(start = 2)]
53	pub terms_cache: u32,
54}
55
56#[revisioned(revision = 2)]
57#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
58#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
59#[non_exhaustive]
60pub struct MTreeParams {
61	pub dimension: u16,
62	#[revision(start = 1, end = 2, convert_fn = "convert_old_distance")]
63	pub _distance: Distance1, // TODO remove once 1.0 && 1.1 are EOL
64	#[revision(start = 2)]
65	pub distance: Distance,
66	pub vector_type: VectorType,
67	pub capacity: u16,
68	pub doc_ids_order: u32,
69	#[revision(start = 2)]
70	pub doc_ids_cache: u32,
71	#[revision(start = 2)]
72	pub mtree_cache: u32,
73}
74
75impl MTreeParams {
76	pub fn new(
77		dimension: u16,
78		distance: Distance,
79		vector_type: VectorType,
80		capacity: u16,
81		doc_ids_order: u32,
82		doc_ids_cache: u32,
83		mtree_cache: u32,
84	) -> Self {
85		Self {
86			dimension,
87			distance,
88			vector_type,
89			capacity,
90			doc_ids_order,
91			doc_ids_cache,
92			mtree_cache,
93		}
94	}
95
96	fn convert_old_distance(
97		&mut self,
98		_revision: u16,
99		d1: Distance1,
100	) -> Result<(), revision::Error> {
101		self.distance = match d1 {
102			Distance1::Euclidean => Distance::Euclidean,
103			Distance1::Manhattan => Distance::Manhattan,
104			Distance1::Cosine => Distance::Cosine,
105			Distance1::Hamming => Distance::Hamming,
106			Distance1::Minkowski(n) => Distance::Minkowski(n),
107		};
108		Ok(())
109	}
110}
111
112#[revisioned(revision = 1)]
113#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
114#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
115#[non_exhaustive]
116pub enum Distance1 {
117	#[default]
118	Euclidean,
119	Manhattan,
120	Cosine,
121	Hamming,
122	Minkowski(Number),
123}
124
125#[revisioned(revision = 1)]
126#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
127#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
128#[non_exhaustive]
129pub struct HnswParams {
130	pub dimension: u16,
131	pub distance: Distance,
132	pub vector_type: VectorType,
133	pub m: u8,
134	pub m0: u8,
135	pub ef_construction: u16,
136	pub extend_candidates: bool,
137	pub keep_pruned_connections: bool,
138	pub ml: Number,
139}
140
141impl HnswParams {
142	#[allow(clippy::too_many_arguments)]
143	pub fn new(
144		dimension: u16,
145		distance: Distance,
146		vector_type: VectorType,
147		m: u8,
148		m0: u8,
149		ml: Number,
150		ef_construction: u16,
151		extend_candidates: bool,
152		keep_pruned_connections: bool,
153	) -> Self {
154		Self {
155			dimension,
156			distance,
157			vector_type,
158			m,
159			m0,
160			ef_construction,
161			ml,
162			extend_candidates,
163			keep_pruned_connections,
164		}
165	}
166}
167
168#[revisioned(revision = 1)]
169#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
170#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
171#[non_exhaustive]
172pub enum Distance {
173	Chebyshev,
174	Cosine,
175	#[default]
176	Euclidean,
177	Hamming,
178	Jaccard,
179	Manhattan,
180	Minkowski(Number),
181	Pearson,
182}
183
184impl Distance {
185	pub(crate) fn compute(&self, v1: &Vec<Number>, v2: &Vec<Number>) -> Result<Number, Error> {
186		match self {
187			Self::Cosine => v1.cosine_distance(v2),
188			Self::Chebyshev => v1.chebyshev_distance(v2),
189			Self::Euclidean => v1.euclidean_distance(v2),
190			Self::Hamming => v1.hamming_distance(v2),
191			Self::Jaccard => v1.jaccard_similarity(v2),
192			Self::Manhattan => v1.manhattan_distance(v2),
193			Self::Minkowski(r) => v1.minkowski_distance(v2, r),
194			Self::Pearson => v1.pearson_similarity(v2),
195		}
196	}
197}
198
199impl Display for Distance {
200	fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
201		match self {
202			Self::Chebyshev => f.write_str("CHEBYSHEV"),
203			Self::Cosine => f.write_str("COSINE"),
204			Self::Euclidean => f.write_str("EUCLIDEAN"),
205			Self::Hamming => f.write_str("HAMMING"),
206			Self::Jaccard => f.write_str("JACCARD"),
207			Self::Manhattan => f.write_str("MANHATTAN"),
208			Self::Minkowski(order) => write!(f, "MINKOWSKI {}", order),
209			Self::Pearson => f.write_str("PEARSON"),
210		}
211	}
212}
213
214#[revisioned(revision = 1)]
215#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
216#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
217#[non_exhaustive]
218pub enum VectorType {
219	#[default]
220	F64,
221	F32,
222	I64,
223	I32,
224	I16,
225}
226
227impl Display for VectorType {
228	fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
229		match self {
230			Self::F64 => f.write_str("F64"),
231			Self::F32 => f.write_str("F32"),
232			Self::I64 => f.write_str("I64"),
233			Self::I32 => f.write_str("I32"),
234			Self::I16 => f.write_str("I16"),
235		}
236	}
237}
238
239impl Display for Index {
240	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
241		match self {
242			Self::Idx => Ok(()),
243			Self::Uniq => f.write_str("UNIQUE"),
244			Self::Search(p) => {
245				write!(
246					f,
247					"SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {} DOC_IDS_CACHE {} DOC_LENGTHS_CACHE {} POSTINGS_CACHE {} TERMS_CACHE {}",
248					p.az,
249					p.sc,
250					p.doc_ids_order,
251					p.doc_lengths_order,
252					p.postings_order,
253					p.terms_order,
254					p.doc_ids_cache,
255					p.doc_lengths_cache,
256					p.postings_cache,
257					p.terms_cache
258				)?;
259				if p.hl {
260					f.write_str(" HIGHLIGHTS")?
261				}
262				Ok(())
263			}
264			Self::MTree(p) => {
265				write!(
266					f,
267					"MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {} DOC_IDS_CACHE {} MTREE_CACHE {}",
268					p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order, p.doc_ids_cache, p.mtree_cache
269				)
270			}
271			Self::Hnsw(p) => {
272				write!(
273					f,
274					"HNSW DIMENSION {} DIST {} TYPE {} EFC {} M {} M0 {} LM {}",
275					p.dimension, p.distance, p.vector_type, p.ef_construction, p.m, p.m0, p.ml
276				)?;
277				if p.extend_candidates {
278					f.write_str(" EXTEND_CANDIDATES")?
279				}
280				if p.keep_pruned_connections {
281					f.write_str(" KEEP_PRUNED_CONNECTIONS")?
282				}
283				Ok(())
284			}
285		}
286	}
287}
288
289impl InfoStructure for Index {
290	fn structure(self) -> Value {
291		self.to_string().into()
292	}
293}