1use crate::err::Error;
2use crate::fnc::util::math::vector::{
3 ChebyshevDistance, CosineDistance, EuclideanDistance, HammingDistance, JaccardSimilarity,
4 ManhattanDistance, MinkowskiDistance, PearsonSimilarity,
5};
6use crate::sql::ident::Ident;
7use crate::sql::scoring::Scoring;
8use crate::sql::statements::info::InfoStructure;
9use crate::sql::{Number, Value};
10use revision::revisioned;
11use serde::{Deserialize, Serialize};
12use std::fmt;
13use std::fmt::{Display, Formatter};
14
15#[revisioned(revision = 2)]
16#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
17#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
18#[non_exhaustive]
19pub enum Index {
20 #[default]
22 Idx,
23 Uniq,
25 Search(SearchParams),
27 MTree(MTreeParams),
29 #[revision(start = 2)]
31 Hnsw(HnswParams),
32}
33
34#[revisioned(revision = 2)]
35#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
36#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
37#[non_exhaustive]
38pub struct SearchParams {
39 pub az: Ident,
40 pub hl: bool,
41 pub sc: Scoring,
42 pub doc_ids_order: u32,
43 pub doc_lengths_order: u32,
44 pub postings_order: u32,
45 pub terms_order: u32,
46 #[revision(start = 2)]
47 pub doc_ids_cache: u32,
48 #[revision(start = 2)]
49 pub doc_lengths_cache: u32,
50 #[revision(start = 2)]
51 pub postings_cache: u32,
52 #[revision(start = 2)]
53 pub terms_cache: u32,
54}
55
56#[revisioned(revision = 2)]
57#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
58#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
59#[non_exhaustive]
60pub struct MTreeParams {
61 pub dimension: u16,
62 #[revision(start = 1, end = 2, convert_fn = "convert_old_distance")]
63 pub _distance: Distance1, #[revision(start = 2)]
65 pub distance: Distance,
66 pub vector_type: VectorType,
67 pub capacity: u16,
68 pub doc_ids_order: u32,
69 #[revision(start = 2)]
70 pub doc_ids_cache: u32,
71 #[revision(start = 2)]
72 pub mtree_cache: u32,
73}
74
75impl MTreeParams {
76 pub fn new(
77 dimension: u16,
78 distance: Distance,
79 vector_type: VectorType,
80 capacity: u16,
81 doc_ids_order: u32,
82 doc_ids_cache: u32,
83 mtree_cache: u32,
84 ) -> Self {
85 Self {
86 dimension,
87 distance,
88 vector_type,
89 capacity,
90 doc_ids_order,
91 doc_ids_cache,
92 mtree_cache,
93 }
94 }
95
96 fn convert_old_distance(
97 &mut self,
98 _revision: u16,
99 d1: Distance1,
100 ) -> Result<(), revision::Error> {
101 self.distance = match d1 {
102 Distance1::Euclidean => Distance::Euclidean,
103 Distance1::Manhattan => Distance::Manhattan,
104 Distance1::Cosine => Distance::Cosine,
105 Distance1::Hamming => Distance::Hamming,
106 Distance1::Minkowski(n) => Distance::Minkowski(n),
107 };
108 Ok(())
109 }
110}
111
112#[revisioned(revision = 1)]
113#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
114#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
115#[non_exhaustive]
116pub enum Distance1 {
117 #[default]
118 Euclidean,
119 Manhattan,
120 Cosine,
121 Hamming,
122 Minkowski(Number),
123}
124
125#[revisioned(revision = 1)]
126#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
127#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
128#[non_exhaustive]
129pub struct HnswParams {
130 pub dimension: u16,
131 pub distance: Distance,
132 pub vector_type: VectorType,
133 pub m: u8,
134 pub m0: u8,
135 pub ef_construction: u16,
136 pub extend_candidates: bool,
137 pub keep_pruned_connections: bool,
138 pub ml: Number,
139}
140
141impl HnswParams {
142 #[allow(clippy::too_many_arguments)]
143 pub fn new(
144 dimension: u16,
145 distance: Distance,
146 vector_type: VectorType,
147 m: u8,
148 m0: u8,
149 ml: Number,
150 ef_construction: u16,
151 extend_candidates: bool,
152 keep_pruned_connections: bool,
153 ) -> Self {
154 Self {
155 dimension,
156 distance,
157 vector_type,
158 m,
159 m0,
160 ef_construction,
161 ml,
162 extend_candidates,
163 keep_pruned_connections,
164 }
165 }
166}
167
168#[revisioned(revision = 1)]
169#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
170#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
171#[non_exhaustive]
172pub enum Distance {
173 Chebyshev,
174 Cosine,
175 #[default]
176 Euclidean,
177 Hamming,
178 Jaccard,
179 Manhattan,
180 Minkowski(Number),
181 Pearson,
182}
183
184impl Distance {
185 pub(crate) fn compute(&self, v1: &Vec<Number>, v2: &Vec<Number>) -> Result<Number, Error> {
186 match self {
187 Self::Cosine => v1.cosine_distance(v2),
188 Self::Chebyshev => v1.chebyshev_distance(v2),
189 Self::Euclidean => v1.euclidean_distance(v2),
190 Self::Hamming => v1.hamming_distance(v2),
191 Self::Jaccard => v1.jaccard_similarity(v2),
192 Self::Manhattan => v1.manhattan_distance(v2),
193 Self::Minkowski(r) => v1.minkowski_distance(v2, r),
194 Self::Pearson => v1.pearson_similarity(v2),
195 }
196 }
197}
198
199impl Display for Distance {
200 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
201 match self {
202 Self::Chebyshev => f.write_str("CHEBYSHEV"),
203 Self::Cosine => f.write_str("COSINE"),
204 Self::Euclidean => f.write_str("EUCLIDEAN"),
205 Self::Hamming => f.write_str("HAMMING"),
206 Self::Jaccard => f.write_str("JACCARD"),
207 Self::Manhattan => f.write_str("MANHATTAN"),
208 Self::Minkowski(order) => write!(f, "MINKOWSKI {}", order),
209 Self::Pearson => f.write_str("PEARSON"),
210 }
211 }
212}
213
214#[revisioned(revision = 1)]
215#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
216#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
217#[non_exhaustive]
218pub enum VectorType {
219 #[default]
220 F64,
221 F32,
222 I64,
223 I32,
224 I16,
225}
226
227impl Display for VectorType {
228 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
229 match self {
230 Self::F64 => f.write_str("F64"),
231 Self::F32 => f.write_str("F32"),
232 Self::I64 => f.write_str("I64"),
233 Self::I32 => f.write_str("I32"),
234 Self::I16 => f.write_str("I16"),
235 }
236 }
237}
238
239impl Display for Index {
240 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
241 match self {
242 Self::Idx => Ok(()),
243 Self::Uniq => f.write_str("UNIQUE"),
244 Self::Search(p) => {
245 write!(
246 f,
247 "SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {} DOC_IDS_CACHE {} DOC_LENGTHS_CACHE {} POSTINGS_CACHE {} TERMS_CACHE {}",
248 p.az,
249 p.sc,
250 p.doc_ids_order,
251 p.doc_lengths_order,
252 p.postings_order,
253 p.terms_order,
254 p.doc_ids_cache,
255 p.doc_lengths_cache,
256 p.postings_cache,
257 p.terms_cache
258 )?;
259 if p.hl {
260 f.write_str(" HIGHLIGHTS")?
261 }
262 Ok(())
263 }
264 Self::MTree(p) => {
265 write!(
266 f,
267 "MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {} DOC_IDS_CACHE {} MTREE_CACHE {}",
268 p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order, p.doc_ids_cache, p.mtree_cache
269 )
270 }
271 Self::Hnsw(p) => {
272 write!(
273 f,
274 "HNSW DIMENSION {} DIST {} TYPE {} EFC {} M {} M0 {} LM {}",
275 p.dimension, p.distance, p.vector_type, p.ef_construction, p.m, p.m0, p.ml
276 )?;
277 if p.extend_candidates {
278 f.write_str(" EXTEND_CANDIDATES")?
279 }
280 if p.keep_pruned_connections {
281 f.write_str(" KEEP_PRUNED_CONNECTIONS")?
282 }
283 Ok(())
284 }
285 }
286 }
287}
288
289impl InfoStructure for Index {
290 fn structure(self) -> Value {
291 self.to_string().into()
292 }
293}