surrealdb_core/idx/trees/
bkeys.rs

1use crate::err::Error;
2use crate::idx::trees::btree::Payload;
3use crate::kvs::Key;
4use fst::{IntoStreamer, Map, MapBuilder, Streamer};
5use radix_trie::{SubTrie, Trie, TrieCommon};
6use serde::ser;
7use std::collections::VecDeque;
8use std::fmt::{Debug, Display, Formatter};
9use std::io;
10use std::io::Cursor;
11
12pub trait BKeys: Default + Debug + Display + Sized {
13	fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error>;
14	fn len(&self) -> u32;
15	fn is_empty(&self) -> bool;
16	fn get(&self, key: &Key) -> Option<Payload>;
17	// It is okay to return a owned Vec rather than an iterator,
18	// because BKeys are intended to be stored as Node in the BTree.
19	// The size of the Node should be small, therefore one instance of
20	// BKeys would never be store a large volume of keys.
21	fn collect_with_prefix(&self, prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error>;
22	fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload>;
23	fn append(&mut self, keys: Self);
24	fn remove(&mut self, key: &Key) -> Option<Payload>;
25	fn split_keys(self) -> Result<SplitKeys<Self>, Error>;
26	fn get_key(&self, idx: usize) -> Option<Key>;
27	fn get_child_idx(&self, searched_key: &Key) -> usize;
28	fn get_first_key(&self) -> Option<(Key, Payload)>;
29	fn get_last_key(&self) -> Option<(Key, Payload)>;
30	fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error>;
31	fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error>;
32	fn compile(&mut self) {}
33}
34
35#[non_exhaustive]
36pub struct SplitKeys<BK>
37where
38	BK: BKeys,
39{
40	pub(in crate::idx) left: BK,
41	pub(in crate::idx) right: BK,
42	pub(in crate::idx) median_idx: usize,
43	pub(in crate::idx) median_key: Key,
44	pub(in crate::idx) median_payload: Payload,
45}
46
47#[derive(Debug, Clone)]
48#[non_exhaustive]
49pub struct FstKeys {
50	i: Inner,
51}
52
53#[derive(Debug, Clone)]
54enum Inner {
55	Map(Map<Vec<u8>>),
56	Trie(TrieKeys),
57}
58
59impl FstKeys {
60	fn edit(&mut self) {
61		if let Inner::Map(m) = &self.i {
62			let t: TrieKeys = m.into();
63			self.i = Inner::Trie(t);
64		}
65	}
66}
67
68impl Default for FstKeys {
69	fn default() -> Self {
70		Self {
71			i: Inner::Trie(TrieKeys::default()),
72		}
73	}
74}
75
76impl BKeys for FstKeys {
77	fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error> {
78		let i = Inner::Trie(TrieKeys::with_key_val(key, payload)?);
79		Ok(Self {
80			i,
81		})
82	}
83
84	fn len(&self) -> u32 {
85		match &self.i {
86			Inner::Map(m) => m.len() as u32,
87			Inner::Trie(t) => t.len(),
88		}
89	}
90
91	fn is_empty(&self) -> bool {
92		match &self.i {
93			Inner::Map(m) => m.is_empty(),
94			Inner::Trie(t) => t.is_empty(),
95		}
96	}
97
98	fn get(&self, key: &Key) -> Option<Payload> {
99		match &self.i {
100			Inner::Map(m) => m.get(key),
101			Inner::Trie(t) => t.get(key),
102		}
103	}
104
105	fn collect_with_prefix(&self, _prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
106		Err(fail!("BKeys/FSTKeys::collect_with_prefix"))
107	}
108
109	fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
110		self.edit();
111		if let Inner::Trie(t) = &mut self.i {
112			return t.insert(key, payload);
113		}
114		unreachable!()
115	}
116
117	fn append(&mut self, keys: Self) {
118		if keys.is_empty() {
119			return;
120		}
121		self.edit();
122		match keys.i {
123			Inner::Map(other) => {
124				let mut s = other.stream();
125				while let Some((key, payload)) = s.next() {
126					self.insert(key.to_vec(), payload);
127				}
128			}
129			Inner::Trie(other) => {
130				if let Inner::Trie(t) = &mut self.i {
131					t.append(other)
132				}
133			}
134		}
135	}
136
137	fn remove(&mut self, key: &Key) -> Option<Payload> {
138		self.edit();
139		if let Inner::Trie(t) = &mut self.i {
140			t.remove(key)
141		} else {
142			None
143		}
144	}
145
146	fn split_keys(mut self) -> Result<SplitKeys<Self>, Error> {
147		self.edit();
148		if let Inner::Trie(t) = self.i {
149			let s = t.split_keys()?;
150			Ok(SplitKeys {
151				left: Self {
152					i: Inner::Trie(s.left),
153				},
154				right: Self {
155					i: Inner::Trie(s.right),
156				},
157				median_idx: s.median_idx,
158				median_key: s.median_key,
159				median_payload: s.median_payload,
160			})
161		} else {
162			Err(fail!("BKeys/FSTKeys::split_keys"))
163		}
164	}
165
166	fn get_key(&self, mut idx: usize) -> Option<Key> {
167		match &self.i {
168			Inner::Map(m) => {
169				let mut s = m.keys().into_stream();
170				while let Some(key) = s.next() {
171					if idx == 0 {
172						return Some(key.to_vec());
173					}
174					idx -= 1;
175				}
176				None
177			}
178			Inner::Trie(t) => t.get_key(idx),
179		}
180	}
181
182	fn get_child_idx(&self, searched_key: &Key) -> usize {
183		match &self.i {
184			Inner::Map(m) => {
185				let searched_key = searched_key.as_slice();
186				let mut s = m.keys().into_stream();
187				let mut child_idx = 0;
188				while let Some(key) = s.next() {
189					if searched_key.le(key) {
190						break;
191					}
192					child_idx += 1;
193				}
194				child_idx
195			}
196			Inner::Trie(t) => t.get_child_idx(searched_key),
197		}
198	}
199
200	fn get_first_key(&self) -> Option<(Key, Payload)> {
201		match &self.i {
202			Inner::Map(m) => m.stream().next().map(|(k, p)| (k.to_vec(), p)),
203			Inner::Trie(t) => t.get_first_key(),
204		}
205	}
206
207	fn get_last_key(&self) -> Option<(Key, Payload)> {
208		match &self.i {
209			Inner::Map(m) => {
210				let mut last = None;
211				let mut s = m.stream();
212				while let Some((k, p)) = s.next() {
213					last = Some((k.to_vec(), p));
214				}
215				last
216			}
217			Inner::Trie(t) => t.get_last_key(),
218		}
219	}
220
221	fn compile(&mut self) {
222		if let Inner::Trie(t) = &self.i {
223			let mut builder = MapBuilder::memory();
224			for (key, payload) in t.keys.iter() {
225				builder.insert(key, *payload).unwrap();
226			}
227			let m = Map::new(builder.into_inner().unwrap()).unwrap();
228			self.i = Inner::Map(m);
229		}
230	}
231
232	fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error> {
233		let bytes: Vec<u8> = bincode::deserialize_from(c)?;
234		Ok(Self::try_from(bytes)?)
235	}
236
237	fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error> {
238		if let Inner::Map(m) = &self.i {
239			let b = m.as_fst().as_bytes();
240			bincode::serialize_into(c, b)?;
241			Ok(())
242		} else {
243			Err(Error::Bincode(ser::Error::custom(
244				"bkeys.to_map() should be called prior serializing",
245			)))
246		}
247	}
248}
249
250impl TryFrom<MapBuilder<Vec<u8>>> for FstKeys {
251	type Error = fst::Error;
252	fn try_from(builder: MapBuilder<Vec<u8>>) -> Result<Self, Self::Error> {
253		Self::try_from(builder.into_inner()?)
254	}
255}
256
257impl TryFrom<Vec<u8>> for FstKeys {
258	type Error = fst::Error;
259	fn try_from(bytes: Vec<u8>) -> Result<Self, Self::Error> {
260		let map = Map::new(bytes)?;
261		Ok(Self {
262			i: Inner::Map(map),
263		})
264	}
265}
266
267impl Display for FstKeys {
268	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
269		match &self.i {
270			Inner::Map(m) => {
271				let mut s = m.stream();
272				let mut start = true;
273				while let Some((key, val)) = s.next() {
274					let key = String::from_utf8_lossy(key);
275					if start {
276						start = false;
277					} else {
278						f.write_str(", ")?;
279					}
280					write!(f, "{}=>{}", key, val)?;
281				}
282				Ok(())
283			}
284			Inner::Trie(t) => write!(f, "{}", t),
285		}
286	}
287}
288
289#[derive(Default, Debug, Clone)]
290#[non_exhaustive]
291pub struct TrieKeys {
292	keys: Trie<Key, Payload>,
293}
294
295impl Display for TrieKeys {
296	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
297		let mut start = true;
298		for (key, val) in self.keys.iter() {
299			let key = String::from_utf8_lossy(key);
300			if start {
301				start = false;
302			} else {
303				f.write_str(", ")?;
304			}
305			write!(f, "{}=>{}", key, val)?;
306		}
307		Ok(())
308	}
309}
310
311impl From<&Map<Vec<u8>>> for TrieKeys {
312	fn from(m: &Map<Vec<u8>>) -> Self {
313		let mut keys = TrieKeys::default();
314		let mut s = m.stream();
315		while let Some((key, payload)) = s.next() {
316			keys.insert(key.to_vec(), payload);
317		}
318		keys
319	}
320}
321
322impl BKeys for TrieKeys {
323	fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error> {
324		let mut trie_keys = Self {
325			keys: Trie::default(),
326		};
327		trie_keys.insert(key, payload);
328		Ok(trie_keys)
329	}
330
331	fn len(&self) -> u32 {
332		self.keys.len() as u32
333	}
334
335	fn is_empty(&self) -> bool {
336		self.keys.is_empty()
337	}
338
339	fn get(&self, key: &Key) -> Option<Payload> {
340		self.keys.get(key).copied()
341	}
342
343	fn collect_with_prefix(&self, prefix: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
344		let mut i = KeysIterator::new(prefix, &self.keys);
345		let mut r = VecDeque::new();
346		while let Some((k, p)) = i.next() {
347			r.push_back((k.clone(), p))
348		}
349		Ok(r)
350	}
351
352	fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
353		self.keys.insert(key, payload)
354	}
355
356	fn append(&mut self, keys: Self) {
357		for (k, p) in keys.keys.iter() {
358			self.insert(k.clone(), *p);
359		}
360	}
361
362	fn remove(&mut self, key: &Key) -> Option<Payload> {
363		self.keys.remove(key)
364	}
365
366	fn split_keys(self) -> Result<SplitKeys<Self>, Error> {
367		let median_idx = self.keys.len() / 2;
368		let mut s = self.keys.iter();
369		let mut left = Trie::default();
370		let mut n = median_idx;
371		while n > 0 {
372			if let Some((key, payload)) = s.next() {
373				left.insert(key.clone(), *payload);
374			}
375			n -= 1;
376		}
377		let (median_key, median_payload) = if let Some((k, v)) = s.next() {
378			(k.clone(), *v)
379		} else {
380			return Err(fail!("BKeys/TrieKeys::split_keys"));
381		};
382		let mut right = Trie::default();
383		for (key, val) in s {
384			right.insert(key.clone(), *val);
385		}
386		Ok(SplitKeys {
387			left: Self::from(left),
388			right: Self::from(right),
389			median_idx,
390			median_key,
391			median_payload,
392		})
393	}
394
395	fn get_key(&self, mut idx: usize) -> Option<Key> {
396		for key in self.keys.keys() {
397			if idx == 0 {
398				return Some(key.clone());
399			}
400			idx -= 1;
401		}
402		None
403	}
404
405	fn get_child_idx(&self, searched_key: &Key) -> usize {
406		let mut child_idx = 0;
407		for key in self.keys.keys() {
408			if searched_key.le(key) {
409				break;
410			}
411			child_idx += 1;
412		}
413		child_idx
414	}
415
416	fn get_first_key(&self) -> Option<(Key, Payload)> {
417		self.keys.iter().next().map(|(k, p)| (k.clone(), *p))
418	}
419
420	fn get_last_key(&self) -> Option<(Key, Payload)> {
421		self.keys.iter().last().map(|(k, p)| (k.clone(), *p))
422	}
423
424	fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error> {
425		let compressed: Vec<u8> = bincode::deserialize_from(c)?;
426		let mut uncompressed: Vec<u8> = Vec::new();
427		{
428			let mut rdr = snap::read::FrameDecoder::new(compressed.as_slice());
429			io::copy(&mut rdr, &mut uncompressed)?;
430		}
431		let keys: Trie<Vec<u8>, u64> = bincode::deserialize_from(uncompressed.as_slice())?;
432		Ok(Self {
433			keys,
434		})
435	}
436
437	fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error> {
438		let mut uncompressed: Vec<u8> = Vec::new();
439		bincode::serialize_into(&mut uncompressed, &self.keys)?;
440		let mut compressed: Vec<u8> = Vec::new();
441		{
442			let mut wtr = snap::write::FrameEncoder::new(&mut compressed);
443			io::copy(&mut uncompressed.as_slice(), &mut wtr)?;
444		}
445		bincode::serialize_into(c, &compressed)?;
446		Ok(())
447	}
448}
449
450impl From<Trie<Key, Payload>> for TrieKeys {
451	fn from(keys: Trie<Key, Payload>) -> Self {
452		Self {
453			keys,
454		}
455	}
456}
457
458struct KeysIterator<'a> {
459	prefix: &'a Key,
460	node_queue: VecDeque<SubTrie<'a, Key, Payload>>,
461	current_node: Option<SubTrie<'a, Key, Payload>>,
462}
463
464impl<'a> KeysIterator<'a> {
465	fn new(prefix: &'a Key, keys: &'a Trie<Key, Payload>) -> Self {
466		let start_node = keys.get_raw_descendant(prefix);
467		Self {
468			prefix,
469			node_queue: VecDeque::new(),
470			current_node: start_node,
471		}
472	}
473
474	fn next(&mut self) -> Option<(&Key, Payload)> {
475		loop {
476			if let Some(node) = self.current_node.take() {
477				for children in node.children() {
478					self.node_queue.push_front(children);
479				}
480				if let Some(value) = node.value() {
481					if let Some(node_key) = node.key() {
482						if node_key.starts_with(self.prefix) {
483							return Some((node_key, *value));
484						}
485					}
486				}
487			} else {
488				self.current_node = self.node_queue.pop_front();
489				self.current_node.as_ref()?;
490			}
491		}
492	}
493}
494
495#[cfg(test)]
496mod tests {
497	use crate::idx::trees::bkeys::{BKeys, FstKeys, TrieKeys};
498	use crate::idx::trees::btree::Payload;
499	use crate::kvs::Key;
500	use std::collections::{HashMap, HashSet, VecDeque};
501	use std::io::Cursor;
502
503	fn test_keys_serde<BK: BKeys>(expected_size: usize) {
504		let key: Key = "a".as_bytes().into();
505		let mut keys = BK::with_key_val(key.clone(), 130).unwrap();
506		keys.compile();
507		// Serialize
508		let mut cur: Cursor<Vec<u8>> = Cursor::new(Vec::new());
509		keys.write_to(&mut cur).unwrap();
510		let buf = cur.into_inner();
511		assert_eq!(buf.len(), expected_size);
512		// Deserialize
513		let mut cur = Cursor::new(buf);
514		let keys = BK::read_from(&mut cur).unwrap();
515		assert_eq!(keys.get(&key), Some(130));
516	}
517
518	#[test]
519	fn test_fst_keys_serde() {
520		test_keys_serde::<FstKeys>(48);
521	}
522
523	#[test]
524	fn test_trie_keys_serde() {
525		test_keys_serde::<TrieKeys>(44);
526	}
527
528	fn test_keys_additions<BK: BKeys>(mut keys: BK) {
529		let terms = [
530			"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "the", "fast",
531			"fox", "jumped", "over", "the", "lazy", "dog",
532		];
533		let mut i = 1;
534		assert_eq!(keys.len(), 0);
535		let mut term_set = HashSet::new();
536		for term in terms {
537			term_set.insert(term.to_string());
538			let key: Key = term.into();
539			keys.insert(key.clone(), i);
540			keys.compile();
541			assert_eq!(keys.get(&key), Some(i));
542			assert_eq!(keys.len() as usize, term_set.len());
543			i += 1;
544		}
545	}
546
547	#[test]
548	fn test_fst_keys_additions() {
549		test_keys_additions(FstKeys::default())
550	}
551
552	#[test]
553	fn test_trie_keys_additions() {
554		test_keys_additions(TrieKeys::default())
555	}
556
557	fn test_keys_deletions<BK: BKeys>(mut keys: BK) {
558		assert_eq!(keys.remove(&"dummy".into()), None);
559		assert_eq!(keys.len(), 0);
560		keys.insert("foo".into(), 1);
561		keys.insert("bar".into(), 2);
562		assert_eq!(keys.len(), 2);
563		assert_eq!(keys.remove(&"bar".into()), Some(2));
564		assert_eq!(keys.len(), 1);
565		assert_eq!(keys.remove(&"bar".into()), None);
566		assert_eq!(keys.len(), 1);
567		assert_eq!(keys.remove(&"foo".into()), Some(1));
568		assert_eq!(keys.len(), 0);
569		assert_eq!(keys.remove(&"foo".into()), None);
570		assert_eq!(keys.len(), 0);
571	}
572
573	#[test]
574	fn test_fst_keys_deletions() {
575		test_keys_deletions(FstKeys::default())
576	}
577
578	#[test]
579	fn test_trie_keys_deletions() {
580		test_keys_deletions(TrieKeys::default())
581	}
582
583	fn check_keys(r: VecDeque<(Key, Payload)>, e: Vec<(Key, Payload)>) {
584		let mut map = HashMap::new();
585		for (k, p) in r {
586			map.insert(k, p);
587		}
588		assert_eq!(map.len(), e.len());
589		for (k, p) in e {
590			assert_eq!(map.get(&k), Some(&p));
591		}
592	}
593
594	#[tokio::test]
595	async fn test_tries_keys_collect_with_prefix() {
596		let mut keys = TrieKeys::default();
597		keys.insert("apple".into(), 1);
598		keys.insert("applicant".into(), 2);
599		keys.insert("application".into(), 3);
600		keys.insert("applicative".into(), 4);
601		keys.insert("banana".into(), 5);
602		keys.insert("blueberry".into(), 6);
603		keys.insert("the".into(), 7);
604		keys.insert("these".into(), 11);
605		keys.insert("theses".into(), 12);
606		keys.insert("their".into(), 8);
607		keys.insert("theirs".into(), 9);
608		keys.insert("there".into(), 10);
609
610		{
611			let r = keys.collect_with_prefix(&"appli".into()).unwrap();
612			check_keys(
613				r,
614				vec![("applicant".into(), 2), ("application".into(), 3), ("applicative".into(), 4)],
615			);
616		}
617
618		{
619			let r = keys.collect_with_prefix(&"the".into()).unwrap();
620			check_keys(
621				r,
622				vec![
623					("the".into(), 7),
624					("their".into(), 8),
625					("theirs".into(), 9),
626					("there".into(), 10),
627					("these".into(), 11),
628					("theses".into(), 12),
629				],
630			);
631		}
632
633		{
634			let r = keys.collect_with_prefix(&"blue".into()).unwrap();
635			check_keys(r, vec![("blueberry".into(), 6)]);
636		}
637
638		{
639			let r = keys.collect_with_prefix(&"apple".into()).unwrap();
640			check_keys(r, vec![("apple".into(), 1)]);
641		}
642
643		{
644			let r = keys.collect_with_prefix(&"zz".into()).unwrap();
645			check_keys(r, vec![]);
646		}
647	}
648
649	fn test_keys_split<BK: BKeys>(mut keys: BK) {
650		keys.insert("a".into(), 1);
651		keys.insert("b".into(), 2);
652		keys.insert("c".into(), 3);
653		keys.insert("d".into(), 4);
654		keys.insert("e".into(), 5);
655		keys.compile();
656		let r = keys.split_keys().unwrap();
657		assert_eq!(r.median_payload, 3);
658		let c: Key = "c".into();
659		assert_eq!(r.median_key, c);
660		assert_eq!(r.median_idx, 2);
661		assert_eq!(r.left.len(), 2);
662		assert_eq!(r.left.get(&"a".into()), Some(1));
663		assert_eq!(r.left.get(&"b".into()), Some(2));
664		assert_eq!(r.right.len(), 2);
665		assert_eq!(r.right.get(&"d".into()), Some(4));
666		assert_eq!(r.right.get(&"e".into()), Some(5));
667	}
668
669	#[test]
670	fn test_fst_keys_split() {
671		test_keys_split(FstKeys::default());
672	}
673
674	#[test]
675	fn test_trie_keys_split() {
676		test_keys_split(TrieKeys::default());
677	}
678}