1use crate::err::Error;
2use crate::idx::trees::btree::Payload;
3use crate::kvs::Key;
4use fst::{IntoStreamer, Map, MapBuilder, Streamer};
5use radix_trie::{SubTrie, Trie, TrieCommon};
6use serde::ser;
7use std::collections::VecDeque;
8use std::fmt::{Debug, Display, Formatter};
9use std::io;
10use std::io::Cursor;
11
12pub trait BKeys: Default + Debug + Display + Sized {
13 fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error>;
14 fn len(&self) -> u32;
15 fn is_empty(&self) -> bool;
16 fn get(&self, key: &Key) -> Option<Payload>;
17 fn collect_with_prefix(&self, prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error>;
22 fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload>;
23 fn append(&mut self, keys: Self);
24 fn remove(&mut self, key: &Key) -> Option<Payload>;
25 fn split_keys(self) -> Result<SplitKeys<Self>, Error>;
26 fn get_key(&self, idx: usize) -> Option<Key>;
27 fn get_child_idx(&self, searched_key: &Key) -> usize;
28 fn get_first_key(&self) -> Option<(Key, Payload)>;
29 fn get_last_key(&self) -> Option<(Key, Payload)>;
30 fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error>;
31 fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error>;
32 fn compile(&mut self) {}
33}
34
35#[non_exhaustive]
36pub struct SplitKeys<BK>
37where
38 BK: BKeys,
39{
40 pub(in crate::idx) left: BK,
41 pub(in crate::idx) right: BK,
42 pub(in crate::idx) median_idx: usize,
43 pub(in crate::idx) median_key: Key,
44 pub(in crate::idx) median_payload: Payload,
45}
46
47#[derive(Debug, Clone)]
48#[non_exhaustive]
49pub struct FstKeys {
50 i: Inner,
51}
52
53#[derive(Debug, Clone)]
54enum Inner {
55 Map(Map<Vec<u8>>),
56 Trie(TrieKeys),
57}
58
59impl FstKeys {
60 fn edit(&mut self) {
61 if let Inner::Map(m) = &self.i {
62 let t: TrieKeys = m.into();
63 self.i = Inner::Trie(t);
64 }
65 }
66}
67
68impl Default for FstKeys {
69 fn default() -> Self {
70 Self {
71 i: Inner::Trie(TrieKeys::default()),
72 }
73 }
74}
75
76impl BKeys for FstKeys {
77 fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error> {
78 let i = Inner::Trie(TrieKeys::with_key_val(key, payload)?);
79 Ok(Self {
80 i,
81 })
82 }
83
84 fn len(&self) -> u32 {
85 match &self.i {
86 Inner::Map(m) => m.len() as u32,
87 Inner::Trie(t) => t.len(),
88 }
89 }
90
91 fn is_empty(&self) -> bool {
92 match &self.i {
93 Inner::Map(m) => m.is_empty(),
94 Inner::Trie(t) => t.is_empty(),
95 }
96 }
97
98 fn get(&self, key: &Key) -> Option<Payload> {
99 match &self.i {
100 Inner::Map(m) => m.get(key),
101 Inner::Trie(t) => t.get(key),
102 }
103 }
104
105 fn collect_with_prefix(&self, _prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
106 Err(fail!("BKeys/FSTKeys::collect_with_prefix"))
107 }
108
109 fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
110 self.edit();
111 if let Inner::Trie(t) = &mut self.i {
112 return t.insert(key, payload);
113 }
114 unreachable!()
115 }
116
117 fn append(&mut self, keys: Self) {
118 if keys.is_empty() {
119 return;
120 }
121 self.edit();
122 match keys.i {
123 Inner::Map(other) => {
124 let mut s = other.stream();
125 while let Some((key, payload)) = s.next() {
126 self.insert(key.to_vec(), payload);
127 }
128 }
129 Inner::Trie(other) => {
130 if let Inner::Trie(t) = &mut self.i {
131 t.append(other)
132 }
133 }
134 }
135 }
136
137 fn remove(&mut self, key: &Key) -> Option<Payload> {
138 self.edit();
139 if let Inner::Trie(t) = &mut self.i {
140 t.remove(key)
141 } else {
142 None
143 }
144 }
145
146 fn split_keys(mut self) -> Result<SplitKeys<Self>, Error> {
147 self.edit();
148 if let Inner::Trie(t) = self.i {
149 let s = t.split_keys()?;
150 Ok(SplitKeys {
151 left: Self {
152 i: Inner::Trie(s.left),
153 },
154 right: Self {
155 i: Inner::Trie(s.right),
156 },
157 median_idx: s.median_idx,
158 median_key: s.median_key,
159 median_payload: s.median_payload,
160 })
161 } else {
162 Err(fail!("BKeys/FSTKeys::split_keys"))
163 }
164 }
165
166 fn get_key(&self, mut idx: usize) -> Option<Key> {
167 match &self.i {
168 Inner::Map(m) => {
169 let mut s = m.keys().into_stream();
170 while let Some(key) = s.next() {
171 if idx == 0 {
172 return Some(key.to_vec());
173 }
174 idx -= 1;
175 }
176 None
177 }
178 Inner::Trie(t) => t.get_key(idx),
179 }
180 }
181
182 fn get_child_idx(&self, searched_key: &Key) -> usize {
183 match &self.i {
184 Inner::Map(m) => {
185 let searched_key = searched_key.as_slice();
186 let mut s = m.keys().into_stream();
187 let mut child_idx = 0;
188 while let Some(key) = s.next() {
189 if searched_key.le(key) {
190 break;
191 }
192 child_idx += 1;
193 }
194 child_idx
195 }
196 Inner::Trie(t) => t.get_child_idx(searched_key),
197 }
198 }
199
200 fn get_first_key(&self) -> Option<(Key, Payload)> {
201 match &self.i {
202 Inner::Map(m) => m.stream().next().map(|(k, p)| (k.to_vec(), p)),
203 Inner::Trie(t) => t.get_first_key(),
204 }
205 }
206
207 fn get_last_key(&self) -> Option<(Key, Payload)> {
208 match &self.i {
209 Inner::Map(m) => {
210 let mut last = None;
211 let mut s = m.stream();
212 while let Some((k, p)) = s.next() {
213 last = Some((k.to_vec(), p));
214 }
215 last
216 }
217 Inner::Trie(t) => t.get_last_key(),
218 }
219 }
220
221 fn compile(&mut self) {
222 if let Inner::Trie(t) = &self.i {
223 let mut builder = MapBuilder::memory();
224 for (key, payload) in t.keys.iter() {
225 builder.insert(key, *payload).unwrap();
226 }
227 let m = Map::new(builder.into_inner().unwrap()).unwrap();
228 self.i = Inner::Map(m);
229 }
230 }
231
232 fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error> {
233 let bytes: Vec<u8> = bincode::deserialize_from(c)?;
234 Ok(Self::try_from(bytes)?)
235 }
236
237 fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error> {
238 if let Inner::Map(m) = &self.i {
239 let b = m.as_fst().as_bytes();
240 bincode::serialize_into(c, b)?;
241 Ok(())
242 } else {
243 Err(Error::Bincode(ser::Error::custom(
244 "bkeys.to_map() should be called prior serializing",
245 )))
246 }
247 }
248}
249
250impl TryFrom<MapBuilder<Vec<u8>>> for FstKeys {
251 type Error = fst::Error;
252 fn try_from(builder: MapBuilder<Vec<u8>>) -> Result<Self, Self::Error> {
253 Self::try_from(builder.into_inner()?)
254 }
255}
256
257impl TryFrom<Vec<u8>> for FstKeys {
258 type Error = fst::Error;
259 fn try_from(bytes: Vec<u8>) -> Result<Self, Self::Error> {
260 let map = Map::new(bytes)?;
261 Ok(Self {
262 i: Inner::Map(map),
263 })
264 }
265}
266
267impl Display for FstKeys {
268 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
269 match &self.i {
270 Inner::Map(m) => {
271 let mut s = m.stream();
272 let mut start = true;
273 while let Some((key, val)) = s.next() {
274 let key = String::from_utf8_lossy(key);
275 if start {
276 start = false;
277 } else {
278 f.write_str(", ")?;
279 }
280 write!(f, "{}=>{}", key, val)?;
281 }
282 Ok(())
283 }
284 Inner::Trie(t) => write!(f, "{}", t),
285 }
286 }
287}
288
289#[derive(Default, Debug, Clone)]
290#[non_exhaustive]
291pub struct TrieKeys {
292 keys: Trie<Key, Payload>,
293}
294
295impl Display for TrieKeys {
296 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
297 let mut start = true;
298 for (key, val) in self.keys.iter() {
299 let key = String::from_utf8_lossy(key);
300 if start {
301 start = false;
302 } else {
303 f.write_str(", ")?;
304 }
305 write!(f, "{}=>{}", key, val)?;
306 }
307 Ok(())
308 }
309}
310
311impl From<&Map<Vec<u8>>> for TrieKeys {
312 fn from(m: &Map<Vec<u8>>) -> Self {
313 let mut keys = TrieKeys::default();
314 let mut s = m.stream();
315 while let Some((key, payload)) = s.next() {
316 keys.insert(key.to_vec(), payload);
317 }
318 keys
319 }
320}
321
322impl BKeys for TrieKeys {
323 fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error> {
324 let mut trie_keys = Self {
325 keys: Trie::default(),
326 };
327 trie_keys.insert(key, payload);
328 Ok(trie_keys)
329 }
330
331 fn len(&self) -> u32 {
332 self.keys.len() as u32
333 }
334
335 fn is_empty(&self) -> bool {
336 self.keys.is_empty()
337 }
338
339 fn get(&self, key: &Key) -> Option<Payload> {
340 self.keys.get(key).copied()
341 }
342
343 fn collect_with_prefix(&self, prefix: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
344 let mut i = KeysIterator::new(prefix, &self.keys);
345 let mut r = VecDeque::new();
346 while let Some((k, p)) = i.next() {
347 r.push_back((k.clone(), p))
348 }
349 Ok(r)
350 }
351
352 fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
353 self.keys.insert(key, payload)
354 }
355
356 fn append(&mut self, keys: Self) {
357 for (k, p) in keys.keys.iter() {
358 self.insert(k.clone(), *p);
359 }
360 }
361
362 fn remove(&mut self, key: &Key) -> Option<Payload> {
363 self.keys.remove(key)
364 }
365
366 fn split_keys(self) -> Result<SplitKeys<Self>, Error> {
367 let median_idx = self.keys.len() / 2;
368 let mut s = self.keys.iter();
369 let mut left = Trie::default();
370 let mut n = median_idx;
371 while n > 0 {
372 if let Some((key, payload)) = s.next() {
373 left.insert(key.clone(), *payload);
374 }
375 n -= 1;
376 }
377 let (median_key, median_payload) = if let Some((k, v)) = s.next() {
378 (k.clone(), *v)
379 } else {
380 return Err(fail!("BKeys/TrieKeys::split_keys"));
381 };
382 let mut right = Trie::default();
383 for (key, val) in s {
384 right.insert(key.clone(), *val);
385 }
386 Ok(SplitKeys {
387 left: Self::from(left),
388 right: Self::from(right),
389 median_idx,
390 median_key,
391 median_payload,
392 })
393 }
394
395 fn get_key(&self, mut idx: usize) -> Option<Key> {
396 for key in self.keys.keys() {
397 if idx == 0 {
398 return Some(key.clone());
399 }
400 idx -= 1;
401 }
402 None
403 }
404
405 fn get_child_idx(&self, searched_key: &Key) -> usize {
406 let mut child_idx = 0;
407 for key in self.keys.keys() {
408 if searched_key.le(key) {
409 break;
410 }
411 child_idx += 1;
412 }
413 child_idx
414 }
415
416 fn get_first_key(&self) -> Option<(Key, Payload)> {
417 self.keys.iter().next().map(|(k, p)| (k.clone(), *p))
418 }
419
420 fn get_last_key(&self) -> Option<(Key, Payload)> {
421 self.keys.iter().last().map(|(k, p)| (k.clone(), *p))
422 }
423
424 fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error> {
425 let compressed: Vec<u8> = bincode::deserialize_from(c)?;
426 let mut uncompressed: Vec<u8> = Vec::new();
427 {
428 let mut rdr = snap::read::FrameDecoder::new(compressed.as_slice());
429 io::copy(&mut rdr, &mut uncompressed)?;
430 }
431 let keys: Trie<Vec<u8>, u64> = bincode::deserialize_from(uncompressed.as_slice())?;
432 Ok(Self {
433 keys,
434 })
435 }
436
437 fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error> {
438 let mut uncompressed: Vec<u8> = Vec::new();
439 bincode::serialize_into(&mut uncompressed, &self.keys)?;
440 let mut compressed: Vec<u8> = Vec::new();
441 {
442 let mut wtr = snap::write::FrameEncoder::new(&mut compressed);
443 io::copy(&mut uncompressed.as_slice(), &mut wtr)?;
444 }
445 bincode::serialize_into(c, &compressed)?;
446 Ok(())
447 }
448}
449
450impl From<Trie<Key, Payload>> for TrieKeys {
451 fn from(keys: Trie<Key, Payload>) -> Self {
452 Self {
453 keys,
454 }
455 }
456}
457
458struct KeysIterator<'a> {
459 prefix: &'a Key,
460 node_queue: VecDeque<SubTrie<'a, Key, Payload>>,
461 current_node: Option<SubTrie<'a, Key, Payload>>,
462}
463
464impl<'a> KeysIterator<'a> {
465 fn new(prefix: &'a Key, keys: &'a Trie<Key, Payload>) -> Self {
466 let start_node = keys.get_raw_descendant(prefix);
467 Self {
468 prefix,
469 node_queue: VecDeque::new(),
470 current_node: start_node,
471 }
472 }
473
474 fn next(&mut self) -> Option<(&Key, Payload)> {
475 loop {
476 if let Some(node) = self.current_node.take() {
477 for children in node.children() {
478 self.node_queue.push_front(children);
479 }
480 if let Some(value) = node.value() {
481 if let Some(node_key) = node.key() {
482 if node_key.starts_with(self.prefix) {
483 return Some((node_key, *value));
484 }
485 }
486 }
487 } else {
488 self.current_node = self.node_queue.pop_front();
489 self.current_node.as_ref()?;
490 }
491 }
492 }
493}
494
495#[cfg(test)]
496mod tests {
497 use crate::idx::trees::bkeys::{BKeys, FstKeys, TrieKeys};
498 use crate::idx::trees::btree::Payload;
499 use crate::kvs::Key;
500 use std::collections::{HashMap, HashSet, VecDeque};
501 use std::io::Cursor;
502
503 fn test_keys_serde<BK: BKeys>(expected_size: usize) {
504 let key: Key = "a".as_bytes().into();
505 let mut keys = BK::with_key_val(key.clone(), 130).unwrap();
506 keys.compile();
507 let mut cur: Cursor<Vec<u8>> = Cursor::new(Vec::new());
509 keys.write_to(&mut cur).unwrap();
510 let buf = cur.into_inner();
511 assert_eq!(buf.len(), expected_size);
512 let mut cur = Cursor::new(buf);
514 let keys = BK::read_from(&mut cur).unwrap();
515 assert_eq!(keys.get(&key), Some(130));
516 }
517
518 #[test]
519 fn test_fst_keys_serde() {
520 test_keys_serde::<FstKeys>(48);
521 }
522
523 #[test]
524 fn test_trie_keys_serde() {
525 test_keys_serde::<TrieKeys>(44);
526 }
527
528 fn test_keys_additions<BK: BKeys>(mut keys: BK) {
529 let terms = [
530 "the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "the", "fast",
531 "fox", "jumped", "over", "the", "lazy", "dog",
532 ];
533 let mut i = 1;
534 assert_eq!(keys.len(), 0);
535 let mut term_set = HashSet::new();
536 for term in terms {
537 term_set.insert(term.to_string());
538 let key: Key = term.into();
539 keys.insert(key.clone(), i);
540 keys.compile();
541 assert_eq!(keys.get(&key), Some(i));
542 assert_eq!(keys.len() as usize, term_set.len());
543 i += 1;
544 }
545 }
546
547 #[test]
548 fn test_fst_keys_additions() {
549 test_keys_additions(FstKeys::default())
550 }
551
552 #[test]
553 fn test_trie_keys_additions() {
554 test_keys_additions(TrieKeys::default())
555 }
556
557 fn test_keys_deletions<BK: BKeys>(mut keys: BK) {
558 assert_eq!(keys.remove(&"dummy".into()), None);
559 assert_eq!(keys.len(), 0);
560 keys.insert("foo".into(), 1);
561 keys.insert("bar".into(), 2);
562 assert_eq!(keys.len(), 2);
563 assert_eq!(keys.remove(&"bar".into()), Some(2));
564 assert_eq!(keys.len(), 1);
565 assert_eq!(keys.remove(&"bar".into()), None);
566 assert_eq!(keys.len(), 1);
567 assert_eq!(keys.remove(&"foo".into()), Some(1));
568 assert_eq!(keys.len(), 0);
569 assert_eq!(keys.remove(&"foo".into()), None);
570 assert_eq!(keys.len(), 0);
571 }
572
573 #[test]
574 fn test_fst_keys_deletions() {
575 test_keys_deletions(FstKeys::default())
576 }
577
578 #[test]
579 fn test_trie_keys_deletions() {
580 test_keys_deletions(TrieKeys::default())
581 }
582
583 fn check_keys(r: VecDeque<(Key, Payload)>, e: Vec<(Key, Payload)>) {
584 let mut map = HashMap::new();
585 for (k, p) in r {
586 map.insert(k, p);
587 }
588 assert_eq!(map.len(), e.len());
589 for (k, p) in e {
590 assert_eq!(map.get(&k), Some(&p));
591 }
592 }
593
594 #[tokio::test]
595 async fn test_tries_keys_collect_with_prefix() {
596 let mut keys = TrieKeys::default();
597 keys.insert("apple".into(), 1);
598 keys.insert("applicant".into(), 2);
599 keys.insert("application".into(), 3);
600 keys.insert("applicative".into(), 4);
601 keys.insert("banana".into(), 5);
602 keys.insert("blueberry".into(), 6);
603 keys.insert("the".into(), 7);
604 keys.insert("these".into(), 11);
605 keys.insert("theses".into(), 12);
606 keys.insert("their".into(), 8);
607 keys.insert("theirs".into(), 9);
608 keys.insert("there".into(), 10);
609
610 {
611 let r = keys.collect_with_prefix(&"appli".into()).unwrap();
612 check_keys(
613 r,
614 vec![("applicant".into(), 2), ("application".into(), 3), ("applicative".into(), 4)],
615 );
616 }
617
618 {
619 let r = keys.collect_with_prefix(&"the".into()).unwrap();
620 check_keys(
621 r,
622 vec![
623 ("the".into(), 7),
624 ("their".into(), 8),
625 ("theirs".into(), 9),
626 ("there".into(), 10),
627 ("these".into(), 11),
628 ("theses".into(), 12),
629 ],
630 );
631 }
632
633 {
634 let r = keys.collect_with_prefix(&"blue".into()).unwrap();
635 check_keys(r, vec![("blueberry".into(), 6)]);
636 }
637
638 {
639 let r = keys.collect_with_prefix(&"apple".into()).unwrap();
640 check_keys(r, vec![("apple".into(), 1)]);
641 }
642
643 {
644 let r = keys.collect_with_prefix(&"zz".into()).unwrap();
645 check_keys(r, vec![]);
646 }
647 }
648
649 fn test_keys_split<BK: BKeys>(mut keys: BK) {
650 keys.insert("a".into(), 1);
651 keys.insert("b".into(), 2);
652 keys.insert("c".into(), 3);
653 keys.insert("d".into(), 4);
654 keys.insert("e".into(), 5);
655 keys.compile();
656 let r = keys.split_keys().unwrap();
657 assert_eq!(r.median_payload, 3);
658 let c: Key = "c".into();
659 assert_eq!(r.median_key, c);
660 assert_eq!(r.median_idx, 2);
661 assert_eq!(r.left.len(), 2);
662 assert_eq!(r.left.get(&"a".into()), Some(1));
663 assert_eq!(r.left.get(&"b".into()), Some(2));
664 assert_eq!(r.right.len(), 2);
665 assert_eq!(r.right.get(&"d".into()), Some(4));
666 assert_eq!(r.right.get(&"e".into()), Some(5));
667 }
668
669 #[test]
670 fn test_fst_keys_split() {
671 test_keys_split(FstKeys::default());
672 }
673
674 #[test]
675 fn test_trie_keys_split() {
676 test_keys_split(TrieKeys::default());
677 }
678}