sanakirja_core/btree/
mod.rs

1//! An implementation of B trees. The core operations on B trees
2//! (lookup, iterate, put and del) are generic in the actual
3//! implementation of nodes, via the [`BTreePage`] and
4//! [`BTreeMutPage`]. This allows for a simpler code for the
5//! high-level functions, as well as specialised, high-performance
6//! implementations for the nodes.
7//!
8//! Two different implementations are supplied: one in [`page`] for
9//! types with a size known at compile-time, which yields denser
10//! leaves, and hence shallower trees (if the values are really using
11//! the space). The other one, in [`page_unsized`], is for
12//! dynamic-sized types, or types whose representation is dynamic, for
13//! example enums that are `Sized` in Rust, but whose cases vary
14//! widely in size.
15use crate::*;
16#[doc(hidden)]
17pub mod cursor;
18pub use cursor::{iter, rev_iter, Cursor, Iter, RevIter};
19pub mod del;
20pub use del::{del, del_no_decr};
21pub mod put;
22pub use put::put;
23pub mod page;
24pub mod page_unsized;
25
26pub trait BTreePage<K: ?Sized, V: ?Sized>: Sized {
27    type Cursor: Clone + Copy + core::fmt::Debug;
28
29    /// Whether this cursor is at the end of the page.
30    fn is_empty(c: &Self::Cursor) -> bool;
31
32    /// Whether this cursor is strictly before the first element.
33    fn is_init(c: &Self::Cursor) -> bool;
34
35    /// Returns a cursor set before the first element of the page
36    /// (i.e. set to -1).
37    fn cursor_before(p: &CowPage) -> Self::Cursor;
38
39    /// Returns a cursor set to the first element of the page
40    /// (i.e. 0). If the page is empty, the returned cursor might be
41    /// empty.
42    fn cursor_first(p: &CowPage) -> Self::Cursor {
43        let mut c = Self::cursor_before(p);
44        Self::move_next(&mut c);
45        c
46    }
47
48    /// Returns a cursor set after the last element of the page
49    /// (i.e. to element n)
50    fn cursor_after(p: &CowPage) -> Self::Cursor;
51
52    /// Returns a cursor set to the last element of the page. If the
53    /// cursor is empty, this is the same as `cursor_before`.
54    fn cursor_last(p: &CowPage) -> Self::Cursor {
55        let mut c = Self::cursor_after(p);
56        Self::move_prev(&mut c);
57        c
58    }
59
60    /// Return the element currently pointed to by the cursor (if the
61    /// cursor is not before or after the elements of the page), and
62    /// move the cursor to the next element.
63    fn next<'b, T: LoadPage>(
64        txn: &T,
65        p: Page<'b>,
66        c: &mut Self::Cursor,
67    ) -> Option<(&'b K, &'b V, u64)> {
68        if let Some((k, v, r)) = Self::current(txn, p, c) {
69            Self::move_next(c);
70            Some((k, v, r))
71        } else {
72            None
73        }
74    }
75
76    /// Move the cursor to the previous element, and return that
77    /// element. Note that this is not the symmetric of `next`.
78    fn prev<'b, T: LoadPage>(
79        txn: &T,
80        p: Page<'b>,
81        c: &mut Self::Cursor,
82    ) -> Option<(&'b K, &'b V, u64)> {
83        if Self::move_prev(c) {
84            Self::current(txn, p, c)
85        } else {
86            None
87        }
88    }
89
90    /// Move the cursor to the next position. Returns whether the
91    /// cursor was actually moved (i.e. `true` if and only if the
92    /// cursor isn't already after the last element).
93    fn move_next(c: &mut Self::Cursor) -> bool;
94
95    /// Move the cursor to the previous position. Returns whether the
96    /// cursor was actually moved (i.e. `true` if and only if the
97    /// cursor isn't strictly before the page).
98    fn move_prev(c: &mut Self::Cursor) -> bool;
99
100    /// Returns the current element, if the cursor is pointing at one.
101    fn current<'a, T: LoadPage>(
102        txn: &T,
103        p: Page<'a>,
104        c: &Self::Cursor,
105    ) -> Option<(&'a K, &'a V, u64)>;
106
107    /// Returns the left child of the entry pointed to by the cursor.
108    fn left_child(p: Page, c: &Self::Cursor) -> u64;
109
110    /// Returns the right child of the entry pointed to by the cursor.
111    fn right_child(p: Page, c: &Self::Cursor) -> u64;
112
113    /// Sets the cursor to the last element less than or equal to `k0`
114    /// if `v0.is_none()`, and to `(k0, v0)` if `v0.is_some()`.  If a
115    /// match is found (on `k0` only if `v0.is_none()`, on `(k0, v0)`
116    /// else), return the match along with the right child.
117    ///
118    /// Else (in the `Err` case of the `Result`), return the position
119    /// at which `(k0, v0)` can be inserted.
120    fn set_cursor<'a, T: LoadPage>(
121        txn: &'a T,
122        page: Page,
123        c: &mut Self::Cursor,
124        k0: &K,
125        v0: Option<&V>,
126    ) -> Result<(&'a K, &'a V, u64), usize>;
127
128    /// Splits the cursor into two cursors: the elements strictly
129    /// before the current position, and the elements greater than or
130    /// equal the current element.
131    fn split_at(c: &Self::Cursor) -> (Self::Cursor, Self::Cursor);
132}
133
134pub struct PageIterator<'a, T: LoadPage, K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
135    cursor: P::Cursor,
136    txn: &'a T,
137    page: Page<'a>,
138}
139
140impl<'a, T: LoadPage, K: ?Sized + 'a, V: ?Sized + 'a, P: BTreePage<K, V>> Iterator
141    for PageIterator<'a, T, K, V, P>
142{
143    type Item = (&'a K, &'a V, u64);
144    fn next(&mut self) -> Option<Self::Item> {
145        P::next(self.txn, self.page, &mut self.cursor)
146    }
147}
148
149pub trait BTreeMutPage<K: ?Sized, V: ?Sized>: BTreePage<K, V> + core::fmt::Debug {
150    /// Initialise a page.
151    fn init(page: &mut MutPage);
152
153    /// Add an entry to the page, possibly splitting the page in the
154    /// process.
155    ///
156    /// Makes the assumption that `k1v1.is_some()` implies
157    /// `replace`. When `k1v1.is_some()`, we insert both `(k0, v0)`
158    /// (as a replacement), followed by `(k1, v1)`. This is only ever
159    /// used when deleting, and when the right child of a deleted
160    /// entry (in an internal node) has split while we were looking
161    /// for a replacement for the deleted entry.
162    ///
163    /// Since we only look for replacements in the right child, the
164    /// left child of the insertion isn't modified, in which case `l`
165    /// and `r` are interpreted as the left and right child of the new
166    /// entry, `k1v1`.
167    unsafe fn put<'a, T: AllocPage>(
168        txn: &mut T,
169        page: CowPage,
170        mutable: bool,
171        replace: bool,
172        c: &Self::Cursor,
173        k0: &'a K,
174        v0: &'a V,
175        k1v1: Option<(&'a K, &'a V)>,
176        l: u64,
177        r: u64,
178    ) -> Result<crate::btree::put::Put<'a, K, V>, T::Error>;
179
180    /// Add an entry to `page`, at position `c`. Does not check
181    /// whether there is enough space to do so. This method is mostly
182    /// useful for cloning pages.
183    #[allow(unused_variables)]
184    unsafe fn put_mut<T: AllocPage>(
185        txn: &mut T,
186        page: &mut MutPage,
187        c: &mut Self::Cursor,
188        k0: &K,
189        v0: &V,
190        r: u64,
191    );
192
193    #[allow(unused_variables)]
194    unsafe fn set_left_child(page: &mut MutPage, c: &Self::Cursor, l: u64);
195
196    /// Update the left child of the position pointed to by the
197    /// cursor.
198    unsafe fn update_left_child<T: AllocPage>(
199        txn: &mut T,
200        page: CowPage,
201        mutable: bool,
202        c: &Self::Cursor,
203        r: u64,
204    ) -> Result<crate::btree::put::Ok, T::Error>;
205
206    type Saved;
207
208    /// Save a leaf entry as a replacement, when we delete at an
209    /// internal node. This can be a pointer to the leaf if the leaf
210    /// isn't mutated, or the actual value, or something else.
211    fn save_deleted_leaf_entry(k: &K, v: &V) -> Self::Saved;
212
213    /// Recover the saved value.
214    unsafe fn from_saved<'a>(s: &Self::Saved) -> (&'a K, &'a V);
215
216    /// Delete an entry on the page, returning the resuting page along
217    /// with the offset of the freed page (or 0 if no page was freed,
218    /// i.e. if `page` is mutable).
219    unsafe fn del<T: AllocPage>(
220        txn: &mut T,
221        page: CowPage,
222        mutable: bool,
223        c: &Self::Cursor,
224        l: u64,
225    ) -> Result<(MutPage, u64), T::Error>;
226
227    /// Merge, rebalance or update a concatenation.
228    unsafe fn merge_or_rebalance<'a, 'b, T: AllocPage>(
229        txn: &mut T,
230        m: del::Concat<'a, K, V, Self>,
231    ) -> Result<del::Op<'a, T, K, V>, T::Error>;
232}
233
234/// A database, which is essentially just a page offset along with markers.
235#[derive(Debug)]
236pub struct Db_<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
237    pub db: core::num::NonZeroU64,
238    pub k: core::marker::PhantomData<K>,
239    pub v: core::marker::PhantomData<V>,
240    pub p: core::marker::PhantomData<P>,
241}
242
243/// A database of sized values.
244pub type Db<K, V> = Db_<K, V, page::Page<K, V>>;
245
246#[cfg(feature = "typeids")]
247impl<K: Storable + TypeId, V: Storable + TypeId, P: BTreePage<K, V> + core::fmt::Debug> TypeId
248    for Db_<K, V, P>
249{
250    fn type_id() -> [u8; 32] {
251        let mut h = sha2::Sha256::new();
252        h.update("sanakirja-core::Db".as_bytes());
253        h.update(&K::type_id());
254        h.update(&V::type_id());
255        h.finalize().into()
256    }
257}
258
259impl<K: Storable, V: Storable, P: BTreePage<K, V> + core::fmt::Debug> Storable for Db_<K, V, P> {
260    type PageReferences = core::iter::Once<u64>;
261    fn page_references(&self) -> Self::PageReferences {
262        core::iter::once(self.db.into())
263    }
264
265    unsafe fn drop<T: AllocPage>(&self, t: &mut T) -> Result<(), T::Error> {
266        drop_(t, self)
267    }
268}
269
270impl<K: Storable, V: Storable, P: BTreePage<K, V> + core::fmt::Debug> Storable
271    for Option<Db_<K, V, P>>
272{
273    type PageReferences = core::iter::Once<u64>;
274    fn page_references(&self) -> Self::PageReferences {
275        if let Some(ref db) = self {
276            core::iter::once(db.db.into())
277        } else {
278            let mut it = core::iter::once(0);
279            it.next();
280            it
281        }
282    }
283
284    unsafe fn drop<T: AllocPage>(&self, t: &mut T) -> Result<(), T::Error> {
285        if let Some(ref db) = self {
286            drop_(t, db)
287        } else {
288            Ok(())
289        }
290    }
291}
292
293/// A database of unsized values.
294pub type UDb<K, V> = Db_<K, V, page_unsized::Page<K, V>>;
295
296impl<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> Db_<K, V, P> {
297    /// Load a database from a page offset.
298    pub unsafe fn from_page(db: u64) -> Self {
299        Db_ {
300            db: core::num::NonZeroU64::new_unchecked(db),
301            k: core::marker::PhantomData,
302            v: core::marker::PhantomData,
303            p: core::marker::PhantomData,
304        }
305    }
306}
307
308/// Create a database with an arbitrary page implementation.
309pub unsafe fn create_db_<T: AllocPage, K: ?Sized, V: ?Sized, P: BTreeMutPage<K, V>>(
310    txn: &mut T,
311) -> Result<Db_<K, V, P>, T::Error> {
312    let mut page = txn.alloc_page()?;
313    P::init(&mut page);
314    Ok(Db_ {
315        db: core::num::NonZeroU64::new_unchecked(page.0.offset),
316        k: core::marker::PhantomData,
317        v: core::marker::PhantomData,
318        p: core::marker::PhantomData,
319    })
320}
321
322/// Create a database for sized keys and values.
323pub unsafe fn create_db<T: AllocPage, K: Storable, V: Storable>(
324    txn: &mut T,
325) -> Result<Db_<K, V, page::Page<K, V>>, T::Error> {
326    create_db_(txn)
327}
328
329/// Fork a database.
330pub fn fork_db<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreeMutPage<K, V>>(
331    txn: &mut T,
332    db: &Db_<K, V, P>,
333) -> Result<Db_<K, V, P>, T::Error> {
334    txn.incr_rc(db.db.into())?;
335    Ok(Db_ {
336        db: db.db,
337        k: core::marker::PhantomData,
338        v: core::marker::PhantomData,
339        p: core::marker::PhantomData,
340    })
341}
342
343/// Get the first entry of a database greater than or equal to `k` (or
344/// to `(k, v)` if `v.is_some()`), and return `true` if this entry is
345/// shared with another structure (i.e. the RC of at least one page
346/// along a path to the entry is at least 2).
347pub fn get_shared<
348    'a,
349    T: LoadPage,
350    K: Storable + ?Sized,
351    V: Storable + ?Sized,
352    P: BTreePage<K, V>,
353>(
354    txn: &'a T,
355    db: &Db_<K, V, P>,
356    k: &K,
357    v: Option<&V>,
358) -> Result<Option<(&'a K, &'a V, bool)>, T::Error> {
359    // Set the "cursor stack" by setting a skip list cursor in
360    // each page from the root to the appropriate leaf.
361    let mut last_match = None;
362    let mut page = unsafe { txn.load_page(db.db.into())? };
363    let mut is_shared = txn.rc(db.db.into())? >= 2;
364    // This is a for loop, to allow the compiler to unroll (maybe).
365    for _ in 0..cursor::N_CURSORS {
366        let mut cursor = P::cursor_before(&page);
367        if let Ok((kk, vv, _)) = P::set_cursor(txn, page.as_page(), &mut cursor, k, v) {
368            if v.is_some() {
369                return Ok(Some((kk, vv, is_shared)));
370            }
371            last_match = Some((kk, vv, is_shared));
372        } else if let Some((k, v, _)) = P::current(txn, page.as_page(), &cursor) {
373            // Here, Rust says that `k` and `v` have the same lifetime
374            // as `page`, but we actually know that they're alive for
375            // as long as `txn` doesn't change, so we can safely
376            // extend their lifetimes:
377            unsafe {
378                last_match = Some((core::mem::transmute(k), core::mem::transmute(v), is_shared))
379            }
380        }
381        // The cursor is set to the first element greater than or
382        // equal to the (k, v) we're looking for, so we need to
383        // explore the left subtree.
384        let next_page = P::left_child(page.as_page(), &cursor);
385        if next_page > 0 {
386            page = unsafe { txn.load_page(next_page)? };
387            is_shared |= txn.rc(db.db.into())? >= 2;
388        } else {
389            break;
390        }
391    }
392    Ok(last_match)
393}
394
395/// Get the first entry of a database greater than or equal to `k` (or
396/// to `(k, v)` if `v.is_some()`), and return `true` if this entry is
397/// shared with another structure (i.e. the RC of at least one page
398/// along a path to the entry is at least 2).
399pub fn get<'a, T: LoadPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
400    txn: &'a T,
401    db: &Db_<K, V, P>,
402    k: &K,
403    v: Option<&V>,
404) -> Result<Option<(&'a K, &'a V)>, T::Error> {
405    // Unfortunately, since the above function `get_shared` uses this
406    // one in order to get the RC of pages, we can't really refactor,
407    // so this is mostly a copy of the other function.
408
409    // Set the "cursor stack" by setting a skip list cursor in
410    // each page from the root to the appropriate leaf.
411    let mut last_match = None;
412    let mut page = unsafe { txn.load_page(db.db.into())? };
413    // This is a for loop, to allow the compiler to unroll (maybe).
414    for _ in 0..cursor::N_CURSORS {
415        let mut cursor = P::cursor_before(&page);
416        if let Ok((kk, vv, _)) = P::set_cursor(txn, page.as_page(), &mut cursor, k, v) {
417            if v.is_some() {
418                return Ok(Some((kk, vv)));
419            }
420            last_match = Some((kk, vv));
421        } else if let Some((k, v, _)) = P::current(txn, page.as_page(), &cursor) {
422            // Here, Rust says that `k` and `v` have the same lifetime
423            // as `page`, but we actually know that they're alive for
424            // as long as `txn` doesn't change, so we can safely
425            // extend their lifetimes:
426            unsafe { last_match = Some((core::mem::transmute(k), core::mem::transmute(v))) }
427        }
428        // The cursor is set to the first element greater than or
429        // equal to the (k, v) we're looking for, so we need to
430        // explore the left subtree.
431        let next_page = P::left_child(page.as_page(), &cursor);
432        if next_page > 0 {
433            page = unsafe { txn.load_page(next_page)? };
434        } else {
435            break;
436        }
437    }
438    Ok(last_match)
439}
440
441/// Drop a database recursively, dropping all referenced keys and
442/// values that aren't shared with other databases.
443pub unsafe fn drop<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
444    txn: &mut T,
445    db: Db_<K, V, P>,
446) -> Result<(), T::Error> {
447    drop_(txn, &db)
448}
449
450unsafe fn drop_<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
451    txn: &mut T,
452    db: &Db_<K, V, P>,
453) -> Result<(), T::Error> {
454    // In order to make this function tail-recursive, we simulate a
455    // stack with the following:
456    use core::mem::MaybeUninit;
457    let mut stack: [MaybeUninit<cursor::PageCursor<K, V, P>>; cursor::N_CURSORS] = [
458        MaybeUninit::uninit(),
459        MaybeUninit::uninit(),
460        MaybeUninit::uninit(),
461        MaybeUninit::uninit(),
462        MaybeUninit::uninit(),
463        MaybeUninit::uninit(),
464        MaybeUninit::uninit(),
465        MaybeUninit::uninit(),
466        MaybeUninit::uninit(),
467        MaybeUninit::uninit(),
468        MaybeUninit::uninit(),
469        MaybeUninit::uninit(),
470        MaybeUninit::uninit(),
471        MaybeUninit::uninit(),
472        MaybeUninit::uninit(),
473        MaybeUninit::uninit(),
474        MaybeUninit::uninit(),
475        MaybeUninit::uninit(),
476        MaybeUninit::uninit(),
477        MaybeUninit::uninit(),
478        MaybeUninit::uninit(),
479        MaybeUninit::uninit(),
480        MaybeUninit::uninit(),
481        MaybeUninit::uninit(),
482        MaybeUninit::uninit(),
483        MaybeUninit::uninit(),
484        MaybeUninit::uninit(),
485        MaybeUninit::uninit(),
486        MaybeUninit::uninit(),
487        MaybeUninit::uninit(),
488        MaybeUninit::uninit(),
489        MaybeUninit::uninit(),
490        MaybeUninit::uninit(),
491    ];
492    let mut ptr = 0;
493
494    // Push the root page of `db` onto the stack.
495    let page = txn.load_page(db.db.into())?;
496    stack[0] = MaybeUninit::new(cursor::PageCursor {
497        cursor: P::cursor_before(&page),
498        page,
499    });
500
501    // Then perform a DFS:
502    loop {
503        // Look at the top element of the stack.
504        let cur = unsafe { &mut *stack[ptr].as_mut_ptr() };
505        // If it needs to be dropped (i.e. if its RC is <= 1), iterate
506        // its cursor and drop all its referenced pages.
507        let rc = txn.rc(cur.page.offset)?;
508        if rc <= 1 {
509            // if there's a current element in the cursor (i.e. we
510            // aren't before or after the elements), decrease its RC.
511            if let Some((k, v, _)) = P::current(txn, cur.page.as_page(), &cur.cursor) {
512                k.drop(txn)?;
513                v.drop(txn)?;
514            }
515            // In all cases, move next and push the left child onto
516            // the stack. This works because pushed cursors are
517            // initially set to before the page's elements.
518            if P::move_next(&mut cur.cursor) {
519                let r = P::left_child(cur.page.as_page(), &cur.cursor);
520                if r > 0 {
521                    ptr += 1;
522                    let page = txn.load_page(r)?;
523                    stack[ptr] = MaybeUninit::new(cursor::PageCursor {
524                        cursor: P::cursor_before(&page),
525                        page,
526                    })
527                }
528                continue;
529            }
530        }
531        // Here, either rc > 1 (in which case the only thing we need
532        // to do in this iteration is to decrement the RC), or else
533        // `P::move_next` returned `false`, meaning that the cursor is
534        // after the last element (in which case we are done with this
535        // page, and also need to decrement its RC, in order to free
536        // it).
537        if cur.page.is_dirty() {
538            txn.decr_rc_owned(cur.page.offset)?;
539        } else {
540            txn.decr_rc(cur.page.offset)?;
541        }
542        // If this was the bottom element of the stack, stop, else, pop.
543        if ptr == 0 {
544            break;
545        } else {
546            ptr -= 1;
547        }
548    }
549    Ok(())
550}