lance_index/
prefilter.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

use std::sync::Arc;

use async_trait::async_trait;
use lance_core::utils::mask::RowIdMask;
use lance_core::Result;

/// A trait to be implemented by anything supplying a prefilter row id mask
///
/// This trait is for internal use only and has no stability guarantees.
#[async_trait]
pub trait FilterLoader: Send + 'static {
    async fn load(self: Box<Self>) -> Result<RowIdMask>;
}

/// Filter out row ids that we know are not relevant to the query.
///
/// This could be both rows that are deleted or a prefilter
/// that should be applied to the search
///
/// <section class="warning">
/// Internal use only. No API stability guarantees.
/// </section>
#[async_trait]
pub trait PreFilter: Send + Sync {
    /// Waits for the prefilter to be fully loaded
    ///
    /// The prefilter loads in the background while the rest of the index
    /// search is running.  When you are ready to use the prefilter you
    /// must first call this method to ensure it is fully loaded.  This
    /// allows `filter_row_ids` to be a synchronous method.
    async fn wait_for_ready(&self) -> Result<()>;

    /// If the filter is empty.
    fn is_empty(&self) -> bool;

    /// Get the row id mask for this prefilter
    ///
    /// This method must be called after `wait_for_ready`
    fn mask(&self) -> Arc<RowIdMask>;

    /// Check whether a slice of row ids should be included in a query.
    ///
    /// Returns a vector of indices into the input slice that should be included,
    /// also known as a selection vector.
    ///
    /// This method must be called after `wait_for_ready`
    fn filter_row_ids<'a>(&self, row_ids: Box<dyn Iterator<Item = &'a u64> + 'a>) -> Vec<u64>;
}

/// A prefilter that does nothing
pub struct NoFilter;

#[async_trait]
impl PreFilter for NoFilter {
    async fn wait_for_ready(&self) -> Result<()> {
        Ok(())
    }

    fn is_empty(&self) -> bool {
        true
    }

    fn mask(&self) -> Arc<RowIdMask> {
        Arc::new(RowIdMask::all_rows())
    }

    fn filter_row_ids<'a>(&self, row_ids: Box<dyn Iterator<Item = &'a u64> + 'a>) -> Vec<u64> {
        row_ids.enumerate().map(|(i, _)| i as u64).collect()
    }
}