1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! # `column_index`
//!
//! `column_index` provides rank and select operations to associate positions when not all
//! documents have exactly one element.

mod merge;
mod multivalued_index;
mod optional_index;
mod serialize;

use std::ops::Range;

pub use merge::merge_column_index;
pub use optional_index::{OptionalIndex, Set};
pub use serialize::{open_column_index, serialize_column_index, SerializableColumnIndex};

use crate::column_index::multivalued_index::MultiValueIndex;
use crate::{Cardinality, DocId, RowId};

#[derive(Clone, Debug)]
pub enum ColumnIndex {
    Empty {
        num_docs: u32,
    },
    Full,
    Optional(OptionalIndex),
    /// In addition, at index num_rows, an extra value is added
    /// containing the overal number of values.
    Multivalued(MultiValueIndex),
}

impl From<OptionalIndex> for ColumnIndex {
    fn from(optional_index: OptionalIndex) -> ColumnIndex {
        ColumnIndex::Optional(optional_index)
    }
}

impl From<MultiValueIndex> for ColumnIndex {
    fn from(multi_value_index: MultiValueIndex) -> ColumnIndex {
        ColumnIndex::Multivalued(multi_value_index)
    }
}

impl ColumnIndex {
    /// Returns the cardinality of the column index.
    ///
    /// By convention, if the column contains no docs, we consider that it is
    /// full.
    #[inline]
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
            ColumnIndex::Empty { num_docs: 0 } | ColumnIndex::Full => Cardinality::Full,
            ColumnIndex::Empty { .. } => Cardinality::Optional,
            ColumnIndex::Optional(_) => Cardinality::Optional,
            ColumnIndex::Multivalued(_) => Cardinality::Multivalued,
        }
    }

    /// Returns true if and only if there are at least one value associated to the row.
    pub fn has_value(&self, doc_id: DocId) -> bool {
        match self {
            ColumnIndex::Empty { .. } => false,
            ColumnIndex::Full => true,
            ColumnIndex::Optional(optional_index) => optional_index.contains(doc_id),
            ColumnIndex::Multivalued(multivalued_index) => {
                !multivalued_index.range(doc_id).is_empty()
            }
        }
    }

    pub fn value_row_ids(&self, doc_id: DocId) -> Range<RowId> {
        match self {
            ColumnIndex::Empty { .. } => 0..0,
            ColumnIndex::Full => doc_id..doc_id + 1,
            ColumnIndex::Optional(optional_index) => {
                if let Some(val) = optional_index.rank_if_exists(doc_id) {
                    val..val + 1
                } else {
                    0..0
                }
            }
            ColumnIndex::Multivalued(multivalued_index) => multivalued_index.range(doc_id),
        }
    }

    /// Translates a block of docis to row_ids.
    ///
    /// returns the row_ids and the matching docids on the same index
    /// e.g.
    /// DocId In:  [0, 5, 6]
    /// DocId Out: [0, 0, 6, 6]
    /// RowId Out: [0, 1, 2, 3]
    #[inline]
    pub fn docids_to_rowids(
        &self,
        doc_ids: &[DocId],
        doc_ids_out: &mut Vec<DocId>,
        row_ids: &mut Vec<RowId>,
    ) {
        match self {
            ColumnIndex::Empty { .. } => {}
            ColumnIndex::Full => {
                doc_ids_out.extend_from_slice(doc_ids);
                row_ids.extend_from_slice(doc_ids);
            }
            ColumnIndex::Optional(optional_index) => {
                for doc_id in doc_ids {
                    if let Some(row_id) = optional_index.rank_if_exists(*doc_id) {
                        doc_ids_out.push(*doc_id);
                        row_ids.push(row_id);
                    }
                }
            }
            ColumnIndex::Multivalued(multivalued_index) => {
                for doc_id in doc_ids {
                    for row_id in multivalued_index.range(*doc_id) {
                        doc_ids_out.push(*doc_id);
                        row_ids.push(row_id);
                    }
                }
            }
        }
    }

    pub fn docid_range_to_rowids(&self, doc_id_range: Range<DocId>) -> Range<RowId> {
        match self {
            ColumnIndex::Empty { .. } => 0..0,
            ColumnIndex::Full => doc_id_range,
            ColumnIndex::Optional(optional_index) => {
                let row_start = optional_index.rank(doc_id_range.start);
                let row_end = optional_index.rank(doc_id_range.end);
                row_start..row_end
            }
            ColumnIndex::Multivalued(multivalued_index) => {
                let end_docid = doc_id_range.end.min(multivalued_index.num_docs() - 1) + 1;
                let start_docid = doc_id_range.start.min(end_docid);

                let row_start = multivalued_index.start_index_column.get_val(start_docid);
                let row_end = multivalued_index.start_index_column.get_val(end_docid);

                row_start..row_end
            }
        }
    }

    pub fn select_batch_in_place(&self, doc_id_start: DocId, rank_ids: &mut Vec<RowId>) {
        match self {
            ColumnIndex::Empty { .. } => {
                rank_ids.clear();
            }
            ColumnIndex::Full => {
                // No need to do anything:
                // value_idx and row_idx are the same.
            }
            ColumnIndex::Optional(optional_index) => {
                optional_index.select_batch(&mut rank_ids[..]);
            }
            ColumnIndex::Multivalued(multivalued_index) => {
                multivalued_index.select_batch_in_place(doc_id_start, rank_ids)
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::{Cardinality, ColumnIndex};

    #[test]
    fn test_column_index_get_cardinality() {
        assert_eq!(
            ColumnIndex::Empty { num_docs: 0 }.get_cardinality(),
            Cardinality::Full
        );
        assert_eq!(ColumnIndex::Full.get_cardinality(), Cardinality::Full);
        assert_eq!(
            ColumnIndex::Empty { num_docs: 1 }.get_cardinality(),
            Cardinality::Optional
        );
    }
}