oca_rs/repositories/
oca_bundle_fts_repo.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
use std::str::FromStr;

use said::SelfAddressingIdentifier;

use crate::facade::Connection;

#[derive(Debug)]
pub struct OCABundleFTSRecord {
    pub name: String,
    pub description: String,
    pub language_code: String,
    pub oca_bundle_said: String,
}

impl OCABundleFTSRecord {
    pub fn new(
        oca_bundle_said: String,
        name: String,
        description: String,
        language: isolang::Language,
    ) -> Self {
        Self {
            name,
            description,
            language_code: isolang::Language::to_639_3(&language).to_string(),
            oca_bundle_said,
        }
    }
}

pub struct OCABundleFTSRepo {
    connection: Connection,
}

impl OCABundleFTSRepo {
    pub fn new(connection: Connection) -> Self {
        let create_table_query = r#"
        CREATE VIRTUAL TABLE IF NOT EXISTS oca_bundle_fts
        USING FTS5(
            name,
            description,
            language_code,
            oca_bundle_said UNINDEXED,
            tokenize="trigram"
        )"#;
        connection.execute(create_table_query, ()).unwrap();

        Self { connection }
    }

    pub fn insert(&self, model: OCABundleFTSRecord) {
        let query = r#"
        INSERT INTO oca_bundle_fts
        (rowid, name, description, language_code, oca_bundle_said)
        VALUES (
            (
                SELECT rowid FROM oca_bundle_fts
                WHERE oca_bundle_said = ?4 AND language_code = ?3
                LIMIT 1
            ), ?1, ?2, ?3, ?4
        )"#;
        let _ = self.connection.execute(
            query,
            [
                &model.name,
                &model.description,
                &model.language_code,
                &model.oca_bundle_said,
            ],
        );
    }

    pub fn search(
        &self,
        language: Option<isolang::Language>,
        meta_query: String,
        limit: usize,
        page: usize,
    ) -> SearchResult {
        let offset = (page - 1) * limit;
        let query = match language {
            Some(lang) => {
                let lang_code = isolang::Language::to_639_3(&lang).to_string();
                format!("({{name description}}:{meta_query:} AND language_code:{lang_code:}) OR ({{name description}}:{meta_query:} NOT language_code:{lang_code:})")
            }
            None => format!("{{name description}}:{meta_query:}"),
        };

        let sql_query = r#"
        SELECT results.*, count.total
        FROM
        (
            SELECT COUNT(*) OVER() AS total
            FROM (
                SELECT *
                FROM oca_bundle_fts
                WHERE oca_bundle_fts MATCH ?1
            ) AS inner_query
            GROUP BY oca_bundle_said
        ) AS count
        LEFT JOIN
        (
            SELECT *, COUNT(*) OVER()
            FROM (
                SELECT *,
                    bm25(oca_bundle_fts, 1.0, 1.0, 100.0) as rank,
                    snippet(oca_bundle_fts, -1, '<mark>', '</mark>', '...', 64)
                FROM oca_bundle_fts
                WHERE oca_bundle_fts MATCH ?1
                ORDER BY rank
            ) AS subquery
            GROUP BY oca_bundle_said
            ORDER BY rank
            LIMIT ?2 OFFSET ?3
        ) AS results
        ON true
        GROUP BY oca_bundle_said
        ORDER BY rank
        "#;

        struct Record {
            pub name: Option<String>,
            pub description: Option<String>,
            pub oca_bundle_said: Option<String>,
            pub rank: Option<f32>,
            pub total: i32,
            pub snippet: Option<String>,
        }

        impl Record {
            fn get_scope(&self) -> String {
                let mut snippet_regex = self.snippet.clone().unwrap();
                snippet_regex = snippet_regex.replace("<mark>", "");
                snippet_regex = snippet_regex.replace("</mark>", "");
                let mut v: Vec<String> =
                    snippet_regex.split("...").map(|x| x.to_string()).collect();
                if v.first().unwrap().is_empty() {
                    v.remove(0);
                    if let Some(x) = v.first_mut() {
                        *x = format!(".*{}", x);
                    }
                }
                if v.last().unwrap().is_empty() {
                    v.pop();
                    if let Some(x) = v.last_mut() {
                        *x = format!("{}.*", x);
                    }
                }
                snippet_regex = v.join("...");
                let re = regex::Regex::new(&format!("(?m)^([^:]+):{snippet_regex:}$")).unwrap();
                let hay = format!(
                    "\
meta_overlay:{}
meta_overlay:{}
",
                    self.name.clone().unwrap(),
                    self.description.clone().unwrap()
                );
                let mut scope = String::new();
                if let Some((_, [s])) = re.captures_iter(&hay).map(|c| c.extract()).next() {
                    scope = s.to_string();
                }
                scope
            }
        }

        let connection = self.connection.connection.lock().unwrap();
        let mut statement = connection.prepare(sql_query).unwrap();

        let rows = statement
            .query_map(
                [query.clone(), limit.to_string(), offset.to_string()],
                |row| {
                    Ok(Record {
                        name: row.get(0).unwrap(),
                        description: row.get(1).unwrap(),
                        oca_bundle_said: row.get(3).unwrap(),
                        rank: row.get(4).unwrap(),
                        total: row.get(7).unwrap(),
                        snippet: row.get(5).unwrap(),
                    })
                },
            )
            .unwrap();

        let mut records = vec![];
        let mut total: usize = 0;

        for row in rows {
            let record = row.unwrap();
            if total == 0 {
                total = record.total as usize;
            }
            if record.oca_bundle_said.is_none() {
                continue;
            }
            let metdata = SearchRecordMetadata {
                phrase: record.snippet.clone().unwrap(),
                scope: record.get_scope().clone(),
                score: record.rank.unwrap().abs(),
            };

            records.push(SearchRecord {
                oca_bundle_said: SelfAddressingIdentifier::from_str(
                    &record.oca_bundle_said.unwrap(),
                )
                .unwrap(), //TODO
                metadata: metdata,
            });
        }

        SearchResult {
            records,
            metadata: SearchMetadata { total, page },
        }
    }
}

#[derive(Debug)]
pub struct SearchResult {
    pub records: Vec<SearchRecord>,
    pub metadata: SearchMetadata,
}

#[derive(Debug)]
pub struct SearchRecord {
    pub oca_bundle_said: SelfAddressingIdentifier,
    pub metadata: SearchRecordMetadata,
}

#[derive(Debug)]
pub struct SearchRecordMetadata {
    pub phrase: String,
    pub scope: String,
    pub score: f32,
}

#[derive(Debug)]
pub struct SearchMetadata {
    pub total: usize,
    pub page: usize,
}