oca_rs/repositories/
oca_bundle_fts_repo.rs

1use std::str::FromStr;
2
3use said::SelfAddressingIdentifier;
4
5use crate::facade::Connection;
6
7#[derive(Debug)]
8pub struct OCABundleFTSRecord {
9    pub name: String,
10    pub description: String,
11    pub language_code: String,
12    pub oca_bundle_said: String,
13}
14
15impl OCABundleFTSRecord {
16    pub fn new(
17        oca_bundle_said: String,
18        name: String,
19        description: String,
20        language: isolang::Language,
21    ) -> Self {
22        Self {
23            name,
24            description,
25            language_code: isolang::Language::to_639_3(&language).to_string(),
26            oca_bundle_said,
27        }
28    }
29}
30
31pub struct OCABundleFTSRepo {
32    connection: Connection,
33}
34
35impl OCABundleFTSRepo {
36    pub fn new(connection: Connection) -> Self {
37        let create_table_query = r#"
38        CREATE VIRTUAL TABLE IF NOT EXISTS oca_bundle_fts
39        USING FTS5(
40            name,
41            description,
42            language_code,
43            oca_bundle_said UNINDEXED,
44            tokenize="trigram"
45        )"#;
46        connection.execute(create_table_query, ()).unwrap();
47
48        Self { connection }
49    }
50
51    pub fn insert(&self, model: OCABundleFTSRecord) {
52        let query = r#"
53        INSERT INTO oca_bundle_fts
54        (rowid, name, description, language_code, oca_bundle_said)
55        VALUES (
56            (
57                SELECT rowid FROM oca_bundle_fts
58                WHERE oca_bundle_said = ?4 AND language_code = ?3
59                LIMIT 1
60            ), ?1, ?2, ?3, ?4
61        )"#;
62        let _ = self.connection.execute(
63            query,
64            [
65                &model.name,
66                &model.description,
67                &model.language_code,
68                &model.oca_bundle_said,
69            ],
70        );
71    }
72
73    pub fn search(
74        &self,
75        language: Option<isolang::Language>,
76        meta_query: String,
77        limit: usize,
78        page: usize,
79    ) -> SearchResult {
80        let offset = (page - 1) * limit;
81        let query = match language {
82            Some(lang) => {
83                let lang_code = isolang::Language::to_639_3(&lang).to_string();
84                format!("({{name description}}:{meta_query:} AND language_code:{lang_code:}) OR ({{name description}}:{meta_query:} NOT language_code:{lang_code:})")
85            }
86            None => format!("{{name description}}:{meta_query:}"),
87        };
88
89        let sql_query = r#"
90        SELECT results.*, count.total
91        FROM
92        (
93            SELECT COUNT(*) OVER() AS total
94            FROM (
95                SELECT *
96                FROM oca_bundle_fts
97                WHERE oca_bundle_fts MATCH ?1
98            ) AS inner_query
99            GROUP BY oca_bundle_said
100        ) AS count
101        LEFT JOIN
102        (
103            SELECT *, COUNT(*) OVER()
104            FROM (
105                SELECT *,
106                    bm25(oca_bundle_fts, 1.0, 1.0, 100.0) as rank,
107                    snippet(oca_bundle_fts, -1, '<mark>', '</mark>', '...', 64)
108                FROM oca_bundle_fts
109                WHERE oca_bundle_fts MATCH ?1
110                ORDER BY rank
111            ) AS subquery
112            GROUP BY oca_bundle_said
113            ORDER BY rank
114            LIMIT ?2 OFFSET ?3
115        ) AS results
116        ON true
117        GROUP BY oca_bundle_said
118        ORDER BY rank
119        "#;
120
121        struct Record {
122            pub name: Option<String>,
123            pub description: Option<String>,
124            pub oca_bundle_said: Option<String>,
125            pub rank: Option<f32>,
126            pub total: i32,
127            pub snippet: Option<String>,
128        }
129
130        impl Record {
131            fn get_scope(&self) -> String {
132                let mut snippet_regex = self.snippet.clone().unwrap();
133                snippet_regex = snippet_regex.replace("<mark>", "");
134                snippet_regex = snippet_regex.replace("</mark>", "");
135                let mut v: Vec<String> =
136                    snippet_regex.split("...").map(|x| x.to_string()).collect();
137                if v.first().unwrap().is_empty() {
138                    v.remove(0);
139                    if let Some(x) = v.first_mut() {
140                        *x = format!(".*{}", x);
141                    }
142                }
143                if v.last().unwrap().is_empty() {
144                    v.pop();
145                    if let Some(x) = v.last_mut() {
146                        *x = format!("{}.*", x);
147                    }
148                }
149                snippet_regex = v.join("...");
150                let re = regex::Regex::new(&format!("(?m)^([^:]+):{snippet_regex:}$")).unwrap();
151                let hay = format!(
152                    "\
153meta_overlay:{}
154meta_overlay:{}
155",
156                    self.name.clone().unwrap(),
157                    self.description.clone().unwrap()
158                );
159                let mut scope = String::new();
160                if let Some((_, [s])) = re.captures_iter(&hay).map(|c| c.extract()).next() {
161                    scope = s.to_string();
162                }
163                scope
164            }
165        }
166
167        let connection = self.connection.connection.lock().unwrap();
168        let mut statement = connection.prepare(sql_query).unwrap();
169
170        let rows = statement
171            .query_map(
172                [query.clone(), limit.to_string(), offset.to_string()],
173                |row| {
174                    Ok(Record {
175                        name: row.get(0).unwrap(),
176                        description: row.get(1).unwrap(),
177                        oca_bundle_said: row.get(3).unwrap(),
178                        rank: row.get(4).unwrap(),
179                        total: row.get(7).unwrap(),
180                        snippet: row.get(5).unwrap(),
181                    })
182                },
183            )
184            .unwrap();
185
186        let mut records = vec![];
187        let mut total: usize = 0;
188
189        for row in rows {
190            let record = row.unwrap();
191            if total == 0 {
192                total = record.total as usize;
193            }
194            if record.oca_bundle_said.is_none() {
195                continue;
196            }
197            let metdata = SearchRecordMetadata {
198                phrase: record.snippet.clone().unwrap(),
199                scope: record.get_scope().clone(),
200                score: record.rank.unwrap().abs(),
201            };
202
203            records.push(SearchRecord {
204                oca_bundle_said: SelfAddressingIdentifier::from_str(
205                    &record.oca_bundle_said.unwrap(),
206                )
207                .unwrap(), //TODO
208                metadata: metdata,
209            });
210        }
211
212        SearchResult {
213            records,
214            metadata: SearchMetadata { total, page },
215        }
216    }
217}
218
219#[derive(Debug)]
220pub struct SearchResult {
221    pub records: Vec<SearchRecord>,
222    pub metadata: SearchMetadata,
223}
224
225#[derive(Debug)]
226pub struct SearchRecord {
227    pub oca_bundle_said: SelfAddressingIdentifier,
228    pub metadata: SearchRecordMetadata,
229}
230
231#[derive(Debug)]
232pub struct SearchRecordMetadata {
233    pub phrase: String,
234    pub scope: String,
235    pub score: f32,
236}
237
238#[derive(Debug)]
239pub struct SearchMetadata {
240    pub total: usize,
241    pub page: usize,
242}