elasticlunr/lang/
common.rs

1use crate::pipeline::PipelineFn;
2use regex::Regex;
3use std::collections::HashSet;
4
5#[derive(Clone)]
6pub struct StopWordFilter {
7    name: String,
8    stop_words: HashSet<String>,
9}
10
11impl StopWordFilter {
12    pub fn new(name: &str, stop_words: &[&str]) -> Self {
13        Self {
14            name: name.into(),
15            stop_words: stop_words.iter().map(|s| s.to_string()).collect(),
16        }
17    }
18}
19
20impl PipelineFn for StopWordFilter {
21    fn name(&self) -> String {
22        self.name.clone()
23    }
24
25    fn filter(&self, token: String) -> Option<String> {
26        if self.stop_words.contains(&token) {
27            None
28        } else {
29            Some(token)
30        }
31    }
32}
33
34#[derive(Clone)]
35pub struct RegexTrimmer {
36    name: String,
37    trimmer: Regex,
38}
39
40impl RegexTrimmer {
41    pub fn new(name: &str, word_chars: &str) -> Self {
42        let name = name.into();
43        let trimmer = Regex::new(&format!("^[^{0}]+|[^{0}]+$", word_chars)).unwrap();
44        Self { name, trimmer }
45    }
46}
47
48impl PipelineFn for RegexTrimmer {
49    fn name(&self) -> String {
50        self.name.clone()
51    }
52
53    fn filter(&self, token: String) -> Option<String> {
54        let result = self.trimmer.replace_all(&token, "");
55        if result.is_empty() {
56            None
57        } else if result == token {
58            Some(token)
59        } else {
60            Some(result.into())
61        }
62    }
63}
64
65#[cfg(feature = "rust-stemmers")]
66pub struct RustStemmer {
67    name: String,
68    stemmer: rust_stemmers::Stemmer,
69}
70
71#[cfg(feature = "rust-stemmers")]
72impl RustStemmer {
73    pub fn new(name: &str, algo: rust_stemmers::Algorithm) -> Self {
74        Self {
75            name: name.into(),
76            stemmer: rust_stemmers::Stemmer::create(algo),
77        }
78    }
79}
80
81#[cfg(feature = "rust-stemmers")]
82impl PipelineFn for RustStemmer {
83    fn name(&self) -> String {
84        self.name.clone()
85    }
86
87    fn filter(&self, token: String) -> Option<String> {
88        let result = self.stemmer.stem(&token);
89        if result.is_empty() {
90            None
91        } else if result == token {
92            Some(token)
93        } else {
94            Some(result.into())
95        }
96    }
97}