cedar_policy_core/
fuzzy_match.rs

1/*
2 * Copyright Cedar Contributors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! This module provides the fuzzy matching utility used to make suggestions
18//! when encountering unknown values in entities, functions, etc.
19
20/// Fuzzy string matching using the Levenshtein distance algorithm
21pub fn fuzzy_search(key: &str, lst: &[impl AsRef<str>]) -> Option<String> {
22    fuzzy_search_limited(key, lst, None)
23}
24
25/// Fuzzy string matching using the Levenshtein distance algorithm, with an
26/// option to limit matching up to some distance.
27pub fn fuzzy_search_limited(
28    key: &str,
29    lst: &[impl AsRef<str>],
30    max_distance: Option<usize>,
31) -> Option<String> {
32    if key.is_empty() || lst.is_empty() {
33        None
34    } else {
35        let t = lst.iter().fold((usize::MAX, ""), |acc, word| {
36            let e = levenshtein_distance(key, word.as_ref());
37            if e < acc.0 {
38                (e, word.as_ref())
39            } else {
40                acc
41            }
42        });
43
44        if let Some(threshold) = max_distance {
45            (t.0 <= threshold).then_some(t.1.to_owned())
46        } else {
47            Some(t.1.to_owned())
48        }
49    }
50}
51
52fn levenshtein_distance(word1: &str, word2: &str) -> usize {
53    let w1 = word1.chars().collect::<Vec<_>>();
54    let w2 = word2.chars().collect::<Vec<_>>();
55    let word1_length = w1.len() + 1;
56    let word2_length = w2.len() + 1;
57    let mut matrix = vec![vec![0; word1_length]; word2_length];
58
59    // The access at 0 is safe because `word2_length` is at least 1.
60    // The access at `i` is safe because it stops before `word1_length`.
61    // PANIC SAFETY: See above.
62    #[allow(clippy::indexing_slicing)]
63    for i in 1..word1_length {
64        matrix[0][i] = i;
65    }
66    // PANIC SAFETY: Similar to above, but fixing the column index instead.
67    #[allow(clippy::indexing_slicing)]
68    #[allow(clippy::needless_range_loop)]
69    for j in 1..word2_length {
70        matrix[j][0] = j;
71    }
72
73    // `i` and `j` start at 1, so the accesses at `i - 1` and `j - 1` are safe.
74    // `i` and `j` stop before the length of the array in their respective
75    // dimensions, so accesses at `i` and `j` are safe.
76    // PANIC SAFETY: See above.
77    #[allow(clippy::indexing_slicing)]
78    for j in 1..word2_length {
79        for i in 1..word1_length {
80            let x: usize = if w1[i - 1] == w2[j - 1] {
81                matrix[j - 1][i - 1]
82            } else {
83                1 + std::cmp::min(
84                    std::cmp::min(matrix[j][i - 1], matrix[j - 1][i]),
85                    matrix[j - 1][i - 1],
86                )
87            };
88            matrix[j][i] = x;
89        }
90    }
91    // PANIC SAFETY: Accesses at one less than length in both dimensions. The length in both dimensions is non-zero.
92    #[allow(clippy::indexing_slicing)]
93    matrix[word2_length - 1][word1_length - 1]
94}
95
96#[cfg(test)]
97mod test {
98    use super::*;
99
100    ///the key differs by 1 letter from a word in words
101    #[test]
102    fn test_match1() {
103        let word1 = "user::Alice";
104        let words = vec!["User::Alice", "user::alice", "user", "alice"];
105        let x = fuzzy_search(word1, &words);
106        assert_eq!(x, Some("User::Alice".to_owned()));
107    }
108
109    ///the key differs by 1 letter from a word in words
110    #[test]
111    fn test_match2() {
112        let word1 = "princpal";
113        let words = vec![
114            "principal",
115            "Principal",
116            "principality",
117            "prince",
118            "principle",
119        ];
120        let x = fuzzy_search(word1, &words);
121        assert_eq!(x, Some("principal".to_owned()));
122    }
123
124    ///the word1 differs by two letters from a word in words
125    #[test]
126    fn test_match3() {
127        let word1 = "prncpal";
128        let words = vec![
129            "principal",
130            "Principal",
131            "principality",
132            "prince",
133            "principle",
134        ];
135        let x = fuzzy_search(word1, &words);
136        assert_eq!(x, Some("principal".to_owned()));
137    }
138
139    ///the word1 contains special characters like "
140    #[test]
141    fn test_match4() {
142        let word1 = "user::\"Alice\"";
143        let words = vec!["User::\"Alice\"", "user::\"alice\"", "user", "alice"];
144        let x = fuzzy_search(word1, &words);
145        assert_eq!(x, Some("User::\"Alice\"".to_owned()));
146    }
147
148    ///the word1 is the empty string
149    #[test]
150    fn test_match5() {
151        let word1 = "";
152        let words = vec!["User::\"Alice\"", "user::\"alice\"", "user", "alice"];
153        let x = fuzzy_search(word1, &words);
154        assert_eq!(x, None); //Some("user".to_owned()));
155    }
156
157    ///the words list contains duplicates
158    #[test]
159    fn test_match6() {
160        let word1 = "prncpal";
161        let words = vec![
162            "principal",
163            "Principal",
164            "principality",
165            "principal",
166            "prince",
167            "principle",
168            "principal",
169        ];
170        let x = fuzzy_search(word1, &words);
171        assert_eq!(x, Some("principal".to_owned()));
172    }
173
174    ///the word1 differs by a word in words only due to a special character (eg: ' instead of ")
175    #[test]
176    fn test_match7() {
177        let word1 = "User::\"Alice\"";
178        let words = vec!["User::\'Alice\'", "user::\"alice\"", "user", "alice"];
179        let x = fuzzy_search(word1, &words);
180        assert_eq!(x, Some("User::\'Alice\'".to_owned()));
181    }
182
183    #[test]
184    fn test_match_empty() {
185        let word1 = "user::Alice";
186        let words: Vec<&str> = Vec::new();
187        let x = fuzzy_search(word1, &words);
188        assert_eq!(x, None);
189    }
190}