gix_glob/wildmatch.rs
1use bitflags::bitflags;
2bitflags! {
3 /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
4 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
5 #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
6 pub struct Mode: u8 {
7 /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
8 const NO_MATCH_SLASH_LITERAL = 1 << 0;
9 /// Match case insensitively for ascii characters only.
10 const IGNORE_CASE = 1 << 1;
11 }
12}
13
14pub(crate) mod function {
15 use bstr::{BStr, ByteSlice};
16
17 use crate::wildmatch::Mode;
18
19 #[derive(Eq, PartialEq)]
20 enum Result {
21 Match,
22 NoMatch,
23 AbortAll,
24 AbortToStarStar,
25 RecursionLimitReached,
26 }
27
28 const STAR: u8 = b'*';
29 const BACKSLASH: u8 = b'\\';
30 const SLASH: u8 = b'/';
31 const BRACKET_OPEN: u8 = b'[';
32 const BRACKET_CLOSE: u8 = b']';
33 const COLON: u8 = b':';
34
35 const NEGATE_CLASS: u8 = b'!';
36 /// Setting this limit to something reasonable means less compute time spent on unnecessarily complex patterns, or malicious ones.
37 const RECURSION_LIMIT: usize = 64;
38
39 fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode, depth: usize) -> Result {
40 if depth == RECURSION_LIMIT {
41 return RecursionLimitReached;
42 }
43 use self::Result::*;
44 let possibly_lowercase = |c: &u8| {
45 if mode.contains(Mode::IGNORE_CASE) {
46 c.to_ascii_lowercase()
47 } else {
48 *c
49 }
50 };
51 let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable();
52 let mut t = text.iter().map(possibly_lowercase).enumerate();
53
54 while let Some((mut p_idx, mut p_ch)) = p.next() {
55 let (mut t_idx, mut t_ch) = match t.next() {
56 Some(c) => c,
57 None if p_ch != STAR => return AbortAll,
58 None => (text.len(), 0),
59 };
60
61 if p_ch == BACKSLASH {
62 match p.next() {
63 Some((_p_idx, p_ch)) => {
64 if p_ch != t_ch {
65 return NoMatch;
66 } else {
67 continue;
68 }
69 }
70 None => return NoMatch,
71 };
72 }
73 match p_ch {
74 b'?' => {
75 if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
76 return NoMatch;
77 } else {
78 continue;
79 }
80 }
81 STAR => {
82 let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL);
83 match p.next() {
84 Some((next_p_idx, next_p_ch)) => {
85 let next;
86 if next_p_ch == STAR {
87 let leading_slash_idx = p_idx.checked_sub(1);
88 while p.next_if(|(_, c)| *c == STAR).is_some() {}
89 next = p.next();
90 if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) {
91 match_slash = true;
92 } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH)
93 && next.map_or(true, |(_, c)| {
94 c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH))
95 })
96 {
97 if next.map_or(NoMatch, |(idx, _)| {
98 match_recursive(
99 pattern[idx + 1..].as_bstr(),
100 text[t_idx..].as_bstr(),
101 mode,
102 depth + 1,
103 )
104 }) == Match
105 {
106 return Match;
107 }
108 match_slash = true;
109 } else {
110 match_slash = false;
111 }
112 } else {
113 next = Some((next_p_idx, next_p_ch));
114 }
115
116 match next {
117 None => {
118 return if !match_slash && text[t_idx..].contains(&SLASH) {
119 NoMatch
120 } else {
121 Match
122 };
123 }
124 Some((next_p_idx, next_p_ch)) => {
125 p_idx = next_p_idx;
126 p_ch = next_p_ch;
127 if !match_slash && p_ch == SLASH {
128 match text[t_idx..].find_byte(SLASH) {
129 Some(distance_to_slash) => {
130 for _ in t.by_ref().take(distance_to_slash) {}
131 continue;
132 }
133 None => return NoMatch,
134 }
135 }
136 }
137 }
138 }
139 None => {
140 return if !match_slash && text[t_idx..].contains(&SLASH) {
141 NoMatch
142 } else {
143 Match
144 }
145 }
146 }
147
148 return loop {
149 if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) {
150 loop {
151 if (!match_slash && t_ch == SLASH) || t_ch == p_ch {
152 break;
153 }
154 match t.next() {
155 Some(t) => {
156 t_idx = t.0;
157 t_ch = t.1;
158 }
159 None => break,
160 };
161 }
162 if t_ch != p_ch {
163 return NoMatch;
164 }
165 }
166 let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode, depth + 1);
167 if res != NoMatch {
168 if !match_slash || res != AbortToStarStar {
169 return res;
170 }
171 } else if !match_slash && t_ch == SLASH {
172 return AbortToStarStar;
173 }
174 match t.next() {
175 Some(t) => {
176 t_idx = t.0;
177 t_ch = t.1;
178 }
179 None => break AbortAll,
180 };
181 };
182 }
183 BRACKET_OPEN => {
184 match p.next() {
185 Some(t) => {
186 p_idx = t.0;
187 p_ch = t.1;
188 }
189 None => return AbortAll,
190 };
191
192 if p_ch == b'^' {
193 p_ch = NEGATE_CLASS;
194 }
195 let negated = p_ch == NEGATE_CLASS;
196 let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) };
197 let mut prev_p_ch = 0;
198 let mut matched = false;
199 let mut p_idx_ofs = 0;
200 loop {
201 let Some((mut p_idx, mut p_ch)) = next else {
202 return AbortAll;
203 };
204 p_idx += p_idx_ofs;
205 match p_ch {
206 BACKSLASH => match p.next() {
207 Some((_, p_ch)) => {
208 if p_ch == t_ch {
209 matched = true;
210 } else {
211 prev_p_ch = p_ch;
212 }
213 }
214 None => return AbortAll,
215 },
216 b'-' if prev_p_ch != 0
217 && p.peek().is_some()
218 && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) =>
219 {
220 p_ch = p.next().expect("peeked").1;
221 if p_ch == BACKSLASH {
222 p_ch = match p.next() {
223 Some(t) => t.1,
224 None => return AbortAll,
225 };
226 }
227 if t_ch <= p_ch && t_ch >= prev_p_ch {
228 matched = true;
229 } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() {
230 let t_ch_upper = t_ch.to_ascii_uppercase();
231 if (t_ch_upper <= p_ch.to_ascii_uppercase()
232 && t_ch_upper >= prev_p_ch.to_ascii_uppercase())
233 || (t_ch_upper <= prev_p_ch.to_ascii_uppercase()
234 && t_ch_upper >= p_ch.to_ascii_uppercase())
235 {
236 matched = true;
237 }
238 }
239 prev_p_ch = 0;
240 }
241 BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => {
242 p.next();
243 while p.peek().is_some_and(|t| t.1 != BRACKET_CLOSE) {
244 p.next();
245 }
246 let closing_bracket_idx = match p.next() {
247 Some((idx, _)) => idx,
248 None => return AbortAll,
249 };
250 const BRACKET__COLON__BRACKET: usize = 3;
251 if closing_bracket_idx.saturating_sub(p_idx) < BRACKET__COLON__BRACKET
252 || pattern[closing_bracket_idx - 1] != COLON
253 {
254 if t_ch == BRACKET_OPEN {
255 matched = true;
256 }
257 if p_idx > pattern.len() {
258 return AbortAll;
259 }
260 p = pattern[p_idx..].iter().map(possibly_lowercase).enumerate().peekable();
261 p_idx_ofs += p_idx;
262 } else {
263 let class = &pattern.as_bytes()[p_idx + 2..closing_bracket_idx - 1];
264 match class {
265 b"alnum" => {
266 if t_ch.is_ascii_alphanumeric() {
267 matched = true;
268 }
269 }
270 b"alpha" => {
271 if t_ch.is_ascii_alphabetic() {
272 matched = true;
273 }
274 }
275 b"blank" => {
276 if t_ch.is_ascii_whitespace() {
277 matched = true;
278 }
279 }
280 b"cntrl" => {
281 if t_ch.is_ascii_control() {
282 matched = true;
283 }
284 }
285 b"digit" => {
286 if t_ch.is_ascii_digit() {
287 matched = true;
288 }
289 }
290
291 b"graph" => {
292 if t_ch.is_ascii_graphic() {
293 matched = true;
294 }
295 }
296 b"lower" => {
297 if t_ch.is_ascii_lowercase() {
298 matched = true;
299 }
300 }
301 b"print" => {
302 if (0x20u8..=0x7e).contains(&t_ch) {
303 matched = true;
304 }
305 }
306 b"punct" => {
307 if t_ch.is_ascii_punctuation() {
308 matched = true;
309 }
310 }
311 b"space" => {
312 if t_ch == b' ' {
313 matched = true;
314 }
315 }
316 b"upper" => {
317 if t_ch.is_ascii_uppercase()
318 || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase()
319 {
320 matched = true;
321 }
322 }
323 b"xdigit" => {
324 if t_ch.is_ascii_hexdigit() {
325 matched = true;
326 }
327 }
328 _ => return AbortAll,
329 };
330 prev_p_ch = 0;
331 }
332 }
333 _ => {
334 prev_p_ch = p_ch;
335 if p_ch == t_ch {
336 matched = true;
337 }
338 }
339 };
340 next = p.next();
341 if let Some((_, BRACKET_CLOSE)) = next {
342 break;
343 }
344 }
345 if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
346 return NoMatch;
347 }
348 continue;
349 }
350 non_glob_ch => {
351 if non_glob_ch != t_ch {
352 return NoMatch;
353 } else {
354 continue;
355 }
356 }
357 }
358 }
359 t.next().map_or(Match, |_| NoMatch)
360 }
361
362 /// Employ pattern matching to see if `value` matches `pattern`.
363 ///
364 /// `mode` can be used to adjust the way the matching is performed.
365 pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool {
366 let res = match_recursive(pattern, value, mode, 0);
367 if res == Result::RecursionLimitReached {
368 gix_features::trace::error!("Recursion limit of {} reached for pattern '{pattern}'", RECURSION_LIMIT);
369 }
370 res == Result::Match
371 }
372}