gix_glob/
wildmatch.rs

1use bitflags::bitflags;
2bitflags! {
3    /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
4    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
5    #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
6    pub struct Mode: u8 {
7        /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
8        const NO_MATCH_SLASH_LITERAL = 1 << 0;
9        /// Match case insensitively for ascii characters only.
10        const IGNORE_CASE = 1 << 1;
11    }
12}
13
14pub(crate) mod function {
15    use bstr::{BStr, ByteSlice};
16
17    use crate::wildmatch::Mode;
18
19    #[derive(Eq, PartialEq)]
20    enum Result {
21        Match,
22        NoMatch,
23        AbortAll,
24        AbortToStarStar,
25        RecursionLimitReached,
26    }
27
28    const STAR: u8 = b'*';
29    const BACKSLASH: u8 = b'\\';
30    const SLASH: u8 = b'/';
31    const BRACKET_OPEN: u8 = b'[';
32    const BRACKET_CLOSE: u8 = b']';
33    const COLON: u8 = b':';
34
35    const NEGATE_CLASS: u8 = b'!';
36    /// Setting this limit to something reasonable means less compute time spent on unnecessarily complex patterns, or malicious ones.
37    const RECURSION_LIMIT: usize = 64;
38
39    fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode, depth: usize) -> Result {
40        if depth == RECURSION_LIMIT {
41            return RecursionLimitReached;
42        }
43        use self::Result::*;
44        let possibly_lowercase = |c: &u8| {
45            if mode.contains(Mode::IGNORE_CASE) {
46                c.to_ascii_lowercase()
47            } else {
48                *c
49            }
50        };
51        let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable();
52        let mut t = text.iter().map(possibly_lowercase).enumerate();
53
54        while let Some((mut p_idx, mut p_ch)) = p.next() {
55            let (mut t_idx, mut t_ch) = match t.next() {
56                Some(c) => c,
57                None if p_ch != STAR => return AbortAll,
58                None => (text.len(), 0),
59            };
60
61            if p_ch == BACKSLASH {
62                match p.next() {
63                    Some((_p_idx, p_ch)) => {
64                        if p_ch != t_ch {
65                            return NoMatch;
66                        } else {
67                            continue;
68                        }
69                    }
70                    None => return NoMatch,
71                };
72            }
73            match p_ch {
74                b'?' => {
75                    if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
76                        return NoMatch;
77                    } else {
78                        continue;
79                    }
80                }
81                STAR => {
82                    let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL);
83                    match p.next() {
84                        Some((next_p_idx, next_p_ch)) => {
85                            let next;
86                            if next_p_ch == STAR {
87                                let leading_slash_idx = p_idx.checked_sub(1);
88                                while p.next_if(|(_, c)| *c == STAR).is_some() {}
89                                next = p.next();
90                                if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) {
91                                    match_slash = true;
92                                } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH)
93                                    && next.map_or(true, |(_, c)| {
94                                        c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH))
95                                    })
96                                {
97                                    if next.map_or(NoMatch, |(idx, _)| {
98                                        match_recursive(
99                                            pattern[idx + 1..].as_bstr(),
100                                            text[t_idx..].as_bstr(),
101                                            mode,
102                                            depth + 1,
103                                        )
104                                    }) == Match
105                                    {
106                                        return Match;
107                                    }
108                                    match_slash = true;
109                                } else {
110                                    match_slash = false;
111                                }
112                            } else {
113                                next = Some((next_p_idx, next_p_ch));
114                            }
115
116                            match next {
117                                None => {
118                                    return if !match_slash && text[t_idx..].contains(&SLASH) {
119                                        NoMatch
120                                    } else {
121                                        Match
122                                    };
123                                }
124                                Some((next_p_idx, next_p_ch)) => {
125                                    p_idx = next_p_idx;
126                                    p_ch = next_p_ch;
127                                    if !match_slash && p_ch == SLASH {
128                                        match text[t_idx..].find_byte(SLASH) {
129                                            Some(distance_to_slash) => {
130                                                for _ in t.by_ref().take(distance_to_slash) {}
131                                                continue;
132                                            }
133                                            None => return NoMatch,
134                                        }
135                                    }
136                                }
137                            }
138                        }
139                        None => {
140                            return if !match_slash && text[t_idx..].contains(&SLASH) {
141                                NoMatch
142                            } else {
143                                Match
144                            }
145                        }
146                    }
147
148                    return loop {
149                        if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) {
150                            loop {
151                                if (!match_slash && t_ch == SLASH) || t_ch == p_ch {
152                                    break;
153                                }
154                                match t.next() {
155                                    Some(t) => {
156                                        t_idx = t.0;
157                                        t_ch = t.1;
158                                    }
159                                    None => break,
160                                };
161                            }
162                            if t_ch != p_ch {
163                                return NoMatch;
164                            }
165                        }
166                        let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode, depth + 1);
167                        if res != NoMatch {
168                            if !match_slash || res != AbortToStarStar {
169                                return res;
170                            }
171                        } else if !match_slash && t_ch == SLASH {
172                            return AbortToStarStar;
173                        }
174                        match t.next() {
175                            Some(t) => {
176                                t_idx = t.0;
177                                t_ch = t.1;
178                            }
179                            None => break AbortAll,
180                        };
181                    };
182                }
183                BRACKET_OPEN => {
184                    match p.next() {
185                        Some(t) => {
186                            p_idx = t.0;
187                            p_ch = t.1;
188                        }
189                        None => return AbortAll,
190                    };
191
192                    if p_ch == b'^' {
193                        p_ch = NEGATE_CLASS;
194                    }
195                    let negated = p_ch == NEGATE_CLASS;
196                    let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) };
197                    let mut prev_p_ch = 0;
198                    let mut matched = false;
199                    let mut p_idx_ofs = 0;
200                    loop {
201                        let Some((mut p_idx, mut p_ch)) = next else {
202                            return AbortAll;
203                        };
204                        p_idx += p_idx_ofs;
205                        match p_ch {
206                            BACKSLASH => match p.next() {
207                                Some((_, p_ch)) => {
208                                    if p_ch == t_ch {
209                                        matched = true;
210                                    } else {
211                                        prev_p_ch = p_ch;
212                                    }
213                                }
214                                None => return AbortAll,
215                            },
216                            b'-' if prev_p_ch != 0
217                                && p.peek().is_some()
218                                && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) =>
219                            {
220                                p_ch = p.next().expect("peeked").1;
221                                if p_ch == BACKSLASH {
222                                    p_ch = match p.next() {
223                                        Some(t) => t.1,
224                                        None => return AbortAll,
225                                    };
226                                }
227                                if t_ch <= p_ch && t_ch >= prev_p_ch {
228                                    matched = true;
229                                } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() {
230                                    let t_ch_upper = t_ch.to_ascii_uppercase();
231                                    if (t_ch_upper <= p_ch.to_ascii_uppercase()
232                                        && t_ch_upper >= prev_p_ch.to_ascii_uppercase())
233                                        || (t_ch_upper <= prev_p_ch.to_ascii_uppercase()
234                                            && t_ch_upper >= p_ch.to_ascii_uppercase())
235                                    {
236                                        matched = true;
237                                    }
238                                }
239                                prev_p_ch = 0;
240                            }
241                            BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => {
242                                p.next();
243                                while p.peek().is_some_and(|t| t.1 != BRACKET_CLOSE) {
244                                    p.next();
245                                }
246                                let closing_bracket_idx = match p.next() {
247                                    Some((idx, _)) => idx,
248                                    None => return AbortAll,
249                                };
250                                const BRACKET__COLON__BRACKET: usize = 3;
251                                if closing_bracket_idx.saturating_sub(p_idx) < BRACKET__COLON__BRACKET
252                                    || pattern[closing_bracket_idx - 1] != COLON
253                                {
254                                    if t_ch == BRACKET_OPEN {
255                                        matched = true;
256                                    }
257                                    if p_idx > pattern.len() {
258                                        return AbortAll;
259                                    }
260                                    p = pattern[p_idx..].iter().map(possibly_lowercase).enumerate().peekable();
261                                    p_idx_ofs += p_idx;
262                                } else {
263                                    let class = &pattern.as_bytes()[p_idx + 2..closing_bracket_idx - 1];
264                                    match class {
265                                        b"alnum" => {
266                                            if t_ch.is_ascii_alphanumeric() {
267                                                matched = true;
268                                            }
269                                        }
270                                        b"alpha" => {
271                                            if t_ch.is_ascii_alphabetic() {
272                                                matched = true;
273                                            }
274                                        }
275                                        b"blank" => {
276                                            if t_ch.is_ascii_whitespace() {
277                                                matched = true;
278                                            }
279                                        }
280                                        b"cntrl" => {
281                                            if t_ch.is_ascii_control() {
282                                                matched = true;
283                                            }
284                                        }
285                                        b"digit" => {
286                                            if t_ch.is_ascii_digit() {
287                                                matched = true;
288                                            }
289                                        }
290
291                                        b"graph" => {
292                                            if t_ch.is_ascii_graphic() {
293                                                matched = true;
294                                            }
295                                        }
296                                        b"lower" => {
297                                            if t_ch.is_ascii_lowercase() {
298                                                matched = true;
299                                            }
300                                        }
301                                        b"print" => {
302                                            if (0x20u8..=0x7e).contains(&t_ch) {
303                                                matched = true;
304                                            }
305                                        }
306                                        b"punct" => {
307                                            if t_ch.is_ascii_punctuation() {
308                                                matched = true;
309                                            }
310                                        }
311                                        b"space" => {
312                                            if t_ch == b' ' {
313                                                matched = true;
314                                            }
315                                        }
316                                        b"upper" => {
317                                            if t_ch.is_ascii_uppercase()
318                                                || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase()
319                                            {
320                                                matched = true;
321                                            }
322                                        }
323                                        b"xdigit" => {
324                                            if t_ch.is_ascii_hexdigit() {
325                                                matched = true;
326                                            }
327                                        }
328                                        _ => return AbortAll,
329                                    };
330                                    prev_p_ch = 0;
331                                }
332                            }
333                            _ => {
334                                prev_p_ch = p_ch;
335                                if p_ch == t_ch {
336                                    matched = true;
337                                }
338                            }
339                        };
340                        next = p.next();
341                        if let Some((_, BRACKET_CLOSE)) = next {
342                            break;
343                        }
344                    }
345                    if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
346                        return NoMatch;
347                    }
348                    continue;
349                }
350                non_glob_ch => {
351                    if non_glob_ch != t_ch {
352                        return NoMatch;
353                    } else {
354                        continue;
355                    }
356                }
357            }
358        }
359        t.next().map_or(Match, |_| NoMatch)
360    }
361
362    /// Employ pattern matching to see if `value` matches `pattern`.
363    ///
364    /// `mode` can be used to adjust the way the matching is performed.
365    pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool {
366        let res = match_recursive(pattern, value, mode, 0);
367        if res == Result::RecursionLimitReached {
368            gix_features::trace::error!("Recursion limit of {} reached for pattern '{pattern}'", RECURSION_LIMIT);
369        }
370        res == Result::Match
371    }
372}