gix_glob/
pattern.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
use std::fmt;

use bitflags::bitflags;
use bstr::{BStr, ByteSlice};

use crate::{pattern, wildmatch, Pattern};

bitflags! {
    /// Information about a [`Pattern`].
    ///
    /// Its main purpose is to accelerate pattern matching, or to negate the match result or to
    /// keep special rules only applicable when matching paths.
    ///
    /// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user.
    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
    #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Ord, PartialOrd)]
    pub struct Mode: u32 {
        /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one.
        const NO_SUB_DIR = 1 << 0;
        /// A pattern that is '*literal', meaning that it ends with what's given here
        const ENDS_WITH = 1 << 1;
        /// The pattern must match a directory, and not a file.
        const MUST_BE_DIR = 1 << 2;
        /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller.
        const NEGATIVE = 1 << 3;
        /// The pattern starts with a slash and thus matches only from the beginning.
        const ABSOLUTE = 1 << 4;
    }
}

/// Describes whether to match a path case sensitively or not.
///
/// Used in [`Pattern::matches_repo_relative_path()`].
#[derive(Default, Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)]
pub enum Case {
    /// The case affects the match
    #[default]
    Sensitive,
    /// Ignore the case of ascii characters.
    Fold,
}

/// Instantiation
impl Pattern {
    /// Parse the given `text` as pattern, or return `None` if `text` was empty.
    pub fn from_bytes(text: &[u8]) -> Option<Self> {
        crate::parse::pattern(text, true).map(|(text, mode, first_wildcard_pos)| Pattern {
            text: text.into(),
            mode,
            first_wildcard_pos,
        })
    }

    /// Parse the given `text` as pattern without supporting leading `!` or `\\!` , or return `None` if `text` was empty.
    ///
    /// This assures that `text` remains entirely unaltered, but removes built-in support for negation as well.
    pub fn from_bytes_without_negation(text: &[u8]) -> Option<Self> {
        crate::parse::pattern(text, false).map(|(text, mode, first_wildcard_pos)| Pattern {
            text: text.into(),
            mode,
            first_wildcard_pos,
        })
    }
}

/// Access
impl Pattern {
    /// Return true if a match is negated.
    pub fn is_negative(&self) -> bool {
        self.mode.contains(Mode::NEGATIVE)
    }

    /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root.
    /// Note that `path` is assumed to be relative to the repository.
    ///
    /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play.
    /// `basename_start_pos` is the index at which the `path`'s basename starts.
    ///
    /// `case` folding can be configured as well.
    /// `mode` is used to control how [`crate::wildmatch()`] should operate.
    pub fn matches_repo_relative_path(
        &self,
        path: &BStr,
        basename_start_pos: Option<usize>,
        is_dir: Option<bool>,
        case: Case,
        mode: wildmatch::Mode,
    ) -> bool {
        let is_dir = is_dir.unwrap_or(false);
        if !is_dir && self.mode.contains(pattern::Mode::MUST_BE_DIR) {
            return false;
        }

        let flags = mode
            | match case {
                Case::Fold => wildmatch::Mode::IGNORE_CASE,
                Case::Sensitive => wildmatch::Mode::empty(),
            };
        #[cfg(debug_assertions)]
        {
            if basename_start_pos.is_some() {
                debug_assert_eq!(
                    basename_start_pos,
                    path.rfind_byte(b'/').map(|p| p + 1),
                    "BUG: invalid cached basename_start_pos provided"
                );
            }
        }
        debug_assert!(!path.starts_with(b"/"), "input path must be relative");

        if self.mode.contains(pattern::Mode::NO_SUB_DIR) && !self.mode.contains(pattern::Mode::ABSOLUTE) {
            let basename = &path[basename_start_pos.unwrap_or_default()..];
            self.matches(basename, flags)
        } else {
            self.matches(path, flags)
        }
    }

    /// See if `value` matches this pattern in the given `mode`.
    ///
    /// `mode` can identify `value` as path which won't match the slash character, and can match
    /// strings with cases ignored as well. Note that the case folding performed here is ASCII only.
    ///
    /// Note that this method uses some shortcuts to accelerate simple patterns, but falls back to
    /// [wildmatch()][crate::wildmatch()] if these fail.
    pub fn matches(&self, value: &BStr, mode: wildmatch::Mode) -> bool {
        match self.first_wildcard_pos {
            // "*literal" case, overrides starts-with
            Some(pos)
                if self.mode.contains(pattern::Mode::ENDS_WITH)
                    && (!mode.contains(wildmatch::Mode::NO_MATCH_SLASH_LITERAL) || !value.contains(&b'/')) =>
            {
                let text = &self.text[pos + 1..];
                if mode.contains(wildmatch::Mode::IGNORE_CASE) {
                    value
                        .len()
                        .checked_sub(text.len())
                        .map_or(false, |start| text.eq_ignore_ascii_case(&value[start..]))
                } else {
                    value.ends_with(text.as_ref())
                }
            }
            Some(pos) => {
                if mode.contains(wildmatch::Mode::IGNORE_CASE) {
                    if !value
                        .get(..pos)
                        .map_or(false, |value| value.eq_ignore_ascii_case(&self.text[..pos]))
                    {
                        return false;
                    }
                } else if !value.starts_with(&self.text[..pos]) {
                    return false;
                }
                crate::wildmatch(self.text.as_bstr(), value, mode)
            }
            None => {
                if mode.contains(wildmatch::Mode::IGNORE_CASE) {
                    self.text.eq_ignore_ascii_case(value)
                } else {
                    self.text == value
                }
            }
        }
    }
}

impl fmt::Display for Pattern {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if self.mode.contains(Mode::NEGATIVE) {
            "!".fmt(f)?;
        }
        if self.mode.contains(Mode::ABSOLUTE) {
            "/".fmt(f)?;
        }
        self.text.fmt(f)?;
        if self.mode.contains(Mode::MUST_BE_DIR) {
            "/".fmt(f)?;
        }
        Ok(())
    }
}