fancy_regex/
expand.rs

1use alloc::borrow::Cow;
2use alloc::string::{String, ToString};
3use alloc::vec::Vec;
4
5use crate::parse::{parse_decimal, parse_id};
6use crate::{Captures, CompileError, Error, ParseError, Regex};
7
8/// A set of options for expanding a template string using the contents
9/// of capture groups.
10#[derive(Debug)]
11pub struct Expander {
12    sub_char: char,
13    open: &'static str,
14    close: &'static str,
15    allow_undelimited_name: bool,
16}
17
18impl Default for Expander {
19    /// Returns the default expander used by [`Captures::expand`].
20    ///
21    /// [`Captures::expand`]: struct.Captures.html#expand
22    fn default() -> Self {
23        Expander {
24            sub_char: '$',
25            open: "{",
26            close: "}",
27            allow_undelimited_name: true,
28        }
29    }
30}
31
32impl Expander {
33    /// Returns an expander that uses Python-compatible syntax.
34    ///
35    /// Expands all instances of `\num` or `\g<name>` in `replacement`
36    /// to the corresponding capture group `num` or `name`, and writes
37    /// them to the `dst` buffer given.
38    ///
39    /// `name` may be an integer corresponding to the index of the
40    /// capture group (counted by order of opening parenthesis where `\0` is the
41    /// entire match) or it can be a name (consisting of letters, digits or
42    /// underscores) corresponding to a named capture group.
43    ///
44    /// `num` must be an integer corresponding to the index of the
45    /// capture group.
46    ///
47    /// If `num` or `name` isn't a valid capture group (whether the name doesn't exist
48    /// or isn't a valid index), then it is replaced with the empty string.
49    ///
50    /// The longest possible number is used. e.g., `\10` looks up capture
51    /// group 10 and not capture group 1 followed by a literal 0.
52    ///
53    /// To write a literal `\`, use `\\`.
54    pub fn python() -> Expander {
55        Expander {
56            sub_char: '\\',
57            open: "g<",
58            close: ">",
59            allow_undelimited_name: false,
60        }
61    }
62
63    /// Checks `template` for errors.  The following conditions are checked for:
64    ///
65    /// - A reference to a numbered group that does not exist in `regex`
66    /// - A reference to a numbered group (other than 0) when `regex` contains named groups
67    /// - A reference to a named group that does not occur in `regex`
68    /// - An opening group name delimiter without a closing delimiter
69    /// - Using an empty string as a group name
70    pub fn check(&self, template: &str, regex: &Regex) -> crate::Result<()> {
71        let on_group_num = |num| {
72            if num == 0 {
73                Ok(())
74            } else if !regex.named_groups.is_empty() {
75                Err(Error::CompileError(CompileError::NamedBackrefOnly))
76            } else if num < regex.captures_len() {
77                Ok(())
78            } else {
79                Err(Error::CompileError(CompileError::InvalidBackref))
80            }
81        };
82        self.exec(template, |step| match step {
83            Step::Char(_) => Ok(()),
84            Step::GroupName(name) => {
85                if regex.named_groups.contains_key(name) {
86                    Ok(())
87                } else if let Ok(num) = name.parse() {
88                    on_group_num(num)
89                } else {
90                    Err(Error::CompileError(CompileError::InvalidBackref))
91                }
92            }
93            Step::GroupNum(num) => on_group_num(num),
94            Step::Error => Err(Error::ParseError(
95                0,
96                ParseError::GeneralParseError(
97                    "parse error in template while expanding".to_string(),
98                ),
99            )),
100        })
101    }
102
103    /// Escapes the substitution character in `text` so it appears literally
104    /// in the output of `expansion`.
105    ///
106    /// ```
107    /// assert_eq!(
108    ///     fancy_regex::Expander::default().escape("Has a literal $ sign."),
109    ///     "Has a literal $$ sign.",
110    /// );
111    /// ```
112    pub fn escape<'a>(&self, text: &'a str) -> Cow<'a, str> {
113        if text.contains(self.sub_char) {
114            let mut quoted = String::with_capacity(self.sub_char.len_utf8() * 2);
115            quoted.push(self.sub_char);
116            quoted.push(self.sub_char);
117            Cow::Owned(text.replace(self.sub_char, &quoted))
118        } else {
119            Cow::Borrowed(text)
120        }
121    }
122
123    #[doc(hidden)]
124    #[deprecated(since = "0.4.0", note = "Use `escape` instead.")]
125    pub fn quote<'a>(&self, text: &'a str) -> Cow<'a, str> {
126        self.escape(text)
127    }
128
129    /// Expands the template string `template` using the syntax defined
130    /// by this expander and the values of capture groups from `captures`.
131    pub fn expansion(&self, template: &str, captures: &Captures<'_>) -> String {
132        let mut cursor = Vec::with_capacity(template.len());
133        #[cfg(feature = "std")]
134        self.write_expansion(&mut cursor, template, captures)
135            .expect("expansion succeeded");
136        #[cfg(not(feature = "std"))]
137        self.write_expansion_vec(&mut cursor, template, captures)
138            .expect("expansion succeeded");
139        String::from_utf8(cursor).expect("expansion is UTF-8")
140    }
141
142    /// Appends the expansion produced by `expansion` to `dst`.  Potentially more efficient
143    /// than calling `expansion` directly and appending to an existing string.
144    pub fn append_expansion(&self, dst: &mut String, template: &str, captures: &Captures<'_>) {
145        let mut cursor = core::mem::take(dst).into_bytes();
146        #[cfg(feature = "std")]
147        self.write_expansion(&mut cursor, template, captures)
148            .expect("expansion succeeded");
149        #[cfg(not(feature = "std"))]
150        self.write_expansion_vec(&mut cursor, template, captures)
151            .expect("expansion succeeded");
152        *dst = String::from_utf8(cursor).expect("expansion is UTF-8");
153    }
154
155    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
156    /// than calling `expansion` directly and writing the result.
157    #[cfg(feature = "std")]
158    pub fn write_expansion(
159        &self,
160        mut dst: impl std::io::Write,
161        template: &str,
162        captures: &Captures<'_>,
163    ) -> std::io::Result<()> {
164        self.exec(template, |step| match step {
165            Step::Char(c) => write!(dst, "{}", c),
166            Step::GroupName(name) => {
167                if let Some(m) = captures.name(name) {
168                    write!(dst, "{}", m.as_str())
169                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
170                    write!(dst, "{}", m.as_str())
171                } else {
172                    Ok(())
173                }
174            }
175            Step::GroupNum(num) => {
176                if let Some(m) = captures.get(num) {
177                    write!(dst, "{}", m.as_str())
178                } else {
179                    Ok(())
180                }
181            }
182            Step::Error => Ok(()),
183        })
184    }
185
186    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
187    /// than calling `expansion` directly and writing the result.
188    pub fn write_expansion_vec(
189        &self,
190        dst: &mut Vec<u8>,
191        template: &str,
192        captures: &Captures<'_>,
193    ) -> core::fmt::Result {
194        self.exec(template, |step| match step {
195            Step::Char(c) => Ok(dst.extend(c.to_string().as_bytes())),
196            Step::GroupName(name) => {
197                if let Some(m) = captures.name(name) {
198                    Ok(dst.extend(m.as_str().as_bytes()))
199                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
200                    Ok(dst.extend(m.as_str().as_bytes()))
201                } else {
202                    Ok(())
203                }
204            }
205            Step::GroupNum(num) => {
206                if let Some(m) = captures.get(num) {
207                    Ok(dst.extend(m.as_str().as_bytes()))
208                } else {
209                    Ok(())
210                }
211            }
212            Step::Error => Ok(()),
213        })
214    }
215
216    fn exec<'t, E>(
217        &self,
218        template: &'t str,
219        mut f: impl FnMut(Step<'t>) -> Result<(), E>,
220    ) -> Result<(), E> {
221        debug_assert!(!self.open.is_empty());
222        debug_assert!(!self.close.is_empty());
223        let mut iter = template.chars();
224        while let Some(c) = iter.next() {
225            if c == self.sub_char {
226                let tail = iter.as_str();
227                let skip = if tail.starts_with(self.sub_char) {
228                    f(Step::Char(self.sub_char))?;
229                    1
230                } else if let Some((id, skip)) = parse_id(tail, self.open, self.close, false)
231                    .or_else(|| {
232                        if self.allow_undelimited_name {
233                            parse_id(tail, "", "", false)
234                        } else {
235                            None
236                        }
237                    })
238                {
239                    f(Step::GroupName(id))?;
240                    skip
241                } else if let Some((skip, num)) = parse_decimal(tail, 0) {
242                    f(Step::GroupNum(num))?;
243                    skip
244                } else {
245                    f(Step::Error)?;
246                    f(Step::Char(self.sub_char))?;
247                    0
248                };
249                iter = iter.as_str()[skip..].chars();
250            } else {
251                f(Step::Char(c))?;
252            }
253        }
254        Ok(())
255    }
256}
257
258enum Step<'a> {
259    Char(char),
260    GroupName(&'a str),
261    GroupNum(usize),
262    Error,
263}