grep_regex/matcher.rs
1use {
2 grep_matcher::{
3 ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher,
4 NoError,
5 },
6 regex_automata::{
7 meta::Regex, util::captures::Captures as AutomataCaptures, Input,
8 PatternID,
9 },
10};
11
12use crate::{config::Config, error::Error, literal::InnerLiterals};
13
14/// A builder for constructing a `Matcher` using regular expressions.
15///
16/// This builder re-exports many of the same options found on the regex crate's
17/// builder, in addition to a few other options such as smart case, word
18/// matching and the ability to set a line terminator which may enable certain
19/// types of optimizations.
20///
21/// The syntax supported is documented as part of the regex crate:
22/// <https://docs.rs/regex/#syntax>.
23#[derive(Clone, Debug)]
24pub struct RegexMatcherBuilder {
25 config: Config,
26}
27
28impl Default for RegexMatcherBuilder {
29 fn default() -> RegexMatcherBuilder {
30 RegexMatcherBuilder::new()
31 }
32}
33
34impl RegexMatcherBuilder {
35 /// Create a new builder for configuring a regex matcher.
36 pub fn new() -> RegexMatcherBuilder {
37 RegexMatcherBuilder { config: Config::default() }
38 }
39
40 /// Build a new matcher using the current configuration for the provided
41 /// pattern.
42 ///
43 /// The syntax supported is documented as part of the regex crate:
44 /// <https://docs.rs/regex/#syntax>.
45 pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
46 self.build_many(&[pattern])
47 }
48
49 /// Build a new matcher using the current configuration for the provided
50 /// patterns. The resulting matcher behaves as if all of the patterns
51 /// given are joined together into a single alternation. That is, it
52 /// reports matches where at least one of the given patterns matches.
53 pub fn build_many<P: AsRef<str>>(
54 &self,
55 patterns: &[P],
56 ) -> Result<RegexMatcher, Error> {
57 let mut chir = self.config.build_many(patterns)?;
58 // 'whole_line' is a strict subset of 'word', so when it is enabled,
59 // we don't need to both with any specific to word matching.
60 if chir.config().whole_line {
61 chir = chir.into_whole_line();
62 } else if chir.config().word {
63 chir = chir.into_word();
64 }
65 let regex = chir.to_regex()?;
66 log::trace!("final regex: {:?}", chir.hir().to_string());
67
68 let non_matching_bytes = chir.non_matching_bytes();
69 // If we can pick out some literals from the regex, then we might be
70 // able to build a faster regex that quickly identifies candidate
71 // matching lines. The regex engine will do what it can on its own, but
72 // we can specifically do a little more when a line terminator is set.
73 // For example, for a regex like `\w+foo\w+`, we can look for `foo`,
74 // and when a match is found, look for the line containing `foo` and
75 // then run the original regex on only that line. (In this case, the
76 // regex engine is likely to handle this case for us since it's so
77 // simple, but the idea applies.)
78 let fast_line_regex = InnerLiterals::new(&chir, ®ex).one_regex()?;
79
80 // We override the line terminator in case the configured HIR doesn't
81 // support it.
82 let mut config = self.config.clone();
83 config.line_terminator = chir.line_terminator();
84 Ok(RegexMatcher { config, regex, fast_line_regex, non_matching_bytes })
85 }
86
87 /// Build a new matcher from a plain alternation of literals.
88 ///
89 /// Depending on the configuration set by the builder, this may be able to
90 /// build a matcher substantially faster than by joining the patterns with
91 /// a `|` and calling `build`.
92 pub fn build_literals<B: AsRef<str>>(
93 &self,
94 literals: &[B],
95 ) -> Result<RegexMatcher, Error> {
96 self.build_many(literals)
97 }
98
99 /// Set the value for the case insensitive (`i`) flag.
100 ///
101 /// When enabled, letters in the pattern will match both upper case and
102 /// lower case variants.
103 pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
104 self.config.case_insensitive = yes;
105 self
106 }
107
108 /// Whether to enable "smart case" or not.
109 ///
110 /// When smart case is enabled, the builder will automatically enable
111 /// case insensitive matching based on how the pattern is written. Namely,
112 /// case insensitive mode is enabled when both of the following things
113 /// are true:
114 ///
115 /// 1. The pattern contains at least one literal character. For example,
116 /// `a\w` contains a literal (`a`) but `\w` does not.
117 /// 2. Of the literals in the pattern, none of them are considered to be
118 /// uppercase according to Unicode. For example, `foo\pL` has no
119 /// uppercase literals but `Foo\pL` does.
120 pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
121 self.config.case_smart = yes;
122 self
123 }
124
125 /// Set the value for the multi-line matching (`m`) flag.
126 ///
127 /// When enabled, `^` matches the beginning of lines and `$` matches the
128 /// end of lines.
129 ///
130 /// By default, they match beginning/end of the input.
131 pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
132 self.config.multi_line = yes;
133 self
134 }
135
136 /// Set the value for the any character (`s`) flag, where in `.` matches
137 /// anything when `s` is set and matches anything except for new line when
138 /// it is not set (the default).
139 ///
140 /// N.B. "matches anything" means "any byte" when Unicode is disabled and
141 /// means "any valid UTF-8 encoding of any Unicode scalar value" when
142 /// Unicode is enabled.
143 pub fn dot_matches_new_line(
144 &mut self,
145 yes: bool,
146 ) -> &mut RegexMatcherBuilder {
147 self.config.dot_matches_new_line = yes;
148 self
149 }
150
151 /// Set the value for the greedy swap (`U`) flag.
152 ///
153 /// When enabled, a pattern like `a*` is lazy (tries to find shortest
154 /// match) and `a*?` is greedy (tries to find longest match).
155 ///
156 /// By default, `a*` is greedy and `a*?` is lazy.
157 pub fn swap_greed(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
158 self.config.swap_greed = yes;
159 self
160 }
161
162 /// Set the value for the ignore whitespace (`x`) flag.
163 ///
164 /// When enabled, whitespace such as new lines and spaces will be ignored
165 /// between expressions of the pattern, and `#` can be used to start a
166 /// comment until the next new line.
167 pub fn ignore_whitespace(
168 &mut self,
169 yes: bool,
170 ) -> &mut RegexMatcherBuilder {
171 self.config.ignore_whitespace = yes;
172 self
173 }
174
175 /// Set the value for the Unicode (`u`) flag.
176 ///
177 /// Enabled by default. When disabled, character classes such as `\w` only
178 /// match ASCII word characters instead of all Unicode word characters.
179 pub fn unicode(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
180 self.config.unicode = yes;
181 self
182 }
183
184 /// Whether to support octal syntax or not.
185 ///
186 /// Octal syntax is a little-known way of uttering Unicode codepoints in
187 /// a regular expression. For example, `a`, `\x61`, `\u0061` and
188 /// `\141` are all equivalent regular expressions, where the last example
189 /// shows octal syntax.
190 ///
191 /// While supporting octal syntax isn't in and of itself a problem, it does
192 /// make good error messages harder. That is, in PCRE based regex engines,
193 /// syntax like `\0` invokes a backreference, which is explicitly
194 /// unsupported in Rust's regex engine. However, many users expect it to
195 /// be supported. Therefore, when octal support is disabled, the error
196 /// message will explicitly mention that backreferences aren't supported.
197 ///
198 /// Octal syntax is disabled by default.
199 pub fn octal(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
200 self.config.octal = yes;
201 self
202 }
203
204 /// Set the approximate size limit of the compiled regular expression.
205 ///
206 /// This roughly corresponds to the number of bytes occupied by a single
207 /// compiled program. If the program exceeds this number, then a
208 /// compilation error is returned.
209 pub fn size_limit(&mut self, bytes: usize) -> &mut RegexMatcherBuilder {
210 self.config.size_limit = bytes;
211 self
212 }
213
214 /// Set the approximate size of the cache used by the DFA.
215 ///
216 /// This roughly corresponds to the number of bytes that the DFA will
217 /// use while searching.
218 ///
219 /// Note that this is a *per thread* limit. There is no way to set a global
220 /// limit. In particular, if a regex is used from multiple threads
221 /// simultaneously, then each thread may use up to the number of bytes
222 /// specified here.
223 pub fn dfa_size_limit(
224 &mut self,
225 bytes: usize,
226 ) -> &mut RegexMatcherBuilder {
227 self.config.dfa_size_limit = bytes;
228 self
229 }
230
231 /// Set the nesting limit for this parser.
232 ///
233 /// The nesting limit controls how deep the abstract syntax tree is allowed
234 /// to be. If the AST exceeds the given limit (e.g., with too many nested
235 /// groups), then an error is returned by the parser.
236 ///
237 /// The purpose of this limit is to act as a heuristic to prevent stack
238 /// overflow for consumers that do structural induction on an `Ast` using
239 /// explicit recursion. While this crate never does this (instead using
240 /// constant stack space and moving the call stack to the heap), other
241 /// crates may.
242 ///
243 /// This limit is not checked until the entire Ast is parsed. Therefore,
244 /// if callers want to put a limit on the amount of heap space used, then
245 /// they should impose a limit on the length, in bytes, of the concrete
246 /// pattern string. In particular, this is viable since this parser
247 /// implementation will limit itself to heap space proportional to the
248 /// length of the pattern string.
249 ///
250 /// Note that a nest limit of `0` will return a nest limit error for most
251 /// patterns but not all. For example, a nest limit of `0` permits `a` but
252 /// not `ab`, since `ab` requires a concatenation, which results in a nest
253 /// depth of `1`. In general, a nest limit is not something that manifests
254 /// in an obvious way in the concrete syntax, therefore, it should not be
255 /// used in a granular way.
256 pub fn nest_limit(&mut self, limit: u32) -> &mut RegexMatcherBuilder {
257 self.config.nest_limit = limit;
258 self
259 }
260
261 /// Set an ASCII line terminator for the matcher.
262 ///
263 /// The purpose of setting a line terminator is to enable a certain class
264 /// of optimizations that can make line oriented searching faster. Namely,
265 /// when a line terminator is enabled, then the builder will guarantee that
266 /// the resulting matcher will never be capable of producing a match that
267 /// contains the line terminator. Because of this guarantee, users of the
268 /// resulting matcher do not need to slowly execute a search line by line
269 /// for line oriented search.
270 ///
271 /// If the aforementioned guarantee about not matching a line terminator
272 /// cannot be made because of how the pattern was written, then the builder
273 /// will return an error when attempting to construct the matcher. For
274 /// example, the pattern `a\sb` will be transformed such that it can never
275 /// match `a\nb` (when `\n` is the line terminator), but the pattern `a\nb`
276 /// will result in an error since the `\n` cannot be easily removed without
277 /// changing the fundamental intent of the pattern.
278 ///
279 /// If the given line terminator isn't an ASCII byte (`<=127`), then the
280 /// builder will return an error when constructing the matcher.
281 pub fn line_terminator(
282 &mut self,
283 line_term: Option<u8>,
284 ) -> &mut RegexMatcherBuilder {
285 self.config.line_terminator = line_term.map(LineTerminator::byte);
286 self
287 }
288
289 /// Ban a byte from occurring in a regular expression pattern.
290 ///
291 /// If this byte is found in the regex pattern, then an error will be
292 /// returned at construction time.
293 ///
294 /// This is useful when binary detection is enabled. Callers will likely
295 /// want to ban the same byte that is used to detect binary data, i.e.,
296 /// the NUL byte. The reason for this is that when binary detection is
297 /// enabled, it's impossible to match a NUL byte because binary detection
298 /// will either quit when one is found, or will convert NUL bytes to line
299 /// terminators to avoid exorbitant heap usage.
300 pub fn ban_byte(&mut self, byte: Option<u8>) -> &mut RegexMatcherBuilder {
301 self.config.ban = byte;
302 self
303 }
304
305 /// Set the line terminator to `\r\n` and enable CRLF matching for `$` in
306 /// regex patterns.
307 ///
308 /// This method sets two distinct settings:
309 ///
310 /// 1. It causes the line terminator for the matcher to be `\r\n`. Namely,
311 /// this prevents the matcher from ever producing a match that contains
312 /// a `\r` or `\n`.
313 /// 2. It enables CRLF mode for `^` and `$`. This means that line anchors
314 /// will treat both `\r` and `\n` as line terminators, but will never
315 /// match between a `\r` and `\n`.
316 ///
317 /// Note that if you do not wish to set the line terminator but would
318 /// still like `$` to match `\r\n` line terminators, then it is valid to
319 /// call `crlf(true)` followed by `line_terminator(None)`. Ordering is
320 /// important, since `crlf` sets the line terminator, but `line_terminator`
321 /// does not touch the `crlf` setting.
322 pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
323 if yes {
324 self.config.line_terminator = Some(LineTerminator::crlf());
325 } else {
326 self.config.line_terminator = None;
327 }
328 self.config.crlf = yes;
329 self
330 }
331
332 /// Require that all matches occur on word boundaries.
333 ///
334 /// Enabling this option is subtly different than putting `\b` assertions
335 /// on both sides of your pattern. In particular, a `\b` assertion requires
336 /// that one side of it match a word character while the other match a
337 /// non-word character. This option, in contrast, merely requires that
338 /// one side match a non-word character.
339 ///
340 /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
341 /// word character. However, `-2` with this `word` option enabled will
342 /// match the `-2` in `foo -2 bar`.
343 pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
344 self.config.word = yes;
345 self
346 }
347
348 /// Whether the patterns should be treated as literal strings or not. When
349 /// this is active, all characters, including ones that would normally be
350 /// special regex meta characters, are matched literally.
351 pub fn fixed_strings(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
352 self.config.fixed_strings = yes;
353 self
354 }
355
356 /// Whether each pattern should match the entire line or not. This is
357 /// equivalent to surrounding the pattern with `(?m:^)` and `(?m:$)`.
358 pub fn whole_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
359 self.config.whole_line = yes;
360 self
361 }
362}
363
364/// An implementation of the `Matcher` trait using Rust's standard regex
365/// library.
366#[derive(Clone, Debug)]
367pub struct RegexMatcher {
368 /// The configuration specified by the caller.
369 config: Config,
370 /// The regular expression compiled from the pattern provided by the
371 /// caller.
372 regex: Regex,
373 /// A regex that never reports false negatives but may report false
374 /// positives that is believed to be capable of being matched more quickly
375 /// than `regex`. Typically, this is a single literal or an alternation
376 /// of literals.
377 fast_line_regex: Option<Regex>,
378 /// A set of bytes that will never appear in a match.
379 non_matching_bytes: ByteSet,
380}
381
382impl RegexMatcher {
383 /// Create a new matcher from the given pattern using the default
384 /// configuration.
385 pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
386 RegexMatcherBuilder::new().build(pattern)
387 }
388
389 /// Create a new matcher from the given pattern using the default
390 /// configuration, but matches lines terminated by `\n`.
391 ///
392 /// This is meant to be a convenience constructor for
393 /// using a `RegexMatcherBuilder` and setting its
394 /// [`line_terminator`](RegexMatcherBuilder::method.line_terminator) to
395 /// `\n`. The purpose of using this constructor is to permit special
396 /// optimizations that help speed up line oriented search. These types of
397 /// optimizations are only appropriate when matches span no more than one
398 /// line. For this reason, this constructor will return an error if the
399 /// given pattern contains a literal `\n`. Other uses of `\n` (such as in
400 /// `\s`) are removed transparently.
401 pub fn new_line_matcher(pattern: &str) -> Result<RegexMatcher, Error> {
402 RegexMatcherBuilder::new().line_terminator(Some(b'\n')).build(pattern)
403 }
404}
405
406// This implementation just dispatches on the internal matcher impl except
407// for the line terminator optimization, which is possibly executed via
408// `fast_line_regex`.
409impl Matcher for RegexMatcher {
410 type Captures = RegexCaptures;
411 type Error = NoError;
412
413 #[inline]
414 fn find_at(
415 &self,
416 haystack: &[u8],
417 at: usize,
418 ) -> Result<Option<Match>, NoError> {
419 let input = Input::new(haystack).span(at..haystack.len());
420 Ok(self.regex.find(input).map(|m| Match::new(m.start(), m.end())))
421 }
422
423 #[inline]
424 fn new_captures(&self) -> Result<RegexCaptures, NoError> {
425 Ok(RegexCaptures::new(self.regex.create_captures()))
426 }
427
428 #[inline]
429 fn capture_count(&self) -> usize {
430 self.regex.captures_len()
431 }
432
433 #[inline]
434 fn capture_index(&self, name: &str) -> Option<usize> {
435 self.regex.group_info().to_index(PatternID::ZERO, name)
436 }
437
438 #[inline]
439 fn try_find_iter<F, E>(
440 &self,
441 haystack: &[u8],
442 mut matched: F,
443 ) -> Result<Result<(), E>, NoError>
444 where
445 F: FnMut(Match) -> Result<bool, E>,
446 {
447 for m in self.regex.find_iter(haystack) {
448 match matched(Match::new(m.start(), m.end())) {
449 Ok(true) => continue,
450 Ok(false) => return Ok(Ok(())),
451 Err(err) => return Ok(Err(err)),
452 }
453 }
454 Ok(Ok(()))
455 }
456
457 #[inline]
458 fn captures_at(
459 &self,
460 haystack: &[u8],
461 at: usize,
462 caps: &mut RegexCaptures,
463 ) -> Result<bool, NoError> {
464 let input = Input::new(haystack).span(at..haystack.len());
465 let caps = caps.captures_mut();
466 self.regex.search_captures(&input, caps);
467 Ok(caps.is_match())
468 }
469
470 #[inline]
471 fn shortest_match_at(
472 &self,
473 haystack: &[u8],
474 at: usize,
475 ) -> Result<Option<usize>, NoError> {
476 let input = Input::new(haystack).span(at..haystack.len());
477 Ok(self.regex.search_half(&input).map(|hm| hm.offset()))
478 }
479
480 #[inline]
481 fn non_matching_bytes(&self) -> Option<&ByteSet> {
482 Some(&self.non_matching_bytes)
483 }
484
485 #[inline]
486 fn line_terminator(&self) -> Option<LineTerminator> {
487 self.config.line_terminator
488 }
489
490 #[inline]
491 fn find_candidate_line(
492 &self,
493 haystack: &[u8],
494 ) -> Result<Option<LineMatchKind>, NoError> {
495 Ok(match self.fast_line_regex {
496 Some(ref regex) => {
497 let input = Input::new(haystack);
498 regex
499 .search_half(&input)
500 .map(|hm| LineMatchKind::Candidate(hm.offset()))
501 }
502 None => {
503 self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)
504 }
505 })
506 }
507}
508
509/// Represents the match offsets of each capturing group in a match.
510///
511/// The first, or `0`th capture group, always corresponds to the entire match
512/// and is guaranteed to be present when a match occurs. The next capture
513/// group, at index `1`, corresponds to the first capturing group in the regex,
514/// ordered by the position at which the left opening parenthesis occurs.
515///
516/// Note that not all capturing groups are guaranteed to be present in a match.
517/// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
518/// or `bar` will ever be set in any given match.
519///
520/// In order to access a capture group by name, you'll need to first find the
521/// index of the group using the corresponding matcher's `capture_index`
522/// method, and then use that index with `RegexCaptures::get`.
523#[derive(Clone, Debug)]
524pub struct RegexCaptures {
525 /// Where the captures are stored.
526 caps: AutomataCaptures,
527}
528
529impl Captures for RegexCaptures {
530 #[inline]
531 fn len(&self) -> usize {
532 self.caps.group_info().all_group_len()
533 }
534
535 #[inline]
536 fn get(&self, i: usize) -> Option<Match> {
537 self.caps.get_group(i).map(|sp| Match::new(sp.start, sp.end))
538 }
539}
540
541impl RegexCaptures {
542 #[inline]
543 pub(crate) fn new(caps: AutomataCaptures) -> RegexCaptures {
544 RegexCaptures { caps }
545 }
546
547 #[inline]
548 pub(crate) fn captures_mut(&mut self) -> &mut AutomataCaptures {
549 &mut self.caps
550 }
551}
552
553#[cfg(test)]
554mod tests {
555 use super::*;
556
557 // Test that enabling word matches does the right thing and demonstrate
558 // the difference between it and surrounding the regex in `\b`.
559 #[test]
560 fn word() {
561 let matcher =
562 RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
563 assert!(matcher.is_match(b"abc -2 foo").unwrap());
564
565 let matcher =
566 RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
567 assert!(!matcher.is_match(b"abc -2 foo").unwrap());
568 }
569
570 // Test that enabling a line terminator prevents it from matching through
571 // said line terminator.
572 #[test]
573 fn line_terminator() {
574 // This works, because there's no line terminator specified.
575 let matcher = RegexMatcherBuilder::new().build(r"abc\sxyz").unwrap();
576 assert!(matcher.is_match(b"abc\nxyz").unwrap());
577
578 // This doesn't.
579 let matcher = RegexMatcherBuilder::new()
580 .line_terminator(Some(b'\n'))
581 .build(r"abc\sxyz")
582 .unwrap();
583 assert!(!matcher.is_match(b"abc\nxyz").unwrap());
584 }
585
586 // Ensure that the builder returns an error if a line terminator is set
587 // and the regex could not be modified to remove a line terminator.
588 #[test]
589 fn line_terminator_error() {
590 assert!(RegexMatcherBuilder::new()
591 .line_terminator(Some(b'\n'))
592 .build(r"a\nz")
593 .is_err())
594 }
595
596 // Test that enabling CRLF permits `$` to match at the end of a line.
597 #[test]
598 fn line_terminator_crlf() {
599 // Test normal use of `$` with a `\n` line terminator.
600 let matcher = RegexMatcherBuilder::new()
601 .multi_line(true)
602 .build(r"abc$")
603 .unwrap();
604 assert!(matcher.is_match(b"abc\n").unwrap());
605
606 // Test that `$` doesn't match at `\r\n` boundary normally.
607 let matcher = RegexMatcherBuilder::new()
608 .multi_line(true)
609 .build(r"abc$")
610 .unwrap();
611 assert!(!matcher.is_match(b"abc\r\n").unwrap());
612
613 // Now check the CRLF handling.
614 let matcher = RegexMatcherBuilder::new()
615 .multi_line(true)
616 .crlf(true)
617 .build(r"abc$")
618 .unwrap();
619 assert!(matcher.is_match(b"abc\r\n").unwrap());
620 }
621
622 // Test that smart case works.
623 #[test]
624 fn case_smart() {
625 let matcher =
626 RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
627 assert!(matcher.is_match(b"ABC").unwrap());
628
629 let matcher =
630 RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
631 assert!(!matcher.is_match(b"ABC").unwrap());
632 }
633
634 // Test that finding candidate lines works as expected.
635 // FIXME: Re-enable this test once inner literal extraction works.
636 #[test]
637 #[ignore]
638 fn candidate_lines() {
639 fn is_confirmed(m: LineMatchKind) -> bool {
640 match m {
641 LineMatchKind::Confirmed(_) => true,
642 _ => false,
643 }
644 }
645 fn is_candidate(m: LineMatchKind) -> bool {
646 match m {
647 LineMatchKind::Candidate(_) => true,
648 _ => false,
649 }
650 }
651
652 // With no line terminator set, we can't employ any optimizations,
653 // so we get a confirmed match.
654 let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
655 let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
656 assert!(is_confirmed(m));
657
658 // With a line terminator and a regex specially crafted to have an
659 // easy-to-detect inner literal, we can apply an optimization that
660 // quickly finds candidate matches.
661 let matcher = RegexMatcherBuilder::new()
662 .line_terminator(Some(b'\n'))
663 .build(r"\wfoo\s")
664 .unwrap();
665 let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
666 assert!(is_candidate(m));
667 }
668}