regex_automata/util/
captures.rs

1/*!
2Provides types for dealing with capturing groups.
3
4Capturing groups refer to sub-patterns of regexes that some regex engines can
5report matching offsets for. For example, matching `[a-z]([0-9]+)` against
6`a789` would give `a789` as the overall match (for the implicit capturing group
7at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an
8explicit capturing group at index `1`).
9
10Not all regex engines can report match offsets for capturing groups. Indeed,
11to a first approximation, regex engines that can report capturing group offsets
12tend to be quite a bit slower than regex engines that can't. This is because
13tracking capturing groups at search time usually requires more "power" that
14in turn adds overhead.
15
16Other regex implementations might call capturing groups "submatches."
17
18# Overview
19
20The main types in this module are:
21
22* [`Captures`] records the capturing group offsets found during a search. It
23provides convenience routines for looking up capturing group offsets by either
24index or name.
25* [`GroupInfo`] records the mapping between capturing groups and "slots,"
26where the latter are how capturing groups are recorded during a regex search.
27This also keeps a mapping from capturing group name to index, and capture
28group index to name. A `GroupInfo` is used by `Captures` internally to
29provide a convenient API. It is unlikely that you'll use a `GroupInfo`
30directly, but for example, if you've compiled an Thompson NFA, then you can use
31[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its
32underlying `GroupInfo`.
33*/
34
35use alloc::{string::String, sync::Arc, vec, vec::Vec};
36
37use crate::util::{
38    interpolate,
39    primitives::{
40        NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex,
41    },
42    search::{Match, Span},
43};
44
45/// The span offsets of capturing groups after a match has been found.
46///
47/// This type represents the output of regex engines that can report the
48/// offsets at which capturing groups matches or "submatches" occur. For
49/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match
50/// occurs, it will at minimum contain the [`PatternID`] of the pattern that
51/// matched. Depending upon how it was constructed, it may also contain the
52/// start/end offsets of the entire match of the pattern and the start/end
53/// offsets of each capturing group that participated in the match.
54///
55/// Values of this type are always created for a specific [`GroupInfo`]. It is
56/// unspecified behavior to use a `Captures` value in a search with any regex
57/// engine that has a different `GroupInfo` than the one the `Captures` were
58/// created with.
59///
60/// # Constructors
61///
62/// There are three constructors for this type that control what kind of
63/// information is available upon a match:
64///
65/// * [`Captures::all`]: Will store overall pattern match offsets in addition
66/// to the offsets of capturing groups that participated in the match.
67/// * [`Captures::matches`]: Will store only the overall pattern
68/// match offsets. The offsets of capturing groups (even ones that participated
69/// in the match) are not available.
70/// * [`Captures::empty`]: Will only store the pattern ID that matched. No
71/// match offsets are available at all.
72///
73/// If you aren't sure which to choose, then pick the first one. The first one
74/// is what convenience routines like,
75/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures),
76/// will use automatically.
77///
78/// The main difference between these choices is performance. Namely, if you
79/// ask for _less_ information, then the execution of regex search may be able
80/// to run more quickly.
81///
82/// # Notes
83///
84/// It is worth pointing out that this type is not coupled to any one specific
85/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the
86/// thing that is responsible for mapping capturing groups to "slot" offsets.
87/// Slot offsets are indices into a single sequence of memory at which matching
88/// haystack offsets for the corresponding group are written by regex engines.
89///
90/// # Example
91///
92/// This example shows how to parse a simple date and extract the components of
93/// the date via capturing groups:
94///
95/// ```
96/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
97///
98/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
99/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
100///
101/// re.captures(&mut cache, "2010-03-14", &mut caps);
102/// assert!(caps.is_match());
103/// assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
104/// assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
105/// assert_eq!(Some(Span::from(8..10)), caps.get_group(3));
106///
107/// # Ok::<(), Box<dyn std::error::Error>>(())
108/// ```
109///
110/// # Example: named capturing groups
111///
112/// This example is like the one above, but leverages the ability to name
113/// capturing groups in order to make the code a bit clearer:
114///
115/// ```
116/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
117///
118/// let re = PikeVM::new(r"^(?P<y>[0-9]{4})-(?P<m>[0-9]{2})-(?P<d>[0-9]{2})$")?;
119/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
120///
121/// re.captures(&mut cache, "2010-03-14", &mut caps);
122/// assert!(caps.is_match());
123/// assert_eq!(Some(Span::from(0..4)), caps.get_group_by_name("y"));
124/// assert_eq!(Some(Span::from(5..7)), caps.get_group_by_name("m"));
125/// assert_eq!(Some(Span::from(8..10)), caps.get_group_by_name("d"));
126///
127/// # Ok::<(), Box<dyn std::error::Error>>(())
128/// ```
129#[derive(Clone)]
130pub struct Captures {
131    /// The group info that these capture groups are coupled to. This is what
132    /// gives the "convenience" of the `Captures` API. Namely, it provides the
133    /// slot mapping and the name|-->index mapping for capture lookups by name.
134    group_info: GroupInfo,
135    /// The ID of the pattern that matched. Regex engines must set this to
136    /// None when no match occurs.
137    pid: Option<PatternID>,
138    /// The slot values, i.e., submatch offsets.
139    ///
140    /// In theory, the smallest sequence of slots would be something like
141    /// `max(groups(pattern) for pattern in regex) * 2`, but instead, we use
142    /// `sum(groups(pattern) for pattern in regex) * 2`. Why?
143    ///
144    /// Well, the former could be used in theory, because we don't generally
145    /// have any overlapping APIs that involve capturing groups. Therefore,
146    /// there's technically never any need to have slots set for multiple
147    /// patterns. However, this might change some day, in which case, we would
148    /// need to have slots available.
149    ///
150    /// The other reason is that during the execution of some regex engines,
151    /// there exists a point in time where multiple slots for different
152    /// patterns may be written to before knowing which pattern has matched.
153    /// Therefore, the regex engines themselves, in order to support multiple
154    /// patterns correctly, must have all slots available. If `Captures`
155    /// doesn't have all slots available, then regex engines can't write
156    /// directly into the caller provided `Captures` and must instead write
157    /// into some other storage and then copy the slots involved in the match
158    /// at the end of the search.
159    ///
160    /// So overall, at least as of the time of writing, it seems like the path
161    /// of least resistance is to just require allocating all possible slots
162    /// instead of the conceptual minimum. Another way to justify this is that
163    /// the most common case is a single pattern, in which case, there is no
164    /// inefficiency here since the 'max' and 'sum' calculations above are
165    /// equivalent in that case.
166    ///
167    /// N.B. The mapping from group index to slot is maintained by `GroupInfo`
168    /// and is considered an API guarantee. See `GroupInfo` for more details on
169    /// that mapping.
170    ///
171    /// N.B. `Option<NonMaxUsize>` has the same size as a `usize`.
172    slots: Vec<Option<NonMaxUsize>>,
173}
174
175impl Captures {
176    /// Create new storage for the offsets of all matching capturing groups.
177    ///
178    /// This routine provides the most information for matches---namely, the
179    /// spans of matching capturing groups---but also requires the regex search
180    /// routines to do the most work.
181    ///
182    /// It is unspecified behavior to use the returned `Captures` value in a
183    /// search with a `GroupInfo` other than the one that is provided to this
184    /// constructor.
185    ///
186    /// # Example
187    ///
188    /// This example shows that all capturing groups---but only ones that
189    /// participated in a match---are available to query after a match has
190    /// been found:
191    ///
192    /// ```
193    /// use regex_automata::{
194    ///     nfa::thompson::pikevm::PikeVM,
195    ///     util::captures::Captures,
196    ///     Span, Match,
197    /// };
198    ///
199    /// let re = PikeVM::new(
200    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
201    /// )?;
202    /// let mut cache = re.create_cache();
203    /// let mut caps = Captures::all(re.get_nfa().group_info().clone());
204    ///
205    /// re.captures(&mut cache, "ABC123", &mut caps);
206    /// assert!(caps.is_match());
207    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
208    /// // The 'lower' group didn't match, so it won't have any offsets.
209    /// assert_eq!(None, caps.get_group_by_name("lower"));
210    /// assert_eq!(Some(Span::from(0..3)), caps.get_group_by_name("upper"));
211    /// assert_eq!(Some(Span::from(3..6)), caps.get_group_by_name("digits"));
212    ///
213    /// # Ok::<(), Box<dyn std::error::Error>>(())
214    /// ```
215    pub fn all(group_info: GroupInfo) -> Captures {
216        let slots = group_info.slot_len();
217        Captures { group_info, pid: None, slots: vec![None; slots] }
218    }
219
220    /// Create new storage for only the full match spans of a pattern. This
221    /// does not include any capturing group offsets.
222    ///
223    /// It is unspecified behavior to use the returned `Captures` value in a
224    /// search with a `GroupInfo` other than the one that is provided to this
225    /// constructor.
226    ///
227    /// # Example
228    ///
229    /// This example shows that only overall match offsets are reported when
230    /// this constructor is used. Accessing any capturing groups other than
231    /// the 0th will always return `None`.
232    ///
233    /// ```
234    /// use regex_automata::{
235    ///     nfa::thompson::pikevm::PikeVM,
236    ///     util::captures::Captures,
237    ///     Match,
238    /// };
239    ///
240    /// let re = PikeVM::new(
241    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
242    /// )?;
243    /// let mut cache = re.create_cache();
244    /// let mut caps = Captures::matches(re.get_nfa().group_info().clone());
245    ///
246    /// re.captures(&mut cache, "ABC123", &mut caps);
247    /// assert!(caps.is_match());
248    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
249    /// // We didn't ask for capturing group offsets, so they aren't available.
250    /// assert_eq!(None, caps.get_group_by_name("lower"));
251    /// assert_eq!(None, caps.get_group_by_name("upper"));
252    /// assert_eq!(None, caps.get_group_by_name("digits"));
253    ///
254    /// # Ok::<(), Box<dyn std::error::Error>>(())
255    /// ```
256    pub fn matches(group_info: GroupInfo) -> Captures {
257        // This is OK because we know there are at least this many slots,
258        // and GroupInfo construction guarantees that the number of slots fits
259        // into a usize.
260        let slots = group_info.pattern_len().checked_mul(2).unwrap();
261        Captures { group_info, pid: None, slots: vec![None; slots] }
262    }
263
264    /// Create new storage for only tracking which pattern matched. No offsets
265    /// are stored at all.
266    ///
267    /// It is unspecified behavior to use the returned `Captures` value in a
268    /// search with a `GroupInfo` other than the one that is provided to this
269    /// constructor.
270    ///
271    /// # Example
272    ///
273    /// This example shows that only the pattern that matched can be accessed
274    /// from a `Captures` value created via this constructor.
275    ///
276    /// ```
277    /// use regex_automata::{
278    ///     nfa::thompson::pikevm::PikeVM,
279    ///     util::captures::Captures,
280    ///     PatternID,
281    /// };
282    ///
283    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
284    /// let mut cache = re.create_cache();
285    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
286    ///
287    /// re.captures(&mut cache, "aABCz", &mut caps);
288    /// assert!(caps.is_match());
289    /// assert_eq!(Some(PatternID::must(0)), caps.pattern());
290    /// // We didn't ask for any offsets, so they aren't available.
291    /// assert_eq!(None, caps.get_match());
292    ///
293    /// re.captures(&mut cache, &"aABCz"[1..], &mut caps);
294    /// assert!(caps.is_match());
295    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
296    /// // We didn't ask for any offsets, so they aren't available.
297    /// assert_eq!(None, caps.get_match());
298    ///
299    /// # Ok::<(), Box<dyn std::error::Error>>(())
300    /// ```
301    pub fn empty(group_info: GroupInfo) -> Captures {
302        Captures { group_info, pid: None, slots: vec![] }
303    }
304
305    /// Returns true if and only if this capturing group represents a match.
306    ///
307    /// This is a convenience routine for `caps.pattern().is_some()`.
308    ///
309    /// # Example
310    ///
311    /// When using the PikeVM (for example), the lightest weight way of
312    /// detecting whether a match exists is to create capturing groups that
313    /// only track the ID of the pattern that match (if any):
314    ///
315    /// ```
316    /// use regex_automata::{
317    ///     nfa::thompson::pikevm::PikeVM,
318    ///     util::captures::Captures,
319    /// };
320    ///
321    /// let re = PikeVM::new(r"[a-z]+")?;
322    /// let mut cache = re.create_cache();
323    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
324    ///
325    /// re.captures(&mut cache, "aABCz", &mut caps);
326    /// assert!(caps.is_match());
327    ///
328    /// # Ok::<(), Box<dyn std::error::Error>>(())
329    /// ```
330    #[inline]
331    pub fn is_match(&self) -> bool {
332        self.pid.is_some()
333    }
334
335    /// Returns the identifier of the pattern that matched when this
336    /// capturing group represents a match. If no match was found, then this
337    /// always returns `None`.
338    ///
339    /// This returns a pattern ID in precisely the cases in which `is_match`
340    /// returns `true`. Similarly, the pattern ID returned is always the
341    /// same pattern ID found in the `Match` returned by `get_match`.
342    ///
343    /// # Example
344    ///
345    /// When using the PikeVM (for example), the lightest weight way of
346    /// detecting which pattern matched is to create capturing groups that only
347    /// track the ID of the pattern that match (if any):
348    ///
349    /// ```
350    /// use regex_automata::{
351    ///     nfa::thompson::pikevm::PikeVM,
352    ///     util::captures::Captures,
353    ///     PatternID,
354    /// };
355    ///
356    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
357    /// let mut cache = re.create_cache();
358    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
359    ///
360    /// re.captures(&mut cache, "ABC", &mut caps);
361    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
362    /// // Recall that offsets are only available when using a non-empty
363    /// // Captures value. So even though a match occurred, this returns None!
364    /// assert_eq!(None, caps.get_match());
365    ///
366    /// # Ok::<(), Box<dyn std::error::Error>>(())
367    /// ```
368    #[inline]
369    pub fn pattern(&self) -> Option<PatternID> {
370        self.pid
371    }
372
373    /// Returns the pattern ID and the span of the match, if one occurred.
374    ///
375    /// This always returns `None` when `Captures` was created with
376    /// [`Captures::empty`], even if a match was found.
377    ///
378    /// If this routine returns a non-`None` value, then `is_match` is
379    /// guaranteed to return `true` and `pattern` is also guaranteed to return
380    /// a non-`None` value.
381    ///
382    /// # Example
383    ///
384    /// This example shows how to get the full match from a search:
385    ///
386    /// ```
387    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
388    ///
389    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
390    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
391    ///
392    /// re.captures(&mut cache, "ABC", &mut caps);
393    /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match());
394    ///
395    /// # Ok::<(), Box<dyn std::error::Error>>(())
396    /// ```
397    #[inline]
398    pub fn get_match(&self) -> Option<Match> {
399        Some(Match::new(self.pattern()?, self.get_group(0)?))
400    }
401
402    /// Returns the span of a capturing group match corresponding to the group
403    /// index given, only if both the overall pattern matched and the capturing
404    /// group participated in that match.
405    ///
406    /// This returns `None` if `index` is invalid. `index` is valid if and only
407    /// if it's less than [`Captures::group_len`] for the matching pattern.
408    ///
409    /// This always returns `None` when `Captures` was created with
410    /// [`Captures::empty`], even if a match was found. This also always
411    /// returns `None` for any `index > 0` when `Captures` was created with
412    /// [`Captures::matches`].
413    ///
414    /// If this routine returns a non-`None` value, then `is_match` is
415    /// guaranteed to return `true`, `pattern` is guaranteed to return a
416    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
417    /// value.
418    ///
419    /// By convention, the 0th capture group will always return the same
420    /// span as the span returned by `get_match`. This is because the 0th
421    /// capture group always corresponds to the entirety of the pattern's
422    /// match. (It is similarly always unnamed because it is implicit.) This
423    /// isn't necessarily true of all regex engines. For example, one can
424    /// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a
425    /// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't
426    /// technically forced to make the 0th capturing group always correspond to
427    /// the entire match.
428    ///
429    /// # Example
430    ///
431    /// This example shows how to get the capturing groups, by index, from a
432    /// match:
433    ///
434    /// ```
435    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
436    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
437    ///
438    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
439    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
440    ///
441    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
442    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
443    /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1));
444    /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2));
445    /// // Looking for a non-existent capturing group will return None:
446    /// assert_eq!(None, caps.get_group(3));
447    /// # // literals are too big for 32-bit usize: #1039
448    /// # #[cfg(target_pointer_width = "64")]
449    /// assert_eq!(None, caps.get_group(9944060567225171988));
450    ///
451    /// # Ok::<(), Box<dyn std::error::Error>>(())
452    /// ```
453    #[inline]
454    pub fn get_group(&self, index: usize) -> Option<Span> {
455        let pid = self.pattern()?;
456        // There's a little bit of work needed to map captures to slots in the
457        // fully general case. But in the overwhelming common case of a single
458        // pattern, we can just do some simple arithmetic.
459        let (slot_start, slot_end) = if self.group_info().pattern_len() == 1 {
460            (index.checked_mul(2)?, index.checked_mul(2)?.checked_add(1)?)
461        } else {
462            self.group_info().slots(pid, index)?
463        };
464        let start = self.slots.get(slot_start).copied()??;
465        let end = self.slots.get(slot_end).copied()??;
466        Some(Span { start: start.get(), end: end.get() })
467    }
468
469    /// Returns the span of a capturing group match corresponding to the group
470    /// name given, only if both the overall pattern matched and the capturing
471    /// group participated in that match.
472    ///
473    /// This returns `None` if `name` does not correspond to a valid capturing
474    /// group for the pattern that matched.
475    ///
476    /// This always returns `None` when `Captures` was created with
477    /// [`Captures::empty`], even if a match was found. This also always
478    /// returns `None` for any `index > 0` when `Captures` was created with
479    /// [`Captures::matches`].
480    ///
481    /// If this routine returns a non-`None` value, then `is_match` is
482    /// guaranteed to return `true`, `pattern` is guaranteed to return a
483    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
484    /// value.
485    ///
486    /// # Example
487    ///
488    /// This example shows how to get the capturing groups, by name, from a
489    /// match:
490    ///
491    /// ```
492    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
493    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
494    ///
495    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
496    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
497    ///
498    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
499    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
500    /// assert_eq!(Some(Span::from(0..5)), caps.get_group_by_name("first"));
501    /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last"));
502    /// // Looking for a non-existent capturing group will return None:
503    /// assert_eq!(None, caps.get_group_by_name("middle"));
504    ///
505    /// # Ok::<(), Box<dyn std::error::Error>>(())
506    /// ```
507    pub fn get_group_by_name(&self, name: &str) -> Option<Span> {
508        let index = self.group_info().to_index(self.pattern()?, name)?;
509        self.get_group(index)
510    }
511
512    /// Returns an iterator of possible spans for every capturing group in the
513    /// matching pattern.
514    ///
515    /// If this `Captures` value does not correspond to a match, then the
516    /// iterator returned yields no elements.
517    ///
518    /// Note that the iterator returned yields elements of type `Option<Span>`.
519    /// A span is present if and only if it corresponds to a capturing group
520    /// that participated in a match.
521    ///
522    /// # Example
523    ///
524    /// This example shows how to collect all capturing groups:
525    ///
526    /// ```
527    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
528    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
529    ///
530    /// let re = PikeVM::new(
531    ///     // Matches first/last names, with an optional middle name.
532    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
533    /// )?;
534    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
535    ///
536    /// re.captures(&mut cache, "Harry James Potter", &mut caps);
537    /// assert!(caps.is_match());
538    /// let groups: Vec<Option<Span>> = caps.iter().collect();
539    /// assert_eq!(groups, vec![
540    ///     Some(Span::from(0..18)),
541    ///     Some(Span::from(0..5)),
542    ///     Some(Span::from(6..11)),
543    ///     Some(Span::from(12..18)),
544    /// ]);
545    ///
546    /// # Ok::<(), Box<dyn std::error::Error>>(())
547    /// ```
548    ///
549    /// This example uses the same regex as the previous example, but with a
550    /// haystack that omits the middle name. This results in a capturing group
551    /// that is present in the elements yielded by the iterator but without a
552    /// match:
553    ///
554    /// ```
555    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
556    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
557    ///
558    /// let re = PikeVM::new(
559    ///     // Matches first/last names, with an optional middle name.
560    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
561    /// )?;
562    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
563    ///
564    /// re.captures(&mut cache, "Harry Potter", &mut caps);
565    /// assert!(caps.is_match());
566    /// let groups: Vec<Option<Span>> = caps.iter().collect();
567    /// assert_eq!(groups, vec![
568    ///     Some(Span::from(0..12)),
569    ///     Some(Span::from(0..5)),
570    ///     None,
571    ///     Some(Span::from(6..12)),
572    /// ]);
573    ///
574    /// # Ok::<(), Box<dyn std::error::Error>>(())
575    /// ```
576    pub fn iter(&self) -> CapturesPatternIter<'_> {
577        let names = self
578            .pattern()
579            .map_or(GroupInfoPatternNames::empty().enumerate(), |pid| {
580                self.group_info().pattern_names(pid).enumerate()
581            });
582        CapturesPatternIter { caps: self, names }
583    }
584
585    /// Return the total number of capturing groups for the matching pattern.
586    ///
587    /// If this `Captures` value does not correspond to a match, then this
588    /// always returns `0`.
589    ///
590    /// This always returns the same number of elements yielded by
591    /// [`Captures::iter`]. That is, the number includes capturing groups even
592    /// if they don't participate in the match.
593    ///
594    /// # Example
595    ///
596    /// This example shows how to count the total number of capturing groups
597    /// associated with a pattern. Notice that it includes groups that did not
598    /// participate in a match (just like `Captures::iter` does).
599    ///
600    /// ```
601    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
602    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
603    ///
604    /// let re = PikeVM::new(
605    ///     // Matches first/last names, with an optional middle name.
606    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
607    /// )?;
608    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
609    ///
610    /// re.captures(&mut cache, "Harry Potter", &mut caps);
611    /// assert_eq!(4, caps.group_len());
612    ///
613    /// # Ok::<(), Box<dyn std::error::Error>>(())
614    /// ```
615    pub fn group_len(&self) -> usize {
616        let pid = match self.pattern() {
617            None => return 0,
618            Some(pid) => pid,
619        };
620        self.group_info().group_len(pid)
621    }
622
623    /// Returns a reference to the underlying group info on which these
624    /// captures are based.
625    ///
626    /// The difference between `GroupInfo` and `Captures` is that the former
627    /// defines the structure of capturing groups where as the latter is what
628    /// stores the actual match information. So where as `Captures` only gives
629    /// you access to the current match, `GroupInfo` lets you query any
630    /// information about all capturing groups, even ones for patterns that
631    /// weren't involved in a match.
632    ///
633    /// Note that a `GroupInfo` uses reference counting internally, so it may
634    /// be cloned cheaply.
635    ///
636    /// # Example
637    ///
638    /// This example shows how to get all capturing group names from the
639    /// underlying `GroupInfo`. Notice that we don't even need to run a
640    /// search.
641    ///
642    /// ```
643    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
644    ///
645    /// let re = PikeVM::new_many(&[
646    ///     r"(?P<foo>a)",
647    ///     r"(a)(b)",
648    ///     r"ab",
649    ///     r"(?P<bar>a)(?P<quux>a)",
650    ///     r"(?P<foo>z)",
651    /// ])?;
652    /// let caps = re.create_captures();
653    ///
654    /// let expected = vec![
655    ///     (PatternID::must(0), 0, None),
656    ///     (PatternID::must(0), 1, Some("foo")),
657    ///     (PatternID::must(1), 0, None),
658    ///     (PatternID::must(1), 1, None),
659    ///     (PatternID::must(1), 2, None),
660    ///     (PatternID::must(2), 0, None),
661    ///     (PatternID::must(3), 0, None),
662    ///     (PatternID::must(3), 1, Some("bar")),
663    ///     (PatternID::must(3), 2, Some("quux")),
664    ///     (PatternID::must(4), 0, None),
665    ///     (PatternID::must(4), 1, Some("foo")),
666    /// ];
667    /// // We could also just use 're.get_nfa().group_info()'.
668    /// let got: Vec<(PatternID, usize, Option<&str>)> =
669    ///     caps.group_info().all_names().collect();
670    /// assert_eq!(expected, got);
671    ///
672    /// # Ok::<(), Box<dyn std::error::Error>>(())
673    /// ```
674    pub fn group_info(&self) -> &GroupInfo {
675        &self.group_info
676    }
677
678    /// Interpolates the capture references in `replacement` with the
679    /// corresponding substrings in `haystack` matched by each reference. The
680    /// interpolated string is returned.
681    ///
682    /// See the [`interpolate` module](interpolate) for documentation on the
683    /// format of the replacement string.
684    ///
685    /// # Example
686    ///
687    /// This example shows how to use interpolation, and also shows how it
688    /// can work with multi-pattern regexes.
689    ///
690    /// ```
691    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
692    ///
693    /// let re = PikeVM::new_many(&[
694    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
695    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
696    /// ])?;
697    /// let mut cache = re.create_cache();
698    /// let mut caps = re.create_captures();
699    ///
700    /// let replacement = "year=$year, month=$month, day=$day";
701    ///
702    /// // This matches the first pattern.
703    /// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
704    /// re.captures(&mut cache, hay, &mut caps);
705    /// let result = caps.interpolate_string(hay, replacement);
706    /// assert_eq!("year=2010, month=03, day=14", result);
707    ///
708    /// // And this matches the second pattern.
709    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
710    /// re.captures(&mut cache, hay, &mut caps);
711    /// let result = caps.interpolate_string(hay, replacement);
712    /// assert_eq!("year=2010, month=03, day=14", result);
713    ///
714    /// # Ok::<(), Box<dyn std::error::Error>>(())
715    /// ```
716    pub fn interpolate_string(
717        &self,
718        haystack: &str,
719        replacement: &str,
720    ) -> String {
721        let mut dst = String::new();
722        self.interpolate_string_into(haystack, replacement, &mut dst);
723        dst
724    }
725
726    /// Interpolates the capture references in `replacement` with the
727    /// corresponding substrings in `haystack` matched by each reference. The
728    /// interpolated string is written to `dst`.
729    ///
730    /// See the [`interpolate` module](interpolate) for documentation on the
731    /// format of the replacement string.
732    ///
733    /// # Example
734    ///
735    /// This example shows how to use interpolation, and also shows how it
736    /// can work with multi-pattern regexes.
737    ///
738    /// ```
739    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
740    ///
741    /// let re = PikeVM::new_many(&[
742    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
743    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
744    /// ])?;
745    /// let mut cache = re.create_cache();
746    /// let mut caps = re.create_captures();
747    ///
748    /// let replacement = "year=$year, month=$month, day=$day";
749    ///
750    /// // This matches the first pattern.
751    /// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
752    /// re.captures(&mut cache, hay, &mut caps);
753    /// let mut dst = String::new();
754    /// caps.interpolate_string_into(hay, replacement, &mut dst);
755    /// assert_eq!("year=2010, month=03, day=14", dst);
756    ///
757    /// // And this matches the second pattern.
758    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
759    /// re.captures(&mut cache, hay, &mut caps);
760    /// let mut dst = String::new();
761    /// caps.interpolate_string_into(hay, replacement, &mut dst);
762    /// assert_eq!("year=2010, month=03, day=14", dst);
763    ///
764    /// # Ok::<(), Box<dyn std::error::Error>>(())
765    /// ```
766    pub fn interpolate_string_into(
767        &self,
768        haystack: &str,
769        replacement: &str,
770        dst: &mut String,
771    ) {
772        interpolate::string(
773            replacement,
774            |index, dst| {
775                let span = match self.get_group(index) {
776                    None => return,
777                    Some(span) => span,
778                };
779                dst.push_str(&haystack[span]);
780            },
781            |name| self.group_info().to_index(self.pattern()?, name),
782            dst,
783        );
784    }
785
786    /// Interpolates the capture references in `replacement` with the
787    /// corresponding substrings in `haystack` matched by each reference. The
788    /// interpolated byte string is returned.
789    ///
790    /// See the [`interpolate` module](interpolate) for documentation on the
791    /// format of the replacement string.
792    ///
793    /// # Example
794    ///
795    /// This example shows how to use interpolation, and also shows how it
796    /// can work with multi-pattern regexes.
797    ///
798    /// ```
799    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
800    ///
801    /// let re = PikeVM::new_many(&[
802    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
803    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
804    /// ])?;
805    /// let mut cache = re.create_cache();
806    /// let mut caps = re.create_captures();
807    ///
808    /// let replacement = b"year=$year, month=$month, day=$day";
809    ///
810    /// // This matches the first pattern.
811    /// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
812    /// re.captures(&mut cache, hay, &mut caps);
813    /// let result = caps.interpolate_bytes(hay, replacement);
814    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
815    ///
816    /// // And this matches the second pattern.
817    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
818    /// re.captures(&mut cache, hay, &mut caps);
819    /// let result = caps.interpolate_bytes(hay, replacement);
820    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
821    ///
822    /// # Ok::<(), Box<dyn std::error::Error>>(())
823    /// ```
824    pub fn interpolate_bytes(
825        &self,
826        haystack: &[u8],
827        replacement: &[u8],
828    ) -> Vec<u8> {
829        let mut dst = vec![];
830        self.interpolate_bytes_into(haystack, replacement, &mut dst);
831        dst
832    }
833
834    /// Interpolates the capture references in `replacement` with the
835    /// corresponding substrings in `haystack` matched by each reference. The
836    /// interpolated byte string is written to `dst`.
837    ///
838    /// See the [`interpolate` module](interpolate) for documentation on the
839    /// format of the replacement string.
840    ///
841    /// # Example
842    ///
843    /// This example shows how to use interpolation, and also shows how it
844    /// can work with multi-pattern regexes.
845    ///
846    /// ```
847    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
848    ///
849    /// let re = PikeVM::new_many(&[
850    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
851    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
852    /// ])?;
853    /// let mut cache = re.create_cache();
854    /// let mut caps = re.create_captures();
855    ///
856    /// let replacement = b"year=$year, month=$month, day=$day";
857    ///
858    /// // This matches the first pattern.
859    /// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
860    /// re.captures(&mut cache, hay, &mut caps);
861    /// let mut dst = vec![];
862    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
863    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
864    ///
865    /// // And this matches the second pattern.
866    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
867    /// re.captures(&mut cache, hay, &mut caps);
868    /// let mut dst = vec![];
869    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
870    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
871    ///
872    /// # Ok::<(), Box<dyn std::error::Error>>(())
873    /// ```
874    pub fn interpolate_bytes_into(
875        &self,
876        haystack: &[u8],
877        replacement: &[u8],
878        dst: &mut Vec<u8>,
879    ) {
880        interpolate::bytes(
881            replacement,
882            |index, dst| {
883                let span = match self.get_group(index) {
884                    None => return,
885                    Some(span) => span,
886                };
887                dst.extend_from_slice(&haystack[span]);
888            },
889            |name| self.group_info().to_index(self.pattern()?, name),
890            dst,
891        );
892    }
893
894    /// This is a convenience routine for extracting the substrings
895    /// corresponding to matching capture groups in the given `haystack`. The
896    /// `haystack` should be the same substring used to find the match spans in
897    /// this `Captures` value.
898    ///
899    /// This is identical to [`Captures::extract_bytes`], except it works with
900    /// `&str` instead of `&[u8]`.
901    ///
902    /// # Panics
903    ///
904    /// This panics if the number of explicit matching groups in this
905    /// `Captures` value is less than `N`. This also panics if this `Captures`
906    /// value does not correspond to a match.
907    ///
908    /// Note that this does *not* panic if the number of explicit matching
909    /// groups is bigger than `N`. In that case, only the first `N` matching
910    /// groups are extracted.
911    ///
912    /// # Example
913    ///
914    /// ```
915    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
916    ///
917    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
918    /// let mut cache = re.create_cache();
919    /// let mut caps = re.create_captures();
920    ///
921    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
922    /// re.captures(&mut cache, hay, &mut caps);
923    /// assert!(caps.is_match());
924    /// let (full, [year, month, day]) = caps.extract(hay);
925    /// assert_eq!("2010-03-14", full);
926    /// assert_eq!("2010", year);
927    /// assert_eq!("03", month);
928    /// assert_eq!("14", day);
929    ///
930    /// // We can also ask for fewer than all capture groups.
931    /// let (full, [year]) = caps.extract(hay);
932    /// assert_eq!("2010-03-14", full);
933    /// assert_eq!("2010", year);
934    ///
935    /// # Ok::<(), Box<dyn std::error::Error>>(())
936    /// ```
937    pub fn extract<'h, const N: usize>(
938        &self,
939        haystack: &'h str,
940    ) -> (&'h str, [&'h str; N]) {
941        let mut matched = self.iter().flatten();
942        let whole_match = &haystack[matched.next().expect("a match")];
943        let group_matches = [0; N].map(|_| {
944            let sp = matched.next().expect("too few matching groups");
945            &haystack[sp]
946        });
947        (whole_match, group_matches)
948    }
949
950    /// This is a convenience routine for extracting the substrings
951    /// corresponding to matching capture groups in the given `haystack`. The
952    /// `haystack` should be the same substring used to find the match spans in
953    /// this `Captures` value.
954    ///
955    /// This is identical to [`Captures::extract`], except it works with
956    /// `&[u8]` instead of `&str`.
957    ///
958    /// # Panics
959    ///
960    /// This panics if the number of explicit matching groups in this
961    /// `Captures` value is less than `N`. This also panics if this `Captures`
962    /// value does not correspond to a match.
963    ///
964    /// Note that this does *not* panic if the number of explicit matching
965    /// groups is bigger than `N`. In that case, only the first `N` matching
966    /// groups are extracted.
967    ///
968    /// # Example
969    ///
970    /// ```
971    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
972    ///
973    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
974    /// let mut cache = re.create_cache();
975    /// let mut caps = re.create_captures();
976    ///
977    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
978    /// re.captures(&mut cache, hay, &mut caps);
979    /// assert!(caps.is_match());
980    /// let (full, [year, month, day]) = caps.extract_bytes(hay);
981    /// assert_eq!(b"2010-03-14", full);
982    /// assert_eq!(b"2010", year);
983    /// assert_eq!(b"03", month);
984    /// assert_eq!(b"14", day);
985    ///
986    /// // We can also ask for fewer than all capture groups.
987    /// let (full, [year]) = caps.extract_bytes(hay);
988    /// assert_eq!(b"2010-03-14", full);
989    /// assert_eq!(b"2010", year);
990    ///
991    /// # Ok::<(), Box<dyn std::error::Error>>(())
992    /// ```
993    pub fn extract_bytes<'h, const N: usize>(
994        &self,
995        haystack: &'h [u8],
996    ) -> (&'h [u8], [&'h [u8]; N]) {
997        let mut matched = self.iter().flatten();
998        let whole_match = &haystack[matched.next().expect("a match")];
999        let group_matches = [0; N].map(|_| {
1000            let sp = matched.next().expect("too few matching groups");
1001            &haystack[sp]
1002        });
1003        (whole_match, group_matches)
1004    }
1005}
1006
1007/// Lower level "slot" oriented APIs. One does not typically need to use these
1008/// when executing a search. They are instead mostly intended for folks that
1009/// are writing their own regex engine while reusing this `Captures` type.
1010impl Captures {
1011    /// Clear this `Captures` value.
1012    ///
1013    /// After clearing, all slots inside this `Captures` value will be set to
1014    /// `None`. Similarly, any pattern ID that it was previously associated
1015    /// with (for a match) is erased.
1016    ///
1017    /// It is not usually necessary to call this routine. Namely, a `Captures`
1018    /// value only provides high level access to the capturing groups of the
1019    /// pattern that matched, and only low level access to individual slots.
1020    /// Thus, even if slots corresponding to groups that aren't associated
1021    /// with the matching pattern are set, then it won't impact the higher
1022    /// level APIs. Namely, higher level APIs like [`Captures::get_group`] will
1023    /// return `None` if no pattern ID is present, even if there are spans set
1024    /// in the underlying slots.
1025    ///
1026    /// Thus, to "clear" a `Captures` value of a match, it is usually only
1027    /// necessary to call [`Captures::set_pattern`] with `None`.
1028    ///
1029    /// # Example
1030    ///
1031    /// This example shows what happens when a `Captures` value is cleared.
1032    ///
1033    /// ```
1034    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1035    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
1036    ///
1037    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1038    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1039    ///
1040    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1041    /// assert!(caps.is_match());
1042    /// let slots: Vec<Option<usize>> =
1043    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1044    /// // Note that the following ordering is considered an API guarantee.
1045    /// assert_eq!(slots, vec![
1046    ///     Some(0),
1047    ///     Some(17),
1048    ///     Some(0),
1049    ///     Some(5),
1050    ///     Some(6),
1051    ///     Some(17),
1052    /// ]);
1053    ///
1054    /// // Now clear the slots. Everything is gone and it is no longer a match.
1055    /// caps.clear();
1056    /// assert!(!caps.is_match());
1057    /// let slots: Vec<Option<usize>> =
1058    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1059    /// assert_eq!(slots, vec![
1060    ///     None,
1061    ///     None,
1062    ///     None,
1063    ///     None,
1064    ///     None,
1065    ///     None,
1066    /// ]);
1067    ///
1068    /// # Ok::<(), Box<dyn std::error::Error>>(())
1069    /// ```
1070    #[inline]
1071    pub fn clear(&mut self) {
1072        self.pid = None;
1073        for slot in self.slots.iter_mut() {
1074            *slot = None;
1075        }
1076    }
1077
1078    /// Set the pattern on this `Captures` value.
1079    ///
1080    /// When the pattern ID is `None`, then this `Captures` value does not
1081    /// correspond to a match (`is_match` will return `false`). Otherwise, it
1082    /// corresponds to a match.
1083    ///
1084    /// This is useful in search implementations where you might want to
1085    /// initially call `set_pattern(None)` in order to avoid the cost of
1086    /// calling `clear()` if it turns out to not be necessary.
1087    ///
1088    /// # Example
1089    ///
1090    /// This example shows that `set_pattern` merely overwrites the pattern ID.
1091    /// It does not actually change the underlying slot values.
1092    ///
1093    /// ```
1094    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1095    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
1096    ///
1097    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1098    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1099    ///
1100    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1101    /// assert!(caps.is_match());
1102    /// assert!(caps.pattern().is_some());
1103    /// let slots: Vec<Option<usize>> =
1104    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1105    /// // Note that the following ordering is considered an API guarantee.
1106    /// assert_eq!(slots, vec![
1107    ///     Some(0),
1108    ///     Some(17),
1109    ///     Some(0),
1110    ///     Some(5),
1111    ///     Some(6),
1112    ///     Some(17),
1113    /// ]);
1114    ///
1115    /// // Now set the pattern to None. Note that the slot values remain.
1116    /// caps.set_pattern(None);
1117    /// assert!(!caps.is_match());
1118    /// assert!(!caps.pattern().is_some());
1119    /// let slots: Vec<Option<usize>> =
1120    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1121    /// // Note that the following ordering is considered an API guarantee.
1122    /// assert_eq!(slots, vec![
1123    ///     Some(0),
1124    ///     Some(17),
1125    ///     Some(0),
1126    ///     Some(5),
1127    ///     Some(6),
1128    ///     Some(17),
1129    /// ]);
1130    ///
1131    /// # Ok::<(), Box<dyn std::error::Error>>(())
1132    /// ```
1133    #[inline]
1134    pub fn set_pattern(&mut self, pid: Option<PatternID>) {
1135        self.pid = pid;
1136    }
1137
1138    /// Returns the underlying slots, where each slot stores a single offset.
1139    ///
1140    /// Every matching capturing group generally corresponds to two slots: one
1141    /// slot for the starting position and another for the ending position.
1142    /// Typically, either both are present or neither are. (The weasel word
1143    /// "typically" is used here because it really depends on the regex engine
1144    /// implementation. Every sensible regex engine likely adheres to this
1145    /// invariant, and every regex engine in this crate is sensible.)
1146    ///
1147    /// Generally speaking, callers should prefer to use higher level routines
1148    /// like [`Captures::get_match`] or [`Captures::get_group`].
1149    ///
1150    /// An important note here is that a regex engine may not reset all of the
1151    /// slots to `None` values when no match occurs, or even when a match of
1152    /// a different pattern occurs. But this depends on how the regex engine
1153    /// implementation deals with slots.
1154    ///
1155    /// # Example
1156    ///
1157    /// This example shows how to get the underlying slots from a regex match.
1158    ///
1159    /// ```
1160    /// use regex_automata::{
1161    ///     nfa::thompson::pikevm::PikeVM,
1162    ///     util::primitives::{PatternID, NonMaxUsize},
1163    /// };
1164    ///
1165    /// let re = PikeVM::new_many(&[
1166    ///     r"[a-z]+",
1167    ///     r"[0-9]+",
1168    /// ])?;
1169    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1170    ///
1171    /// re.captures(&mut cache, "123", &mut caps);
1172    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
1173    /// // Note that the only guarantee we have here is that slots 2 and 3
1174    /// // are set to correct values. The contents of the first two slots are
1175    /// // unspecified since the 0th pattern did not match.
1176    /// let expected = &[
1177    ///     None,
1178    ///     None,
1179    ///     NonMaxUsize::new(0),
1180    ///     NonMaxUsize::new(3),
1181    /// ];
1182    /// assert_eq!(expected, caps.slots());
1183    ///
1184    /// # Ok::<(), Box<dyn std::error::Error>>(())
1185    /// ```
1186    #[inline]
1187    pub fn slots(&self) -> &[Option<NonMaxUsize>] {
1188        &self.slots
1189    }
1190
1191    /// Returns the underlying slots as a mutable slice, where each slot stores
1192    /// a single offset.
1193    ///
1194    /// This tends to be most useful for regex engine implementations for
1195    /// writing offsets for matching capturing groups to slots.
1196    ///
1197    /// See [`Captures::slots`] for more information about slots.
1198    #[inline]
1199    pub fn slots_mut(&mut self) -> &mut [Option<NonMaxUsize>] {
1200        &mut self.slots
1201    }
1202}
1203
1204impl core::fmt::Debug for Captures {
1205    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1206        let mut dstruct = f.debug_struct("Captures");
1207        dstruct.field("pid", &self.pid);
1208        if let Some(pid) = self.pid {
1209            dstruct.field("spans", &CapturesDebugMap { pid, caps: self });
1210        }
1211        dstruct.finish()
1212    }
1213}
1214
1215/// A little helper type to provide a nice map-like debug representation for
1216/// our capturing group spans.
1217struct CapturesDebugMap<'a> {
1218    pid: PatternID,
1219    caps: &'a Captures,
1220}
1221
1222impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
1223    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1224        struct Key<'a>(usize, Option<&'a str>);
1225
1226        impl<'a> core::fmt::Debug for Key<'a> {
1227            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1228                write!(f, "{}", self.0)?;
1229                if let Some(name) = self.1 {
1230                    write!(f, "/{:?}", name)?;
1231                }
1232                Ok(())
1233            }
1234        }
1235
1236        let mut map = f.debug_map();
1237        let names = self.caps.group_info().pattern_names(self.pid);
1238        for (group_index, maybe_name) in names.enumerate() {
1239            let key = Key(group_index, maybe_name);
1240            match self.caps.get_group(group_index) {
1241                None => map.entry(&key, &None::<()>),
1242                Some(span) => map.entry(&key, &span),
1243            };
1244        }
1245        map.finish()
1246    }
1247}
1248
1249/// An iterator over all capturing groups in a `Captures` value.
1250///
1251/// This iterator includes capturing groups that did not participate in a
1252/// match. See the [`Captures::iter`] method documentation for more details
1253/// and examples.
1254///
1255/// The lifetime parameter `'a` refers to the lifetime of the underlying
1256/// `Captures` value.
1257#[derive(Clone, Debug)]
1258pub struct CapturesPatternIter<'a> {
1259    caps: &'a Captures,
1260    names: core::iter::Enumerate<GroupInfoPatternNames<'a>>,
1261}
1262
1263impl<'a> Iterator for CapturesPatternIter<'a> {
1264    type Item = Option<Span>;
1265
1266    fn next(&mut self) -> Option<Option<Span>> {
1267        let (group_index, _) = self.names.next()?;
1268        Some(self.caps.get_group(group_index))
1269    }
1270
1271    fn size_hint(&self) -> (usize, Option<usize>) {
1272        self.names.size_hint()
1273    }
1274
1275    fn count(self) -> usize {
1276        self.names.count()
1277    }
1278}
1279
1280impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {}
1281impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {}
1282
1283/// Represents information about capturing groups in a compiled regex.
1284///
1285/// The information encapsulated by this type consists of the following. For
1286/// each pattern:
1287///
1288/// * A map from every capture group name to its corresponding capture group
1289/// index.
1290/// * A map from every capture group index to its corresponding capture group
1291/// name.
1292/// * A map from capture group index to its corresponding slot index. A slot
1293/// refers to one half of a capturing group. That is, a capture slot is either
1294/// the start or end of a capturing group. A slot is usually the mechanism
1295/// by which a regex engine records offsets for each capturing group during a
1296/// search.
1297///
1298/// A `GroupInfo` uses reference counting internally and is thus cheap to
1299/// clone.
1300///
1301/// # Mapping from capture groups to slots
1302///
1303/// One of the main responsibilities of a `GroupInfo` is to build a mapping
1304/// from `(PatternID, u32)` (where the `u32` is a capture index) to something
1305/// called a "slot." As mentioned above, a slot refers to one half of a
1306/// capturing group. Both combined provide the start and end offsets of
1307/// a capturing group that participated in a match.
1308///
1309/// **The mapping between group indices and slots is an API guarantee.** That
1310/// is, the mapping won't change within a semver compatible release.
1311///
1312/// Slots exist primarily because this is a convenient mechanism by which
1313/// regex engines report group offsets at search time. For example, the
1314/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture)
1315/// NFA state includes the slot index. When a regex engine transitions through
1316/// this state, it will likely use the slot index to write the current haystack
1317/// offset to some region of memory. When a match is found, those slots are
1318/// then reported to the caller, typically via a convenient abstraction like a
1319/// [`Captures`] value.
1320///
1321/// Because this crate provides first class support for multi-pattern regexes,
1322/// and because of some performance related reasons, the mapping between
1323/// capturing groups and slots is a little complex. However, in the case of a
1324/// single pattern, the mapping can be described very simply: for all capture
1325/// group indices `i`, its corresponding slots are at `i * 2` and `i * 2 + 1`.
1326/// Notice that the pattern ID isn't involved at all here, because it only
1327/// applies to a single-pattern regex, it is therefore always `0`.
1328///
1329/// In the multi-pattern case, the mapping is a bit more complicated. To talk
1330/// about it, we must define what we mean by "implicit" vs "explicit"
1331/// capturing groups:
1332///
1333/// * An **implicit** capturing group refers to the capturing group that is
1334/// present for every pattern automatically, and corresponds to the overall
1335/// match of a pattern. Every pattern has precisely one implicit capturing
1336/// group. It is always unnamed and it always corresponds to the capture group
1337/// index `0`.
1338/// * An **explicit** capturing group refers to any capturing group that
1339/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand
1340/// built without any concrete syntax, it refers to any capturing group with an
1341/// index greater than `0`.)
1342///
1343/// Some examples:
1344///
1345/// * `\w+` has one implicit capturing group and zero explicit capturing
1346/// groups.
1347/// * `(\w+)` has one implicit group and one explicit group.
1348/// * `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups.
1349///
1350/// Turning back to the slot mapping, we can now state it as follows:
1351///
1352/// * Given a pattern ID `pid`, the slots for its implicit group are always
1353/// at `pid * 2` and `pid * 2 + 1`.
1354/// * Given a pattern ID `0`, the slots for its explicit groups start
1355/// at `group_info.pattern_len() * 2`.
1356/// * Given a pattern ID `pid > 0`, the slots for its explicit groups start
1357/// immediately following where the slots for the explicit groups of `pid - 1`
1358/// end.
1359///
1360/// In particular, while there is a concrete formula one can use to determine
1361/// where the slots for the implicit group of any pattern are, there is no
1362/// general formula for determining where the slots for explicit capturing
1363/// groups are. This is because each pattern can contain a different number
1364/// of groups.
1365///
1366/// The intended way of getting the slots for a particular capturing group
1367/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or
1368/// [`GroupInfo::slots`] method.
1369///
1370/// See below for a concrete example of how capturing groups get mapped to
1371/// slots.
1372///
1373/// # Example
1374///
1375/// This example shows how to build a new `GroupInfo` and query it for
1376/// information.
1377///
1378/// ```
1379/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1380///
1381/// let info = GroupInfo::new(vec![
1382///     vec![None, Some("foo")],
1383///     vec![None],
1384///     vec![None, None, None, Some("bar"), None],
1385///     vec![None, None, Some("foo")],
1386/// ])?;
1387/// // The number of patterns being tracked.
1388/// assert_eq!(4, info.pattern_len());
1389/// // We can query the number of groups for any pattern.
1390/// assert_eq!(2, info.group_len(PatternID::must(0)));
1391/// assert_eq!(1, info.group_len(PatternID::must(1)));
1392/// assert_eq!(5, info.group_len(PatternID::must(2)));
1393/// assert_eq!(3, info.group_len(PatternID::must(3)));
1394/// // An invalid pattern always has zero groups.
1395/// assert_eq!(0, info.group_len(PatternID::must(999)));
1396/// // 2 slots per group
1397/// assert_eq!(22, info.slot_len());
1398///
1399/// // We can map a group index for a particular pattern to its name, if
1400/// // one exists.
1401/// assert_eq!(Some("foo"), info.to_name(PatternID::must(3), 2));
1402/// assert_eq!(None, info.to_name(PatternID::must(2), 4));
1403/// // Or map a name to its group index.
1404/// assert_eq!(Some(1), info.to_index(PatternID::must(0), "foo"));
1405/// assert_eq!(Some(2), info.to_index(PatternID::must(3), "foo"));
1406///
1407/// # Ok::<(), Box<dyn std::error::Error>>(())
1408/// ```
1409///
1410/// # Example: mapping from capture groups to slots
1411///
1412/// This example shows the specific mapping from capture group indices for
1413/// each pattern to their corresponding slots. The slot values shown in this
1414/// example are considered an API guarantee.
1415///
1416/// ```
1417/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1418///
1419/// let info = GroupInfo::new(vec![
1420///     vec![None, Some("foo")],
1421///     vec![None],
1422///     vec![None, None, None, Some("bar"), None],
1423///     vec![None, None, Some("foo")],
1424/// ])?;
1425///
1426/// // We first show the slots for each pattern's implicit group.
1427/// assert_eq!(Some((0, 1)), info.slots(PatternID::must(0), 0));
1428/// assert_eq!(Some((2, 3)), info.slots(PatternID::must(1), 0));
1429/// assert_eq!(Some((4, 5)), info.slots(PatternID::must(2), 0));
1430/// assert_eq!(Some((6, 7)), info.slots(PatternID::must(3), 0));
1431///
1432/// // And now we show the slots for each pattern's explicit group.
1433/// assert_eq!(Some((8, 9)), info.slots(PatternID::must(0), 1));
1434/// assert_eq!(Some((10, 11)), info.slots(PatternID::must(2), 1));
1435/// assert_eq!(Some((12, 13)), info.slots(PatternID::must(2), 2));
1436/// assert_eq!(Some((14, 15)), info.slots(PatternID::must(2), 3));
1437/// assert_eq!(Some((16, 17)), info.slots(PatternID::must(2), 4));
1438/// assert_eq!(Some((18, 19)), info.slots(PatternID::must(3), 1));
1439/// assert_eq!(Some((20, 21)), info.slots(PatternID::must(3), 2));
1440///
1441/// // Asking for the slots for an invalid pattern ID or even for an invalid
1442/// // group index for a specific pattern will return None. So for example,
1443/// // you're guaranteed to not get the slots for a different pattern than the
1444/// // one requested.
1445/// assert_eq!(None, info.slots(PatternID::must(5), 0));
1446/// assert_eq!(None, info.slots(PatternID::must(1), 1));
1447///
1448/// # Ok::<(), Box<dyn std::error::Error>>(())
1449/// ```
1450#[derive(Clone, Debug, Default)]
1451pub struct GroupInfo(Arc<GroupInfoInner>);
1452
1453impl GroupInfo {
1454    /// Creates a new group info from a sequence of patterns, where each
1455    /// sequence of patterns yields a sequence of possible group names. The
1456    /// index of each pattern in the sequence corresponds to its `PatternID`,
1457    /// and the index of each group in each pattern's sequence corresponds to
1458    /// its corresponding group index.
1459    ///
1460    /// While this constructor is very generic and therefore perhaps hard to
1461    /// chew on, an example of a valid concrete type that can be passed to
1462    /// this constructor is `Vec<Vec<Option<String>>>`. The outer `Vec`
1463    /// corresponds to the patterns, i.e., one `Vec<Option<String>>` per
1464    /// pattern. The inner `Vec` corresponds to the capturing groups for
1465    /// each pattern. The `Option<String>` corresponds to the name of the
1466    /// capturing group, if present.
1467    ///
1468    /// It is legal to pass an empty iterator to this constructor. It will
1469    /// return an empty group info with zero slots. An empty group info is
1470    /// useful for cases where you have no patterns or for cases where slots
1471    /// aren't being used at all (e.g., for most DFAs in this crate).
1472    ///
1473    /// # Errors
1474    ///
1475    /// This constructor returns an error if the given capturing groups are
1476    /// invalid in some way. Those reasons include, but are not necessarily
1477    /// limited to:
1478    ///
1479    /// * Too many patterns (i.e., `PatternID` would overflow).
1480    /// * Too many capturing groups (e.g., `u32` would overflow).
1481    /// * A pattern is given that has no capturing groups. (All patterns must
1482    /// have at least an implicit capturing group at index `0`.)
1483    /// * The capturing group at index `0` has a name. It must be unnamed.
1484    /// * There are duplicate capturing group names within the same pattern.
1485    /// (Multiple capturing groups with the same name may exist, but they
1486    /// must be in different patterns.)
1487    ///
1488    /// An example below shows how to trigger some of the above error
1489    /// conditions.
1490    ///
1491    /// # Example
1492    ///
1493    /// This example shows how to build a new `GroupInfo` and query it for
1494    /// information.
1495    ///
1496    /// ```
1497    /// use regex_automata::util::captures::GroupInfo;
1498    ///
1499    /// let info = GroupInfo::new(vec![
1500    ///     vec![None, Some("foo")],
1501    ///     vec![None],
1502    ///     vec![None, None, None, Some("bar"), None],
1503    ///     vec![None, None, Some("foo")],
1504    /// ])?;
1505    /// // The number of patterns being tracked.
1506    /// assert_eq!(4, info.pattern_len());
1507    /// // 2 slots per group
1508    /// assert_eq!(22, info.slot_len());
1509    ///
1510    /// # Ok::<(), Box<dyn std::error::Error>>(())
1511    /// ```
1512    ///
1513    /// # Example: empty `GroupInfo`
1514    ///
1515    /// This example shows how to build a new `GroupInfo` and query it for
1516    /// information.
1517    ///
1518    /// ```
1519    /// use regex_automata::util::captures::GroupInfo;
1520    ///
1521    /// let info = GroupInfo::empty();
1522    /// // Everything is zero.
1523    /// assert_eq!(0, info.pattern_len());
1524    /// assert_eq!(0, info.slot_len());
1525    ///
1526    /// # Ok::<(), Box<dyn std::error::Error>>(())
1527    /// ```
1528    ///
1529    /// # Example: error conditions
1530    ///
1531    /// This example shows how to provoke some of the ways in which building
1532    /// a `GroupInfo` can fail.
1533    ///
1534    /// ```
1535    /// use regex_automata::util::captures::GroupInfo;
1536    ///
1537    /// // Either the group info is empty, or all patterns must have at least
1538    /// // one capturing group.
1539    /// assert!(GroupInfo::new(vec![
1540    ///     vec![None, Some("a")], // ok
1541    ///     vec![None], // ok
1542    ///     vec![], // not ok
1543    /// ]).is_err());
1544    /// // Note that building an empty group info is OK.
1545    /// assert!(GroupInfo::new(Vec::<Vec<Option<String>>>::new()).is_ok());
1546    ///
1547    /// // The first group in each pattern must correspond to an implicit
1548    /// // anonymous group. i.e., One that is not named. By convention, this
1549    /// // group corresponds to the overall match of a regex. Every other group
1550    /// // in a pattern is explicit and optional.
1551    /// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err());
1552    ///
1553    /// // There must not be duplicate group names within the same pattern.
1554    /// assert!(GroupInfo::new(vec![
1555    ///     vec![None, Some("foo"), Some("foo")],
1556    /// ]).is_err());
1557    /// // But duplicate names across distinct patterns is OK.
1558    /// assert!(GroupInfo::new(vec![
1559    ///     vec![None, Some("foo")],
1560    ///     vec![None, Some("foo")],
1561    /// ]).is_ok());
1562    ///
1563    /// # Ok::<(), Box<dyn std::error::Error>>(())
1564    /// ```
1565    ///
1566    /// There are other ways for building a `GroupInfo` to fail but are
1567    /// difficult to show. For example, if the number of patterns given would
1568    /// overflow `PatternID`.
1569    pub fn new<P, G, N>(pattern_groups: P) -> Result<GroupInfo, GroupInfoError>
1570    where
1571        P: IntoIterator<Item = G>,
1572        G: IntoIterator<Item = Option<N>>,
1573        N: AsRef<str>,
1574    {
1575        let mut group_info = GroupInfoInner {
1576            slot_ranges: vec![],
1577            name_to_index: vec![],
1578            index_to_name: vec![],
1579            memory_extra: 0,
1580        };
1581        for (pattern_index, groups) in pattern_groups.into_iter().enumerate() {
1582            // If we can't convert the pattern index to an ID, then the caller
1583            // tried to build capture info for too many patterns.
1584            let pid = PatternID::new(pattern_index)
1585                .map_err(GroupInfoError::too_many_patterns)?;
1586
1587            let mut groups_iter = groups.into_iter().enumerate();
1588            match groups_iter.next() {
1589                None => return Err(GroupInfoError::missing_groups(pid)),
1590                Some((_, Some(_))) => {
1591                    return Err(GroupInfoError::first_must_be_unnamed(pid))
1592                }
1593                Some((_, None)) => {}
1594            }
1595            group_info.add_first_group(pid);
1596            // Now iterate over the rest, which correspond to all of the
1597            // (conventionally) explicit capture groups in a regex pattern.
1598            for (group_index, maybe_name) in groups_iter {
1599                // Just like for patterns, if the group index can't be
1600                // converted to a "small" index, then the caller has given too
1601                // many groups for a particular pattern.
1602                let group = SmallIndex::new(group_index).map_err(|_| {
1603                    GroupInfoError::too_many_groups(pid, group_index)
1604                })?;
1605                group_info.add_explicit_group(pid, group, maybe_name)?;
1606            }
1607        }
1608        group_info.fixup_slot_ranges()?;
1609        Ok(GroupInfo(Arc::new(group_info)))
1610    }
1611
1612    /// This creates an empty `GroupInfo`.
1613    ///
1614    /// This is a convenience routine for calling `GroupInfo::new` with an
1615    /// iterator that yields no elements.
1616    ///
1617    /// # Example
1618    ///
1619    /// This example shows how to build a new empty `GroupInfo` and query it
1620    /// for information.
1621    ///
1622    /// ```
1623    /// use regex_automata::util::captures::GroupInfo;
1624    ///
1625    /// let info = GroupInfo::empty();
1626    /// // Everything is zero.
1627    /// assert_eq!(0, info.pattern_len());
1628    /// assert_eq!(0, info.all_group_len());
1629    /// assert_eq!(0, info.slot_len());
1630    ///
1631    /// # Ok::<(), Box<dyn std::error::Error>>(())
1632    /// ```
1633    pub fn empty() -> GroupInfo {
1634        GroupInfo::new(core::iter::empty::<[Option<&str>; 0]>())
1635            .expect("empty group info is always valid")
1636    }
1637
1638    /// Return the capture group index corresponding to the given name in the
1639    /// given pattern. If no such capture group name exists in the given
1640    /// pattern, then this returns `None`.
1641    ///
1642    /// If the given pattern ID is invalid, then this returns `None`.
1643    ///
1644    /// This also returns `None` for all inputs if these captures are empty
1645    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1646    /// are present for a specific pattern, use [`GroupInfo::group_len`].
1647    ///
1648    /// # Example
1649    ///
1650    /// This example shows how to find the capture index for the given pattern
1651    /// and group name.
1652    ///
1653    /// Remember that capture indices are relative to the pattern, such that
1654    /// the same capture index value may refer to different capturing groups
1655    /// for distinct patterns.
1656    ///
1657    /// ```
1658    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1659    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1660    ///
1661    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
1662    ///
1663    /// let nfa = NFA::new_many(&[
1664    ///     r"a(?P<quux>\w+)z(?P<foo>\s+)",
1665    ///     r"a(?P<foo>\d+)z",
1666    /// ])?;
1667    /// let groups = nfa.group_info();
1668    /// assert_eq!(Some(2), groups.to_index(pid0, "foo"));
1669    /// // Recall that capture index 0 is always unnamed and refers to the
1670    /// // entire pattern. So the first capturing group present in the pattern
1671    /// // itself always starts at index 1.
1672    /// assert_eq!(Some(1), groups.to_index(pid1, "foo"));
1673    ///
1674    /// // And if a name does not exist for a particular pattern, None is
1675    /// // returned.
1676    /// assert!(groups.to_index(pid0, "quux").is_some());
1677    /// assert!(groups.to_index(pid1, "quux").is_none());
1678    ///
1679    /// # Ok::<(), Box<dyn std::error::Error>>(())
1680    /// ```
1681    #[inline]
1682    pub fn to_index(&self, pid: PatternID, name: &str) -> Option<usize> {
1683        let indices = self.0.name_to_index.get(pid.as_usize())?;
1684        indices.get(name).cloned().map(|i| i.as_usize())
1685    }
1686
1687    /// Return the capture name for the given index and given pattern. If the
1688    /// corresponding group does not have a name, then this returns `None`.
1689    ///
1690    /// If the pattern ID is invalid, then this returns `None`.
1691    ///
1692    /// If the group index is invalid for the given pattern, then this returns
1693    /// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and
1694    /// only if `index < nfa.pattern_capture_len(pid)`.
1695    ///
1696    /// This also returns `None` for all inputs if these captures are empty
1697    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1698    /// are present for a specific pattern, use [`GroupInfo::group_len`].
1699    ///
1700    /// # Example
1701    ///
1702    /// This example shows how to find the capture group name for the given
1703    /// pattern and group index.
1704    ///
1705    /// ```
1706    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1707    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1708    ///
1709    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
1710    ///
1711    /// let nfa = NFA::new_many(&[
1712    ///     r"a(?P<foo>\w+)z(\s+)x(\d+)",
1713    ///     r"a(\d+)z(?P<foo>\s+)",
1714    /// ])?;
1715    /// let groups = nfa.group_info();
1716    /// assert_eq!(None, groups.to_name(pid0, 0));
1717    /// assert_eq!(Some("foo"), groups.to_name(pid0, 1));
1718    /// assert_eq!(None, groups.to_name(pid0, 2));
1719    /// assert_eq!(None, groups.to_name(pid0, 3));
1720    ///
1721    /// assert_eq!(None, groups.to_name(pid1, 0));
1722    /// assert_eq!(None, groups.to_name(pid1, 1));
1723    /// assert_eq!(Some("foo"), groups.to_name(pid1, 2));
1724    /// // '3' is not a valid capture index for the second pattern.
1725    /// assert_eq!(None, groups.to_name(pid1, 3));
1726    ///
1727    /// # Ok::<(), Box<dyn std::error::Error>>(())
1728    /// ```
1729    #[inline]
1730    pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> {
1731        let pattern_names = self.0.index_to_name.get(pid.as_usize())?;
1732        pattern_names.get(group_index)?.as_deref()
1733    }
1734
1735    /// Return an iterator of all capture groups and their names (if present)
1736    /// for a particular pattern.
1737    ///
1738    /// If the given pattern ID is invalid or if this `GroupInfo` is empty,
1739    /// then the iterator yields no elements.
1740    ///
1741    /// The number of elements yielded by this iterator is always equal to
1742    /// the result of calling [`GroupInfo::group_len`] with the same
1743    /// `PatternID`.
1744    ///
1745    /// # Example
1746    ///
1747    /// This example shows how to get a list of all capture group names for
1748    /// a particular pattern.
1749    ///
1750    /// ```
1751    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1752    ///
1753    /// let nfa = NFA::new(r"(a)(?P<foo>b)(c)(d)(?P<bar>e)")?;
1754    /// // The first is the implicit group that is always unnammed. The next
1755    /// // 5 groups are the explicit groups found in the concrete syntax above.
1756    /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")];
1757    /// let got: Vec<Option<&str>> =
1758    ///     nfa.group_info().pattern_names(PatternID::ZERO).collect();
1759    /// assert_eq!(expected, got);
1760    ///
1761    /// // Using an invalid pattern ID will result in nothing yielded.
1762    /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count();
1763    /// assert_eq!(0, got);
1764    ///
1765    /// # Ok::<(), Box<dyn std::error::Error>>(())
1766    /// ```
1767    #[inline]
1768    pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> {
1769        GroupInfoPatternNames {
1770            it: self
1771                .0
1772                .index_to_name
1773                .get(pid.as_usize())
1774                .map(|indices| indices.iter())
1775                .unwrap_or([].iter()),
1776        }
1777    }
1778
1779    /// Return an iterator of all capture groups for all patterns supported by
1780    /// this `GroupInfo`. Each item yielded is a triple of the group's pattern
1781    /// ID, index in the pattern and the group's name, if present.
1782    ///
1783    /// # Example
1784    ///
1785    /// This example shows how to get a list of all capture groups found in
1786    /// one NFA, potentially spanning multiple patterns.
1787    ///
1788    /// ```
1789    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1790    ///
1791    /// let nfa = NFA::new_many(&[
1792    ///     r"(?P<foo>a)",
1793    ///     r"a",
1794    ///     r"(a)",
1795    /// ])?;
1796    /// let expected = vec![
1797    ///     (PatternID::must(0), 0, None),
1798    ///     (PatternID::must(0), 1, Some("foo")),
1799    ///     (PatternID::must(1), 0, None),
1800    ///     (PatternID::must(2), 0, None),
1801    ///     (PatternID::must(2), 1, None),
1802    /// ];
1803    /// let got: Vec<(PatternID, usize, Option<&str>)> =
1804    ///     nfa.group_info().all_names().collect();
1805    /// assert_eq!(expected, got);
1806    ///
1807    /// # Ok::<(), Box<dyn std::error::Error>>(())
1808    /// ```
1809    ///
1810    /// Unlike other capturing group related routines, this routine doesn't
1811    /// panic even if captures aren't enabled on this NFA:
1812    ///
1813    /// ```
1814    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
1815    ///
1816    /// let nfa = NFA::compiler()
1817    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1818    ///     .build_many(&[
1819    ///         r"(?P<foo>a)",
1820    ///         r"a",
1821    ///         r"(a)",
1822    ///     ])?;
1823    /// // When captures aren't enabled, there's nothing to return.
1824    /// assert_eq!(0, nfa.group_info().all_names().count());
1825    ///
1826    /// # Ok::<(), Box<dyn std::error::Error>>(())
1827    /// ```
1828    #[inline]
1829    pub fn all_names(&self) -> GroupInfoAllNames<'_> {
1830        GroupInfoAllNames {
1831            group_info: self,
1832            pids: PatternID::iter(self.pattern_len()),
1833            current_pid: None,
1834            names: None,
1835        }
1836    }
1837
1838    /// Returns the starting and ending slot corresponding to the given
1839    /// capturing group for the given pattern. The ending slot is always one
1840    /// more than the starting slot returned.
1841    ///
1842    /// Note that this is like [`GroupInfo::slot`], except that it also returns
1843    /// the ending slot value for convenience.
1844    ///
1845    /// If either the pattern ID or the capture index is invalid, then this
1846    /// returns None.
1847    ///
1848    /// # Example
1849    ///
1850    /// This example shows that the starting slots for the first capturing
1851    /// group of each pattern are distinct.
1852    ///
1853    /// ```
1854    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1855    ///
1856    /// let nfa = NFA::new_many(&["a", "b"])?;
1857    /// assert_ne!(
1858    ///     nfa.group_info().slots(PatternID::must(0), 0),
1859    ///     nfa.group_info().slots(PatternID::must(1), 0),
1860    /// );
1861    ///
1862    /// // Also, the start and end slot values are never equivalent.
1863    /// let (start, end) = nfa.group_info().slots(PatternID::ZERO, 0).unwrap();
1864    /// assert_ne!(start, end);
1865    ///
1866    /// # Ok::<(), Box<dyn std::error::Error>>(())
1867    /// ```
1868    #[inline]
1869    pub fn slots(
1870        &self,
1871        pid: PatternID,
1872        group_index: usize,
1873    ) -> Option<(usize, usize)> {
1874        // Since 'slot' only even returns valid starting slots, we know that
1875        // there must also be an end slot and that end slot is always one more
1876        // than the start slot.
1877        self.slot(pid, group_index).map(|start| (start, start + 1))
1878    }
1879
1880    /// Returns the starting slot corresponding to the given capturing group
1881    /// for the given pattern. The ending slot is always one more than the
1882    /// value returned.
1883    ///
1884    /// If either the pattern ID or the capture index is invalid, then this
1885    /// returns None.
1886    ///
1887    /// # Example
1888    ///
1889    /// This example shows that the starting slots for the first capturing
1890    /// group of each pattern are distinct.
1891    ///
1892    /// ```
1893    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1894    ///
1895    /// let nfa = NFA::new_many(&["a", "b"])?;
1896    /// assert_ne!(
1897    ///     nfa.group_info().slot(PatternID::must(0), 0),
1898    ///     nfa.group_info().slot(PatternID::must(1), 0),
1899    /// );
1900    ///
1901    /// # Ok::<(), Box<dyn std::error::Error>>(())
1902    /// ```
1903    #[inline]
1904    pub fn slot(&self, pid: PatternID, group_index: usize) -> Option<usize> {
1905        if group_index >= self.group_len(pid) {
1906            return None;
1907        }
1908        // At this point, we know that 'pid' refers to a real pattern and that
1909        // 'group_index' refers to a real group. We therefore also know that
1910        // the pattern and group can be combined to return a correct slot.
1911        // That's why we don't need to use checked arithmetic below.
1912        if group_index == 0 {
1913            Some(pid.as_usize() * 2)
1914        } else {
1915            // As above, we don't need to check that our slot is less than the
1916            // end of our range since we already know the group index is a
1917            // valid index for the given pattern.
1918            let (start, _) = self.0.slot_ranges[pid];
1919            Some(start.as_usize() + ((group_index - 1) * 2))
1920        }
1921    }
1922
1923    /// Returns the total number of patterns in this `GroupInfo`.
1924    ///
1925    /// This may return zero if the `GroupInfo` was constructed with no
1926    /// patterns.
1927    ///
1928    /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because
1929    /// `GroupInfo` construction will fail if too many patterns are added.
1930    ///
1931    /// # Example
1932    ///
1933    /// ```
1934    /// use regex_automata::nfa::thompson::NFA;
1935    ///
1936    /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
1937    /// assert_eq!(3, nfa.group_info().pattern_len());
1938    ///
1939    /// let nfa = NFA::never_match();
1940    /// assert_eq!(0, nfa.group_info().pattern_len());
1941    ///
1942    /// let nfa = NFA::always_match();
1943    /// assert_eq!(1, nfa.group_info().pattern_len());
1944    ///
1945    /// # Ok::<(), Box<dyn std::error::Error>>(())
1946    /// ```
1947    #[inline]
1948    pub fn pattern_len(&self) -> usize {
1949        self.0.pattern_len()
1950    }
1951
1952    /// Return the number of capture groups in a pattern.
1953    ///
1954    /// If the pattern ID is invalid, then this returns `0`.
1955    ///
1956    /// # Example
1957    ///
1958    /// This example shows how the values returned by this routine may vary
1959    /// for different patterns and NFA configurations.
1960    ///
1961    /// ```
1962    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
1963    ///
1964    /// let nfa = NFA::new(r"(a)(b)(c)")?;
1965    /// // There are 3 explicit groups in the pattern's concrete syntax and
1966    /// // 1 unnamed and implicit group spanning the entire pattern.
1967    /// assert_eq!(4, nfa.group_info().group_len(PatternID::ZERO));
1968    ///
1969    /// let nfa = NFA::new(r"abc")?;
1970    /// // There is just the unnamed implicit group.
1971    /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO));
1972    ///
1973    /// let nfa = NFA::compiler()
1974    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1975    ///     .build(r"abc")?;
1976    /// // We disabled capturing groups, so there are none.
1977    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
1978    ///
1979    /// let nfa = NFA::compiler()
1980    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1981    ///     .build(r"(a)(b)(c)")?;
1982    /// // We disabled capturing groups, so there are none, even if there are
1983    /// // explicit groups in the concrete syntax.
1984    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
1985    ///
1986    /// # Ok::<(), Box<dyn std::error::Error>>(())
1987    /// ```
1988    #[inline]
1989    pub fn group_len(&self, pid: PatternID) -> usize {
1990        self.0.group_len(pid)
1991    }
1992
1993    /// Return the total number of capture groups across all patterns.
1994    ///
1995    /// This includes implicit groups that represent the entire match of a
1996    /// pattern.
1997    ///
1998    /// # Example
1999    ///
2000    /// This example shows how the values returned by this routine may vary
2001    /// for different patterns and NFA configurations.
2002    ///
2003    /// ```
2004    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
2005    ///
2006    /// let nfa = NFA::new(r"(a)(b)(c)")?;
2007    /// // There are 3 explicit groups in the pattern's concrete syntax and
2008    /// // 1 unnamed and implicit group spanning the entire pattern.
2009    /// assert_eq!(4, nfa.group_info().all_group_len());
2010    ///
2011    /// let nfa = NFA::new(r"abc")?;
2012    /// // There is just the unnamed implicit group.
2013    /// assert_eq!(1, nfa.group_info().all_group_len());
2014    ///
2015    /// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?;
2016    /// // Each pattern has one implicit groups, and two
2017    /// // patterns have one explicit group each.
2018    /// assert_eq!(5, nfa.group_info().all_group_len());
2019    ///
2020    /// let nfa = NFA::compiler()
2021    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
2022    ///     .build(r"abc")?;
2023    /// // We disabled capturing groups, so there are none.
2024    /// assert_eq!(0, nfa.group_info().all_group_len());
2025    ///
2026    /// let nfa = NFA::compiler()
2027    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
2028    ///     .build(r"(a)(b)(c)")?;
2029    /// // We disabled capturing groups, so there are none, even if there are
2030    /// // explicit groups in the concrete syntax.
2031    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
2032    ///
2033    /// # Ok::<(), Box<dyn std::error::Error>>(())
2034    /// ```
2035    #[inline]
2036    pub fn all_group_len(&self) -> usize {
2037        self.slot_len() / 2
2038    }
2039
2040    /// Returns the total number of slots in this `GroupInfo` across all
2041    /// patterns.
2042    ///
2043    /// The total number of slots is always twice the total number of capturing
2044    /// groups, including both implicit and explicit groups.
2045    ///
2046    /// # Example
2047    ///
2048    /// This example shows the relationship between the number of capturing
2049    /// groups and slots.
2050    ///
2051    /// ```
2052    /// use regex_automata::util::captures::GroupInfo;
2053    ///
2054    /// // There are 11 total groups here.
2055    /// let info = GroupInfo::new(vec![
2056    ///     vec![None, Some("foo")],
2057    ///     vec![None],
2058    ///     vec![None, None, None, Some("bar"), None],
2059    ///     vec![None, None, Some("foo")],
2060    /// ])?;
2061    /// // 2 slots per group gives us 11*2=22 slots.
2062    /// assert_eq!(22, info.slot_len());
2063    ///
2064    /// # Ok::<(), Box<dyn std::error::Error>>(())
2065    /// ```
2066    #[inline]
2067    pub fn slot_len(&self) -> usize {
2068        self.0.small_slot_len().as_usize()
2069    }
2070
2071    /// Returns the total number of slots for implicit capturing groups.
2072    ///
2073    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
2074    /// explicit slots for each pattern. Since there are always exactly 2
2075    /// implicit slots for each pattern, the number of implicit slots is always
2076    /// equal to twice the number of patterns.
2077    ///
2078    /// # Example
2079    ///
2080    /// This example shows the relationship between the number of capturing
2081    /// groups, implicit slots and explicit slots.
2082    ///
2083    /// ```
2084    /// use regex_automata::util::captures::GroupInfo;
2085    ///
2086    /// // There are 11 total groups here.
2087    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2088    /// // 2 slots per group gives us 11*2=22 slots.
2089    /// assert_eq!(6, info.slot_len());
2090    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
2091    /// // is 1 pattern.
2092    /// assert_eq!(2, info.implicit_slot_len());
2093    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
2094    /// assert_eq!(4, info.explicit_slot_len());
2095    ///
2096    /// # Ok::<(), Box<dyn std::error::Error>>(())
2097    /// ```
2098    #[inline]
2099    pub fn implicit_slot_len(&self) -> usize {
2100        self.pattern_len() * 2
2101    }
2102
2103    /// Returns the total number of slots for explicit capturing groups.
2104    ///
2105    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
2106    /// implicit slots for each pattern. (There are always 2 implicit slots for
2107    /// each pattern.)
2108    ///
2109    /// For a non-empty `GroupInfo`, it is always the case that `slot_len` is
2110    /// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`,
2111    /// both the total number of slots and the number of explicit slots is
2112    /// `0`.
2113    ///
2114    /// # Example
2115    ///
2116    /// This example shows the relationship between the number of capturing
2117    /// groups, implicit slots and explicit slots.
2118    ///
2119    /// ```
2120    /// use regex_automata::util::captures::GroupInfo;
2121    ///
2122    /// // There are 11 total groups here.
2123    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2124    /// // 2 slots per group gives us 11*2=22 slots.
2125    /// assert_eq!(6, info.slot_len());
2126    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
2127    /// // is 1 pattern.
2128    /// assert_eq!(2, info.implicit_slot_len());
2129    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
2130    /// assert_eq!(4, info.explicit_slot_len());
2131    ///
2132    /// # Ok::<(), Box<dyn std::error::Error>>(())
2133    /// ```
2134    #[inline]
2135    pub fn explicit_slot_len(&self) -> usize {
2136        self.slot_len().saturating_sub(self.implicit_slot_len())
2137    }
2138
2139    /// Returns the memory usage, in bytes, of this `GroupInfo`.
2140    ///
2141    /// This does **not** include the stack size used up by this `GroupInfo`.
2142    /// To compute that, use `std::mem::size_of::<GroupInfo>()`.
2143    #[inline]
2144    pub fn memory_usage(&self) -> usize {
2145        use core::mem::size_of as s;
2146
2147        s::<GroupInfoInner>()
2148            + self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>()
2149            + self.0.name_to_index.len() * s::<CaptureNameMap>()
2150            + self.0.index_to_name.len() * s::<Vec<Option<Arc<str>>>>()
2151            + self.0.memory_extra
2152    }
2153}
2154
2155/// A map from capture group name to its corresponding capture group index.
2156///
2157/// This type is actually wrapped inside a Vec indexed by pattern ID on a
2158/// `GroupInfo`, since multiple patterns may have the same capture group name.
2159/// That is, each pattern gets its own namespace of capture group names.
2160///
2161/// Perhaps a more memory efficient representation would be
2162/// HashMap<(PatternID, Arc<str>), usize>, but this makes it difficult to look
2163/// up a capture index by name without producing a `Arc<str>`, which requires
2164/// an allocation. To fix this, I think we'd need to define our own unsized
2165/// type or something? Anyway, I didn't give this much thought since it
2166/// probably doesn't matter much in the grand scheme of things. But it did
2167/// stand out to me as mildly wasteful.
2168#[cfg(feature = "std")]
2169type CaptureNameMap = std::collections::HashMap<Arc<str>, SmallIndex>;
2170#[cfg(not(feature = "std"))]
2171type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, SmallIndex>;
2172
2173/// The inner guts of `GroupInfo`. This type only exists so that it can
2174/// be wrapped in an `Arc` to make `GroupInfo` reference counted.
2175#[derive(Debug, Default)]
2176struct GroupInfoInner {
2177    slot_ranges: Vec<(SmallIndex, SmallIndex)>,
2178    name_to_index: Vec<CaptureNameMap>,
2179    index_to_name: Vec<Vec<Option<Arc<str>>>>,
2180    memory_extra: usize,
2181}
2182
2183impl GroupInfoInner {
2184    /// This adds the first unnamed group for the given pattern ID. The given
2185    /// pattern ID must be zero if this is the first time this method is
2186    /// called, or must be exactly one more than the pattern ID supplied to the
2187    /// previous call to this method. (This method panics if this rule is
2188    /// violated.)
2189    ///
2190    /// This can be thought of as initializing the GroupInfo state for the
2191    /// given pattern and closing off the state for any previous pattern.
2192    fn add_first_group(&mut self, pid: PatternID) {
2193        assert_eq!(pid.as_usize(), self.slot_ranges.len());
2194        assert_eq!(pid.as_usize(), self.name_to_index.len());
2195        assert_eq!(pid.as_usize(), self.index_to_name.len());
2196        // This is the start of our slots for the explicit capturing groups.
2197        // Note that since the slots for the 0th group for every pattern appear
2198        // before any slots for the nth group (where n > 0) in any pattern, we
2199        // will have to fix up the slot ranges once we know how many patterns
2200        // we've added capture groups for.
2201        let slot_start = self.small_slot_len();
2202        self.slot_ranges.push((slot_start, slot_start));
2203        self.name_to_index.push(CaptureNameMap::new());
2204        self.index_to_name.push(vec![None]);
2205        self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2206    }
2207
2208    /// Add an explicit capturing group for the given pattern with the given
2209    /// index. If the group has a name, then that must be given as well.
2210    ///
2211    /// Note that every capturing group except for the first or zeroth group is
2212    /// explicit.
2213    ///
2214    /// This returns an error if adding this group would result in overflowing
2215    /// slot indices or if a capturing group with the same name for this
2216    /// pattern has already been added.
2217    fn add_explicit_group<N: AsRef<str>>(
2218        &mut self,
2219        pid: PatternID,
2220        group: SmallIndex,
2221        maybe_name: Option<N>,
2222    ) -> Result<(), GroupInfoError> {
2223        // We also need to check that the slot index generated for
2224        // this group is also valid. Although, this is a little weird
2225        // because we offset these indices below, at which point, we'll
2226        // have to recheck them. Gosh this is annoying. Note that
2227        // the '+2' below is OK because 'end' is guaranteed to be less
2228        // than isize::MAX.
2229        let end = &mut self.slot_ranges[pid].1;
2230        *end = SmallIndex::new(end.as_usize() + 2).map_err(|_| {
2231            GroupInfoError::too_many_groups(pid, group.as_usize())
2232        })?;
2233        if let Some(name) = maybe_name {
2234            let name = Arc::<str>::from(name.as_ref());
2235            if self.name_to_index[pid].contains_key(&*name) {
2236                return Err(GroupInfoError::duplicate(pid, &name));
2237            }
2238            let len = name.len();
2239            self.name_to_index[pid].insert(Arc::clone(&name), group);
2240            self.index_to_name[pid].push(Some(name));
2241            // Adds the memory used by the Arc<str> in both maps.
2242            self.memory_extra +=
2243                2 * (len + core::mem::size_of::<Option<Arc<str>>>());
2244            // And also the value entry for the 'name_to_index' map.
2245            // This is probably an underestimate for 'name_to_index' since
2246            // hashmaps/btrees likely have some non-zero overhead, but we
2247            // assume here that they have zero overhead.
2248            self.memory_extra += core::mem::size_of::<SmallIndex>();
2249        } else {
2250            self.index_to_name[pid].push(None);
2251            self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2252        }
2253        // This is a sanity assert that checks that our group index
2254        // is in line with the number of groups added so far for this
2255        // pattern.
2256        assert_eq!(group.one_more(), self.group_len(pid));
2257        // And is also in line with the 'index_to_name' map.
2258        assert_eq!(group.one_more(), self.index_to_name[pid].len());
2259        Ok(())
2260    }
2261
2262    /// This corrects the slot ranges to account for the slots corresponding
2263    /// to the zeroth group of each pattern. That is, every slot range is
2264    /// offset by 'pattern_len() * 2', since each pattern uses two slots to
2265    /// represent the zeroth group.
2266    fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> {
2267        use crate::util::primitives::IteratorIndexExt;
2268        // Since we know number of patterns fits in PatternID and
2269        // PatternID::MAX < isize::MAX, it follows that multiplying by 2 will
2270        // never overflow usize.
2271        let offset = self.pattern_len().checked_mul(2).unwrap();
2272        for (pid, &mut (ref mut start, ref mut end)) in
2273            self.slot_ranges.iter_mut().with_pattern_ids()
2274        {
2275            let group_len = 1 + ((end.as_usize() - start.as_usize()) / 2);
2276            let new_end = match end.as_usize().checked_add(offset) {
2277                Some(new_end) => new_end,
2278                None => {
2279                    return Err(GroupInfoError::too_many_groups(
2280                        pid, group_len,
2281                    ))
2282                }
2283            };
2284            *end = SmallIndex::new(new_end).map_err(|_| {
2285                GroupInfoError::too_many_groups(pid, group_len)
2286            })?;
2287            // Since start <= end, if end is valid then start must be too.
2288            *start = SmallIndex::new(start.as_usize() + offset).unwrap();
2289        }
2290        Ok(())
2291    }
2292
2293    /// Return the total number of patterns represented by this capture slot
2294    /// info.
2295    fn pattern_len(&self) -> usize {
2296        self.slot_ranges.len()
2297    }
2298
2299    /// Return the total number of capturing groups for the given pattern. If
2300    /// the given pattern isn't valid for this capture slot info, then 0 is
2301    /// returned.
2302    fn group_len(&self, pid: PatternID) -> usize {
2303        let (start, end) = match self.slot_ranges.get(pid.as_usize()) {
2304            None => return 0,
2305            Some(range) => range,
2306        };
2307        // The difference between any two SmallIndex values always fits in a
2308        // usize since we know that SmallIndex::MAX <= isize::MAX-1. We also
2309        // know that start<=end by construction and that the number of groups
2310        // never exceeds SmallIndex and thus never overflows usize.
2311        1 + ((end.as_usize() - start.as_usize()) / 2)
2312    }
2313
2314    /// Return the total number of slots in this capture slot info as a
2315    /// "small index."
2316    fn small_slot_len(&self) -> SmallIndex {
2317        // Since slots are allocated in order of pattern (starting at 0) and
2318        // then in order of capture group, it follows that the number of slots
2319        // is the end of the range of slots for the last pattern. This is
2320        // true even when the last pattern has no capturing groups, since
2321        // 'slot_ranges' will still represent it explicitly with an empty
2322        // range.
2323        self.slot_ranges.last().map_or(SmallIndex::ZERO, |&(_, end)| end)
2324    }
2325}
2326
2327/// An error that may occur when building a `GroupInfo`.
2328///
2329/// Building a `GroupInfo` does a variety of checks to make sure the
2330/// capturing groups satisfy a number of invariants. This includes, but is not
2331/// limited to, ensuring that the first capturing group is unnamed and that
2332/// there are no duplicate capture groups for a specific pattern.
2333#[derive(Clone, Debug)]
2334pub struct GroupInfoError {
2335    kind: GroupInfoErrorKind,
2336}
2337
2338/// The kind of error that occurs when building a `GroupInfo` fails.
2339///
2340/// We keep this un-exported because it's not clear how useful it is to
2341/// export it.
2342#[derive(Clone, Debug)]
2343enum GroupInfoErrorKind {
2344    /// This occurs when too many patterns have been added. i.e., It would
2345    /// otherwise overflow a `PatternID`.
2346    TooManyPatterns { err: PatternIDError },
2347    /// This occurs when too many capturing groups have been added for a
2348    /// particular pattern.
2349    TooManyGroups {
2350        /// The ID of the pattern that had too many groups.
2351        pattern: PatternID,
2352        /// The minimum number of groups that the caller has tried to add for
2353        /// a pattern.
2354        minimum: usize,
2355    },
2356    /// An error that occurs when a pattern has no capture groups. Either the
2357    /// group info must be empty, or all patterns must have at least one group
2358    /// (corresponding to the unnamed group for the entire pattern).
2359    MissingGroups {
2360        /// The ID of the pattern that had no capturing groups.
2361        pattern: PatternID,
2362    },
2363    /// An error that occurs when one tries to provide a name for the capture
2364    /// group at index 0. This capturing group must currently always be
2365    /// unnamed.
2366    FirstMustBeUnnamed {
2367        /// The ID of the pattern that was found to have a named first
2368        /// capturing group.
2369        pattern: PatternID,
2370    },
2371    /// An error that occurs when duplicate capture group names for the same
2372    /// pattern are added.
2373    ///
2374    /// NOTE: At time of writing, this error can never occur if you're using
2375    /// regex-syntax, since the parser itself will reject patterns with
2376    /// duplicate capture group names. This error can only occur when the
2377    /// builder is used to hand construct NFAs.
2378    Duplicate {
2379        /// The pattern in which the duplicate capture group name was found.
2380        pattern: PatternID,
2381        /// The duplicate name.
2382        name: String,
2383    },
2384}
2385
2386impl GroupInfoError {
2387    fn too_many_patterns(err: PatternIDError) -> GroupInfoError {
2388        GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } }
2389    }
2390
2391    fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError {
2392        GroupInfoError {
2393            kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum },
2394        }
2395    }
2396
2397    fn missing_groups(pattern: PatternID) -> GroupInfoError {
2398        GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } }
2399    }
2400
2401    fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError {
2402        GroupInfoError {
2403            kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern },
2404        }
2405    }
2406
2407    fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError {
2408        GroupInfoError {
2409            kind: GroupInfoErrorKind::Duplicate {
2410                pattern,
2411                name: String::from(name),
2412            },
2413        }
2414    }
2415}
2416
2417#[cfg(feature = "std")]
2418impl std::error::Error for GroupInfoError {
2419    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2420        match self.kind {
2421            GroupInfoErrorKind::TooManyPatterns { .. }
2422            | GroupInfoErrorKind::TooManyGroups { .. }
2423            | GroupInfoErrorKind::MissingGroups { .. }
2424            | GroupInfoErrorKind::FirstMustBeUnnamed { .. }
2425            | GroupInfoErrorKind::Duplicate { .. } => None,
2426        }
2427    }
2428}
2429
2430impl core::fmt::Display for GroupInfoError {
2431    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2432        use self::GroupInfoErrorKind::*;
2433
2434        match self.kind {
2435            TooManyPatterns { ref err } => {
2436                write!(f, "too many patterns to build capture info: {}", err)
2437            }
2438            TooManyGroups { pattern, minimum } => {
2439                write!(
2440                    f,
2441                    "too many capture groups (at least {}) were \
2442                     found for pattern {}",
2443                    minimum,
2444                    pattern.as_usize()
2445                )
2446            }
2447            MissingGroups { pattern } => write!(
2448                f,
2449                "no capturing groups found for pattern {} \
2450                 (either all patterns have zero groups or all patterns have \
2451                  at least one group)",
2452                pattern.as_usize(),
2453            ),
2454            FirstMustBeUnnamed { pattern } => write!(
2455                f,
2456                "first capture group (at index 0) for pattern {} has a name \
2457                 (it must be unnamed)",
2458                pattern.as_usize(),
2459            ),
2460            Duplicate { pattern, ref name } => write!(
2461                f,
2462                "duplicate capture group name '{}' found for pattern {}",
2463                name,
2464                pattern.as_usize(),
2465            ),
2466        }
2467    }
2468}
2469
2470/// An iterator over capturing groups and their names for a specific pattern.
2471///
2472/// This iterator is created by [`GroupInfo::pattern_names`].
2473///
2474/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2475/// from which this iterator was created.
2476#[derive(Clone, Debug)]
2477pub struct GroupInfoPatternNames<'a> {
2478    it: core::slice::Iter<'a, Option<Arc<str>>>,
2479}
2480
2481impl GroupInfoPatternNames<'static> {
2482    fn empty() -> GroupInfoPatternNames<'static> {
2483        GroupInfoPatternNames { it: [].iter() }
2484    }
2485}
2486
2487impl<'a> Iterator for GroupInfoPatternNames<'a> {
2488    type Item = Option<&'a str>;
2489
2490    fn next(&mut self) -> Option<Option<&'a str>> {
2491        self.it.next().map(|x| x.as_deref())
2492    }
2493
2494    fn size_hint(&self) -> (usize, Option<usize>) {
2495        self.it.size_hint()
2496    }
2497
2498    fn count(self) -> usize {
2499        self.it.count()
2500    }
2501}
2502
2503impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {}
2504impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {}
2505
2506/// An iterator over capturing groups and their names for a `GroupInfo`.
2507///
2508/// This iterator is created by [`GroupInfo::all_names`].
2509///
2510/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2511/// from which this iterator was created.
2512#[derive(Debug)]
2513pub struct GroupInfoAllNames<'a> {
2514    group_info: &'a GroupInfo,
2515    pids: PatternIDIter,
2516    current_pid: Option<PatternID>,
2517    names: Option<core::iter::Enumerate<GroupInfoPatternNames<'a>>>,
2518}
2519
2520impl<'a> Iterator for GroupInfoAllNames<'a> {
2521    type Item = (PatternID, usize, Option<&'a str>);
2522
2523    fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> {
2524        // If the group info has no captures, then we never have anything
2525        // to yield. We need to consider this case explicitly (at time of
2526        // writing) because 'pattern_capture_names' will panic if captures
2527        // aren't enabled.
2528        if self.group_info.0.index_to_name.is_empty() {
2529            return None;
2530        }
2531        if self.current_pid.is_none() {
2532            self.current_pid = Some(self.pids.next()?);
2533        }
2534        let pid = self.current_pid.unwrap();
2535        if self.names.is_none() {
2536            self.names = Some(self.group_info.pattern_names(pid).enumerate());
2537        }
2538        let (group_index, name) = match self.names.as_mut().unwrap().next() {
2539            Some((group_index, name)) => (group_index, name),
2540            None => {
2541                self.current_pid = None;
2542                self.names = None;
2543                return self.next();
2544            }
2545        };
2546        Some((pid, group_index, name))
2547    }
2548}