/*!
Types and routines specific to lazy DFAs.

This module is the home of [`hybrid::dfa::DFA`](DFA).

This module also contains a [`hybrid::dfa::Builder`](Builder) and a
[`hybrid::dfa::Config`](Config) for configuring and building a lazy DFA.
*/

use core::{borrow::Borrow, iter, mem::size_of};

use alloc::{sync::Arc, vec::Vec};

use crate::{
    hybrid::{
        error::{BuildError, CacheError},
        id::{LazyStateID, LazyStateIDError, OverlappingState},
        search,
    },
    nfa::thompson,
    util::{
        alphabet::{self, ByteClasses, ByteSet},
        determinize::{self, State, StateBuilderEmpty, StateBuilderNFA},
        id::{PatternID, StateID as NFAStateID},
        matchtypes::{HalfMatch, MatchError, MatchKind},
        prefilter,
        sparse_set::SparseSets,
        start::Start,
    },
};

/// The mininum number of states that a lazy DFA's cache size must support.
///
/// This is checked at time of construction to ensure that at least some small
/// number of states can fit in the given capacity allotment. If we can't fit
/// at least this number of states, then the thinking is that it's pretty
/// senseless to use the lazy DFA. More to the point, parts of the code do
/// assume that the cache can fit at least some small number of states.
const MIN_STATES: usize = 5;

/// A hybrid NFA/DFA (also called a "lazy DFA") for regex searching.
///
/// A lazy DFA is a DFA that builds itself at search time. It otherwise has
/// very similar characteristics as a [`dense::DFA`](crate::dfa::dense::DFA).
/// Indeed, both support precisely the same regex features with precisely the
/// same semantics.
///
/// Where as a `dense::DFA` must be completely built to handle any input before
/// it may be used for search, a lazy DFA starts off effectively empty. During
/// a search, a lazy DFA will build itself depending on whether it has already
/// computed the next transition or not. If it has, then it looks a lot like
/// a `dense::DFA` internally: it does a very fast table based access to find
/// the next transition. Otherwise, if the state hasn't been computed, then it
/// does determinization _for that specific transition_ to compute the next DFA
/// state.
///
/// The main selling point of a lazy DFA is that, in practice, it has
/// the performance profile of a `dense::DFA` without the weakness of it
/// taking worst case exponential time to build. Indeed, for each byte of
/// input, the lazy DFA will construct as most one new DFA state. Thus, a
/// lazy DFA achieves worst case `O(mn)` time for regex search (where `m ~
/// pattern.len()` and `n ~ haystack.len()`).
///
/// The main downsides of a lazy DFA are:
///
/// 1. It requires mutable "cache" space during search. This is where the
/// transition table, among other things, is stored.
/// 2. In pathological cases (e.g., if the cache is too small), it will run
/// out of room and either require a bigger cache capacity or will repeatedly
/// clear the cache and thus repeatedly regenerate DFA states. Overall, this
/// will tend to be slower than a typical NFA simulation.
///
/// # Capabilities
///
/// Like a `dense::DFA`, a single lazy DFA fundamentally supports the following
/// operations:
///
/// 1. Detection of a match.
/// 2. Location of the end of a match.
/// 3. In the case of a lazy DFA with multiple patterns, which pattern matched
/// is reported as well.
///
/// A notable absence from the above list of capabilities is the location of
/// the *start* of a match. In order to provide both the start and end of
/// a match, *two* lazy DFAs are required. This functionality is provided by a
/// [`Regex`](crate::hybrid::regex::Regex).
///
/// # Example
///
/// This shows how to build a lazy DFA with the default configuration and
/// execute a search. Notice how, in contrast to a `dense::DFA`, we must create
/// a cache and pass it to our search routine.
///
/// ```
/// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
///
/// let dfa = DFA::new("foo[0-9]+")?;
/// let mut cache = dfa.create_cache();
///
/// let expected = Some(HalfMatch::must(0, 8));
/// assert_eq!(expected, dfa.find_leftmost_fwd(&mut cache, b"foo12345")?);
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[derive(Clone, Debug)]
pub struct DFA {
    nfa: Arc<thompson::NFA>,
    stride2: usize,
    classes: ByteClasses,
    quitset: ByteSet,
    anchored: bool,
    match_kind: MatchKind,
    starts_for_each_pattern: bool,
    cache_capacity: usize,
    minimum_cache_clear_count: Option<usize>,
}

impl DFA {
    /// Parse the given regular expression using a default configuration and
    /// return the corresponding lazy DFA.
    ///
    /// If you want a non-default configuration, then use the [`Builder`] to
    /// set your own configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::new("foo[0-9]+bar")?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let expected = HalfMatch::must(0, 11);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345bar")?,
    /// );
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new(pattern: &str) -> Result<DFA, BuildError> {
        DFA::builder().build(pattern)
    }

    /// Parse the given regular expressions using a default configuration and
    /// return the corresponding lazy multi-DFA.
    ///
    /// If you want a non-default configuration, then use the [`Builder`] to
    /// set your own configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+"])?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let expected = HalfMatch::must(1, 3);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345bar")?,
    /// );
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<DFA, BuildError> {
        DFA::builder().build_many(patterns)
    }

    /// Create a new lazy DFA that matches every input.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::always_match()?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let expected = HalfMatch::must(0, 0);
    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"")?);
    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"foo")?);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn always_match() -> Result<DFA, BuildError> {
        let nfa = thompson::NFA::always_match();
        Builder::new().build_from_nfa(Arc::new(nfa))
    }

    /// Create a new lazy DFA that never matches any input.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::never_match()?;
    /// let mut cache = dfa.create_cache();
    ///
    /// assert_eq!(None, dfa.find_leftmost_fwd(&mut cache, b"")?);
    /// assert_eq!(None, dfa.find_leftmost_fwd(&mut cache, b"foo")?);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn never_match() -> Result<DFA, BuildError> {
        let nfa = thompson::NFA::never_match();
        Builder::new().build_from_nfa(Arc::new(nfa))
    }

    /// Return a default configuration for a `DFA`.
    ///
    /// This is a convenience routine to avoid needing to import the `Config`
    /// type when customizing the construction of a lazy DFA.
    ///
    /// # Example
    ///
    /// This example shows how to build a lazy DFA that only executes searches
    /// in anchored mode.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let re = DFA::builder()
    ///     .configure(DFA::config().anchored(true))
    ///     .build(r"[0-9]+")?;
    /// let mut cache = re.create_cache();
    ///
    /// let haystack = "abc123xyz".as_bytes();
    /// assert_eq!(None, re.find_leftmost_fwd(&mut cache, haystack)?);
    /// assert_eq!(
    ///     Some(HalfMatch::must(0, 3)),
    ///     re.find_leftmost_fwd(&mut cache, &haystack[3..6])?,
    /// );
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn config() -> Config {
        Config::new()
    }

    /// Return a builder for configuring the construction of a `Regex`.
    ///
    /// This is a convenience routine to avoid needing to import the
    /// [`Builder`] type in common cases.
    ///
    /// # Example
    ///
    /// This example shows how to use the builder to disable UTF-8 mode
    /// everywhere for lazy DFAs. This includes disabling it for both the
    /// concrete syntax (e.g., `.` matches any byte and Unicode character
    /// classes like `\p{Letter}` are not allowed) and for the unanchored
    /// search prefix. The latter enables the regex to match anywhere in a
    /// sequence of arbitrary bytes. (Typically, the unanchored search prefix
    /// will only permit matching valid UTF-8.)
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::dfa::DFA,
    ///     nfa::thompson,
    ///     HalfMatch, SyntaxConfig,
    /// };
    ///
    /// let re = DFA::builder()
    ///     .syntax(SyntaxConfig::new().utf8(false))
    ///     .thompson(thompson::Config::new().utf8(false))
    ///     .build(r"foo(?-u:[^b])ar.*")?;
    /// let mut cache = re.create_cache();
    ///
    /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
    /// let expected = Some(HalfMatch::must(0, 9));
    /// let got = re.find_leftmost_fwd(&mut cache, haystack)?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn builder() -> Builder {
        Builder::new()
    }

    /// Create a new cache for this lazy DFA.
    ///
    /// The cache returned should only be used for searches for this
    /// lazy DFA. If you want to reuse the cache for another DFA, then
    /// you must call [`Cache::reset`] with that DFA (or, equivalently,
    /// [`DFA::reset_cache`]).
    pub fn create_cache(&self) -> Cache {
        Cache::new(self)
    }

    /// Reset the given cache such that it can be used for searching with the
    /// this lazy DFA (and only this DFA).
    ///
    /// A cache reset permits reusing memory already allocated in this cache
    /// with a different lazy DFA.
    ///
    /// Resetting a cache sets its "clear count" to 0. This is relevant if the
    /// lazy DFA has been configured to "give up" after it has cleared the
    /// cache a certain number of times.
    ///
    /// Any lazy state ID generated by the cache prior to resetting it is
    /// invalid after the reset.
    ///
    /// # Example
    ///
    /// This shows how to re-purpose a cache for use with a different DFA.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa1 = DFA::new(r"\w")?;
    /// let dfa2 = DFA::new(r"\W")?;
    ///
    /// let mut cache = dfa1.create_cache();
    /// assert_eq!(
    ///     Some(HalfMatch::must(0, 2)),
    ///     dfa1.find_leftmost_fwd(&mut cache, "Δ".as_bytes())?,
    /// );
    ///
    /// // Using 'cache' with dfa2 is not allowed. It may result in panics or
    /// // incorrect results. In order to re-purpose the cache, we must reset
    /// // it with the DFA we'd like to use it with.
    /// //
    /// // Similarly, after this reset, using the cache with 'dfa1' is also not
    /// // allowed.
    /// dfa2.reset_cache(&mut cache);
    /// assert_eq!(
    ///     Some(HalfMatch::must(0, 3)),
    ///     dfa2.find_leftmost_fwd(&mut cache, "☃".as_bytes())?,
    /// );
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn reset_cache(&self, cache: &mut Cache) {
        Lazy::new(self, cache).reset_cache()
    }

    /// Returns the total number of patterns compiled into this lazy DFA.
    ///
    /// In the case of a DFA that contains no patterns, this returns `0`.
    ///
    /// # Example
    ///
    /// This example shows the pattern count for a DFA that never matches:
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::never_match()?;
    /// assert_eq!(dfa.pattern_count(), 0);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// And another example for a DFA that matches at every position:
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::always_match()?;
    /// assert_eq!(dfa.pattern_count(), 1);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// And finally, a DFA that was constructed from multiple patterns:
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
    /// assert_eq!(dfa.pattern_count(), 3);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn pattern_count(&self) -> usize {
        self.nfa.pattern_len()
    }

    /// Returns a reference to the underlying NFA.
    pub fn nfa(&self) -> &Arc<thompson::NFA> {
        &self.nfa
    }

    /// Returns the stride, as a base-2 exponent, required for these
    /// equivalence classes.
    ///
    /// The stride is always the smallest power of 2 that is greater than or
    /// equal to the alphabet length. This is done so that converting between
    /// state IDs and indices can be done with shifts alone, which is much
    /// faster than integer division.
    fn stride2(&self) -> usize {
        self.stride2
    }

    /// Returns the total stride for every state in this lazy DFA. This
    /// corresponds to the total number of transitions used by each state in
    /// this DFA's transition table.
    fn stride(&self) -> usize {
        1 << self.stride2()
    }

    /// Returns the total number of elements in the alphabet for this
    /// transition table. This is always less than or equal to `self.stride()`.
    /// It is only equal when the alphabet length is a power of 2. Otherwise,
    /// it is always strictly less.
    fn alphabet_len(&self) -> usize {
        self.classes.alphabet_len()
    }

    /// Returns the memory usage, in bytes, of this lazy DFA.
    ///
    /// This does **not** include the stack size used up by this lazy DFA. To
    /// compute that, use `std::mem::size_of::<DFA>()`. This also does
    /// not include the size of the `Cache` used.
    pub fn memory_usage(&self) -> usize {
        // Everything else is on the stack.
        self.nfa.memory_usage()
    }
}

impl DFA {
    /// Executes a forward search and returns the end position of the first
    /// match that is found as early as possible. If no match exists, then
    /// `None` is returned.
    ///
    /// This routine stops scanning input as soon as the search observes a
    /// match state. This is useful for implementing boolean `is_match`-like
    /// routines, where as little work is done as possible.
    ///
    /// See [`DFA::find_earliest_fwd_at`] for additional functionality, such as
    /// providing a prefilter, a specific pattern to match and the bounds of
    /// the search within the haystack. This routine is meant as a convenience
    /// for common cases where the additional functionality is not needed.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Example
    ///
    /// This example demonstrates how the position returned might differ from
    /// what one might expect when executing a traditional leftmost search.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::new("foo[0-9]+")?;
    /// let mut cache = dfa.create_cache();
    /// // Normally, the end of the leftmost first match here would be 8,
    /// // corresponding to the end of the input. But the "earliest" semantics
    /// // this routine cause it to stop as soon as a match is known, which
    /// // occurs once 'foo[0-9]' has matched.
    /// let expected = HalfMatch::must(0, 4);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_earliest_fwd(&mut cache, b"foo12345")?,
    /// );
    ///
    /// let dfa = DFA::new("abc|a")?;
    /// let mut cache = dfa.create_cache();
    /// // Normally, the end of the leftmost first match here would be 3,
    /// // but the shortest match semantics detect a match earlier.
    /// let expected = HalfMatch::must(0, 1);
    /// assert_eq!(Some(expected), dfa.find_earliest_fwd(&mut cache, b"abc")?);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_earliest_fwd(
        &self,
        cache: &mut Cache,
        bytes: &[u8],
    ) -> Result<Option<HalfMatch>, MatchError> {
        self.find_earliest_fwd_at(cache, None, None, bytes, 0, bytes.len())
    }

    /// Executes a reverse search and returns the start position of the first
    /// match that is found as early as possible. If no match exists, then
    /// `None` is returned.
    ///
    /// This routine stops scanning input as soon as the search observes a
    /// match state.
    ///
    /// Note that while it is not technically necessary to build a reverse
    /// automaton to use a reverse search, it is likely that you'll want to do
    /// so. Namely, the typical use of a reverse search is to find the starting
    /// location of a match once its end is discovered from a forward search. A
    /// reverse DFA automaton can be built by configuring the intermediate NFA
    /// to be reversed via
    /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse).
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Example
    ///
    /// This example demonstrates how the position returned might differ from
    /// what one might expect when executing a traditional leftmost reverse
    /// search.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, nfa::thompson, HalfMatch};
    ///
    /// let dfa = DFA::builder()
    ///     .thompson(thompson::Config::new().reverse(true))
    ///     .build("[a-z]+[0-9]+")?;
    /// let mut cache = dfa.create_cache();
    /// // Normally, the end of the leftmost first match here would be 0,
    /// // corresponding to the beginning of the input. But the "earliest"
    /// // semantics of this routine cause it to stop as soon as a match is
    /// // known, which occurs once '[a-z][0-9]+' has matched.
    /// let expected = HalfMatch::must(0, 2);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_earliest_rev(&mut cache, b"foo12345")?,
    /// );
    ///
    /// let dfa = DFA::builder()
    ///     .thompson(thompson::Config::new().reverse(true))
    ///     .build("abc|c")?;
    /// let mut cache = dfa.create_cache();
    /// // Normally, the end of the leftmost first match here would be 0,
    /// // but the shortest match semantics detect a match earlier.
    /// let expected = HalfMatch::must(0, 2);
    /// assert_eq!(Some(expected), dfa.find_earliest_rev(&mut cache, b"abc")?);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_earliest_rev(
        &self,
        cache: &mut Cache,
        bytes: &[u8],
    ) -> Result<Option<HalfMatch>, MatchError> {
        self.find_earliest_rev_at(cache, None, bytes, 0, bytes.len())
    }

    /// Executes a forward search and returns the end position of the leftmost
    /// match that is found. If no match exists, then `None` is returned.
    ///
    /// In particular, this method continues searching even after it enters
    /// a match state. The search only terminates once it has reached the
    /// end of the input or when it has entered a dead or quit state. Upon
    /// termination, the position of the last byte seen while still in a match
    /// state is returned.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Example
    ///
    /// Leftmost first match semantics corresponds to the match with the
    /// smallest starting offset, but where the end offset is determined by
    /// preferring earlier branches in the original regular expression. For
    /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam`
    /// will match `Samwise` in `Samwise`.
    ///
    /// Generally speaking, the "leftmost first" match is how most backtracking
    /// regular expressions tend to work. This is in contrast to POSIX-style
    /// regular expressions that yield "leftmost longest" matches. Namely,
    /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using
    /// leftmost longest semantics. (This crate does not currently support
    /// leftmost longest semantics.)
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::new("foo[0-9]+")?;
    /// let mut cache = dfa.create_cache();
    /// let expected = HalfMatch::must(0, 8);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345")?,
    /// );
    ///
    /// // Even though a match is found after reading the first byte (`a`),
    /// // the leftmost first match semantics demand that we find the earliest
    /// // match that prefers earlier parts of the pattern over latter parts.
    /// let dfa = DFA::new("abc|a")?;
    /// let mut cache = dfa.create_cache();
    /// let expected = HalfMatch::must(0, 3);
    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"abc")?);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_leftmost_fwd(
        &self,
        cache: &mut Cache,
        bytes: &[u8],
    ) -> Result<Option<HalfMatch>, MatchError> {
        self.find_leftmost_fwd_at(cache, None, None, bytes, 0, bytes.len())
    }

    /// Executes a reverse search and returns the start of the position of the
    /// leftmost match that is found. If no match exists, then `None` is
    /// returned.
    ///
    /// In particular, this method continues searching even after it enters
    /// a match state. The search only terminates once it has reached the
    /// end of the input or when it has entered a dead or quit state. Upon
    /// termination, the position of the last byte seen while still in a match
    /// state is returned.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Example
    ///
    /// In particular, this routine is principally
    /// useful when used in conjunction with the
    /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::revers
    /// e) configuration. In general, it's unlikely to be correct to use both
    /// `find_leftmost_fwd` and `find_leftmost_rev` with the same DFA since
    /// any particular DFA will only support searching in one direction with
    /// respect to the pattern.
    ///
    /// ```
    /// use regex_automata::{nfa::thompson, hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa = DFA::builder()
    ///     .thompson(thompson::Config::new().reverse(true))
    ///     .build("foo[0-9]+")?;
    /// let mut cache = dfa.create_cache();
    /// let expected = HalfMatch::must(0, 0);
    /// assert_eq!(
    ///     Some(expected),
    ///     dfa.find_leftmost_rev(&mut cache, b"foo12345")?,
    /// );
    ///
    /// // Even though a match is found after reading the last byte (`c`),
    /// // the leftmost first match semantics demand that we find the earliest
    /// // match that prefers earlier parts of the pattern over latter parts.
    /// let dfa = DFA::builder()
    ///     .thompson(thompson::Config::new().reverse(true))
    ///     .build("abc|c")?;
    /// let mut cache = dfa.create_cache();
    /// let expected = HalfMatch::must(0, 0);
    /// assert_eq!(Some(expected), dfa.find_leftmost_rev(&mut cache, b"abc")?);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_leftmost_rev(
        &self,
        cache: &mut Cache,
        bytes: &[u8],
    ) -> Result<Option<HalfMatch>, MatchError> {
        self.find_leftmost_rev_at(cache, None, bytes, 0, bytes.len())
    }

    /// Executes an overlapping forward search and returns the end position of
    /// matches as they are found. If no match exists, then `None` is returned.
    ///
    /// This routine is principally only useful when searching for multiple
    /// patterns on inputs where multiple patterns may match the same regions
    /// of text. In particular, callers must preserve the automaton's search
    /// state from prior calls so that the implementation knows where the last
    /// match occurred.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Example
    ///
    /// This example shows how to run a basic overlapping search. Notice
    /// that we build the automaton with a `MatchKind::All` configuration.
    /// Overlapping searches are unlikely to work as one would expect when
    /// using the default `MatchKind::LeftmostFirst` match semantics, since
    /// leftmost-first matching is fundamentally incompatible with overlapping
    /// searches. Namely, overlapping searches need to report matches as they
    /// are seen, where as leftmost-first searches will continue searching even
    /// after a match has been observed in order to find the conventional end
    /// position of the match. More concretely, leftmost-first searches use
    /// dead states to terminate a search after a specific match can no longer
    /// be extended. Overlapping searches instead do the opposite by continuing
    /// the search to find totally new matches (potentially of other patterns).
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::{dfa::DFA, OverlappingState},
    ///     HalfMatch,
    ///     MatchKind,
    /// };
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().match_kind(MatchKind::All))
    ///     .build_many(&[r"\w+$", r"\S+$"])?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "@foo".as_bytes();
    /// let mut state = OverlappingState::start();
    ///
    /// let expected = Some(HalfMatch::must(1, 4));
    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
    /// assert_eq!(expected, got);
    ///
    /// // The first pattern also matches at the same position, so re-running
    /// // the search will yield another match. Notice also that the first
    /// // pattern is returned after the second. This is because the second
    /// // pattern begins its match before the first, is therefore an earlier
    /// // match and is thus reported first.
    /// let expected = Some(HalfMatch::must(0, 4));
    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_overlapping_fwd(
        &self,
        cache: &mut Cache,
        bytes: &[u8],
        state: &mut OverlappingState,
    ) -> Result<Option<HalfMatch>, MatchError> {
        self.find_overlapping_fwd_at(
            cache,
            None,
            None,
            bytes,
            0,
            bytes.len(),
            state,
        )
    }

    /// Executes a forward search and returns the end position of the first
    /// match that is found as early as possible. If no match exists, then
    /// `None` is returned.
    ///
    /// This routine stops scanning input as soon as the search observes a
    /// match state. This is useful for implementing boolean `is_match`-like
    /// routines, where as little work is done as possible.
    ///
    /// This is like [`DFA::find_earliest_fwd`], except it provides some
    /// additional control over how the search is executed:
    ///
    /// * `pre` is a prefilter scanner that, when given, is used whenever the
    /// DFA enters its starting state. This is meant to speed up searches where
    /// one or a small number of literal prefixes are known.
    /// * `pattern_id` specifies a specific pattern in the DFA to run an
    /// anchored search for. If not given, then a search for any pattern is
    /// performed. For lazy DFAs, [`Config::starts_for_each_pattern`] must be
    /// enabled to use this functionality.
    /// * `start` and `end` permit searching a specific region of the haystack
    /// `bytes`. This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `bytes`. (Because the existence of look-around
    /// operations such as `\b`, `^` and `$` need to take the surrounding
    /// context into account. This cannot be done if the haystack doesn't
    /// contain it.)
    ///
    /// The examples below demonstrate each of these additional parameters.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Panics
    ///
    /// This routine panics if a `pattern_id` is given and this lazy DFA does
    /// not support specific pattern searches.
    ///
    /// It also panics if the given haystack range is not valid.
    ///
    /// # Example: prefilter
    ///
    /// This example shows how to provide a prefilter for a pattern where all
    /// matches start with a `z` byte.
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::dfa::DFA,
    ///     util::prefilter::{Candidate, Prefilter, Scanner, State},
    ///     HalfMatch,
    /// };
    ///
    /// #[derive(Debug)]
    /// pub struct ZPrefilter;
    ///
    /// impl Prefilter for ZPrefilter {
    ///     fn next_candidate(
    ///         &self,
    ///         _: &mut State,
    ///         haystack: &[u8],
    ///         at: usize,
    ///     ) -> Candidate {
    ///         // Try changing b'z' to b'q' and observe this test fail since
    ///         // the prefilter will skip right over the match.
    ///         match haystack.iter().position(|&b| b == b'z') {
    ///             None => Candidate::None,
    ///             Some(i) => Candidate::PossibleStartOfMatch(at + i),
    ///         }
    ///     }
    ///
    ///     fn heap_bytes(&self) -> usize {
    ///         0
    ///     }
    /// }
    ///
    /// let dfa = DFA::new("z[0-9]{3}")?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "foobar z123 q123".as_bytes();
    /// // A scanner executes a prefilter while tracking some state that helps
    /// // determine whether a prefilter is still "effective" or not.
    /// let mut scanner = Scanner::new(&ZPrefilter);
    ///
    /// let expected = Some(HalfMatch::must(0, 11));
    /// let got = dfa.find_earliest_fwd_at(
    ///     &mut cache,
    ///     Some(&mut scanner),
    ///     None,
    ///     haystack,
    ///     0,
    ///     haystack.len(),
    /// )?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// # Example: specific pattern search
    ///
    /// This example shows how to build a lazy multi-DFA that permits searching
    /// for specific patterns.
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::dfa::DFA,
    ///     HalfMatch,
    ///     PatternID,
    /// };
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().starts_for_each_pattern(true))
    ///     .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "foo123".as_bytes();
    ///
    /// // Since we are using the default leftmost-first match and both
    /// // patterns match at the same starting position, only the first pattern
    /// // will be returned in this case when doing a search for any of the
    /// // patterns.
    /// let expected = Some(HalfMatch::must(0, 6));
    /// let got = dfa.find_earliest_fwd_at(
    ///     &mut cache,
    ///     None,
    ///     None,
    ///     haystack,
    ///     0,
    ///     haystack.len(),
    /// )?;
    /// assert_eq!(expected, got);
    ///
    /// // But if we want to check whether some other pattern matches, then we
    /// // can provide its pattern ID.
    /// let expected = Some(HalfMatch::must(1, 6));
    /// let got = dfa.find_earliest_fwd_at(
    ///     &mut cache,
    ///     None,
    ///     Some(PatternID::must(1)),
    ///     haystack,
    ///     0,
    ///     haystack.len(),
    /// )?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// # Example: specifying the bounds of a search
    ///
    /// This example shows how providing the bounds of a search can produce
    /// different results than simply sub-slicing the haystack.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// // N.B. We disable Unicode here so that we use a simple ASCII word
    /// // boundary. Alternatively, we could enable heuristic support for
    /// // Unicode word boundaries since our haystack is pure ASCII.
    /// let dfa = DFA::new(r"(?-u)\b[0-9]{3}\b")?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "foo123bar".as_bytes();
    ///
    /// // Since we sub-slice the haystack, the search doesn't know about the
    /// // larger context and assumes that `123` is surrounded by word
    /// // boundaries. And of course, the match position is reported relative
    /// // to the sub-slice as well, which means we get `3` instead of `6`.
    /// let expected = Some(HalfMatch::must(0, 3));
    /// let got = dfa.find_earliest_fwd_at(
    ///     &mut cache,
    ///     None,
    ///     None,
    ///     &haystack[3..6],
    ///     0,
    ///     haystack[3..6].len(),
    /// )?;
    /// assert_eq!(expected, got);
    ///
    /// // But if we provide the bounds of the search within the context of the
    /// // entire haystack, then the search can take the surrounding context
    /// // into account. (And if we did find a match, it would be reported
    /// // as a valid offset into `haystack` instead of its sub-slice.)
    /// let expected = None;
    /// let got = dfa.find_earliest_fwd_at(
    ///     &mut cache,
    ///     None,
    ///     None,
    ///     haystack,
    ///     3,
    ///     6,
    /// )?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn find_earliest_fwd_at(
        &self,
        cache: &mut Cache,
        pre: Option<&mut prefilter::Scanner>,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<HalfMatch>, MatchError> {
        search::find_earliest_fwd(
            pre, self, cache, pattern_id, bytes, start, end,
        )
    }

    /// Executes a reverse search and returns the start position of the first
    /// match that is found as early as possible. If no match exists, then
    /// `None` is returned.
    ///
    /// This routine stops scanning input as soon as the search observes a
    /// match state.
    ///
    /// This is like [`DFA::find_earliest_rev`], except it provides some
    /// additional control over how the search is executed. See the
    /// documentation of [`DFA::find_earliest_fwd_at`] for more details
    /// on the additional parameters along with examples of their usage.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Panics
    ///
    /// This routine panics if a `pattern_id` is given and the underlying
    /// DFA does not support specific pattern searches.
    ///
    /// It also panics if the given haystack range is not valid.
    #[inline]
    pub fn find_earliest_rev_at(
        &self,
        cache: &mut Cache,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<HalfMatch>, MatchError> {
        search::find_earliest_rev(self, cache, pattern_id, bytes, start, end)
    }

    /// Executes a forward search and returns the end position of the leftmost
    /// match that is found. If no match exists, then `None` is returned.
    ///
    /// This is like [`DFA::find_leftmost_fwd`], except it provides some
    /// additional control over how the search is executed. See the
    /// documentation of [`DFA::find_earliest_fwd_at`] for more details on the
    /// additional parameters along with examples of their usage.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Panics
    ///
    /// This routine panics if a `pattern_id` is given and the underlying
    /// DFA does not support specific pattern searches.
    ///
    /// It also panics if the given haystack range is not valid.
    #[inline]
    pub fn find_leftmost_fwd_at(
        &self,
        cache: &mut Cache,
        pre: Option<&mut prefilter::Scanner>,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<HalfMatch>, MatchError> {
        search::find_leftmost_fwd(
            pre, self, cache, pattern_id, bytes, start, end,
        )
    }

    /// Executes a reverse search and returns the start of the position of the
    /// leftmost match that is found. If no match exists, then `None` is
    /// returned.
    ///
    /// This is like [`DFA::find_leftmost_rev`], except it provides some
    /// additional control over how the search is executed. See the
    /// documentation of [`DFA::find_earliest_fwd_at`] for more details on the
    /// additional parameters along with examples of their usage.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Panics
    ///
    /// This routine panics if a `pattern_id` is given and the underlying
    /// DFA does not support specific pattern searches.
    ///
    /// It also panics if the given haystack range is not valid.
    #[inline]
    pub fn find_leftmost_rev_at(
        &self,
        cache: &mut Cache,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<HalfMatch>, MatchError> {
        search::find_leftmost_rev(self, cache, pattern_id, bytes, start, end)
    }

    /// Executes an overlapping forward search and returns the end position of
    /// matches as they are found. If no match exists, then `None` is returned.
    ///
    /// This routine is principally only useful when searching for multiple
    /// patterns on inputs where multiple patterns may match the same regions
    /// of text. In particular, callers must preserve the automaton's search
    /// state from prior calls so that the implementation knows where the last
    /// match occurred.
    ///
    /// This is like [`DFA::find_overlapping_fwd`], except it provides
    /// some additional control over how the search is executed. See the
    /// documentation of [`DFA::find_earliest_fwd_at`] for more details
    /// on the additional parameters along with examples of their usage.
    ///
    /// When using this routine to implement an iterator of overlapping
    /// matches, the `start` of the search should always be set to the end
    /// of the last match. If more patterns match at the previous location,
    /// then they will be immediately returned. (This is tracked by the given
    /// overlapping state.) Otherwise, the search continues at the starting
    /// position given.
    ///
    /// If for some reason you want the search to forget about its previous
    /// state and restart the search at a particular position, then setting the
    /// state to [`OverlappingState::start`] will accomplish that.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// lazy DFAs generated by this crate, this only occurs in non-default
    /// configurations where quit bytes are used, Unicode word boundaries are
    /// heuristically enabled or limits are set on the number of times the lazy
    /// DFA's cache may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// # Panics
    ///
    /// This routine panics if a `pattern_id` is given and the underlying
    /// DFA does not support specific pattern searches.
    ///
    /// It also panics if the given haystack range is not valid.
    #[inline]
    pub fn find_overlapping_fwd_at(
        &self,
        cache: &mut Cache,
        pre: Option<&mut prefilter::Scanner>,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
        state: &mut OverlappingState,
    ) -> Result<Option<HalfMatch>, MatchError> {
        search::find_overlapping_fwd(
            pre, self, cache, pattern_id, bytes, start, end, state,
        )
    }
}

impl DFA {
    /// Transitions from the current state to the next state, given the next
    /// byte of input.
    ///
    /// The given cache is used to either reuse pre-computed state
    /// transitions, or to store this newly computed transition for future
    /// reuse. Thus, this routine guarantees that it will never return a state
    /// ID that has an "unknown" tag.
    ///
    /// # State identifier validity
    ///
    /// The only valid value for `current` is the lazy state ID returned
    /// by the most recent call to `next_state`, `next_state_untagged`,
    /// `next_state_untagged_unchecked`, `start_state_forward` or
    /// `state_state_reverse` for the given `cache`. Any state ID returned from
    /// prior calls to these routines (with the same `cache`) is considered
    /// invalid (even if it gives an appearance of working). State IDs returned
    /// from _any_ prior call for different `cache` values are also always
    /// invalid.
    ///
    /// The returned ID is always a valid ID when `current` refers to a valid
    /// ID. Moreover, this routine is defined for all possible values of
    /// `input`.
    ///
    /// These validity rules are not checked, even in debug mode. Callers are
    /// required to uphold these rules themselves.
    ///
    /// Violating these state ID validity rules will not sacrifice memory
    /// safety, but _may_ produce an incorrect result or a panic.
    ///
    /// # Panics
    ///
    /// If the given ID does not refer to a valid state, then this routine
    /// may panic but it also may not panic and instead return an invalid or
    /// incorrect ID.
    ///
    /// # Example
    ///
    /// This shows a simplistic example for walking a lazy DFA for a given
    /// haystack by using the `next_state` method.
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::new(r"[a-z]+r")?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "bar".as_bytes();
    ///
    /// // The start state is determined by inspecting the position and the
    /// // initial bytes of the haystack.
    /// let mut sid = dfa.start_state_forward(
    ///     &mut cache, None, haystack, 0, haystack.len(),
    /// )?;
    /// // Walk all the bytes in the haystack.
    /// for &b in haystack {
    ///     sid = dfa.next_state(&mut cache, sid, b)?;
    /// }
    /// // Matches are always delayed by 1 byte, so we must explicitly walk the
    /// // special "EOI" transition at the end of the search.
    /// sid = dfa.next_eoi_state(&mut cache, sid)?;
    /// assert!(sid.is_match());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn next_state(
        &self,
        cache: &mut Cache,
        current: LazyStateID,
        input: u8,
    ) -> Result<LazyStateID, CacheError> {
        let class = usize::from(self.classes.get(input));
        let offset = current.as_usize_untagged() + class;
        let sid = cache.trans[offset];
        if !sid.is_unknown() {
            return Ok(sid);
        }
        let unit = alphabet::Unit::u8(input);
        Lazy::new(self, cache).cache_next_state(current, unit)
    }

    /// Transitions from the current state to the next state, given the next
    /// byte of input and a state ID that is not tagged.
    ///
    /// The only reason to use this routine is performance. In particular, the
    /// `next_state` method needs to do some additional checks, among them is
    /// to account for identifiers to states that are not yet computed. In
    /// such a case, the transition is computed on the fly. However, if it is
    /// known that the `current` state ID is untagged, then these checks can be
    /// omitted.
    ///
    /// Since this routine does not compute states on the fly, it does not
    /// modify the cache and thus cannot return an error. Consequently, `cache`
    /// does not need to be mutable and it is possible for this routine to
    /// return a state ID corresponding to the special "unknown" state. In
    /// this case, it is the caller's responsibility to use the prior state
    /// ID and `input` with `next_state` in order to force the computation of
    /// the unknown transition. Otherwise, trying to use the "unknown" state
    /// ID will just result in transitioning back to itself, and thus never
    /// terminating. (This is technically a special exemption to the state ID
    /// validity rules, but is permissible since this routine is guarateed to
    /// never mutate the given `cache`, and thus the identifier is guaranteed
    /// to remain valid.)
    ///
    /// See [`LazyStateID`] for more details on what it means for a state ID
    /// to be tagged. Also, see
    /// [`next_state_untagged_unchecked`](DFA::next_state_untagged_unchecked)
    /// for this same idea, but with bounds checks forcefully elided.
    ///
    /// # State identifier validity
    ///
    /// The only valid value for `current` is an **untagged** lazy
    /// state ID returned by the most recent call to `next_state`,
    /// `next_state_untagged`, `next_state_untagged_unchecked`,
    /// `start_state_forward` or `state_state_reverse` for the given `cache`.
    /// Any state ID returned from prior calls to these routines (with the
    /// same `cache`) is considered invalid (even if it gives an appearance
    /// of working). State IDs returned from _any_ prior call for different
    /// `cache` values are also always invalid.
    ///
    /// The returned ID is always a valid ID when `current` refers to a valid
    /// ID, although it may be tagged. Moreover, this routine is defined for
    /// all possible values of `input`.
    ///
    /// Not all validity rules are checked, even in debug mode. Callers are
    /// required to uphold these rules themselves.
    ///
    /// Violating these state ID validity rules will not sacrifice memory
    /// safety, but _may_ produce an incorrect result or a panic.
    ///
    /// # Panics
    ///
    /// If the given ID does not refer to a valid state, then this routine
    /// may panic but it also may not panic and instead return an invalid or
    /// incorrect ID.
    ///
    /// # Example
    ///
    /// This shows a simplistic example for walking a lazy DFA for a given
    /// haystack by using the `next_state_untagged` method where possible.
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::new(r"[a-z]+r")?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "bar".as_bytes();
    ///
    /// // The start state is determined by inspecting the position and the
    /// // initial bytes of the haystack.
    /// let mut sid = dfa.start_state_forward(
    ///     &mut cache, None, haystack, 0, haystack.len(),
    /// )?;
    /// // Walk all the bytes in the haystack.
    /// let mut at = 0;
    /// while at < haystack.len() {
    ///     if sid.is_tagged() {
    ///         sid = dfa.next_state(&mut cache, sid, haystack[at])?;
    ///     } else {
    ///         let mut prev_sid = sid;
    ///         // We attempt to chew through as much as we can while moving
    ///         // through untagged state IDs. Thus, the transition function
    ///         // does less work on average per byte. (Unrolling this loop
    ///         // may help even more.)
    ///         while at < haystack.len() {
    ///             prev_sid = sid;
    ///             sid = dfa.next_state_untagged(
    ///                 &mut cache, sid, haystack[at],
    ///             );
    ///             at += 1;
    ///             if sid.is_tagged() {
    ///                 break;
    ///             }
    ///         }
    ///         // We must ensure that we never proceed to the next iteration
    ///         // with an unknown state ID. If we don't account for this
    ///         // case, then search isn't guaranteed to terminate since all
    ///         // transitions on unknown states loop back to itself.
    ///         if sid.is_unknown() {
    ///             sid = dfa.next_state(
    ///                 &mut cache, prev_sid, haystack[at - 1],
    ///             )?;
    ///         }
    ///     }
    /// }
    /// // Matches are always delayed by 1 byte, so we must explicitly walk the
    /// // special "EOI" transition at the end of the search.
    /// sid = dfa.next_eoi_state(&mut cache, sid)?;
    /// assert!(sid.is_match());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn next_state_untagged(
        &self,
        cache: &Cache,
        current: LazyStateID,
        input: u8,
    ) -> LazyStateID {
        debug_assert!(!current.is_tagged());
        let class = usize::from(self.classes.get(input));
        let offset = current.as_usize_unchecked() + class;
        cache.trans[offset]
    }

    /// Transitions from the current state to the next state, eliding bounds
    /// checks, given the next byte of input and a state ID that is not tagged.
    ///
    /// The only reason to use this routine is performance. In particular, the
    /// `next_state` method needs to do some additional checks, among them is
    /// to account for identifiers to states that are not yet computed. In
    /// such a case, the transition is computed on the fly. However, if it is
    /// known that the `current` state ID is untagged, then these checks can be
    /// omitted.
    ///
    /// Since this routine does not compute states on the fly, it does not
    /// modify the cache and thus cannot return an error. Consequently, `cache`
    /// does not need to be mutable and it is possible for this routine to
    /// return a state ID corresponding to the special "unknown" state. In
    /// this case, it is the caller's responsibility to use the prior state
    /// ID and `input` with `next_state` in order to force the computation of
    /// the unknown transition. Otherwise, trying to use the "unknown" state
    /// ID will just result in transitioning back to itself, and thus never
    /// terminating. (This is technically a special exemption to the state ID
    /// validity rules, but is permissible since this routine is guarateed to
    /// never mutate the given `cache`, and thus the identifier is guaranteed
    /// to remain valid.)
    ///
    /// See [`LazyStateID`] for more details on what it means for a state ID
    /// to be tagged. Also, see
    /// [`next_state_untagged`](DFA::next_state_untagged)
    /// for this same idea, but with memory safety guaranteed by retaining
    /// bounds checks.
    ///
    /// # State identifier validity
    ///
    /// The only valid value for `current` is an **untagged** lazy
    /// state ID returned by the most recent call to `next_state`,
    /// `next_state_untagged`, `next_state_untagged_unchecked`,
    /// `start_state_forward` or `state_state_reverse` for the given `cache`.
    /// Any state ID returned from prior calls to these routines (with the
    /// same `cache`) is considered invalid (even if it gives an appearance
    /// of working). State IDs returned from _any_ prior call for different
    /// `cache` values are also always invalid.
    ///
    /// The returned ID is always a valid ID when `current` refers to a valid
    /// ID, although it may be tagged. Moreover, this routine is defined for
    /// all possible values of `input`.
    ///
    /// Not all validity rules are checked, even in debug mode. Callers are
    /// required to uphold these rules themselves.
    ///
    /// Violating these state ID validity rules will not sacrifice memory
    /// safety, but _may_ produce an incorrect result or a panic.
    ///
    /// # Safety
    ///
    /// Callers of this method must guarantee that `current` refers to a valid
    /// state ID according to the rules described above. If `current` is not a
    /// valid state ID for this automaton, then calling this routine may result
    /// in undefined behavior.
    ///
    /// If `current` is valid, then the ID returned is valid for all possible
    /// values of `input`.
    #[inline]
    pub unsafe fn next_state_untagged_unchecked(
        &self,
        cache: &Cache,
        current: LazyStateID,
        input: u8,
    ) -> LazyStateID {
        debug_assert!(!current.is_tagged());
        let class = usize::from(self.classes.get(input));
        let offset = current.as_usize_unchecked() + class;
        *cache.trans.get_unchecked(offset)
    }

    /// Transitions from the current state to the next state for the special
    /// EOI symbol.
    ///
    /// The given cache is used to either reuse pre-computed state
    /// transitions, or to store this newly computed transition for future
    /// reuse. Thus, this routine guarantees that it will never return a state
    /// ID that has an "unknown" tag.
    ///
    /// This routine must be called at the end of every search in a correct
    /// implementation of search. Namely, lazy DFAs in this crate delay matches
    /// by one byte in order to support look-around operators. Thus, after
    /// reaching the end of a haystack, a search implementation must follow one
    /// last EOI transition.
    ///
    /// It is best to think of EOI as an additional symbol in the alphabet of a
    /// DFA that is distinct from every other symbol. That is, the alphabet of
    /// lazy DFAs in this crate has a logical size of 257 instead of 256, where
    /// 256 corresponds to every possible inhabitant of `u8`. (In practice, the
    /// physical alphabet size may be smaller because of alphabet compression
    /// via equivalence classes, but EOI is always represented somehow in the
    /// alphabet.)
    ///
    /// # State identifier validity
    ///
    /// The only valid value for `current` is the lazy state ID returned
    /// by the most recent call to `next_state`, `next_state_untagged`,
    /// `next_state_untagged_unchecked`, `start_state_forward` or
    /// `state_state_reverse` for the given `cache`. Any state ID returned from
    /// prior calls to these routines (with the same `cache`) is considered
    /// invalid (even if it gives an appearance of working). State IDs returned
    /// from _any_ prior call for different `cache` values are also always
    /// invalid.
    ///
    /// The returned ID is always a valid ID when `current` refers to a valid
    /// ID.
    ///
    /// These validity rules are not checked, even in debug mode. Callers are
    /// required to uphold these rules themselves.
    ///
    /// Violating these state ID validity rules will not sacrifice memory
    /// safety, but _may_ produce an incorrect result or a panic.
    ///
    /// # Panics
    ///
    /// If the given ID does not refer to a valid state, then this routine
    /// may panic but it also may not panic and instead return an invalid or
    /// incorrect ID.
    ///
    /// # Example
    ///
    /// This shows a simplistic example for walking a DFA for a given haystack,
    /// and then finishing the search with the final EOI transition.
    ///
    /// ```
    /// use regex_automata::hybrid::dfa::DFA;
    ///
    /// let dfa = DFA::new(r"[a-z]+r")?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "bar".as_bytes();
    ///
    /// // The start state is determined by inspecting the position and the
    /// // initial bytes of the haystack.
    /// let mut sid = dfa.start_state_forward(
    ///     &mut cache, None, haystack, 0, haystack.len(),
    /// )?;
    /// // Walk all the bytes in the haystack.
    /// for &b in haystack {
    ///     sid = dfa.next_state(&mut cache, sid, b)?;
    /// }
    /// // Matches are always delayed by 1 byte, so we must explicitly walk
    /// // the special "EOI" transition at the end of the search. Without this
    /// // final transition, the assert below will fail since the DFA will not
    /// // have entered a match state yet!
    /// sid = dfa.next_eoi_state(&mut cache, sid)?;
    /// assert!(sid.is_match());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn next_eoi_state(
        &self,
        cache: &mut Cache,
        current: LazyStateID,
    ) -> Result<LazyStateID, CacheError> {
        let eoi = self.classes.eoi().as_usize();
        let offset = current.as_usize_untagged() + eoi;
        let sid = cache.trans[offset];
        if !sid.is_unknown() {
            return Ok(sid);
        }
        let unit = self.classes.eoi();
        Lazy::new(self, cache).cache_next_state(current, unit)
    }

    /// Return the ID of the start state for this lazy DFA when executing a
    /// forward search.
    ///
    /// Unlike typical DFA implementations, the start state for DFAs in this
    /// crate is dependent on a few different factors:
    ///
    /// * The pattern ID, if present. When the underlying DFA has been
    /// configured with multiple patterns _and_ the DFA has been configured to
    /// build an anchored start state for each pattern, then a pattern ID may
    /// be specified to execute an anchored search for that specific pattern.
    /// If `pattern_id` is invalid or if the DFA isn't configured to build
    /// start states for each pattern, then implementations must panic. DFAs in
    /// this crate can be configured to build start states for each pattern via
    /// [`Config::starts_for_each_pattern`].
    /// * When `start > 0`, the byte at index `start - 1` may influence the
    /// start state if the regex uses `^` or `\b`.
    /// * Similarly, when `start == 0`, it may influence the start state when
    /// the regex uses `^` or `\A`.
    /// * Currently, `end` is unused.
    /// * Whether the search is a forward or reverse search. This routine can
    /// only be used for forward searches.
    ///
    /// # Panics
    ///
    /// This panics if `start..end` is not a valid sub-slice of `bytes`. This
    /// also panics if `pattern_id` is non-None and does not refer to a valid
    /// pattern, or if the DFA was not configured to build anchored start
    /// states for each pattern.
    #[inline]
    pub fn start_state_forward(
        &self,
        cache: &mut Cache,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<LazyStateID, CacheError> {
        let mut lazy = Lazy::new(self, cache);
        let start_type = Start::from_position_fwd(bytes, start, end);
        let sid = lazy.as_ref().get_cached_start_id(pattern_id, start_type);
        if !sid.is_unknown() {
            return Ok(sid);
        }
        lazy.cache_start_group(pattern_id, start_type)
    }

    /// Return the ID of the start state for this lazy DFA when executing a
    /// reverse search.
    ///
    /// Unlike typical DFA implementations, the start state for DFAs in this
    /// crate is dependent on a few different factors:
    ///
    /// * The pattern ID, if present. When the underlying DFA has been
    /// configured with multiple patterns _and_ the DFA has been configured to
    /// build an anchored start state for each pattern, then a pattern ID may
    /// be specified to execute an anchored search for that specific pattern.
    /// If `pattern_id` is invalid or if the DFA isn't configured to build
    /// start states for each pattern, then implementations must panic. DFAs in
    /// this crate can be configured to build start states for each pattern via
    /// [`Config::starts_for_each_pattern`].
    /// * When `end < bytes.len()`, the byte at index `end` may influence the
    /// start state if the regex uses `$` or `\b`.
    /// * Similarly, when `end == bytes.len()`, it may influence the start
    /// state when the regex uses `$` or `\z`.
    /// * Currently, `start` is unused.
    /// * Whether the search is a forward or reverse search. This routine can
    /// only be used for reverse searches.
    ///
    /// # Panics
    ///
    /// This panics if `start..end` is not a valid sub-slice of `bytes`. This
    /// also panics if `pattern_id` is non-None and does not refer to a valid
    /// pattern, or if the DFA was not configured to build anchored start
    /// states for each pattern.
    #[inline]
    pub fn start_state_reverse(
        &self,
        cache: &mut Cache,
        pattern_id: Option<PatternID>,
        bytes: &[u8],
        start: usize,
        end: usize,
    ) -> Result<LazyStateID, CacheError> {
        let mut lazy = Lazy::new(self, cache);
        let start_type = Start::from_position_rev(bytes, start, end);
        let sid = lazy.as_ref().get_cached_start_id(pattern_id, start_type);
        if !sid.is_unknown() {
            return Ok(sid);
        }
        lazy.cache_start_group(pattern_id, start_type)
    }

    /// Returns the total number of patterns that match in this state.
    ///
    /// If the lazy DFA was compiled with one pattern, then this must
    /// necessarily always return `1` for all match states.
    ///
    /// A lazy DFA guarantees that [`DFA::match_pattern`] can be called with
    /// indices up to (but not including) the count returned by this routine
    /// without panicking.
    ///
    /// If the given state is not a match state, then this may either panic
    /// or return an incorrect result.
    ///
    /// # Example
    ///
    /// This example shows a simple instance of implementing overlapping
    /// matches. In particular, it shows not only how to determine how many
    /// patterns have matched in a particular state, but also how to access
    /// which specific patterns have matched.
    ///
    /// Notice that we must use [`MatchKind::All`](crate::MatchKind::All)
    /// when building the DFA. If we used
    /// [`MatchKind::LeftmostFirst`](crate::MatchKind::LeftmostFirst)
    /// instead, then the DFA would not be constructed in a way that supports
    /// overlapping matches. (It would only report a single pattern that
    /// matches at any particular point in time.)
    ///
    /// Another thing to take note of is the patterns used and the order in
    /// which the pattern IDs are reported. In the example below, pattern `3`
    /// is yielded first. Why? Because it corresponds to the match that
    /// appears first. Namely, the `@` symbol is part of `\S+` but not part
    /// of any of the other patterns. Since the `\S+` pattern has a match that
    /// starts to the left of any other pattern, its ID is returned before any
    /// other.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, MatchKind};
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().match_kind(MatchKind::All))
    ///     .build_many(&[
    ///         r"\w+", r"[a-z]+", r"[A-Z]+", r"\S+",
    ///     ])?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "@bar".as_bytes();
    ///
    /// // The start state is determined by inspecting the position and the
    /// // initial bytes of the haystack.
    /// let mut sid = dfa.start_state_forward(
    ///     &mut cache, None, haystack, 0, haystack.len(),
    /// )?;
    /// // Walk all the bytes in the haystack.
    /// for &b in haystack {
    ///     sid = dfa.next_state(&mut cache, sid, b)?;
    /// }
    /// sid = dfa.next_eoi_state(&mut cache, sid)?;
    ///
    /// assert!(sid.is_match());
    /// assert_eq!(dfa.match_count(&mut cache, sid), 3);
    /// // The following calls are guaranteed to not panic since `match_count`
    /// // returned `3` above.
    /// assert_eq!(dfa.match_pattern(&mut cache, sid, 0).as_usize(), 3);
    /// assert_eq!(dfa.match_pattern(&mut cache, sid, 1).as_usize(), 0);
    /// assert_eq!(dfa.match_pattern(&mut cache, sid, 2).as_usize(), 1);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[inline]
    pub fn match_count(&self, cache: &Cache, id: LazyStateID) -> usize {
        assert!(id.is_match());
        LazyRef::new(self, cache).get_cached_state(id).match_count()
    }

    /// Returns the pattern ID corresponding to the given match index in the
    /// given state.
    ///
    /// See [`DFA::match_count`] for an example of how to use this method
    /// correctly. Note that if you know your lazy DFA is configured with a
    /// single pattern, then this routine is never necessary since it will
    /// always return a pattern ID of `0` for an index of `0` when `id`
    /// corresponds to a match state.
    ///
    /// Typically, this routine is used when implementing an overlapping
    /// search, as the example for `DFA::match_count` does.
    ///
    /// # Panics
    ///
    /// If the state ID is not a match state or if the match index is out
    /// of bounds for the given state, then this routine may either panic
    /// or produce an incorrect result. If the state ID is correct and the
    /// match index is correct, then this routine always produces a valid
    /// `PatternID`.
    #[inline]
    pub fn match_pattern(
        &self,
        cache: &Cache,
        id: LazyStateID,
        match_index: usize,
    ) -> PatternID {
        // This is an optimization for the very common case of a DFA with a
        // single pattern. This conditional avoids a somewhat more costly path
        // that finds the pattern ID from the corresponding `State`, which
        // requires a bit of slicing/pointer-chasing. This optimization tends
        // to only matter when matches are frequent.
        if self.pattern_count() == 1 {
            return PatternID::ZERO;
        }
        LazyRef::new(self, cache)
            .get_cached_state(id)
            .match_pattern(match_index)
    }
}

/// A cache represents a partially computed DFA.
///
/// A cache is the key component that differentiates a classical DFA and a
/// hybrid NFA/DFA (also called a "lazy DFA"). Where a classical DFA builds a
/// complete transition table that can handle all possible inputs, a hybrid
/// NFA/DFA starts with an empty transition table and builds only the parts
/// required during search. The parts that are built are stored in a cache. For
/// this reason, a cache is a required parameter for nearly every operation on
/// a [`DFA`].
///
/// Caches can be created from their corresponding DFA via
/// [`DFA::create_cache`]. A cache can only be used with either the DFA that
/// created it, or the DFA that was most recently used to reset it with
/// [`Cache::reset`]. Using a cache with any other DFA may result in panics
/// or incorrect results.
#[derive(Clone, Debug)]
pub struct Cache {
    // N.B. If you're looking to understand how determinization works, it
    // is probably simpler to first grok src/dfa/determinize.rs, since that
    // doesn't have the "laziness" component.
    /// The transition table.
    ///
    /// Given a `current` LazyStateID and an `input` byte, the next state can
    /// be computed via `trans[untagged(current) + equiv_class(input)]`. Notice
    /// that no multiplication is used. That's because state identifiers are
    /// "premultiplied."
    ///
    /// Note that the next state may be the "unknown" state. In this case, the
    /// next state is not known and determinization for `current` on `input`
    /// must be performed.
    trans: Vec<LazyStateID>,
    /// The starting states for this DFA.
    ///
    /// These are computed lazily. Initially, these are all set to "unknown"
    /// lazy state IDs.
    ///
    /// When 'starts_for_each_pattern' is disabled (the default), then the size
    /// of this is constrained to the possible starting configurations based
    /// on the search parameters. (At time of writing, that's 4.) However,
    /// when starting states for each pattern is enabled, then there are N
    /// additional groups of starting states, where each group reflects the
    /// different possible configurations and N is the number of patterns.
    starts: Vec<LazyStateID>,
    /// A sequence of NFA/DFA powerset states that have been computed for this
    /// lazy DFA. This sequence is indexable by untagged LazyStateIDs. (Every
    /// tagged LazyStateID can be used to index this sequence by converting it
    /// to its untagged form.)
    states: Vec<State>,
    /// A map from states to their corresponding IDs. This map may be accessed
    /// via the raw byte representation of a state, which means that a `State`
    /// does not need to be allocated to determine whether it already exists
    /// in this map. Indeed, the existence of such a state is what determines
    /// whether we allocate a new `State` or not.
    ///
    /// The higher level idea here is that we do just enough determinization
    /// for a state to check whether we've already computed it. If we have,
    /// then we can save a little (albeit not much) work. The real savings is
    /// in memory usage. If we never checked for trivially duplicate states,
    /// then our memory usage would explode to unreasonable levels.
    states_to_id: StateMap,
    /// Sparse sets used to track which NFA states have been visited during
    /// various traversals.
    sparses: SparseSets,
    /// Scratch space for traversing the NFA graph. (We use space on the heap
    /// instead of the call stack.)
    stack: Vec<NFAStateID>,
    /// Scratch space for building a NFA/DFA powerset state. This is used to
    /// help amortize allocation since not every powerset state generated is
    /// added to the cache. In particular, if it already exists in the cache,
    /// then there is no need to allocate a new `State` for it.
    scratch_state_builder: StateBuilderEmpty,
    /// A simple abstraction for handling the saving of at most a single state
    /// across a cache clearing. This is required for correctness. Namely, if
    /// adding a new state after clearing the cache fails, then the caller
    /// must retain the ability to continue using the state ID given. The
    /// state corresponding to the state ID is what we preserve across cache
    /// clearings.
    state_saver: StateSaver,
    /// The memory usage, in bytes, used by 'states' and 'states_to_id'. We
    /// track this as new states are added since states use a variable amount
    /// of heap. Tracking this as we add states makes it possible to compute
    /// the total amount of memory used by the determinizer in constant time.
    memory_usage_state: usize,
    /// The number of times the cache has been cleared. When a minimum cache
    /// clear count is set, then the cache will return an error instead of
    /// clearing the cache if the count has been exceeded.
    clear_count: usize,
}

impl Cache {
    /// Create a new cache for the given lazy DFA.
    ///
    /// The cache returned should only be used for searches for the given DFA.
    /// If you want to reuse the cache for another DFA, then you must call
    /// [`Cache::reset`] with that DFA.
    pub fn new(dfa: &DFA) -> Cache {
        let mut cache = Cache {
            trans: alloc::vec![],
            starts: alloc::vec![],
            states: alloc::vec![],
            states_to_id: StateMap::new(),
            sparses: SparseSets::new(dfa.nfa.len()),
            stack: alloc::vec![],
            scratch_state_builder: StateBuilderEmpty::new(),
            state_saver: StateSaver::none(),
            memory_usage_state: 0,
            clear_count: 0,
        };
        Lazy { dfa, cache: &mut cache }.init_cache();
        cache
    }

    /// Reset this cache such that it can be used for searching with the given
    /// lazy DFA (and only that DFA).
    ///
    /// A cache reset permits reusing memory already allocated in this cache
    /// with a different lazy DFA.
    ///
    /// Resetting a cache sets its "clear count" to 0. This is relevant if the
    /// lazy DFA has been configured to "give up" after it has cleared the
    /// cache a certain number of times.
    ///
    /// Any lazy state ID generated by the cache prior to resetting it is
    /// invalid after the reset.
    ///
    /// # Example
    ///
    /// This shows how to re-purpose a cache for use with a different DFA.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let dfa1 = DFA::new(r"\w")?;
    /// let dfa2 = DFA::new(r"\W")?;
    ///
    /// let mut cache = dfa1.create_cache();
    /// assert_eq!(
    ///     Some(HalfMatch::must(0, 2)),
    ///     dfa1.find_leftmost_fwd(&mut cache, "Δ".as_bytes())?,
    /// );
    ///
    /// // Using 'cache' with dfa2 is not allowed. It may result in panics or
    /// // incorrect results. In order to re-purpose the cache, we must reset
    /// // it with the DFA we'd like to use it with.
    /// //
    /// // Similarly, after this reset, using the cache with 'dfa1' is also not
    /// // allowed.
    /// cache.reset(&dfa2);
    /// assert_eq!(
    ///     Some(HalfMatch::must(0, 3)),
    ///     dfa2.find_leftmost_fwd(&mut cache, "☃".as_bytes())?,
    /// );
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn reset(&mut self, dfa: &DFA) {
        Lazy::new(dfa, self).reset_cache()
    }

    /// Returns the total number of times this cache has been cleared since it
    /// was either created or last reset.
    ///
    /// This is useful for informational purposes or if you want to change
    /// search strategies based on the number of times the cache has been
    /// cleared.
    pub fn clear_count(&self) -> usize {
        self.clear_count
    }

    /// Returns the heap memory usage, in bytes, of this cache.
    ///
    /// This does **not** include the stack size used up by this cache. To
    /// compute that, use `std::mem::size_of::<Cache>()`.
    pub fn memory_usage(&self) -> usize {
        const ID_SIZE: usize = size_of::<LazyStateID>();
        const STATE_SIZE: usize = size_of::<State>();

        self.trans.len() * ID_SIZE
        + self.starts.len() * ID_SIZE
        + self.states.len() * STATE_SIZE
        // Maps likely use more memory than this, but it's probably close.
        + self.states_to_id.len() * (STATE_SIZE + ID_SIZE)
        + self.sparses.memory_usage()
        + self.stack.capacity() * ID_SIZE
        + self.scratch_state_builder.capacity()
        // Heap memory used by 'State' in both 'states' and 'states_to_id'.
        + self.memory_usage_state
    }
}

/// A map from states to state identifiers. When using std, we use a standard
/// hashmap, since it's a bit faster for this use case. (Other maps, like
/// one's based on FNV, have not yet been benchmarked.)
///
/// The main purpose of this map is to reuse states where possible. This won't
/// fully minimize the DFA, but it works well in a lot of cases.
#[cfg(feature = "std")]
type StateMap = std::collections::HashMap<State, LazyStateID>;
#[cfg(not(feature = "std"))]
type StateMap = alloc::collections::BTreeMap<State, LazyStateID>;

/// A type that groups methods that require the base NFA/DFA and writable
/// access to the cache.
#[derive(Debug)]
struct Lazy<'i, 'c> {
    dfa: &'i DFA,
    cache: &'c mut Cache,
}

impl<'i, 'c> Lazy<'i, 'c> {
    /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache.
    fn new(dfa: &'i DFA, cache: &'c mut Cache) -> Lazy<'i, 'c> {
        Lazy { dfa, cache }
    }

    /// Return an immutable view by downgrading a writable cache to a read-only
    /// cache.
    fn as_ref<'a>(&'a self) -> LazyRef<'i, 'a> {
        LazyRef::new(self.dfa, self.cache)
    }

    /// This is marked as 'inline(never)' to avoid bloating methods on 'DFA'
    /// like 'next_state' and 'next_eoi_state' that are called in critical
    /// areas. The idea is to let the optimizer focus on the other areas of
    /// those methods as the hot path.
    ///
    /// Here's an example that justifies 'inline(never)'
    ///
    /// ```ignore
    /// regex-cli find hybrid dfa \
    ///   @all-codepoints-utf8-100x '\pL{100}' --cache-capacity 10000000
    /// ```
    ///
    /// Where 'all-codepoints-utf8-100x' is the UTF-8 encoding of every
    /// codepoint, in sequence, repeated 100 times.
    ///
    /// With 'inline(never)' hyperfine reports 1.1s per run. With
    /// 'inline(always)', hyperfine reports 1.23s. So that's a 10% improvement.
    #[inline(never)]
    fn cache_next_state(
        &mut self,
        mut current: LazyStateID,
        unit: alphabet::Unit,
    ) -> Result<LazyStateID, CacheError> {
        let stride2 = self.dfa.stride2();
        let empty_builder = self.get_state_builder();
        let builder = determinize::next(
            &self.dfa.nfa,
            self.dfa.match_kind,
            &mut self.cache.sparses,
            &mut self.cache.stack,
            &self.cache.states[current.as_usize_untagged() >> stride2],
            unit,
            empty_builder,
        );
        let save_state = !self.as_ref().state_builder_fits_in_cache(&builder);
        if save_state {
            self.save_state(current);
        }
        let next = self.add_builder_state(builder, |sid| sid)?;
        if save_state {
            current = self.saved_state_id();
        }
        // This is the payoff. The next time 'next_state' is called with this
        // state and alphabet unit, it will find this transition and avoid
        // having to re-determinize this transition.
        self.set_transition(current, unit, next);
        Ok(next)
    }

    /// Compute and cache the starting state for the given pattern ID (if
    /// present) and the starting configuration.
    ///
    /// This panics if a pattern ID is given and the DFA isn't configured to
    /// build anchored start states for each pattern.
    ///
    /// This will never return an unknown lazy state ID.
    ///
    /// If caching this state would otherwise result in a cache that has been
    /// cleared too many times, then an error is returned.
    fn cache_start_group(
        &mut self,
        pattern_id: Option<PatternID>,
        start: Start,
    ) -> Result<LazyStateID, CacheError> {
        let nfa_start_id = match pattern_id {
            Some(pid) => {
                assert!(
                    self.dfa.starts_for_each_pattern,
                    "attempted to search for a specific pattern \
                     without enabling starts_for_each_pattern",
                );
                self.dfa.nfa.start_pattern(pid)
            }
            None if self.dfa.anchored => self.dfa.nfa.start_anchored(),
            None => self.dfa.nfa.start_unanchored(),
        };

        let id = self.cache_start_one(nfa_start_id, start)?;
        self.set_start_state(pattern_id, start, id);
        Ok(id)
    }

    /// Compute and cache the starting state for the given NFA state ID and the
    /// starting configuration. The NFA state ID might be one of the following:
    ///
    /// 1) An unanchored start state to match any pattern.
    /// 2) An anchored start state to match any pattern.
    /// 3) An anchored start state for a particular pattern.
    ///
    /// This will never return an unknown lazy state ID.
    ///
    /// If caching this state would otherwise result in a cache that has been
    /// cleared too many times, then an error is returned.
    fn cache_start_one(
        &mut self,
        nfa_start_id: NFAStateID,
        start: Start,
    ) -> Result<LazyStateID, CacheError> {
        let mut builder_matches = self.get_state_builder().into_matches();
        determinize::set_lookbehind_from_start(&start, &mut builder_matches);
        self.cache.sparses.set1.clear();
        determinize::epsilon_closure(
            self.dfa.nfa.borrow(),
            nfa_start_id,
            *builder_matches.look_have(),
            &mut self.cache.stack,
            &mut self.cache.sparses.set1,
        );
        let mut builder = builder_matches.into_nfa();
        determinize::add_nfa_states(
            self.dfa.nfa.borrow(),
            &self.cache.sparses.set1,
            &mut builder,
        );
        self.add_builder_state(builder, |id| id.to_start())
    }

    /// Either add the given builder state to this cache, or return an ID to an
    /// equivalent state already in this cache.
    ///
    /// In the case where no equivalent state exists, the idmap function given
    /// may be used to transform the identifier allocated. This is useful if
    /// the caller needs to tag the ID with additional information.
    ///
    /// This will never return an unknown lazy state ID.
    ///
    /// If caching this state would otherwise result in a cache that has been
    /// cleared too many times, then an error is returned.
    fn add_builder_state(
        &mut self,
        builder: StateBuilderNFA,
        idmap: impl Fn(LazyStateID) -> LazyStateID,
    ) -> Result<LazyStateID, CacheError> {
        if let Some(&cached_id) =
            self.cache.states_to_id.get(builder.as_bytes())
        {
            // Since we have a cached state, put the constructed state's
            // memory back into our scratch space, so that it can be reused.
            self.put_state_builder(builder);
            return Ok(cached_id);
        }
        let result = self.add_state(builder.to_state(), idmap);
        self.put_state_builder(builder);
        result
    }

    /// Allocate a new state ID and add the given state to this cache.
    ///
    /// The idmap function given may be used to transform the identifier
    /// allocated. This is useful if the caller needs to tag the ID with
    /// additional information.
    ///
    /// This will never return an unknown lazy state ID.
    ///
    /// If caching this state would otherwise result in a cache that has been
    /// cleared too many times, then an error is returned.
    fn add_state(
        &mut self,
        state: State,
        idmap: impl Fn(LazyStateID) -> LazyStateID,
    ) -> Result<LazyStateID, CacheError> {
        if !self.as_ref().state_fits_in_cache(&state) {
            self.try_clear_cache()?;
        }
        // It's important for this to come second, since the above may clear
        // the cache. If we clear the cache after ID generation, then the ID
        // is likely bunk since it would have been generated based on a larger
        // transition table.
        let mut id = idmap(self.next_state_id()?);
        if state.is_match() {
            id = id.to_match();
        }
        // Add room in the transition table. Since this is a fresh state, all
        // of its transitions are unknown.
        self.cache.trans.extend(
            iter::repeat(self.as_ref().unknown_id()).take(self.dfa.stride()),
        );
        // When we add a sentinel state, we never want to set any quit
        // transitions. Technically, this is harmless, since sentinel states
        // have all of their transitions set to loop back to themselves. But
        // when creating sentinel states before the quit sentinel state,
        // this will try to call 'set_transition' on a state ID that doesn't
        // actually exist yet, which isn't allowed. So we just skip doing so
        // entirely.
        if !self.dfa.quitset.is_empty() && !self.as_ref().is_sentinel(id) {
            let quit_id = self.as_ref().quit_id();
            for b in self.dfa.quitset.iter() {
                self.set_transition(id, alphabet::Unit::u8(b), quit_id);
            }
        }
        self.cache.memory_usage_state += state.memory_usage();
        self.cache.states.push(state.clone());
        self.cache.states_to_id.insert(state, id);
        Ok(id)
    }

    /// Allocate a new state ID.
    ///
    /// This will never return an unknown lazy state ID.
    ///
    /// If caching this state would otherwise result in a cache that has been
    /// cleared too many times, then an error is returned.
    fn next_state_id(&mut self) -> Result<LazyStateID, CacheError> {
        let sid = match LazyStateID::new(self.cache.trans.len()) {
            Ok(sid) => sid,
            Err(_) => {
                self.try_clear_cache()?;
                // This has to pass since we check that ID capacity at
                // construction time can fit at least MIN_STATES states.
                LazyStateID::new(self.cache.trans.len()).unwrap()
            }
        };
        Ok(sid)
    }

    /// Attempt to clear the cache used by this lazy DFA.
    ///
    /// If clearing the cache exceeds the minimum number of required cache
    /// clearings, then this will return a cache error. In this case,
    /// callers should bubble this up as the cache can't be used until it is
    /// reset. Implementations of search should convert this error into a
    /// `MatchError::GaveUp`.
    ///
    /// If 'self.state_saver' is set to save a state, then this state is
    /// persisted through cache clearing. Otherwise, the cache is returned to
    /// its state after initialization with two exceptions: its clear count
    /// is incremented and some of its memory likely has additional capacity.
    /// That is, clearing a cache does _not_ release memory.
    ///
    /// Otherwise, any lazy state ID generated by the cache prior to resetting
    /// it is invalid after the reset.
    fn try_clear_cache(&mut self) -> Result<(), CacheError> {
        // Currently, the only heuristic we use is the minimum cache clear
        // count. If we pass that minimum, then we give up.
        //
        // It would be good to also add a heuristic based on "bytes searched
        // per generated state," but this requires API design work. Namely,
        // we really do not want to add a counter increment to the transition
        // function, which implies we need to expose APIs to update the number
        // of bytes searched by implementers of the search routines. And that
        // doesn't seem great... But we should do it if this heuristic isn't
        // enough. (The original lazy DFA implementation in the 'regex' crate
        // had this heuristic, since the lazy DFA was coupled with the search
        // routines.)
        if let Some(min_count) = self.dfa.minimum_cache_clear_count {
            if self.cache.clear_count >= min_count {
                return Err(CacheError::too_many_cache_clears());
            }
        }
        self.clear_cache();
        Ok(())
    }

    /// Clears _and_ resets the cache. Resetting the cache means that no
    /// states are persisted and the clear count is reset to 0. No heap memory
    /// is released.
    ///
    /// Note that the caller may reset a cache with a different DFA than what
    /// it was created from. In which case, the cache can now be used with the
    /// new DFA (and not the old DFA).
    fn reset_cache(&mut self) {
        self.cache.state_saver = StateSaver::none();
        self.clear_cache();
        // If a new DFA is used, it might have a different number of NFA
        // states, so we need to make sure our sparse sets have the appropriate
        // size.
        self.cache.sparses.resize(self.dfa.nfa.len());
        self.cache.clear_count = 0;
    }

    /// Clear the cache used by this lazy DFA.
    ///
    /// If clearing the cache exceeds the minimum number of required cache
    /// clearings, then this will return a cache error. In this case,
    /// callers should bubble this up as the cache can't be used until it is
    /// reset. Implementations of search should convert this error into a
    /// `MatchError::GaveUp`.
    ///
    /// If 'self.state_saver' is set to save a state, then this state is
    /// persisted through cache clearing. Otherwise, the cache is returned to
    /// its state after initialization with two exceptions: its clear count
    /// is incremented and some of its memory likely has additional capacity.
    /// That is, clearing a cache does _not_ release memory.
    ///
    /// Otherwise, any lazy state ID generated by the cache prior to resetting
    /// it is invalid after the reset.
    fn clear_cache(&mut self) {
        self.cache.trans.clear();
        self.cache.starts.clear();
        self.cache.states.clear();
        self.cache.states_to_id.clear();
        self.cache.memory_usage_state = 0;
        self.cache.clear_count += 1;
        trace!(
            "lazy DFA cache has been cleared (count: {})",
            self.cache.clear_count
        );
        self.init_cache();
        // If the state we want to save is one of the sentinel
        // (unknown/dead/quit) states, then 'init_cache' adds those back, and
        // their identifier values remains invariant. So there's no need to add
        // it again. (And indeed, doing so would be incorrect!)
        if let Some((old_id, state)) = self.cache.state_saver.take_to_save() {
            // If the state is one of the special sentinel states, then it is
            // automatically added by cache initialization and its ID always
            // remains the same. With that said, this should never occur since
            // the sentinel states are all loop states back to themselves. So
            // we should never be in a position where we're attempting to save
            // a sentinel state since we never compute transitions out of a
            // sentinel state.
            assert!(
                !self.as_ref().is_sentinel(old_id),
                "cannot save sentinel state"
            );
            let new_id = self
                .add_state(state, |id| {
                    if old_id.is_start() {
                        id.to_start()
                    } else {
                        id
                    }
                })
                // The unwrap here is OK because lazy DFA creation ensures that
                // we have room in the cache to add MIN_STATES states. Since
                // 'init_cache' above adds 3, this adds a 4th.
                .expect("adding one state after cache clear must work");
            self.cache.state_saver = StateSaver::Saved(new_id);
        }
    }

    /// Initialize this cache from emptiness to a place where it can be used
    /// for search.
    ///
    /// This is called both at cache creation time and after the cache has been
    /// cleared.
    ///
    /// Primarily, this adds the three sentinel states and allocates some
    /// initial memory.
    fn init_cache(&mut self) {
        let mut starts_len = Start::count();
        if self.dfa.starts_for_each_pattern {
            starts_len += Start::count() * self.dfa.pattern_count();
        }
        self.cache
            .starts
            .extend(iter::repeat(self.as_ref().unknown_id()).take(starts_len));
        // This is the set of NFA states that corresponds to each of our three
        // sentinel states: the empty set.
        let dead = State::dead();
        // This sets up some states that we use as sentinels that are present
        // in every DFA. While it would be technically possible to implement
        // this DFA without explicitly putting these states in the transition
        // table, this is convenient to do to make `next_state` correct for all
        // valid state IDs without needing explicit conditionals to special
        // case these sentinel states.
        //
        // All three of these states are "dead" states. That is, all of
        // them transition only to themselves. So once you enter one of
        // these states, it's impossible to leave them. Thus, any correct
        // search routine must explicitly check for these state types. (Sans
        // `unknown`, since that is only used internally to represent missing
        // states.)
        let unk_id =
            self.add_state(dead.clone(), |id| id.to_unknown()).unwrap();
        let dead_id = self.add_state(dead.clone(), |id| id.to_dead()).unwrap();
        let quit_id = self.add_state(dead.clone(), |id| id.to_quit()).unwrap();
        assert_eq!(unk_id, self.as_ref().unknown_id());
        assert_eq!(dead_id, self.as_ref().dead_id());
        assert_eq!(quit_id, self.as_ref().quit_id());
        // The idea here is that if you start in an unknown/dead/quit state and
        // try to transition on them, then you should end up where you started.
        self.set_all_transitions(unk_id, unk_id);
        self.set_all_transitions(dead_id, dead_id);
        self.set_all_transitions(quit_id, quit_id);
        // All of these states are technically equivalent from the FSM
        // perspective, so putting all three of them in the cache isn't
        // possible. (They are distinct merely because we use their
        // identifiers as sentinels to mean something, as indicated by the
        // names.) Moreover, we wouldn't want to do that. Unknown and quit
        // states are special in that they are artificial constructions
        // this implementation. But dead states are a natural part of
        // determinization. When you reach a point in the NFA where you cannot
        // go anywhere else, a dead state will naturally arise and we MUST
        // reuse the canonical dead state that we've created here. Why? Because
        // it is the state ID that tells the search routine whether a state is
        // dead or not, and thus, whether to stop the search. Having a bunch of
        // distinct dead states would be quite wasteful!
        self.cache.states_to_id.insert(dead, dead_id);
    }

    /// Save the state corresponding to the ID given such that the state
    /// persists through a cache clearing.
    ///
    /// While the state may persist, the ID may not. In order to discover the
    /// new state ID, one must call 'saved_state_id' after a cache clearing.
    fn save_state(&mut self, id: LazyStateID) {
        let state = self.as_ref().get_cached_state(id).clone();
        self.cache.state_saver = StateSaver::ToSave { id, state };
    }

    /// Returns the updated lazy state ID for a state that was persisted
    /// through a cache clearing.
    ///
    /// It is only correct to call this routine when both a state has been
    /// saved and the cache has just been cleared. Otherwise, this panics.
    fn saved_state_id(&mut self) -> LazyStateID {
        self.cache
            .state_saver
            .take_saved()
            .expect("state saver does not have saved state ID")
    }

    /// Set all transitions on the state 'from' to 'to'.
    fn set_all_transitions(&mut self, from: LazyStateID, to: LazyStateID) {
        for unit in self.dfa.classes.representatives() {
            self.set_transition(from, unit, to);
        }
    }

    /// Set the transition on 'from' for 'unit' to 'to'.
    ///
    /// This panics if either 'from' or 'to' is invalid.
    ///
    /// All unit values are OK.
    fn set_transition(
        &mut self,
        from: LazyStateID,
        unit: alphabet::Unit,
        to: LazyStateID,
    ) {
        assert!(self.as_ref().is_valid(from), "invalid 'from' id: {:?}", from);
        assert!(self.as_ref().is_valid(to), "invalid 'to' id: {:?}", to);
        let offset =
            from.as_usize_untagged() + self.dfa.classes.get_by_unit(unit);
        self.cache.trans[offset] = to;
    }

    /// Set the start ID for the given pattern ID (if given) and starting
    /// configuration to the ID given.
    ///
    /// This panics if 'id' is not valid or if a pattern ID is given and
    /// 'starts_for_each_pattern' is not enabled.
    fn set_start_state(
        &mut self,
        pattern_id: Option<PatternID>,
        start: Start,
        id: LazyStateID,
    ) {
        assert!(self.as_ref().is_valid(id));
        let start_index = start.as_usize();
        let index = match pattern_id {
            None => start_index,
            Some(pid) => {
                assert!(
                    self.dfa.starts_for_each_pattern,
                    "attempted to search for a specific pattern \
                     without enabling starts_for_each_pattern",
                );
                let pid = pid.as_usize();
                Start::count() + (Start::count() * pid) + start_index
            }
        };
        self.cache.starts[index] = id;
    }

    /// Returns a state builder from this DFA that might have existing
    /// capacity. This helps avoid allocs in cases where a state is built that
    /// turns out to already be cached.
    ///
    /// Callers must put the state builder back with 'put_state_builder',
    /// otherwise the allocation reuse won't work.
    fn get_state_builder(&mut self) -> StateBuilderEmpty {
        core::mem::replace(
            &mut self.cache.scratch_state_builder,
            StateBuilderEmpty::new(),
        )
    }

    /// Puts the given state builder back into this DFA for reuse.
    ///
    /// Note that building a 'State' from a builder always creates a new alloc,
    /// so callers should always put the builder back.
    fn put_state_builder(&mut self, builder: StateBuilderNFA) {
        let _ = core::mem::replace(
            &mut self.cache.scratch_state_builder,
            builder.clear(),
        );
    }
}

/// A type that groups methods that require the base NFA/DFA and read-only
/// access to the cache.
#[derive(Debug)]
struct LazyRef<'i, 'c> {
    dfa: &'i DFA,
    cache: &'c Cache,
}

impl<'i, 'c> LazyRef<'i, 'c> {
    /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache.
    fn new(dfa: &'i DFA, cache: &'c Cache) -> LazyRef<'i, 'c> {
        LazyRef { dfa, cache }
    }

    /// Return the ID of the start state for the given configuration.
    ///
    /// If the start state has not yet been computed, then this returns an
    /// unknown lazy state ID.
    fn get_cached_start_id(
        &self,
        pattern_id: Option<PatternID>,
        start: Start,
    ) -> LazyStateID {
        let start_index = start.as_usize();
        let index = match pattern_id {
            None => start_index,
            Some(pid) => {
                let pid = pid.as_usize();
                assert!(
                    pid < self.dfa.pattern_count(),
                    "invalid pattern ID: {:?}",
                    pid
                );
                Start::count() + (Start::count() * pid) + start_index
            }
        };
        self.cache.starts[index]
    }

    /// Return the cached NFA/DFA powerset state for the given ID.
    ///
    /// This panics if the given ID does not address a valid state.
    fn get_cached_state(&self, sid: LazyStateID) -> &State {
        let index = sid.as_usize_untagged() >> self.dfa.stride2();
        &self.cache.states[index]
    }

    /// Returns true if and only if the given ID corresponds to a "sentinel"
    /// state.
    ///
    /// A sentinel state is a state that signifies a special condition of
    /// search, and where every transition maps back to itself. See LazyStateID
    /// for more details. Note that start and match states are _not_ sentinels
    /// since they may otherwise be real states with non-trivial transitions.
    /// The purposes of sentinel states is purely to indicate something. Their
    /// transitions are not meant to be followed.
    fn is_sentinel(&self, id: LazyStateID) -> bool {
        id == self.unknown_id() || id == self.dead_id() || id == self.quit_id()
    }

    /// Returns the ID of the unknown state for this lazy DFA.
    fn unknown_id(&self) -> LazyStateID {
        // This unwrap is OK since 0 is always a valid state ID.
        LazyStateID::new(0).unwrap().to_unknown()
    }

    /// Returns the ID of the dead state for this lazy DFA.
    fn dead_id(&self) -> LazyStateID {
        // This unwrap is OK since the maximum value here is 1 * 512 = 512,
        // which is <= 2047 (the maximum state ID on 16-bit systems). Where
        // 512 is the worst case for our equivalence classes (every byte is a
        // distinct class).
        LazyStateID::new(1 << self.dfa.stride2()).unwrap().to_dead()
    }

    /// Returns the ID of the quit state for this lazy DFA.
    fn quit_id(&self) -> LazyStateID {
        // This unwrap is OK since the maximum value here is 2 * 512 = 1024,
        // which is <= 2047 (the maximum state ID on 16-bit systems). Where
        // 512 is the worst case for our equivalence classes (every byte is a
        // distinct class).
        LazyStateID::new(2 << self.dfa.stride2()).unwrap().to_quit()
    }

    /// Returns true if and only if the given ID is valid.
    ///
    /// An ID is valid if it is both a valid index into the transition table
    /// and is a multiple of the DFA's stride.
    fn is_valid(&self, id: LazyStateID) -> bool {
        let id = id.as_usize_untagged();
        id < self.cache.trans.len() && id % self.dfa.stride() == 0
    }

    /// Returns true if adding the state given would fit in this cache.
    fn state_fits_in_cache(&self, state: &State) -> bool {
        let needed = self.cache.memory_usage()
            + self.memory_usage_for_one_more_state(state.memory_usage());
        needed <= self.dfa.cache_capacity
    }

    /// Returns true if adding the state to be built by the given builder would
    /// fit in this cache.
    fn state_builder_fits_in_cache(&self, state: &StateBuilderNFA) -> bool {
        let needed = self.cache.memory_usage()
            + self.memory_usage_for_one_more_state(state.as_bytes().len());
        needed <= self.dfa.cache_capacity
    }

    /// Returns the additional memory usage, in bytes, required to add one more
    /// state to this cache. The given size should be the heap size, in bytes,
    /// that would be used by the new state being added.
    fn memory_usage_for_one_more_state(
        &self,
        state_heap_size: usize,
    ) -> usize {
        const ID_SIZE: usize = size_of::<LazyStateID>();
        const STATE_SIZE: usize = size_of::<State>();

        self.dfa.stride() * ID_SIZE // additional space needed in trans table
        + STATE_SIZE // space in cache.states
        + (STATE_SIZE + ID_SIZE) // space in cache.states_to_id
        + state_heap_size // heap memory used by state itself
    }
}

/// A simple type that encapsulates the saving of a state ID through a cache
/// clearing.
///
/// A state ID can be marked for saving with ToSave, while a state ID can be
/// saved itself with Saved.
#[derive(Clone, Debug)]
enum StateSaver {
    /// An empty state saver. In this case, no states (other than the special
    /// sentinel states) are preserved after clearing the cache.
    None,
    /// An ID of a state (and the state itself) that should be preserved after
    /// the lazy DFA's cache has been cleared. After clearing, the updated ID
    /// is stored in 'Saved' since it may have changed.
    ToSave { id: LazyStateID, state: State },
    /// An ID that of a state that has been persisted through a lazy DFA
    /// cache clearing. The ID recorded here corresonds to an ID that was
    /// once marked as ToSave. The IDs are likely not equivalent even though
    /// the states they point to are.
    Saved(LazyStateID),
}

impl StateSaver {
    /// Create an empty state saver.
    fn none() -> StateSaver {
        StateSaver::None
    }

    /// Replace this state saver with an empty saver, and if this saver is a
    /// request to save a state, return that request.
    fn take_to_save(&mut self) -> Option<(LazyStateID, State)> {
        match core::mem::replace(self, StateSaver::None) {
            StateSaver::None | StateSaver::Saved(_) => None,
            StateSaver::ToSave { id, state } => Some((id, state)),
        }
    }

    /// Replace this state saver with an empty saver, and if this saver is a
    /// saved state (or a request to save a state), return that state's ID.
    ///
    /// The idea here is that a request to save a state isn't necessarily
    /// honored because it might not be needed. e.g., Some higher level code
    /// might request a state to be saved on the off chance that the cache gets
    /// cleared when a new state is added at a lower level. But if that new
    /// state is never added, then the cache is never cleared and the state and
    /// its ID remain unchanged.
    fn take_saved(&mut self) -> Option<LazyStateID> {
        match core::mem::replace(self, StateSaver::None) {
            StateSaver::None => None,
            StateSaver::Saved(id) | StateSaver::ToSave { id, .. } => Some(id),
        }
    }
}

/// The configuration used for building a lazy DFA.
///
/// As a convenience, [`DFA::config`] is an alias for [`Config::new`]. The
/// advantage of the former is that it often lets you avoid importing the
/// `Config` type directly.
///
/// A lazy DFA configuration is a simple data object that is typically used
/// with [`Builder::configure`].
///
/// The default configuration guarantees that a search will _never_ return
/// a [`MatchError`] for any haystack or pattern. Setting a quit byte with
/// [`Config::quit`], enabling heuristic support for Unicode word boundaries
/// with [`Config::unicode_word_boundary`], or setting a minimum cache clear
/// count with [`Config::minimum_cache_clear_count`] can in turn cause a search
/// to return an error. See the corresponding configuration options for more
/// details on when those error conditions arise.
#[derive(Clone, Copy, Debug, Default)]
pub struct Config {
    // As with other configuration types in this crate, we put all our knobs
    // in options so that we can distinguish between "default" and "not set."
    // This makes it possible to easily combine multiple configurations
    // without default values overwriting explicitly specified values. See the
    // 'overwrite' method.
    //
    // For docs on the fields below, see the corresponding method setters.
    anchored: Option<bool>,
    match_kind: Option<MatchKind>,
    starts_for_each_pattern: Option<bool>,
    byte_classes: Option<bool>,
    unicode_word_boundary: Option<bool>,
    quitset: Option<ByteSet>,
    cache_capacity: Option<usize>,
    skip_cache_capacity_check: Option<bool>,
    minimum_cache_clear_count: Option<Option<usize>>,
}

impl Config {
    /// Return a new default lazy DFA builder configuration.
    pub fn new() -> Config {
        Config::default()
    }

    /// Set whether matching must be anchored at the beginning of the input.
    ///
    /// When enabled, a match must begin at the start of a search. When
    /// disabled (the default), the lazy DFA will act as if the pattern started
    /// with a `(?s:.)*?`, which enables a match to appear anywhere.
    ///
    /// Note that if you want to run both anchored and unanchored
    /// searches without building multiple automatons, you can enable the
    /// [`Config::starts_for_each_pattern`] configuration instead. This will
    /// permit unanchored any-pattern searches and pattern-specific anchored
    /// searches. See the documentation for that configuration for an example.
    ///
    /// By default this is disabled.
    ///
    /// **WARNING:** this is subtly different than using a `^` at the start of
    /// your regex. A `^` forces a regex to match exclusively at the start of
    /// input, regardless of where you begin your search. In contrast, enabling
    /// this option will allow your regex to match anywhere in your input,
    /// but the match must start at the beginning of a search. (Most of the
    /// higher level convenience search routines make "start of input" and
    /// "start of search" equivalent, but some routines allow treating these as
    /// orthogonal.)
    ///
    /// For example, consider the haystack `aba` and the following searches:
    ///
    /// 1. The regex `^a` is compiled with `anchored=false` and searches
    ///    `aba` starting at position `2`. Since `^` requires the match to
    ///    start at the beginning of the input and `2 > 0`, no match is found.
    /// 2. The regex `a` is compiled with `anchored=true` and searches `aba`
    ///    starting at position `2`. This reports a match at `[2, 3]` since
    ///    the match starts where the search started. Since there is no `^`,
    ///    there is no requirement for the match to start at the beginning of
    ///    the input.
    /// 3. The regex `a` is compiled with `anchored=true` and searches `aba`
    ///    starting at position `1`. Since `b` corresponds to position `1` and
    ///    since the regex is anchored, it finds no match.
    /// 4. The regex `a` is compiled with `anchored=false` and searches `aba`
    ///    startting at position `1`. Since the regex is neither anchored nor
    ///    starts with `^`, the regex is compiled with an implicit `(?s:.)*?`
    ///    prefix that permits it to match anywhere. Thus, it reports a match
    ///    at `[2, 3]`.
    ///
    /// # Example
    ///
    /// This demonstrates the differences between an anchored search and
    /// a pattern that begins with `^` (as described in the above warning
    /// message).
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
    ///
    /// let haystack = "aba".as_bytes();
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().anchored(false)) // default
    ///     .build(r"^a")?;
    /// let mut cache = dfa.create_cache();
    /// let got = dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, None, haystack, 2, 3,
    /// )?;
    /// // No match is found because 2 is not the beginning of the haystack,
    /// // which is what ^ requires.
    /// let expected = None;
    /// assert_eq!(expected, got);
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().anchored(true))
    ///     .build(r"a")?;
    /// let mut cache = dfa.create_cache();
    /// let got = dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, None, haystack, 2, 3,
    /// )?;
    /// // An anchored search can still match anywhere in the haystack, it just
    /// // must begin at the start of the search which is '2' in this case.
    /// let expected = Some(HalfMatch::must(0, 3));
    /// assert_eq!(expected, got);
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().anchored(true))
    ///     .build(r"a")?;
    /// let mut cache = dfa.create_cache();
    /// let got = dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, None, haystack, 1, 3,
    /// )?;
    /// // No match is found since we start searching at offset 1 which
    /// // corresponds to 'b'. Since there is no '(?s:.)*?' prefix, no match
    /// // is found.
    /// let expected = None;
    /// assert_eq!(expected, got);
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().anchored(false))
    ///     .build(r"a")?;
    /// let mut cache = dfa.create_cache();
    /// let got = dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, None, haystack, 1, 3,
    /// )?;
    /// // Since anchored=false, an implicit '(?s:.)*?' prefix was added to the
    /// // pattern. Even though the search starts at 'b', the 'match anything'
    /// // prefix allows the search to match 'a'.
    /// let expected = Some(HalfMatch::must(0, 3));
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn anchored(mut self, yes: bool) -> Config {
        self.anchored = Some(yes);
        self
    }

    /// Set the desired match semantics.
    ///
    /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the
    /// match semantics of Perl-like regex engines. That is, when multiple
    /// patterns would match at the same leftmost position, the pattern that
    /// appears first in the concrete syntax is chosen.
    ///
    /// Currently, the only other kind of match semantics supported is
    /// [`MatchKind::All`]. This corresponds to classical DFA construction
    /// where all possible matches are added to the lazy DFA.
    ///
    /// Typically, `All` is used when one wants to execute an overlapping
    /// search and `LeftmostFirst` otherwise. In particular, it rarely makes
    /// sense to use `All` with the various "leftmost" find routines, since the
    /// leftmost routines depend on the `LeftmostFirst` automata construction
    /// strategy. Specifically, `LeftmostFirst` adds dead states to the
    /// lazy DFA as a way to terminate the search and report a match.
    /// `LeftmostFirst` also supports non-greedy matches using this strategy
    /// where as `All` does not.
    ///
    /// # Example: overlapping search
    ///
    /// This example shows the typical use of `MatchKind::All`, which is to
    /// report overlapping matches.
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::{dfa::DFA, OverlappingState},
    ///     HalfMatch, MatchKind,
    /// };
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().match_kind(MatchKind::All))
    ///     .build_many(&[r"\w+$", r"\S+$"])?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = "@foo".as_bytes();
    /// let mut state = OverlappingState::start();
    ///
    /// let expected = Some(HalfMatch::must(1, 4));
    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
    /// assert_eq!(expected, got);
    ///
    /// // The first pattern also matches at the same position, so re-running
    /// // the search will yield another match. Notice also that the first
    /// // pattern is returned after the second. This is because the second
    /// // pattern begins its match before the first, is therefore an earlier
    /// // match and is thus reported first.
    /// let expected = Some(HalfMatch::must(0, 4));
    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// # Example: reverse automaton to find start of match
    ///
    /// Another example for using `MatchKind::All` is for constructing a
    /// reverse automaton to find the start of a match. `All` semantics are
    /// used for this in order to find the longest possible match, which
    /// corresponds to the leftmost starting position.
    ///
    /// Note that if you need the starting position then
    /// [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) will handle this
    /// for you, so it's usually not necessary to do this yourself.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchKind};
    ///
    /// let haystack = "123foobar456".as_bytes();
    /// let pattern = r"[a-z]+";
    ///
    /// let dfa_fwd = DFA::new(pattern)?;
    /// let dfa_rev = DFA::builder()
    ///     .configure(DFA::config()
    ///         .anchored(true)
    ///         .match_kind(MatchKind::All)
    ///     )
    ///     .build(pattern)?;
    /// let mut cache_fwd = dfa_fwd.create_cache();
    /// let mut cache_rev = dfa_rev.create_cache();
    ///
    /// let expected_fwd = HalfMatch::must(0, 9);
    /// let expected_rev = HalfMatch::must(0, 3);
    /// let got_fwd = dfa_fwd.find_leftmost_fwd(
    ///     &mut cache_fwd, haystack,
    /// )?.unwrap();
    /// // Here we don't specify the pattern to search for since there's only
    /// // one pattern and we're doing a leftmost search. But if this were an
    /// // overlapping search, you'd need to specify the pattern that matched
    /// // in the forward direction. (Otherwise, you might wind up finding the
    /// // starting position of a match of some other pattern.) That in turn
    /// // requires building the reverse automaton with starts_for_each_pattern
    /// // enabled. Indeed, this is what Regex does internally.
    /// let got_rev = dfa_rev.find_leftmost_rev_at(
    ///     &mut cache_rev, None, haystack, 0, got_fwd.offset(),
    /// )?.unwrap();
    /// assert_eq!(expected_fwd, got_fwd);
    /// assert_eq!(expected_rev, got_rev);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn match_kind(mut self, kind: MatchKind) -> Config {
        self.match_kind = Some(kind);
        self
    }

    /// Whether to compile a separate start state for each pattern in the
    /// lazy DFA.
    ///
    /// When enabled, a separate **anchored** start state is added for each
    /// pattern in the lazy DFA. When this start state is used, then the DFA
    /// will only search for matches for the pattern specified, even if there
    /// are other patterns in the DFA.
    ///
    /// The main downside of this option is that it can potentially increase
    /// the size of the DFA and/or increase the time it takes to build the
    /// DFA at search time. However, since this is configuration for a lazy
    /// DFA, these states aren't actually built unless they're used. Enabling
    /// this isn't necessarily free, however, as it may result in higher cache
    /// usage.
    ///
    /// There are a few reasons one might want to enable this (it's disabled
    /// by default):
    ///
    /// 1. When looking for the start of an overlapping match (using a reverse
    /// DFA), doing it correctly requires starting the reverse search using the
    /// starting state of the pattern that matched in the forward direction.
    /// Indeed, when building a [`Regex`](crate::hybrid::regex::Regex), it
    /// will automatically enable this option when building the reverse DFA
    /// internally.
    /// 2. When you want to use a DFA with multiple patterns to both search
    /// for matches of any pattern or to search for anchored matches of one
    /// particular pattern while using the same DFA. (Otherwise, you would need
    /// to compile a new DFA for each pattern.)
    /// 3. Since the start states added for each pattern are anchored, if you
    /// compile an unanchored DFA with one pattern while also enabling this
    /// option, then you can use the same DFA to perform anchored or unanchored
    /// searches. The latter you get with the standard search APIs. The former
    /// you get from the various `_at` search methods that allow you specify a
    /// pattern ID to search for.
    ///
    /// By default this is disabled.
    ///
    /// # Example
    ///
    /// This example shows how to use this option to permit the same lazy DFA
    /// to run both anchored and unanchored searches for a single pattern.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, PatternID};
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().starts_for_each_pattern(true))
    ///     .build(r"foo[0-9]+")?;
    /// let mut cache = dfa.create_cache();
    /// let haystack = b"quux foo123";
    ///
    /// // Here's a normal unanchored search. Notice that we use 'None' for the
    /// // pattern ID. Since the DFA was built as an unanchored machine, it
    /// // uses its default unanchored starting state.
    /// let expected = HalfMatch::must(0, 11);
    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, None, haystack, 0, haystack.len(),
    /// )?);
    /// // But now if we explicitly specify the pattern to search ('0' being
    /// // the only pattern in the DFA), then it will use the starting state
    /// // for that specific pattern which is always anchored. Since the
    /// // pattern doesn't have a match at the beginning of the haystack, we
    /// // find nothing.
    /// assert_eq!(None, dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, Some(PatternID::must(0)), haystack, 0, haystack.len(),
    /// )?);
    /// // And finally, an anchored search is not the same as putting a '^' at
    /// // beginning of the pattern. An anchored search can only match at the
    /// // beginning of the *search*, which we can change:
    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd_at(
    ///     &mut cache, None, Some(PatternID::must(0)), haystack, 5, haystack.len(),
    /// )?);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn starts_for_each_pattern(mut self, yes: bool) -> Config {
        self.starts_for_each_pattern = Some(yes);
        self
    }

    /// Whether to attempt to shrink the size of the lazy DFA's alphabet or
    /// not.
    ///
    /// This option is enabled by default and should never be disabled unless
    /// one is debugging the lazy DFA.
    ///
    /// When enabled, the lazy DFA will use a map from all possible bytes
    /// to their corresponding equivalence class. Each equivalence class
    /// represents a set of bytes that does not discriminate between a match
    /// and a non-match in the DFA. For example, the pattern `[ab]+` has at
    /// least two equivalence classes: a set containing `a` and `b` and a set
    /// containing every byte except for `a` and `b`. `a` and `b` are in the
    /// same equivalence classes because they never discriminate between a
    /// match and a non-match.
    ///
    /// The advantage of this map is that the size of the transition table
    /// can be reduced drastically from `#states * 256 * sizeof(LazyStateID)`
    /// to `#states * k * sizeof(LazyStateID)` where `k` is the number of
    /// equivalence classes (rounded up to the nearest power of 2). As a
    /// result, total space usage can decrease substantially. Moreover, since a
    /// smaller alphabet is used, DFA compilation during search becomes faster
    /// as well since it will potentially be able to reuse a single transition
    /// for multiple bytes.
    ///
    /// **WARNING:** This is only useful for debugging lazy DFAs. Disabling
    /// this does not yield any speed advantages. Namely, even when this is
    /// disabled, a byte class map is still used while searching. The only
    /// difference is that every byte will be forced into its own distinct
    /// equivalence class. This is useful for debugging the actual generated
    /// transitions because it lets one see the transitions defined on actual
    /// bytes instead of the equivalence classes.
    pub fn byte_classes(mut self, yes: bool) -> Config {
        self.byte_classes = Some(yes);
        self
    }

    /// Heuristically enable Unicode word boundaries.
    ///
    /// When set, this will attempt to implement Unicode word boundaries as if
    /// they were ASCII word boundaries. This only works when the search input
    /// is ASCII only. If a non-ASCII byte is observed while searching, then a
    /// [`MatchError::Quit`](crate::MatchError::Quit) error is returned.
    ///
    /// A possible alternative to enabling this option is to simply use an
    /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this
    /// option is if you absolutely need Unicode support. This option lets one
    /// use a fast search implementation (a DFA) for some potentially very
    /// common cases, while providing the option to fall back to some other
    /// regex engine to handle the general case when an error is returned.
    ///
    /// If the pattern provided has no Unicode word boundary in it, then this
    /// option has no effect. (That is, quitting on a non-ASCII byte only
    /// occurs when this option is enabled _and_ a Unicode word boundary is
    /// present in the pattern.)
    ///
    /// This is almost equivalent to setting all non-ASCII bytes to be quit
    /// bytes. The only difference is that this will cause non-ASCII bytes to
    /// be quit bytes _only_ when a Unicode word boundary is present in the
    /// pattern.
    ///
    /// When enabling this option, callers _must_ be prepared to handle
    /// a [`MatchError`](crate::MatchError) error during search.
    /// When using a [`Regex`](crate::hybrid::regex::Regex), this
    /// corresponds to using the `try_` suite of methods. Alternatively,
    /// if callers can guarantee that their input is ASCII only, then a
    /// [`MatchError::Quit`](crate::MatchError::Quit) error will never be
    /// returned while searching.
    ///
    /// This is disabled by default.
    ///
    /// # Example
    ///
    /// This example shows how to heuristically enable Unicode word boundaries
    /// in a pattern. It also shows what happens when a search comes across a
    /// non-ASCII byte.
    ///
    /// ```
    /// use regex_automata::{
    ///     hybrid::dfa::DFA,
    ///     HalfMatch, MatchError, MatchKind,
    /// };
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().unicode_word_boundary(true))
    ///     .build(r"\b[0-9]+\b")?;
    /// let mut cache = dfa.create_cache();
    ///
    /// // The match occurs before the search ever observes the snowman
    /// // character, so no error occurs.
    /// let haystack = "foo 123 ☃".as_bytes();
    /// let expected = Some(HalfMatch::must(0, 7));
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
    /// assert_eq!(expected, got);
    ///
    /// // Notice that this search fails, even though the snowman character
    /// // occurs after the ending match offset. This is because search
    /// // routines read one byte past the end of the search to account for
    /// // look-around, and indeed, this is required here to determine whether
    /// // the trailing \b matches.
    /// let haystack = "foo 123☃".as_bytes();
    /// let expected = MatchError::Quit { byte: 0xE2, offset: 7 };
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack);
    /// assert_eq!(Err(expected), got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn unicode_word_boundary(mut self, yes: bool) -> Config {
        // We have a separate option for this instead of just setting the
        // appropriate quit bytes here because we don't want to set quit bytes
        // for every regex. We only want to set them when the regex contains a
        // Unicode word boundary.
        self.unicode_word_boundary = Some(yes);
        self
    }

    /// Add a "quit" byte to the lazy DFA.
    ///
    /// When a quit byte is seen during search time, then search will return
    /// a [`MatchError::Quit`](crate::MatchError::Quit) error indicating the
    /// offset at which the search stopped.
    ///
    /// A quit byte will always overrule any other aspects of a regex. For
    /// example, if the `x` byte is added as a quit byte and the regex `\w` is
    /// used, then observing `x` will cause the search to quit immediately
    /// despite the fact that `x` is in the `\w` class.
    ///
    /// This mechanism is primarily useful for heuristically enabling certain
    /// features like Unicode word boundaries in a DFA. Namely, if the input
    /// to search is ASCII, then a Unicode word boundary can be implemented
    /// via an ASCII word boundary with no change in semantics. Thus, a DFA
    /// can attempt to match a Unicode word boundary but give up as soon as it
    /// observes a non-ASCII byte. Indeed, if callers set all non-ASCII bytes
    /// to be quit bytes, then Unicode word boundaries will be permitted when
    /// building lazy DFAs. Of course, callers should enable
    /// [`Config::unicode_word_boundary`] if they want this behavior instead.
    /// (The advantage being that non-ASCII quit bytes will only be added if a
    /// Unicode word boundary is in the pattern.)
    ///
    /// When enabling this option, callers _must_ be prepared to handle a
    /// [`MatchError`](crate::MatchError) error during search. When using a
    /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the
    /// `try_` suite of methods.
    ///
    /// By default, there are no quit bytes set.
    ///
    /// # Panics
    ///
    /// This panics if heuristic Unicode word boundaries are enabled and any
    /// non-ASCII byte is removed from the set of quit bytes. Namely, enabling
    /// Unicode word boundaries requires setting every non-ASCII byte to a quit
    /// byte. So if the caller attempts to undo any of that, then this will
    /// panic.
    ///
    /// # Example
    ///
    /// This example shows how to cause a search to terminate if it sees a
    /// `\n` byte. This could be useful if, for example, you wanted to prevent
    /// a user supplied pattern from matching across a line boundary.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
    ///
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().quit(b'\n', true))
    ///     .build(r"foo\p{any}+bar")?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "foo\nbar".as_bytes();
    /// // Normally this would produce a match, since \p{any} contains '\n'.
    /// // But since we instructed the automaton to enter a quit state if a
    /// // '\n' is observed, this produces a match error instead.
    /// let expected = MatchError::Quit { byte: 0x0A, offset: 3 };
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack).unwrap_err();
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn quit(mut self, byte: u8, yes: bool) -> Config {
        if self.get_unicode_word_boundary() && !byte.is_ascii() && !yes {
            panic!(
                "cannot set non-ASCII byte to be non-quit when \
                 Unicode word boundaries are enabled"
            );
        }
        if self.quitset.is_none() {
            self.quitset = Some(ByteSet::empty());
        }
        if yes {
            self.quitset.as_mut().unwrap().add(byte);
        } else {
            self.quitset.as_mut().unwrap().remove(byte);
        }
        self
    }

    /// Sets the maximum amount of heap memory, in bytes, to allocate to the
    /// cache for use during a lazy DFA search. If the lazy DFA would otherwise
    /// use more heap memory, then, depending on other configuration knobs,
    /// either stop the search and return an error or clear the cache and
    /// continue the search.
    ///
    /// The default cache capacity is some "reasonable" number that will
    /// accommodate most regular expressions. You may find that if you need
    /// to build a large DFA then it may be necessary to increase the cache
    /// capacity.
    ///
    /// Note that while building a lazy DFA will do a "minimum" check to ensure
    /// the capacity is big enough, this is more or less about correctness.
    /// If the cache is bigger than the minimum but still too small, then the
    /// lazy DFA could wind up spending a lot of time clearing the cache and
    /// recomputing transitions, thus negating the performance benefits of a
    /// lazy DFA. Thus, setting the cache capacity is mostly an experimental
    /// endeavor. For most common patterns, however, the default should be
    /// sufficient.
    ///
    /// For more details on how the lazy DFA's cache is used, see the
    /// documentation for [`Cache`].
    ///
    /// # Example
    ///
    /// This example shows what happens if the configured cache capacity is
    /// too small. In such cases, one can override the cache capacity to make
    /// it bigger. Alternatively, one might want to use less memory by setting
    /// a smaller cache capacity.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
    ///
    /// let pattern = r"\p{L}{1000}";
    ///
    /// // The default cache capacity is likely too small to deal with regexes
    /// // that are very large. Large repetitions of large Unicode character
    /// // classes are a common way to make very large regexes.
    /// let _ = DFA::new(pattern).unwrap_err();
    /// // Bump up the capacity to something bigger.
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().cache_capacity(100 * (1<<20))) // 100 MB
    ///     .build(pattern)?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50);
    /// let expected = Some(HalfMatch::must(0, 2000));
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack.as_bytes())?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn cache_capacity(mut self, bytes: usize) -> Config {
        self.cache_capacity = Some(bytes);
        self
    }

    /// Configures construction of a lazy DFA to use the minimum cache capacity
    /// if the configured capacity is otherwise too small for the provided NFA.
    ///
    /// This is useful if you never want lazy DFA construction to fail because
    /// of a capacity that is too small.
    ///
    /// In general, this option is typically not a good idea. In particular,
    /// while a minimum cache capacity does permit the lazy DFA to function
    /// where it otherwise couldn't, it's plausible that it may not function
    /// well if it's constantly running out of room. In that case, the speed
    /// advantages of the lazy DFA may be negated.
    ///
    /// This is disabled by default.
    ///
    /// # Example
    ///
    /// This example shows what happens if the configured cache capacity is
    /// too small. In such cases, one could override the capacity explicitly.
    /// An alternative, demonstrated here, let's us force construction to use
    /// the minimum cache capacity if the configured capacity is otherwise
    /// too small.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
    ///
    /// let pattern = r"\p{L}{1000}";
    ///
    /// // The default cache capacity is likely too small to deal with regexes
    /// // that are very large. Large repetitions of large Unicode character
    /// // classes are a common way to make very large regexes.
    /// let _ = DFA::new(pattern).unwrap_err();
    /// // Configure construction such it automatically selects the minimum
    /// // cache capacity if it would otherwise be too small.
    /// let dfa = DFA::builder()
    ///     .configure(DFA::config().skip_cache_capacity_check(true))
    ///     .build(pattern)?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50);
    /// let expected = Some(HalfMatch::must(0, 2000));
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack.as_bytes())?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn skip_cache_capacity_check(mut self, yes: bool) -> Config {
        self.skip_cache_capacity_check = Some(yes);
        self
    }

    /// Configure a lazy DFA search to quit after a certain number of cache
    /// clearings.
    ///
    /// When a minimum is set, then a lazy DFA search will "give up" after
    /// the minimum number of cache clearings has occurred. This is typically
    /// useful in scenarios where callers want to detect whether the lazy DFA
    /// search is "efficient" or not. If the cache is cleared too many times,
    /// this is a good indicator that it is not efficient, and thus, the caller
    /// may wish to use some other regex engine.
    ///
    /// Note that the number of times a cache is cleared is a property of
    /// the cache itself. Thus, if a cache is used in a subsequent search
    /// with a similarly configured lazy DFA, then it would cause the
    /// search to "give up" if the cache needed to be cleared. The cache
    /// clear count can only be reset to `0` via [`DFA::reset_cache`] (or
    /// [`Regex::reset_cache`](crate::hybrid::regex::Regex::reset_cache) if
    /// you're using the `Regex` API).
    ///
    /// By default, no minimum is configured. Thus, a lazy DFA search will
    /// never give up due to cache clearings.
    ///
    /// # Example
    ///
    /// This example uses a somewhat pathological configuration to demonstrate
    /// the _possible_ behavior of cache clearing and how it might result
    /// in a search that returns an error.
    ///
    /// It is important to note that the precise mechanics of how and when
    /// a cache gets cleared is an implementation detail. Thus, the asserts
    /// in the tests below with respect to the particular offsets at which a
    /// search gave up should be viewed strictly as a demonstration. They are
    /// not part of any API guarantees offered by this crate.
    ///
    /// ```
    /// use regex_automata::{hybrid::dfa::DFA, MatchError};
    ///
    /// // This is a carefully chosen regex. The idea is to pick one
    /// // that requires some decent number of states (hence the bounded
    /// // repetition). But we specifically choose to create a class with an
    /// // ASCII letter and a non-ASCII letter so that we can check that no new
    /// // states are created once the cache is full. Namely, if we fill up the
    /// // cache on a haystack of 'a's, then in order to match one 'β', a new
    /// // state will need to be created since a 'β' is encoded with multiple
    /// // bytes. Since there's no room for this state, the search should quit
    /// // at the very first position.
    /// let pattern = r"[aβ]{100}";
    /// let dfa = DFA::builder()
    ///     .configure(
    ///         // Configure it so that we have the minimum cache capacity
    ///         // possible. And that if any clearings occur, the search quits.
    ///         DFA::config()
    ///             .skip_cache_capacity_check(true)
    ///             .cache_capacity(0)
    ///             .minimum_cache_clear_count(Some(0)),
    ///     )
    ///     .build(pattern)?;
    /// let mut cache = dfa.create_cache();
    ///
    /// let haystack = "a".repeat(101).into_bytes();
    /// assert_eq!(
    ///     dfa.find_leftmost_fwd(&mut cache, &haystack),
    ///     Err(MatchError::GaveUp { offset: 25 }),
    /// );
    ///
    /// // Now that we know the cache is full, if we search a haystack that we
    /// // know will require creating at least one new state, it should not
    /// // be able to make any progress.
    /// let haystack = "β".repeat(101).into_bytes();
    /// assert_eq!(
    ///     dfa.find_leftmost_fwd(&mut cache, &haystack),
    ///     Err(MatchError::GaveUp { offset: 0 }),
    /// );
    ///
    /// // If we reset the cache, then we should be able to create more states
    /// // and make more progress with searching for betas.
    /// cache.reset(&dfa);
    /// let haystack = "β".repeat(101).into_bytes();
    /// assert_eq!(
    ///     dfa.find_earliest_fwd(&mut cache, &haystack),
    ///     Err(MatchError::GaveUp { offset: 26 }),
    /// );
    ///
    /// // ... switching back to ASCII still makes progress since it just needs
    /// // to set transitions on existing states!
    /// let haystack = "a".repeat(101).into_bytes();
    /// assert_eq!(
    ///     dfa.find_earliest_fwd(&mut cache, &haystack),
    ///     Err(MatchError::GaveUp { offset: 13 }),
    /// );
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn minimum_cache_clear_count(mut self, min: Option<usize>) -> Config {
        self.minimum_cache_clear_count = Some(min);
        self
    }

    /// Returns whether this configuration has enabled anchored searches.
    pub fn get_anchored(&self) -> bool {
        self.anchored.unwrap_or(false)
    }

    /// Returns the match semantics set in this configuration.
    pub fn get_match_kind(&self) -> MatchKind {
        self.match_kind.unwrap_or(MatchKind::LeftmostFirst)
    }

    /// Returns whether this configuration has enabled anchored starting states
    /// for every pattern in the DFA.
    pub fn get_starts_for_each_pattern(&self) -> bool {
        self.starts_for_each_pattern.unwrap_or(false)
    }

    /// Returns whether this configuration has enabled byte classes or not.
    /// This is typically a debugging oriented option, as disabling it confers
    /// no speed benefit.
    pub fn get_byte_classes(&self) -> bool {
        self.byte_classes.unwrap_or(true)
    }

    /// Returns whether this configuration has enabled heuristic Unicode word
    /// boundary support. When enabled, it is possible for a search to return
    /// an error.
    pub fn get_unicode_word_boundary(&self) -> bool {
        self.unicode_word_boundary.unwrap_or(false)
    }

    /// Returns whether this configuration will instruct the DFA to enter a
    /// quit state whenever the given byte is seen during a search. When at
    /// least one byte has this enabled, it is possible for a search to return
    /// an error.
    pub fn get_quit(&self, byte: u8) -> bool {
        self.quitset.map_or(false, |q| q.contains(byte))
    }

    /// Returns the cache capacity set on this configuration.
    pub fn get_cache_capacity(&self) -> usize {
        self.cache_capacity.unwrap_or(2 * (1 << 20))
    }

    /// Returns whether the cache capacity check should be skipped.
    pub fn get_skip_cache_capacity_check(&self) -> bool {
        self.skip_cache_capacity_check.unwrap_or(false)
    }

    /// Returns, if set, the minimum number of times the cache must be cleared
    /// before a lazy DFA search can give up. When no minimum is set, then a
    /// search will never quit and will always clear the cache whenever it
    /// fills up.
    pub fn get_minimum_cache_clear_count(&self) -> Option<usize> {
        self.minimum_cache_clear_count.unwrap_or(None)
    }

    /// Returns the minimum lazy DFA cache capacity required for the given NFA.
    ///
    /// The cache capacity required for a particular NFA may change without
    /// notice. Callers should not rely on it being stable.
    ///
    /// This is useful for informational purposes, but can also be useful for
    /// other reasons. For example, if one wants to check the minimum cache
    /// capacity themselves or if one wants to set the capacity based on the
    /// minimum.
    ///
    /// This may return an error if this configuration does not support all of
    /// the instructions used in the given NFA. For example, if the NFA has a
    /// Unicode word boundary but this configuration does not enable heuristic
    /// support for Unicode word boundaries.
    pub fn get_minimum_cache_capacity(
        &self,
        nfa: &thompson::NFA,
    ) -> Result<usize, BuildError> {
        let quitset = self.quit_set_from_nfa(nfa)?;
        let classes = self.byte_classes_from_nfa(nfa, &quitset);
        let starts = self.get_starts_for_each_pattern();
        Ok(minimum_cache_capacity(nfa, &classes, starts))
    }

    /// Returns the byte class map used during search from the given NFA.
    ///
    /// If byte classes are disabled on this configuration, then a map is
    /// returned that puts each byte in its own equivalent class.
    fn byte_classes_from_nfa(
        &self,
        nfa: &thompson::NFA,
        quit: &ByteSet,
    ) -> ByteClasses {
        if !self.get_byte_classes() {
            // The lazy DFA will always use the equivalence class map, but
            // enabling this option is useful for debugging. Namely, this will
            // cause all transitions to be defined over their actual bytes
            // instead of an opaque equivalence class identifier. The former is
            // much easier to grok as a human.
            ByteClasses::singletons()
        } else {
            let mut set = nfa.byte_class_set().clone();
            // It is important to distinguish any "quit" bytes from all other
            // bytes. Otherwise, a non-quit byte may end up in the same class
            // as a quit byte, and thus cause the DFA stop when it shouldn't.
            if !quit.is_empty() {
                set.add_set(&quit);
            }
            set.byte_classes()
        }
    }

    /// Return the quit set for this configuration and the given NFA.
    ///
    /// This may return an error if the NFA is incompatible with this
    /// configuration's quit set. For example, if the NFA has a Unicode word
    /// boundary and the quit set doesn't include non-ASCII bytes.
    fn quit_set_from_nfa(
        &self,
        nfa: &thompson::NFA,
    ) -> Result<ByteSet, BuildError> {
        let mut quit = self.quitset.unwrap_or(ByteSet::empty());
        if nfa.has_word_boundary_unicode() {
            if self.get_unicode_word_boundary() {
                for b in 0x80..=0xFF {
                    quit.add(b);
                }
            } else {
                // If heuristic support for Unicode word boundaries wasn't
                // enabled, then we can still check if our quit set is correct.
                // If the caller set their quit bytes in a way that causes the
                // DFA to quit on at least all non-ASCII bytes, then that's all
                // we need for heuristic support to work.
                if !quit.contains_range(0x80, 0xFF) {
                    return Err(
                        BuildError::unsupported_dfa_word_boundary_unicode(),
                    );
                }
            }
        }
        Ok(quit)
    }

    /// Overwrite the default configuration such that the options in `o` are
    /// always used. If an option in `o` is not set, then the corresponding
    /// option in `self` is used. If it's not set in `self` either, then it
    /// remains not set.
    fn overwrite(self, o: Config) -> Config {
        Config {
            anchored: o.anchored.or(self.anchored),
            match_kind: o.match_kind.or(self.match_kind),
            starts_for_each_pattern: o
                .starts_for_each_pattern
                .or(self.starts_for_each_pattern),
            byte_classes: o.byte_classes.or(self.byte_classes),
            unicode_word_boundary: o
                .unicode_word_boundary
                .or(self.unicode_word_boundary),
            quitset: o.quitset.or(self.quitset),
            cache_capacity: o.cache_capacity.or(self.cache_capacity),
            skip_cache_capacity_check: o
                .skip_cache_capacity_check
                .or(self.skip_cache_capacity_check),
            minimum_cache_clear_count: o
                .minimum_cache_clear_count
                .or(self.minimum_cache_clear_count),
        }
    }
}

/// A builder for constructing a lazy deterministic finite automaton from
/// regular expressions.
///
/// As a convenience, [`DFA::builder`] is an alias for [`Builder::new`]. The
/// advantage of the former is that it often lets you avoid importing the
/// `Builder` type directly.
///
/// This builder provides two main things:
///
/// 1. It provides a few different `build` routines for actually constructing
/// a DFA from different kinds of inputs. The most convenient is
/// [`Builder::build`], which builds a DFA directly from a pattern string. The
/// most flexible is [`Builder::build_from_nfa`], which builds a DFA straight
/// from an NFA.
/// 2. The builder permits configuring a number of things.
/// [`Builder::configure`] is used with [`Config`] to configure aspects of
/// the DFA and the construction process itself. [`Builder::syntax`] and
/// [`Builder::thompson`] permit configuring the regex parser and Thompson NFA
/// construction, respectively. The syntax and thompson configurations only
/// apply when building from a pattern string.
///
/// This builder always constructs a *single* lazy DFA. As such, this builder
/// can only be used to construct regexes that either detect the presence
/// of a match or find the end location of a match. A single DFA cannot
/// produce both the start and end of a match. For that information, use a
/// [`Regex`](crate::hybrid::regex::Regex), which can be similarly configured
/// using [`regex::Builder`](crate::hybrid::regex::Builder). The main reason
/// to use a DFA directly is if the end location of a match is enough for your
/// use case. Namely, a `Regex` will construct two lazy DFAs instead of one,
/// since a second reverse DFA is needed to find the start of a match.
///
/// # Example
///
/// This example shows how to build a lazy DFA that uses a tiny cache capacity
/// and completely disables Unicode. That is:
///
/// * Things such as `\w`, `.` and `\b` are no longer Unicode-aware. `\w`
///   and `\b` are ASCII-only while `.` matches any byte except for `\n`
///   (instead of any UTF-8 encoding of a Unicode scalar value except for
///   `\n`). Things that are Unicode only, such as `\pL`, are not allowed.
/// * The pattern itself is permitted to match invalid UTF-8. For example,
///   things like `[^a]` that match any byte except for `a` are permitted.
/// * Unanchored patterns can search through invalid UTF-8. That is, for
///   unanchored patterns, the implicit prefix is `(?s-u:.)*?` instead of
///   `(?s:.)*?`.
///
/// ```
/// use regex_automata::{
///     hybrid::dfa::DFA,
///     nfa::thompson,
///     HalfMatch, SyntaxConfig,
/// };
///
/// let dfa = DFA::builder()
///     .configure(DFA::config().cache_capacity(5_000))
///     .syntax(SyntaxConfig::new().unicode(false).utf8(false))
///     .thompson(thompson::Config::new().utf8(false))
///     .build(r"foo[^b]ar.*")?;
/// let mut cache = dfa.create_cache();
///
/// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n";
/// let expected = Some(HalfMatch::must(0, 10));
/// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
/// assert_eq!(expected, got);
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[derive(Clone, Debug)]
pub struct Builder {
    config: Config,
    thompson: thompson::Builder,
}

impl Builder {
    /// Create a new lazy DFA builder with the default configuration.
    pub fn new() -> Builder {
        Builder {
            config: Config::default(),
            thompson: thompson::Builder::new(),
        }
    }

    /// Build a lazy DFA from the given pattern.
    ///
    /// If there was a problem parsing or compiling the pattern, then an error
    /// is returned.
    pub fn build(&self, pattern: &str) -> Result<DFA, BuildError> {
        self.build_many(&[pattern])
    }

    /// Build a lazy DFA from the given patterns.
    ///
    /// When matches are returned, the pattern ID corresponds to the index of
    /// the pattern in the slice given.
    pub fn build_many<P: AsRef<str>>(
        &self,
        patterns: &[P],
    ) -> Result<DFA, BuildError> {
        let nfa =
            self.thompson.build_many(patterns).map_err(BuildError::nfa)?;
        self.build_from_nfa(Arc::new(nfa))
    }

    /// Build a DFA from the given NFA.
    ///
    /// Note that this requires an `Arc<thompson::NFA>` instead of a
    /// `&thompson::NFA` because the lazy DFA builds itself from the NFA at
    /// search time. This means that the lazy DFA must hold on to its source
    /// NFA for the entirety of its lifetime. An `Arc` is used so that callers
    /// aren't forced to clone the NFA if it is needed elsewhere.
    ///
    /// # Example
    ///
    /// This example shows how to build a lazy DFA if you already have an NFA
    /// in hand.
    ///
    /// ```
    /// use std::sync::Arc;
    /// use regex_automata::{hybrid::dfa::DFA, nfa::thompson, HalfMatch};
    ///
    /// let haystack = "foo123bar".as_bytes();
    ///
    /// // This shows how to set non-default options for building an NFA.
    /// let nfa = thompson::Builder::new()
    ///     .configure(thompson::Config::new().shrink(false))
    ///     .build(r"[0-9]+")?;
    /// let dfa = DFA::builder().build_from_nfa(Arc::new(nfa))?;
    /// let mut cache = dfa.create_cache();
    /// let expected = Some(HalfMatch::must(0, 6));
    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn build_from_nfa(
        &self,
        nfa: Arc<thompson::NFA>,
    ) -> Result<DFA, BuildError> {
        let quitset = self.config.quit_set_from_nfa(&nfa)?;
        let classes = self.config.byte_classes_from_nfa(&nfa, &quitset);
        // Check that we can fit at least a few states into our cache,
        // otherwise it's pretty senseless to use the lazy DFA. This does have
        // a possible failure mode though. This assumes the maximum size of a
        // state in powerset space (so, the total number of NFA states), which
        // may never actually materialize, and could be quite a bit larger
        // than the actual biggest state. If this turns out to be a problem,
        // we could expose a knob that disables this check. But if so, we have
        // to be careful not to panic in other areas of the code (the cache
        // clearing and init code) that tend to assume some minimum useful
        // cache capacity.
        let min_cache = minimum_cache_capacity(
            &nfa,
            &classes,
            self.config.get_starts_for_each_pattern(),
        );
        let mut cache_capacity = self.config.get_cache_capacity();
        if cache_capacity < min_cache {
            // When the caller has asked us to skip the cache capacity check,
            // then we simply force the cache capacity to its minimum amount
            // and mush on.
            if self.config.get_skip_cache_capacity_check() {
                trace!(
                    "given capacity ({}) is too small, \
                     since skip_cache_capacity_check is enabled, \
                     setting cache capacity to minimum ({})",
                    cache_capacity,
                    min_cache,
                );
                cache_capacity = min_cache;
            } else {
                return Err(BuildError::insufficient_cache_capacity(
                    min_cache,
                    cache_capacity,
                ));
            }
        }
        // We also need to check that we can fit at least some small number
        // of states in our state ID space. This is unlikely to trigger in
        // >=32-bit systems, but 16-bit systems have a pretty small state ID
        // space since a number of bits are used up as sentinels.
        if let Err(err) = minimum_lazy_state_id(&nfa, &classes) {
            return Err(BuildError::insufficient_state_id_capacity(err));
        }
        let stride2 = classes.stride2();
        Ok(DFA {
            nfa,
            stride2,
            classes,
            quitset,
            anchored: self.config.get_anchored(),
            match_kind: self.config.get_match_kind(),
            starts_for_each_pattern: self.config.get_starts_for_each_pattern(),
            cache_capacity,
            minimum_cache_clear_count: self
                .config
                .get_minimum_cache_clear_count(),
        })
    }

    /// Apply the given lazy DFA configuration options to this builder.
    pub fn configure(&mut self, config: Config) -> &mut Builder {
        self.config = self.config.overwrite(config);
        self
    }

    /// Set the syntax configuration for this builder using
    /// [`SyntaxConfig`](crate::SyntaxConfig).
    ///
    /// This permits setting things like case insensitivity, Unicode and multi
    /// line mode.
    ///
    /// These settings only apply when constructing a lazy DFA directly from a
    /// pattern.
    pub fn syntax(
        &mut self,
        config: crate::util::syntax::SyntaxConfig,
    ) -> &mut Builder {
        self.thompson.syntax(config);
        self
    }

    /// Set the Thompson NFA configuration for this builder using
    /// [`nfa::thompson::Config`](crate::nfa::thompson::Config).
    ///
    /// This permits setting things like whether the DFA should match the regex
    /// in reverse or if additional time should be spent shrinking the size of
    /// the NFA.
    ///
    /// These settings only apply when constructing a DFA directly from a
    /// pattern.
    pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
        self.thompson.configure(config);
        self
    }
}

/// Based on the minimum number of states required for a useful lazy DFA cache,
/// this returns the minimum lazy state ID that must be representable.
///
/// It's likely not plausible for this to impose constraints on 32-bit systems
/// (or higher), but on 16-bit systems, the lazy state ID space is quite
/// constrained and thus may be insufficient for bigger regexes.
fn minimum_lazy_state_id(
    nfa: &thompson::NFA,
    classes: &ByteClasses,
) -> Result<LazyStateID, LazyStateIDError> {
    let stride = 1 << classes.stride2();
    let min_state_index = MIN_STATES.checked_sub(1).unwrap();
    LazyStateID::new(min_state_index * stride)
}

/// Based on the minimum number of states required for a useful lazy DFA cache,
/// this returns a heuristic minimum number of bytes of heap space required.
///
/// This is a "heuristic" because the minimum it returns is likely bigger than
/// the true minimum. Namely, it assumes that each powerset NFA/DFA state uses
/// the maximum number of NFA states (all of them). This is likely bigger
/// than what is required in practice. Computing the true minimum effectively
/// requires determinization, which is probably too much work to do for a
/// simple check like this.
fn minimum_cache_capacity(
    nfa: &thompson::NFA,
    classes: &ByteClasses,
    starts_for_each_pattern: bool,
) -> usize {
    const ID_SIZE: usize = size_of::<LazyStateID>();
    let stride = 1 << classes.stride2();

    let sparses = 2 * nfa.len() * NFAStateID::SIZE;
    let trans = MIN_STATES * stride * ID_SIZE;

    let mut starts = Start::count() * ID_SIZE;
    if starts_for_each_pattern {
        starts += (Start::count() * nfa.pattern_len()) * ID_SIZE;
    }

    // Every `State` has three bytes for flags, 4 bytes (max) for the number
    // of patterns, followed by 32-bit encodings of patterns and then delta
    // varint encodings of NFA state IDs. We use the worst case (which isn't
    // technically possible) of 5 bytes for each NFA state ID.
    //
    // HOWEVER, three of the states needed by a lazy DFA are just the sentinel
    // unknown, dead and quit states. Those states have a known size and it is
    // small.
    assert!(MIN_STATES >= 3, "minimum number of states has to be at least 3");
    let dead_state_size = State::dead().memory_usage();
    let max_state_size = 3 + 4 + (nfa.pattern_len() * 4) + (nfa.len() * 5);
    let states = (3 * (size_of::<State>() + dead_state_size))
        + ((MIN_STATES - 3) * (size_of::<State>() + max_state_size));
    let states_to_sid = states + (MIN_STATES * ID_SIZE);
    let stack = nfa.len() * NFAStateID::SIZE;
    let scratch_state_builder = max_state_size;

    trans
        + starts
        + states
        + states_to_sid
        + sparses
        + stack
        + scratch_state_builder
}