Struct regex_automata::dfa::dense::DFA

source · [−]

pub struct DFA<T> { /* private fields */ }

Expand description

A dense table-based deterministic finite automaton (DFA).

All dense DFAs have one or more start states, zero or more match states and a transition table that maps the current state and the current byte of input to the next state. A DFA can use this information to implement fast searching. In particular, the use of a dense DFA generally makes the trade off that match speed is the most valuable characteristic, even if building the DFA may take significant time and space. (More concretely, building a DFA takes time and space that is exponential in the size of the pattern in the worst case.) As such, the processing of every byte of input is done with a small constant number of operations that does not vary with the pattern, its size or the size of the alphabet. If your needs don’t line up with this trade off, then a dense DFA may not be an adequate solution to your problem.

In contrast, a sparse::DFA makes the opposite trade off: it uses less space but will execute a variable number of instructions per byte at match time, which makes it slower for matching. (Note that space usage is still exponential in the size of the pattern in the worst case.)

A DFA can be built using the default configuration via the DFA::new constructor. Otherwise, one can configure various aspects via dense::Builder.

A single DFA fundamentally supports the following operations:

Detection of a match.
Location of the end of a match.
In the case of a DFA with multiple patterns, which pattern matched is reported as well.

A notable absence from the above list of capabilities is the location of the start of a match. In order to provide both the start and end of a match, two DFAs are required. This functionality is provided by a Regex.

Type parameters

A DFA has one type parameter, T, which is used to represent state IDs, pattern IDs and accelerators. T is typically a Vec<u32> or a &[u32].

The `Automaton` trait

This type implements the Automaton trait, which means it can be used for searching. For example:

use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch};

let dfa = DFA::new("foo[0-9]+")?;
let expected = HalfMatch::must(0, 8);
assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);

Struct regex_automata::dfa::dense::DFA

Implementations

impl DFA<Vec<u32>>

pub fn new(pattern: &str) -> Result<DFA<Vec<u32>>, Error>

pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<DFA<Vec<u32>>, Error>

impl DFA<Vec<u32>>

pub fn always_match() -> Result<DFA<Vec<u32>>, Error>

pub fn never_match() -> Result<DFA<Vec<u32>>, Error>

impl<T: AsRef<[u32]>> DFA<T>

pub fn as_ref(&self) -> DFA<&[u32]>

pub fn to_owned(&self) -> DFA<Vec<u32>>

pub fn has_starts_for_each_pattern(&self) -> bool

pub fn alphabet_len(&self) -> usize

pub fn stride2(&self) -> usize

pub fn stride(&self) -> usize

pub fn memory_usage(&self) -> usize

impl<T: AsRef<[u32]>> DFA<T>

pub fn to_sparse(&self) -> Result<DFA<Vec<u8>>, Error>

pub fn to_bytes_little_endian(&self) -> (Vec<u8>, usize)

pub fn to_bytes_big_endian(&self) -> (Vec<u8>, usize)

pub fn to_bytes_native_endian(&self) -> (Vec<u8>, usize)

pub fn write_to_little_endian( &self, dst: &mut [u8]) -> Result<usize, SerializeError>

pub fn write_to_big_endian( &self, dst: &mut [u8]) -> Result<usize, SerializeError>

pub fn write_to_native_endian( &self, dst: &mut [u8]) -> Result<usize, SerializeError>

pub fn write_to_len(&self) -> usize

impl<'a> DFA<&'a [u32]>

pub fn from_bytes( slice: &'a [u8]) -> Result<(DFA<&'a [u32]>, usize), DeserializeError>

pub unsafe fn from_bytes_unchecked( slice: &'a [u8]) -> Result<(DFA<&'a [u32]>, usize), DeserializeError>

Trait Implementations

impl<T: AsRef<[u32]>> Automaton for DFA<T>

fn is_special_state(&self, id: StateID) -> bool

fn is_dead_state(&self, id: StateID) -> bool

fn is_quit_state(&self, id: StateID) -> bool

fn is_match_state(&self, id: StateID) -> bool

fn is_start_state(&self, id: StateID) -> bool

fn is_accel_state(&self, id: StateID) -> bool

fn next_state(&self, current: StateID, input: u8) -> StateID

unsafe fn next_state_unchecked(&self, current: StateID, input: u8) -> StateID

fn next_eoi_state(&self, current: StateID) -> StateID

fn pattern_count(&self) -> usize

fn match_count(&self, id: StateID) -> usize

fn match_pattern(&self, id: StateID, match_index: usize) -> PatternID

fn start_state_forward( &self, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> StateID

fn start_state_reverse( &self, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> StateID

fn accelerator(&self, id: StateID) -> &[u8]ⓘNotable traits for &'_ [u8]impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]

fn find_earliest_fwd( &self, bytes: &[u8]) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_rev( &self, bytes: &[u8]) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_fwd( &self, bytes: &[u8]) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_rev( &self, bytes: &[u8]) -> Result<Option<HalfMatch>, MatchError>

fn find_overlapping_fwd( &self, bytes: &[u8], state: &mut OverlappingState) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_fwd_at( &self, pre: Option<&mut Scanner<'_>>, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_rev_at( &self, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_fwd_at( &self, pre: Option<&mut Scanner<'_>>, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_rev_at( &self, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize) -> Result<Option<HalfMatch>, MatchError>

fn find_overlapping_fwd_at( &self, pre: Option<&mut Scanner<'_>>, pattern_id: Option<PatternID>, bytes: &[u8], start: usize, end: usize, state: &mut OverlappingState) -> Result<Option<HalfMatch>, MatchError>

impl<T: Clone> Clone for DFA<T>

fn clone(&self) -> DFA<T>

fn clone_from(&mut self, source: &Self)

impl<T: AsRef<[u32]>> Debug for DFA<T>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations

impl<T> RefUnwindSafe for DFA<T> where T: RefUnwindSafe,

impl<T> Send for DFA<T> where T: Send,

impl<T> Sync for DFA<T> where T: Sync,

impl<T> Unpin for DFA<T> where T: Unpin,

impl<T> UnwindSafe for DFA<T> where T: UnwindSafe,

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

fn into(self) -> U

impl<T> ToOwned for T where T: Clone,

type Owned = T

fn to_owned(&self) -> T

impl<T: AsRef<[u32 ]>> DFA<T>

pub fn as_ref(&self) -> DFA<&[u32 ]>

impl<T: AsRef<[u32 ]>> DFA<T>

pub fn to_bytes_little_endian(&self) -> (Vec<u8>, usize )

pub fn to_bytes_big_endian(&self) -> (Vec<u8>, usize )

pub fn to_bytes_native_endian(&self) -> (Vec<u8>, usize )

pub fn write_to_little_endian(
&self,
dst: &mut [u8 ]
) -> Result<usize, SerializeError>

pub fn write_to_big_endian(
&self,
dst: &mut [u8 ]
) -> Result<usize, SerializeError>

pub fn write_to_native_endian(
&self,
dst: &mut [u8 ]
) -> Result<usize, SerializeError>

impl<'a> DFA<&'a [u32 ]>

pub fn from_bytes(
slice: &'a [u8 ]
) -> Result<(DFA<&'a [u32 ]>, usize ), DeserializeError>

pub unsafe fn from_bytes_unchecked(
slice: &'a [u8 ]
) -> Result<(DFA<&'a [u32 ]>, usize ), DeserializeError>

impl<T: AsRef<[u32 ]>> Automaton for DFA<T>

fn start_state_forward(
&self,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> StateID

fn start_state_reverse(
&self,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> StateID

fn accelerator(&self, id: StateID) -> &[u8 ]ⓘNotable traits for &'_ [u8 ]`impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]`

fn find_earliest_fwd(
&self,
bytes: &[u8 ]
) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_rev(
&self,
bytes: &[u8 ]
) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_fwd(
&self,
bytes: &[u8 ]
) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_rev(
&self,
bytes: &[u8 ]
) -> Result<Option<HalfMatch>, MatchError>

fn find_overlapping_fwd(
&self,
bytes: &[u8 ],
state: &mut OverlappingState
) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_fwd_at(
&self,
pre: Option<&mut Scanner<'_>>,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> Result<Option<HalfMatch>, MatchError>

fn find_earliest_rev_at(
&self,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_fwd_at(
&self,
pre: Option<&mut Scanner<'_>>,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> Result<Option<HalfMatch>, MatchError>

fn find_leftmost_rev_at(
&self,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize
) -> Result<Option<HalfMatch>, MatchError>

fn find_overlapping_fwd_at(
&self,
pre: Option<&mut Scanner<'_>>,
pattern_id: Option<PatternID>,
bytes: &[u8 ],
start: usize,
end: usize,
state: &mut OverlappingState
) -> Result<Option<HalfMatch>, MatchError>

impl<T: AsRef<[u32 ]>> Debug for DFA<T>

impl<T> RefUnwindSafe for DFA<T> where
T: RefUnwindSafe,

impl<T> Send for DFA<T> where
T: Send,

impl<T> Sync for DFA<T> where
T: Sync,

impl<T> Unpin for DFA<T> where
T: Unpin,

impl<T> UnwindSafe for DFA<T> where
T: UnwindSafe,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,