#![allow(dead_code)]
use std::fmt::Debug;
const DEBUG_ENABLED: bool = false;
macro_rules! debug {
($($args:expr),* $(,)*) => {
if DEBUG_ENABLED {
eprintln!($($args),*);
}
}
}
pub trait ParserDefinition: Sized {
type Location: Clone + Debug;
type Error;
type Token: Clone + Debug;
type TokenIndex: Copy + Clone + Debug;
type Symbol;
type Success;
type StateIndex: Copy + Clone + Debug;
type Action: ParserAction<Self>;
type ReduceIndex: Copy + Clone + Debug;
type NonterminalIndex: Copy + Clone + Debug;
fn start_location(&self) -> Self::Location;
fn start_state(&self) -> Self::StateIndex;
fn token_to_index(&self, token: &Self::Token) -> Option<Self::TokenIndex>;
fn action(&self, state: Self::StateIndex, token_index: Self::TokenIndex) -> Self::Action;
fn error_action(&self, state: Self::StateIndex) -> Self::Action;
fn eof_action(&self, state: Self::StateIndex) -> Self::Action;
fn goto(&self, state: Self::StateIndex, nt: Self::NonterminalIndex) -> Self::StateIndex;
fn token_to_symbol(&self, token_index: Self::TokenIndex, token: Self::Token) -> Self::Symbol;
fn expected_tokens(&self, state: Self::StateIndex) -> Vec<String>;
fn uses_error_recovery(&self) -> bool;
fn error_recovery_symbol(&self, recovery: ErrorRecovery<Self>) -> Self::Symbol;
fn reduce(
&mut self,
reduce_index: Self::ReduceIndex,
start_location: Option<&Self::Location>,
states: &mut Vec<Self::StateIndex>,
symbols: &mut Vec<SymbolTriple<Self>>,
) -> Option<ParseResult<Self>>;
fn simulate_reduce(&self, action: Self::ReduceIndex) -> SimulatedReduce<Self>;
}
pub trait ParserAction<D: ParserDefinition>: Copy + Clone + Debug {
fn as_shift(self) -> Option<D::StateIndex>;
fn as_reduce(self) -> Option<D::ReduceIndex>;
fn is_shift(self) -> bool;
fn is_reduce(self) -> bool;
fn is_error(self) -> bool;
}
pub enum SimulatedReduce<D: ParserDefinition> {
Reduce {
states_to_pop: usize,
nonterminal_produced: D::NonterminalIndex,
},
Accept,
}
#[doc(hidden)]
pub type Location<D> = <D as ParserDefinition>::Location;
#[doc(hidden)]
pub type Token<D> = <D as ParserDefinition>::Token;
#[doc(hidden)]
pub type Error<D> = <D as ParserDefinition>::Error;
#[doc(hidden)]
pub type Success<D> = <D as ParserDefinition>::Success;
#[doc(hidden)]
pub type Symbol<D> = <D as ParserDefinition>::Symbol;
pub type ParseError<D> = crate::ParseError<Location<D>, Token<D>, Error<D>>;
pub type ParseResult<D> = Result<Success<D>, ParseError<D>>;
pub type TokenTriple<D> = (Location<D>, Token<D>, Location<D>);
pub type SymbolTriple<D> = (Location<D>, Symbol<D>, Location<D>);
pub type ErrorRecovery<D> = crate::ErrorRecovery<Location<D>, Token<D>, Error<D>>;
pub struct Parser<D, I>
where
D: ParserDefinition,
I: Iterator<Item = Result<TokenTriple<D>, ParseError<D>>>,
{
definition: D,
tokens: I,
states: Vec<D::StateIndex>,
symbols: Vec<SymbolTriple<D>>,
last_location: D::Location,
}
enum NextToken<D: ParserDefinition> {
FoundToken(TokenTriple<D>, D::TokenIndex),
EOF,
Done(ParseResult<D>),
}
impl<D, I> Parser<D, I>
where
D: ParserDefinition,
I: Iterator<Item = Result<TokenTriple<D>, ParseError<D>>>,
{
pub fn drive(definition: D, tokens: I) -> ParseResult<D> {
let last_location = definition.start_location();
let start_state = definition.start_state();
Parser {
definition,
tokens,
states: vec![start_state],
symbols: vec![],
last_location,
}
.parse()
}
fn top_state(&self) -> D::StateIndex {
*self.states.last().unwrap()
}
fn parse(&mut self) -> ParseResult<D> {
'shift: loop {
let (mut lookahead, mut token_index) = match self.next_token() {
NextToken::FoundToken(l, i) => (l, i),
NextToken::EOF => return self.parse_eof(),
NextToken::Done(e) => return e,
};
debug!("+ SHIFT: {:?}", lookahead);
debug!("\\ token_index: {:?}", token_index);
'inner: loop {
let top_state = self.top_state();
let action = self.definition.action(top_state, token_index);
debug!("\\ action: {:?}", action);
if let Some(target_state) = action.as_shift() {
debug!("\\ shift to: {:?}", target_state);
let symbol = self.definition.token_to_symbol(token_index, lookahead.1);
self.states.push(target_state);
self.symbols.push((lookahead.0, symbol, lookahead.2));
continue 'shift;
} else if let Some(reduce_index) = action.as_reduce() {
debug!("\\ reduce to: {:?}", reduce_index);
if let Some(r) = self.reduce(reduce_index, Some(&lookahead.0)) {
return match r {
Ok(_) => Err(crate::ParseError::ExtraToken { token: lookahead }),
Err(e) => Err(e),
};
}
} else {
debug!("\\ error -- initiating error recovery!");
match self.error_recovery(Some(lookahead), Some(token_index)) {
NextToken::FoundToken(l, i) => {
lookahead = l;
token_index = i;
continue 'inner;
}
NextToken::EOF => return self.parse_eof(),
NextToken::Done(e) => return e,
}
}
}
}
}
fn parse_eof(&mut self) -> ParseResult<D> {
loop {
let top_state = self.top_state();
let action = self.definition.eof_action(top_state);
if let Some(reduce_index) = action.as_reduce() {
if let Some(result) =
self.definition
.reduce(reduce_index, None, &mut self.states, &mut self.symbols)
{
return result;
}
} else {
match self.error_recovery(None, None) {
NextToken::FoundToken(..) => panic!("cannot find token at EOF"),
NextToken::Done(e) => return e,
NextToken::EOF => continue,
}
}
}
}
fn error_recovery(
&mut self,
mut opt_lookahead: Option<TokenTriple<D>>,
mut opt_token_index: Option<D::TokenIndex>,
) -> NextToken<D> {
debug!(
"\\+ error_recovery(opt_lookahead={:?}, opt_token_index={:?})",
opt_lookahead, opt_token_index,
);
if !self.definition.uses_error_recovery() {
debug!("\\ error -- no error recovery!");
return NextToken::Done(Err(
self.unrecognized_token_error(opt_lookahead, self.top_state())
));
}
let error = self.unrecognized_token_error(opt_lookahead.clone(), self.top_state());
let mut dropped_tokens = vec![];
loop {
let state = self.top_state();
let action = self.definition.error_action(state);
if let Some(reduce_index) = action.as_reduce() {
debug!("\\\\ reducing: {:?}", reduce_index);
if let Some(result) =
self.reduce(reduce_index, opt_lookahead.as_ref().map(|l| &l.0))
{
debug!("\\\\ reduced to a result");
return NextToken::Done(result);
}
} else {
break;
}
}
let states_len = self.states.len();
let top = 'find_state: loop {
debug!(
"\\\\+ error_recovery: find_state loop, {:?} states = {:?}",
self.states.len(),
self.states,
);
for top in (0..states_len).rev() {
let state = self.states[top];
debug!("\\\\\\ top = {:?}, state = {:?}", top, state);
let action = self.definition.error_action(state);
debug!("\\\\\\ action = {:?}", action);
if let Some(error_state) = action.as_shift() {
if self.accepts(error_state, &self.states[..=top], opt_token_index) {
debug!("\\\\\\ accepted!");
break 'find_state top;
}
} else {
continue;
}
}
match opt_lookahead.take() {
None => {
debug!("\\\\\\ no more lookahead, report error");
return NextToken::Done(Err(error));
}
Some(lookahead) => {
debug!("\\\\\\ dropping lookahead token");
dropped_tokens.push(lookahead);
match self.next_token() {
NextToken::FoundToken(next_lookahead, next_token_index) => {
opt_lookahead = Some(next_lookahead);
opt_token_index = Some(next_token_index);
}
NextToken::EOF => {
debug!("\\\\\\ reached EOF");
opt_lookahead = None;
opt_token_index = None;
}
NextToken::Done(e) => {
debug!("\\\\\\ no more tokens");
return NextToken::Done(e);
}
}
}
}
};
let start = if let Some(popped_sym) = self.symbols.get(top) {
popped_sym.0.clone()
} else if let Some(dropped_token) = dropped_tokens.first() {
dropped_token.0.clone()
} else if top > 0 {
self.symbols[top - 1].2.clone()
} else {
self.definition.start_location()
};
let end = if let Some(dropped_token) = dropped_tokens.last() {
dropped_token.2.clone()
} else if states_len - 1 > top {
self.symbols.last().unwrap().2.clone()
} else if let Some(lookahead) = opt_lookahead.as_ref() {
lookahead.0.clone()
} else {
start.clone()
};
self.states.truncate(top + 1);
self.symbols.truncate(top);
let recover_state = self.states[top];
let error_action = self.definition.error_action(recover_state);
let error_state = error_action.as_shift().unwrap();
self.states.push(error_state);
let recovery = self.definition.error_recovery_symbol(crate::ErrorRecovery {
error,
dropped_tokens,
});
self.symbols.push((start, recovery, end));
match (opt_lookahead, opt_token_index) {
(Some(l), Some(i)) => NextToken::FoundToken(l, i),
(None, None) => NextToken::EOF,
(l, i) => panic!("lookahead and token_index mismatched: {:?}, {:?}", l, i),
}
}
fn accepts(
&self,
error_state: D::StateIndex,
states: &[D::StateIndex],
opt_token_index: Option<D::TokenIndex>,
) -> bool {
debug!(
"\\\\\\+ accepts(error_state={:?}, states={:?}, opt_token_index={:?})",
error_state, states, opt_token_index,
);
let mut states = states.to_vec();
states.push(error_state);
loop {
let mut states_len = states.len();
let top = states[states_len - 1];
let action = match opt_token_index {
None => self.definition.eof_action(top),
Some(i) => self.definition.action(top, i),
};
if action.is_error() {
debug!("\\\\\\\\ accepts: error");
return false;
}
if let Some(reduce_action) = action.as_reduce() {
match self.definition.simulate_reduce(reduce_action) {
SimulatedReduce::Reduce {
states_to_pop,
nonterminal_produced,
} => {
states_len -= states_to_pop;
states.truncate(states_len);
let top = states[states_len - 1];
let next_state = self.definition.goto(top, nonterminal_produced);
states.push(next_state);
}
SimulatedReduce::Accept => {
debug!("\\\\\\\\ accepts: reduce accepts!");
return true;
}
}
} else {
debug!("\\\\\\\\ accepts: shift accepts!");
assert!(action.is_shift());
return true;
}
}
}
fn reduce(
&mut self,
action: D::ReduceIndex,
lookahead_start: Option<&D::Location>,
) -> Option<ParseResult<D>> {
self.definition
.reduce(action, lookahead_start, &mut self.states, &mut self.symbols)
}
fn unrecognized_token_error(
&self,
token: Option<TokenTriple<D>>,
top_state: D::StateIndex,
) -> ParseError<D> {
match token {
Some(token) => crate::ParseError::UnrecognizedToken {
token,
expected: self.definition.expected_tokens(top_state),
},
None => crate::ParseError::UnrecognizedEOF {
location: self.last_location.clone(),
expected: self.definition.expected_tokens(top_state),
},
}
}
fn next_token(&mut self) -> NextToken<D> {
let token = match self.tokens.next() {
Some(Ok(v)) => v,
Some(Err(e)) => return NextToken::Done(Err(e)),
None => return NextToken::EOF,
};
self.last_location = token.2.clone();
let token_index = match self.definition.token_to_index(&token.1) {
Some(i) => i,
None => {
return NextToken::Done(Err(
self.unrecognized_token_error(Some(token), self.top_state())
))
}
};
NextToken::FoundToken(token, token_index)
}
}
macro_rules! integral_indices {
($t:ty) => {
impl<D: ParserDefinition<StateIndex = $t, ReduceIndex = $t>> ParserAction<D> for $t {
fn as_shift(self) -> Option<D::StateIndex> {
if self > 0 {
Some(self - 1)
} else {
None
}
}
fn as_reduce(self) -> Option<D::ReduceIndex> {
if self < 0 {
Some(-(self + 1))
} else {
None
}
}
fn is_shift(self) -> bool {
self > 0
}
fn is_reduce(self) -> bool {
self < 0
}
fn is_error(self) -> bool {
self == 0
}
}
};
}
integral_indices!(i32);
integral_indices!(i16);
integral_indices!(i8);