lalrpop_util/
lib.rs

//! Runtime utilities for lalrpop generated parsers.
//!
//! For information about using lalrpop, see the [lalrpop
//! documentation](https://docs.rs/lalrpop/latest/lalrpop).
//!
//! This crate is designed to be used in conjunction with the lalrpop crate to provide runtime
//! support when interacting with lalrpop generated parsers. Generally speaking, if you are using
//! lalrpop, you want lalrpop specified as a dev-dependency and lalrpop-util as a dependency.
//! Version numbers for the two crates are kept in sync, and we recommend using the same version
//! number for each crate.
#![cfg_attr(not(feature = "std"), no_std)]
#![warn(rust_2018_idioms)]
#![warn(missing_docs)]

extern crate alloc;

use alloc::{string::String, vec::Vec};
#[rustversion::since(1.81)]
#[cfg(not(feature = "std"))]
use core::error::Error;
use core::fmt;
#[cfg(feature = "std")]
use std::error::Error;

#[cfg(feature = "lexer")]
pub mod lexer;
pub mod state_machine;

/// Error type for errors returned by lalrpop parsers.
///
/// For the built-in lexer, the generic parameters default to:
/// ParseError<usize, lexer::Token<'_>, &'static str>.
///
/// L: the location of the Token where the error occurred
/// T: The token encountered where the error occurred
/// E: A custom user-defined error
///
/// L and T are fixed types as listed above for built in lexers, and can be defined to whatever
/// type you would like if implementing a custom lexer.
///
/// The type of E can be overridden by specifying `type Error` in your grammar like so:
///
/// ```ignore
/// extern {
///     type Error = MyCustomErrorType;
/// }
/// ```
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum ParseError<L, T, E> {
    /// Generated by the internal lexer when it encounters a token (or EOF) it
    /// did not expect.
    InvalidToken {
        /// The end of the invalid token.
        location: L,
    },

    /// Generated by the parser when it encounters an EOF it did not expect.
    UnrecognizedEof {
        /// The end of the final token.
        location: L,

        /// The set of expected tokens: these names are taken from the
        /// grammar and hence may not necessarily be suitable for
        /// presenting to the user.
        expected: Vec<String>,
    },

    /// Generated by the parser when it encounters a token it did not expect.
    ///
    /// This means that the next token in the stream was not valid at this
    /// point in the grammar.
    UnrecognizedToken {
        /// The unexpected token of type `T` with a span given by the two `L` values.
        token: (L, T, L),

        /// The set of expected tokens: these names are taken from the
        /// grammar and hence may not necessarily be suitable for
        /// presenting to the user.
        expected: Vec<String>,
    },

    /// Generated by the parser when it encounters additional, unexpected tokens.
    ExtraToken {
        /// The extra token, with a type of `T` with a span given by the two `L` values.
        token: (L, T, L),
    },

    /// Custom error type.
    User {
        /// Custom user error.
        error: E,
    },
}

impl<L, T, E> ParseError<L, T, E> {
    fn map_intern<LL, TT, EE>(
        self,
        mut loc_op: impl FnMut(L) -> LL,
        tok_op: impl FnOnce(T) -> TT,
        err_op: impl FnOnce(E) -> EE,
    ) -> ParseError<LL, TT, EE> {
        // The signature of token is (L, T, L), so we need to call loc_op on both the "start" and
        // "end" values for a token.
        let maptok = |(s, t, e): (L, T, L)| (loc_op(s), tok_op(t), loc_op(e));
        match self {
            ParseError::InvalidToken { location } => ParseError::InvalidToken {
                location: loc_op(location),
            },
            ParseError::UnrecognizedEof { location, expected } => ParseError::UnrecognizedEof {
                location: loc_op(location),
                expected,
            },
            ParseError::UnrecognizedToken { token, expected } => ParseError::UnrecognizedToken {
                token: maptok(token),
                expected,
            },
            ParseError::ExtraToken { token } => ParseError::ExtraToken {
                token: maptok(token),
            },
            ParseError::User { error } => ParseError::User {
                error: err_op(error),
            },
        }
    }

    /// Transform a `ParseError` by applying a function to the location field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to apply an offset to a location.
    ///
    /// (Note that unlike `map_token()` and `map_error()`, the closure argument
    /// for this function is `FnMut`.  This is so that it can be called
    /// multiple times to apply to the starting and ending location of tokens.)
    pub fn map_location<LL>(self, op: impl FnMut(L) -> LL) -> ParseError<LL, T, E> {
        self.map_intern(op, |x| x, |x| x)
    }

    /// Transform a `ParseError` by applying a function to the token field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to transform a token in some way (eg escaping).
    pub fn map_token<TT>(self, op: impl FnOnce(T) -> TT) -> ParseError<L, TT, E> {
        self.map_intern(|x| x, op, |x| x)
    }

    /// Transform a `ParseError` by applying a function to the error field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to transform to a different error type in place.
    pub fn map_error<EE>(self, op: impl FnOnce(E) -> EE) -> ParseError<L, T, EE> {
        self.map_intern(|x| x, |x| x, op)
    }
}

/// Format a list of expected tokens.
fn fmt_expected(f: &mut fmt::Formatter<'_>, expected: &[String]) -> fmt::Result {
    if !expected.is_empty() {
        writeln!(f)?;
        for (i, e) in expected.iter().enumerate() {
            let sep = match i {
                0 => "Expected one of",
                _ if i < expected.len() - 1 => ",",
                // Last expected message to be written
                _ => " or",
            };
            write!(f, "{} {}", sep, e)?;
        }
    }
    Ok(())
}

impl<L, T, E> fmt::Display for ParseError<L, T, E>
where
    L: fmt::Display,
    T: fmt::Display,
    E: fmt::Display,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use self::ParseError::*;
        match *self {
            User { ref error } => write!(f, "{}", error),
            InvalidToken { ref location } => write!(f, "Invalid token at {}", location),
            UnrecognizedEof {
                ref location,
                ref expected,
            } => {
                write!(f, "Unrecognized EOF found at {}", location)?;
                fmt_expected(f, expected)
            }
            UnrecognizedToken {
                token: (ref start, ref token, ref end),
                ref expected,
            } => {
                write!(
                    f,
                    "Unrecognized token `{}` found at {}:{}",
                    token, start, end
                )?;
                fmt_expected(f, expected)
            }
            ExtraToken {
                token: (ref start, ref token, ref end),
            } => write!(f, "Extra token {} found at {}:{}", token, start, end),
        }
    }
}

impl<L, T, E> From<E> for ParseError<L, T, E> {
    fn from(error: E) -> Self {
        ParseError::User { error }
    }
}

#[cfg_attr(not(feature = "std"), rustversion::since(1.81))]
impl<L, T, E> Error for ParseError<L, T, E>
where
    L: fmt::Debug + fmt::Display,
    T: fmt::Debug + fmt::Display,
    E: fmt::Debug + fmt::Display,
{
    fn description(&self) -> &str {
        "parse error"
    }
}

/// The error type of a recoverable parse error
///
/// For a full description of error recovery, see [the lalrpop
/// book](https://lalrpop.github.io/lalrpop/tutorial/008_error_recovery.html).
///
/// This is the type of the variable resulting from binding a `!` symbol in your lalrpop grammar.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ErrorRecovery<L, T, E> {
    /// The parse error that was recovered from
    pub error: ParseError<L, T, E>,
    /// The tokens discarded prior to resuming parsing
    pub dropped_tokens: Vec<(L, T, L)>,
}

/// Define a module using the generated parse from a `.lalrpop` file.
///
/// You have to specify the name of the module and the path of the file
/// generated by LALRPOP. If the input is in the root directory, you can
/// omit it.
///
/// # Example
/// ```ignore
/// // load parser in src/parser.lalrpop
/// lalrpop_mod!(parser);
///
/// // load parser in src/lex/parser.lalrpop
/// lalrpop_mod!(parser, "/lex/parser.rs");
///
/// // define a public module
/// lalrpop_mod!(pub parser);
///
/// // specify attributes for the generated module
/// lalrpop_mod!(#[allow(clippy::ptr_arg)]#[rustfmt::skip] parser);
/// ```
#[macro_export]
macro_rules! lalrpop_mod {
    ($(#[$attr:meta])* $vis:vis $modname:ident) => {
        lalrpop_util::lalrpop_mod!($(#[$attr])* $vis $modname, concat!("/", stringify!($modname), ".rs"));
    };

    ($(#[$attr:meta])* $vis:vis $modname:ident, $source:expr) => {
        #[rustfmt::skip]
        #[allow(clippy::extra_unused_lifetimes)]
        #[allow(clippy::needless_lifetimes)]
        #[allow(clippy::let_unit_value)]
        #[allow(clippy::just_underscores_and_digits)]
        $(#[$attr])* $vis mod $modname { include!(concat!(env!("OUT_DIR"), $source)); }
    };
}

#[cfg(test)]
mod tests {
    use super::*;
    use alloc::{format, string::ToString, vec};

    #[test]
    fn test() {
        let err = ParseError::UnrecognizedToken::<i32, &str, &str> {
            token: (1, "t0", 2),
            expected: vec!["t1", "t2", "t3"]
                .into_iter()
                .map(|s| s.to_string())
                .collect(),
        };
        assert_eq!(
            format!("{}", err),
            "Unrecognized token `t0` found at 1:2\n\
             Expected one of t1, t2 or t3"
        );
    }
}
lalrpop_util/lib.rs

lalrpop_util/
lib.rs