lalrpop_util/
lib.rs

1//! Runtime utilities for lalrpop generated parsers.
2//!
3//! For information about using lalrpop, see the [lalrpop
4//! documentation](https://docs.rs/lalrpop/latest/lalrpop).
5//!
6//! This crate is designed to be used in conjunction with the lalrpop crate to provide runtime
7//! support when interacting with lalrpop generated parsers. Generally speaking, if you are using
8//! lalrpop, you want lalrpop specified as a dev-dependency and lalrpop-util as a dependency.
9//! Version numbers for the two crates are kept in sync, and we recommend using the same version
10//! number for each crate.
11#![cfg_attr(not(feature = "std"), no_std)]
12#![warn(rust_2018_idioms)]
13#![warn(missing_docs)]
14
15extern crate alloc;
16
17use alloc::{string::String, vec::Vec};
18#[rustversion::since(1.81)]
19#[cfg(not(feature = "std"))]
20use core::error::Error;
21use core::fmt;
22#[cfg(feature = "std")]
23use std::error::Error;
24
25#[cfg(feature = "lexer")]
26pub mod lexer;
27pub mod state_machine;
28
29/// Error type for errors returned by lalrpop parsers.
30///
31/// For the built-in lexer, the generic parameters default to:
32/// ParseError<usize, lexer::Token<'_>, &'static str>.
33///
34/// L: the location of the Token where the error occurred
35/// T: The token encountered where the error occurred
36/// E: A custom user-defined error
37///
38/// L and T are fixed types as listed above for built in lexers, and can be defined to whatever
39/// type you would like if implementing a custom lexer.
40///
41/// The type of E can be overridden by specifying `type Error` in your grammar like so:
42///
43/// ```ignore
44/// extern {
45///     type Error = MyCustomErrorType;
46/// }
47/// ```
48#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
49pub enum ParseError<L, T, E> {
50    /// Generated by the internal lexer when it encounters a token (or EOF) it
51    /// did not expect.
52    InvalidToken {
53        /// The end of the invalid token.
54        location: L,
55    },
56
57    /// Generated by the parser when it encounters an EOF it did not expect.
58    UnrecognizedEof {
59        /// The end of the final token.
60        location: L,
61
62        /// The set of expected tokens: these names are taken from the
63        /// grammar and hence may not necessarily be suitable for
64        /// presenting to the user.
65        expected: Vec<String>,
66    },
67
68    /// Generated by the parser when it encounters a token it did not expect.
69    ///
70    /// This means that the next token in the stream was not valid at this
71    /// point in the grammar.
72    UnrecognizedToken {
73        /// The unexpected token of type `T` with a span given by the two `L` values.
74        token: (L, T, L),
75
76        /// The set of expected tokens: these names are taken from the
77        /// grammar and hence may not necessarily be suitable for
78        /// presenting to the user.
79        expected: Vec<String>,
80    },
81
82    /// Generated by the parser when it encounters additional, unexpected tokens.
83    ExtraToken {
84        /// The extra token, with a type of `T` with a span given by the two `L` values.
85        token: (L, T, L),
86    },
87
88    /// Custom error type.
89    User {
90        /// Custom user error.
91        error: E,
92    },
93}
94
95impl<L, T, E> ParseError<L, T, E> {
96    fn map_intern<LL, TT, EE>(
97        self,
98        mut loc_op: impl FnMut(L) -> LL,
99        tok_op: impl FnOnce(T) -> TT,
100        err_op: impl FnOnce(E) -> EE,
101    ) -> ParseError<LL, TT, EE> {
102        // The signature of token is (L, T, L), so we need to call loc_op on both the "start" and
103        // "end" values for a token.
104        let maptok = |(s, t, e): (L, T, L)| (loc_op(s), tok_op(t), loc_op(e));
105        match self {
106            ParseError::InvalidToken { location } => ParseError::InvalidToken {
107                location: loc_op(location),
108            },
109            ParseError::UnrecognizedEof { location, expected } => ParseError::UnrecognizedEof {
110                location: loc_op(location),
111                expected,
112            },
113            ParseError::UnrecognizedToken { token, expected } => ParseError::UnrecognizedToken {
114                token: maptok(token),
115                expected,
116            },
117            ParseError::ExtraToken { token } => ParseError::ExtraToken {
118                token: maptok(token),
119            },
120            ParseError::User { error } => ParseError::User {
121                error: err_op(error),
122            },
123        }
124    }
125
126    /// Transform a `ParseError` by applying a function to the location field.
127    ///
128    /// This could be useful to ensure that all fields implement some trait, or
129    /// to apply an offset to a location.
130    ///
131    /// (Note that unlike `map_token()` and `map_error()`, the closure argument
132    /// for this function is `FnMut`.  This is so that it can be called
133    /// multiple times to apply to the starting and ending location of tokens.)
134    pub fn map_location<LL>(self, op: impl FnMut(L) -> LL) -> ParseError<LL, T, E> {
135        self.map_intern(op, |x| x, |x| x)
136    }
137
138    /// Transform a `ParseError` by applying a function to the token field.
139    ///
140    /// This could be useful to ensure that all fields implement some trait, or
141    /// to transform a token in some way (eg escaping).
142    pub fn map_token<TT>(self, op: impl FnOnce(T) -> TT) -> ParseError<L, TT, E> {
143        self.map_intern(|x| x, op, |x| x)
144    }
145
146    /// Transform a `ParseError` by applying a function to the error field.
147    ///
148    /// This could be useful to ensure that all fields implement some trait, or
149    /// to transform to a different error type in place.
150    pub fn map_error<EE>(self, op: impl FnOnce(E) -> EE) -> ParseError<L, T, EE> {
151        self.map_intern(|x| x, |x| x, op)
152    }
153}
154
155/// Format a list of expected tokens.
156fn fmt_expected(f: &mut fmt::Formatter<'_>, expected: &[String]) -> fmt::Result {
157    if !expected.is_empty() {
158        writeln!(f)?;
159        for (i, e) in expected.iter().enumerate() {
160            let sep = match i {
161                0 => "Expected one of",
162                _ if i < expected.len() - 1 => ",",
163                // Last expected message to be written
164                _ => " or",
165            };
166            write!(f, "{} {}", sep, e)?;
167        }
168    }
169    Ok(())
170}
171
172impl<L, T, E> fmt::Display for ParseError<L, T, E>
173where
174    L: fmt::Display,
175    T: fmt::Display,
176    E: fmt::Display,
177{
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        use self::ParseError::*;
180        match *self {
181            User { ref error } => write!(f, "{}", error),
182            InvalidToken { ref location } => write!(f, "Invalid token at {}", location),
183            UnrecognizedEof {
184                ref location,
185                ref expected,
186            } => {
187                write!(f, "Unrecognized EOF found at {}", location)?;
188                fmt_expected(f, expected)
189            }
190            UnrecognizedToken {
191                token: (ref start, ref token, ref end),
192                ref expected,
193            } => {
194                write!(
195                    f,
196                    "Unrecognized token `{}` found at {}:{}",
197                    token, start, end
198                )?;
199                fmt_expected(f, expected)
200            }
201            ExtraToken {
202                token: (ref start, ref token, ref end),
203            } => write!(f, "Extra token {} found at {}:{}", token, start, end),
204        }
205    }
206}
207
208impl<L, T, E> From<E> for ParseError<L, T, E> {
209    fn from(error: E) -> Self {
210        ParseError::User { error }
211    }
212}
213
214#[cfg_attr(not(feature = "std"), rustversion::since(1.81))]
215impl<L, T, E> Error for ParseError<L, T, E>
216where
217    L: fmt::Debug + fmt::Display,
218    T: fmt::Debug + fmt::Display,
219    E: fmt::Debug + fmt::Display,
220{
221    fn description(&self) -> &str {
222        "parse error"
223    }
224}
225
226/// The error type of a recoverable parse error
227///
228/// For a full description of error recovery, see [the lalrpop
229/// book](https://lalrpop.github.io/lalrpop/tutorial/008_error_recovery.html).
230///
231/// This is the type of the variable resulting from binding a `!` symbol in your lalrpop grammar.
232#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
233pub struct ErrorRecovery<L, T, E> {
234    /// The parse error that was recovered from
235    pub error: ParseError<L, T, E>,
236    /// The tokens discarded prior to resuming parsing
237    pub dropped_tokens: Vec<(L, T, L)>,
238}
239
240/// Define a module using the generated parse from a `.lalrpop` file.
241///
242/// You have to specify the name of the module and the path of the file
243/// generated by LALRPOP. If the input is in the root directory, you can
244/// omit it.
245///
246/// # Example
247/// ```ignore
248/// // load parser in src/parser.lalrpop
249/// lalrpop_mod!(parser);
250///
251/// // load parser in src/lex/parser.lalrpop
252/// lalrpop_mod!(parser, "/lex/parser.rs");
253///
254/// // define a public module
255/// lalrpop_mod!(pub parser);
256///
257/// // specify attributes for the generated module
258/// lalrpop_mod!(#[allow(clippy::ptr_arg)]#[rustfmt::skip] parser);
259/// ```
260#[macro_export]
261macro_rules! lalrpop_mod {
262    ($(#[$attr:meta])* $vis:vis $modname:ident) => {
263        lalrpop_util::lalrpop_mod!($(#[$attr])* $vis $modname, concat!("/", stringify!($modname), ".rs"));
264    };
265
266    ($(#[$attr:meta])* $vis:vis $modname:ident, $source:expr) => {
267        #[rustfmt::skip]
268        #[allow(clippy::extra_unused_lifetimes)]
269        #[allow(clippy::needless_lifetimes)]
270        #[allow(clippy::let_unit_value)]
271        #[allow(clippy::just_underscores_and_digits)]
272        $(#[$attr])* $vis mod $modname { include!(concat!(env!("OUT_DIR"), $source)); }
273    };
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use alloc::{format, string::ToString, vec};
280
281    #[test]
282    fn test() {
283        let err = ParseError::UnrecognizedToken::<i32, &str, &str> {
284            token: (1, "t0", 2),
285            expected: vec!["t1", "t2", "t3"]
286                .into_iter()
287                .map(|s| s.to_string())
288                .collect(),
289        };
290        assert_eq!(
291            format!("{}", err),
292            "Unrecognized token `t0` found at 1:2\n\
293             Expected one of t1, t2 or t3"
294        );
295    }
296}