lalrpop_util/lib.rs
1//! Runtime utilities for lalrpop generated parsers.
2//!
3//! For information about using lalrpop, see the [lalrpop
4//! documentation](https://docs.rs/lalrpop/latest/lalrpop).
5//!
6//! This crate is designed to be used in conjunction with the lalrpop crate to provide runtime
7//! support when interacting with lalrpop generated parsers. Generally speaking, if you are using
8//! lalrpop, you want lalrpop specified as a dev-dependency and lalrpop-util as a dependency.
9//! Version numbers for the two crates are kept in sync, and we recommend using the same version
10//! number for each crate.
11#![cfg_attr(not(feature = "std"), no_std)]
12#![warn(rust_2018_idioms)]
13#![warn(missing_docs)]
14
15extern crate alloc;
16
17use alloc::{string::String, vec::Vec};
18#[rustversion::since(1.81)]
19#[cfg(not(feature = "std"))]
20use core::error::Error;
21use core::fmt;
22#[cfg(feature = "std")]
23use std::error::Error;
24
25#[cfg(feature = "lexer")]
26pub mod lexer;
27pub mod state_machine;
28
29/// Error type for errors returned by lalrpop parsers.
30///
31/// For the built-in lexer, the generic parameters default to:
32/// ParseError<usize, lexer::Token<'_>, &'static str>.
33///
34/// L: the location of the Token where the error occurred
35/// T: The token encountered where the error occurred
36/// E: A custom user-defined error
37///
38/// L and T are fixed types as listed above for built in lexers, and can be defined to whatever
39/// type you would like if implementing a custom lexer.
40///
41/// The type of E can be overridden by specifying `type Error` in your grammar like so:
42///
43/// ```ignore
44/// extern {
45/// type Error = MyCustomErrorType;
46/// }
47/// ```
48#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
49pub enum ParseError<L, T, E> {
50 /// Generated by the internal lexer when it encounters a token (or EOF) it
51 /// did not expect.
52 InvalidToken {
53 /// The end of the invalid token.
54 location: L,
55 },
56
57 /// Generated by the parser when it encounters an EOF it did not expect.
58 UnrecognizedEof {
59 /// The end of the final token.
60 location: L,
61
62 /// The set of expected tokens: these names are taken from the
63 /// grammar and hence may not necessarily be suitable for
64 /// presenting to the user.
65 expected: Vec<String>,
66 },
67
68 /// Generated by the parser when it encounters a token it did not expect.
69 ///
70 /// This means that the next token in the stream was not valid at this
71 /// point in the grammar.
72 UnrecognizedToken {
73 /// The unexpected token of type `T` with a span given by the two `L` values.
74 token: (L, T, L),
75
76 /// The set of expected tokens: these names are taken from the
77 /// grammar and hence may not necessarily be suitable for
78 /// presenting to the user.
79 expected: Vec<String>,
80 },
81
82 /// Generated by the parser when it encounters additional, unexpected tokens.
83 ExtraToken {
84 /// The extra token, with a type of `T` with a span given by the two `L` values.
85 token: (L, T, L),
86 },
87
88 /// Custom error type.
89 User {
90 /// Custom user error.
91 error: E,
92 },
93}
94
95impl<L, T, E> ParseError<L, T, E> {
96 fn map_intern<LL, TT, EE>(
97 self,
98 mut loc_op: impl FnMut(L) -> LL,
99 tok_op: impl FnOnce(T) -> TT,
100 err_op: impl FnOnce(E) -> EE,
101 ) -> ParseError<LL, TT, EE> {
102 // The signature of token is (L, T, L), so we need to call loc_op on both the "start" and
103 // "end" values for a token.
104 let maptok = |(s, t, e): (L, T, L)| (loc_op(s), tok_op(t), loc_op(e));
105 match self {
106 ParseError::InvalidToken { location } => ParseError::InvalidToken {
107 location: loc_op(location),
108 },
109 ParseError::UnrecognizedEof { location, expected } => ParseError::UnrecognizedEof {
110 location: loc_op(location),
111 expected,
112 },
113 ParseError::UnrecognizedToken { token, expected } => ParseError::UnrecognizedToken {
114 token: maptok(token),
115 expected,
116 },
117 ParseError::ExtraToken { token } => ParseError::ExtraToken {
118 token: maptok(token),
119 },
120 ParseError::User { error } => ParseError::User {
121 error: err_op(error),
122 },
123 }
124 }
125
126 /// Transform a `ParseError` by applying a function to the location field.
127 ///
128 /// This could be useful to ensure that all fields implement some trait, or
129 /// to apply an offset to a location.
130 ///
131 /// (Note that unlike `map_token()` and `map_error()`, the closure argument
132 /// for this function is `FnMut`. This is so that it can be called
133 /// multiple times to apply to the starting and ending location of tokens.)
134 pub fn map_location<LL>(self, op: impl FnMut(L) -> LL) -> ParseError<LL, T, E> {
135 self.map_intern(op, |x| x, |x| x)
136 }
137
138 /// Transform a `ParseError` by applying a function to the token field.
139 ///
140 /// This could be useful to ensure that all fields implement some trait, or
141 /// to transform a token in some way (eg escaping).
142 pub fn map_token<TT>(self, op: impl FnOnce(T) -> TT) -> ParseError<L, TT, E> {
143 self.map_intern(|x| x, op, |x| x)
144 }
145
146 /// Transform a `ParseError` by applying a function to the error field.
147 ///
148 /// This could be useful to ensure that all fields implement some trait, or
149 /// to transform to a different error type in place.
150 pub fn map_error<EE>(self, op: impl FnOnce(E) -> EE) -> ParseError<L, T, EE> {
151 self.map_intern(|x| x, |x| x, op)
152 }
153}
154
155/// Format a list of expected tokens.
156fn fmt_expected(f: &mut fmt::Formatter<'_>, expected: &[String]) -> fmt::Result {
157 if !expected.is_empty() {
158 writeln!(f)?;
159 for (i, e) in expected.iter().enumerate() {
160 let sep = match i {
161 0 => "Expected one of",
162 _ if i < expected.len() - 1 => ",",
163 // Last expected message to be written
164 _ => " or",
165 };
166 write!(f, "{} {}", sep, e)?;
167 }
168 }
169 Ok(())
170}
171
172impl<L, T, E> fmt::Display for ParseError<L, T, E>
173where
174 L: fmt::Display,
175 T: fmt::Display,
176 E: fmt::Display,
177{
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 use self::ParseError::*;
180 match *self {
181 User { ref error } => write!(f, "{}", error),
182 InvalidToken { ref location } => write!(f, "Invalid token at {}", location),
183 UnrecognizedEof {
184 ref location,
185 ref expected,
186 } => {
187 write!(f, "Unrecognized EOF found at {}", location)?;
188 fmt_expected(f, expected)
189 }
190 UnrecognizedToken {
191 token: (ref start, ref token, ref end),
192 ref expected,
193 } => {
194 write!(
195 f,
196 "Unrecognized token `{}` found at {}:{}",
197 token, start, end
198 )?;
199 fmt_expected(f, expected)
200 }
201 ExtraToken {
202 token: (ref start, ref token, ref end),
203 } => write!(f, "Extra token {} found at {}:{}", token, start, end),
204 }
205 }
206}
207
208impl<L, T, E> From<E> for ParseError<L, T, E> {
209 fn from(error: E) -> Self {
210 ParseError::User { error }
211 }
212}
213
214#[cfg_attr(not(feature = "std"), rustversion::since(1.81))]
215impl<L, T, E> Error for ParseError<L, T, E>
216where
217 L: fmt::Debug + fmt::Display,
218 T: fmt::Debug + fmt::Display,
219 E: fmt::Debug + fmt::Display,
220{
221 fn description(&self) -> &str {
222 "parse error"
223 }
224}
225
226/// The error type of a recoverable parse error
227///
228/// For a full description of error recovery, see [the lalrpop
229/// book](https://lalrpop.github.io/lalrpop/tutorial/008_error_recovery.html).
230///
231/// This is the type of the variable resulting from binding a `!` symbol in your lalrpop grammar.
232#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
233pub struct ErrorRecovery<L, T, E> {
234 /// The parse error that was recovered from
235 pub error: ParseError<L, T, E>,
236 /// The tokens discarded prior to resuming parsing
237 pub dropped_tokens: Vec<(L, T, L)>,
238}
239
240/// Define a module using the generated parse from a `.lalrpop` file.
241///
242/// You have to specify the name of the module and the path of the file
243/// generated by LALRPOP. If the input is in the root directory, you can
244/// omit it.
245///
246/// # Example
247/// ```ignore
248/// // load parser in src/parser.lalrpop
249/// lalrpop_mod!(parser);
250///
251/// // load parser in src/lex/parser.lalrpop
252/// lalrpop_mod!(parser, "/lex/parser.rs");
253///
254/// // define a public module
255/// lalrpop_mod!(pub parser);
256///
257/// // specify attributes for the generated module
258/// lalrpop_mod!(#[allow(clippy::ptr_arg)]#[rustfmt::skip] parser);
259/// ```
260#[macro_export]
261macro_rules! lalrpop_mod {
262 ($(#[$attr:meta])* $vis:vis $modname:ident) => {
263 lalrpop_util::lalrpop_mod!($(#[$attr])* $vis $modname, concat!("/", stringify!($modname), ".rs"));
264 };
265
266 ($(#[$attr:meta])* $vis:vis $modname:ident, $source:expr) => {
267 #[rustfmt::skip]
268 #[allow(clippy::extra_unused_lifetimes)]
269 #[allow(clippy::needless_lifetimes)]
270 #[allow(clippy::let_unit_value)]
271 #[allow(clippy::just_underscores_and_digits)]
272 $(#[$attr])* $vis mod $modname { include!(concat!(env!("OUT_DIR"), $source)); }
273 };
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279 use alloc::{format, string::ToString, vec};
280
281 #[test]
282 fn test() {
283 let err = ParseError::UnrecognizedToken::<i32, &str, &str> {
284 token: (1, "t0", 2),
285 expected: vec!["t1", "t2", "t3"]
286 .into_iter()
287 .map(|s| s.to_string())
288 .collect(),
289 };
290 assert_eq!(
291 format!("{}", err),
292 "Unrecognized token `t0` found at 1:2\n\
293 Expected one of t1, t2 or t3"
294 );
295 }
296}