lalrpop_util/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
//! Runtime utilities for lalrpop generated parsers.
//!
//! For information about using lalrpop, see the [lalrpop
//! documentation](https://docs.rs/lalrpop/latest/lalrpop).
//!
//! This crate is designed to be used in conjunction with the lalrpop crate to provide runtime
//! support when interacting with lalrpop generated parsers. Generally speaking, if you are using
//! lalrpop, you want lalrpop specified as a dev-dependency and lalrpop-util as a dependency.
//! Version numbers for the two crates are kept in sync, and we recommend using the same version
//! number for each crate.
#![cfg_attr(not(feature = "std"), no_std)]
#![warn(rust_2018_idioms)]
#![warn(missing_docs)]

extern crate alloc;

use alloc::{string::String, vec::Vec};
#[rustversion::since(1.81)]
#[cfg(not(feature = "std"))]
use core::error::Error;
use core::fmt;
#[cfg(feature = "std")]
use std::error::Error;

#[cfg(feature = "lexer")]
pub mod lexer;
pub mod state_machine;

/// Error type for errors returned by lalrpop parsers.
///
/// For the built-in lexer, the generic parameters default to:
/// ParseError<usize, lexer::Token<'_>, &'static str>.
///
/// L: the location of the Token where the error occurred
/// T: The token encountered where the error occurred
/// E: A custom user-defined error
///
/// L and T are fixed types as listed above for built in lexers, and can be defined to whatever
/// type you would like if implementing a custom lexer.
///
/// The type of E can be overridden by specifying `type Error` in your grammar like so:
///
/// ```ignore
/// extern {
///     type Error = MyCustomErrorType;
/// }
/// ```
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum ParseError<L, T, E> {
    /// Generated by the internal lexer when it encounters a token (or EOF) it
    /// did not expect.
    InvalidToken {
        /// The end of the invalid token.
        location: L,
    },

    /// Generated by the parser when it encounters an EOF it did not expect.
    UnrecognizedEof {
        /// The end of the final token.
        location: L,

        /// The set of expected tokens: these names are taken from the
        /// grammar and hence may not necessarily be suitable for
        /// presenting to the user.
        expected: Vec<String>,
    },

    /// Generated by the parser when it encounters a token it did not expect.
    ///
    /// This means that the next token in the stream was not valid at this
    /// point in the grammar.
    UnrecognizedToken {
        /// The unexpected token of type `T` with a span given by the two `L` values.
        token: (L, T, L),

        /// The set of expected tokens: these names are taken from the
        /// grammar and hence may not necessarily be suitable for
        /// presenting to the user.
        expected: Vec<String>,
    },

    /// Generated by the parser when it encounters additional, unexpected tokens.
    ExtraToken {
        /// The extra token, with a type of `T` with a span given by the two `L` values.
        token: (L, T, L),
    },

    /// Custom error type.
    User {
        /// Custom user error.
        error: E,
    },
}

impl<L, T, E> ParseError<L, T, E> {
    fn map_intern<LL, TT, EE>(
        self,
        mut loc_op: impl FnMut(L) -> LL,
        tok_op: impl FnOnce(T) -> TT,
        err_op: impl FnOnce(E) -> EE,
    ) -> ParseError<LL, TT, EE> {
        // The signature of token is (L, T, L), so we need to call loc_op on both the "start" and
        // "end" values for a token.
        let maptok = |(s, t, e): (L, T, L)| (loc_op(s), tok_op(t), loc_op(e));
        match self {
            ParseError::InvalidToken { location } => ParseError::InvalidToken {
                location: loc_op(location),
            },
            ParseError::UnrecognizedEof { location, expected } => ParseError::UnrecognizedEof {
                location: loc_op(location),
                expected,
            },
            ParseError::UnrecognizedToken { token, expected } => ParseError::UnrecognizedToken {
                token: maptok(token),
                expected,
            },
            ParseError::ExtraToken { token } => ParseError::ExtraToken {
                token: maptok(token),
            },
            ParseError::User { error } => ParseError::User {
                error: err_op(error),
            },
        }
    }

    /// Transform a `ParseError` by applying a function to the location field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to apply an offset to a location.
    ///
    /// (Note that unlike `map_token()` and `map_error()`, the closure argument
    /// for this function is `FnMut`.  This is so that it can be called
    /// multiple times to apply to the starting and ending location of tokens.)
    pub fn map_location<LL>(self, op: impl FnMut(L) -> LL) -> ParseError<LL, T, E> {
        self.map_intern(op, |x| x, |x| x)
    }

    /// Transform a `ParseError` by applying a function to the token field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to transform a token in some way (eg escaping).
    pub fn map_token<TT>(self, op: impl FnOnce(T) -> TT) -> ParseError<L, TT, E> {
        self.map_intern(|x| x, op, |x| x)
    }

    /// Transform a `ParseError` by applying a function to the error field.
    ///
    /// This could be useful to ensure that all fields implement some trait, or
    /// to transform to a different error type in place.
    pub fn map_error<EE>(self, op: impl FnOnce(E) -> EE) -> ParseError<L, T, EE> {
        self.map_intern(|x| x, |x| x, op)
    }
}

/// Format a list of expected tokens.
fn fmt_expected(f: &mut fmt::Formatter<'_>, expected: &[String]) -> fmt::Result {
    if !expected.is_empty() {
        writeln!(f)?;
        for (i, e) in expected.iter().enumerate() {
            let sep = match i {
                0 => "Expected one of",
                _ if i < expected.len() - 1 => ",",
                // Last expected message to be written
                _ => " or",
            };
            write!(f, "{} {}", sep, e)?;
        }
    }
    Ok(())
}

impl<L, T, E> fmt::Display for ParseError<L, T, E>
where
    L: fmt::Display,
    T: fmt::Display,
    E: fmt::Display,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use self::ParseError::*;
        match *self {
            User { ref error } => write!(f, "{}", error),
            InvalidToken { ref location } => write!(f, "Invalid token at {}", location),
            UnrecognizedEof {
                ref location,
                ref expected,
            } => {
                write!(f, "Unrecognized EOF found at {}", location)?;
                fmt_expected(f, expected)
            }
            UnrecognizedToken {
                token: (ref start, ref token, ref end),
                ref expected,
            } => {
                write!(
                    f,
                    "Unrecognized token `{}` found at {}:{}",
                    token, start, end
                )?;
                fmt_expected(f, expected)
            }
            ExtraToken {
                token: (ref start, ref token, ref end),
            } => write!(f, "Extra token {} found at {}:{}", token, start, end),
        }
    }
}

impl<L, T, E> From<E> for ParseError<L, T, E> {
    fn from(error: E) -> Self {
        ParseError::User { error }
    }
}

#[cfg_attr(not(feature = "std"), rustversion::since(1.81))]
impl<L, T, E> Error for ParseError<L, T, E>
where
    L: fmt::Debug + fmt::Display,
    T: fmt::Debug + fmt::Display,
    E: fmt::Debug + fmt::Display,
{
    fn description(&self) -> &str {
        "parse error"
    }
}

/// The error type of a recoverable parse error
///
/// For a full description of error recovery, see [the lalrpop
/// book](https://lalrpop.github.io/lalrpop/tutorial/008_error_recovery.html).
///
/// This is the type of the variable resulting from binding a `!` symbol in your lalrpop grammar.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ErrorRecovery<L, T, E> {
    /// The parse error that was recovered from
    pub error: ParseError<L, T, E>,
    /// The tokens discarded prior to resuming parsing
    pub dropped_tokens: Vec<(L, T, L)>,
}

/// Define a module using the generated parse from a `.lalrpop` file.
///
/// You have to specify the name of the module and the path of the file
/// generated by LALRPOP. If the input is in the root directory, you can
/// omit it.
///
/// # Example
/// ```ignore
/// // load parser in src/parser.lalrpop
/// lalrpop_mod!(parser);
///
/// // load parser in src/lex/parser.lalrpop
/// lalrpop_mod!(parser, "/lex/parser.rs");
///
/// // define a public module
/// lalrpop_mod!(pub parser);
///
/// // specify attributes for the generated module
/// lalrpop_mod!(#[allow(clippy::ptr_arg)]#[rustfmt::skip] parser);
/// ```
#[macro_export]
macro_rules! lalrpop_mod {
    ($(#[$attr:meta])* $vis:vis $modname:ident) => {
        lalrpop_util::lalrpop_mod!($(#[$attr])* $vis $modname, concat!("/", stringify!($modname), ".rs"));
    };

    ($(#[$attr:meta])* $vis:vis $modname:ident, $source:expr) => {
        #[rustfmt::skip]
        #[allow(clippy::extra_unused_lifetimes)]
        #[allow(clippy::needless_lifetimes)]
        #[allow(clippy::let_unit_value)]
        #[allow(clippy::just_underscores_and_digits)]
        $(#[$attr])* $vis mod $modname { include!(concat!(env!("OUT_DIR"), $source)); }
    };
}

#[cfg(test)]
mod tests {
    use super::*;
    use alloc::{format, string::ToString, vec};

    #[test]
    fn test() {
        let err = ParseError::UnrecognizedToken::<i32, &str, &str> {
            token: (1, "t0", 2),
            expected: vec!["t1", "t2", "t3"]
                .into_iter()
                .map(|s| s.to_string())
                .collect(),
        };
        assert_eq!(
            format!("{}", err),
            "Unrecognized token `t0` found at 1:2\n\
             Expected one of t1, t2 or t3"
        );
    }
}