tree_sitter_generate/prepare_grammar/
mod.rs

1mod expand_repeats;
2mod expand_tokens;
3mod extract_default_aliases;
4mod extract_tokens;
5mod flatten_grammar;
6mod intern_symbols;
7mod process_inlines;
8
9use std::{
10    cmp::Ordering,
11    collections::{hash_map, HashMap, HashSet},
12    mem,
13};
14
15use anyhow::Result;
16pub use expand_tokens::ExpandTokensError;
17pub use extract_tokens::ExtractTokensError;
18pub use flatten_grammar::FlattenGrammarError;
19pub use intern_symbols::InternSymbolsError;
20pub use process_inlines::ProcessInlinesError;
21use serde::Serialize;
22use thiserror::Error;
23
24pub use self::expand_tokens::expand_tokens;
25use self::{
26    expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
27    extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
28    intern_symbols::intern_symbols, process_inlines::process_inlines,
29};
30use super::{
31    grammars::{
32        ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
33        SyntaxGrammar, Variable,
34    },
35    rules::{AliasMap, Precedence, Rule, Symbol},
36};
37use crate::grammars::ReservedWordContext;
38
39pub struct IntermediateGrammar<T, U> {
40    variables: Vec<Variable>,
41    extra_symbols: Vec<T>,
42    expected_conflicts: Vec<Vec<Symbol>>,
43    precedence_orderings: Vec<Vec<PrecedenceEntry>>,
44    external_tokens: Vec<U>,
45    variables_to_inline: Vec<Symbol>,
46    supertype_symbols: Vec<Symbol>,
47    word_token: Option<Symbol>,
48    reserved_word_sets: Vec<ReservedWordContext<T>>,
49}
50
51pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
52
53pub type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
54
55#[derive(Debug, PartialEq, Eq)]
56pub struct ExtractedLexicalGrammar {
57    pub variables: Vec<Variable>,
58    pub separators: Vec<Rule>,
59}
60
61impl<T, U> Default for IntermediateGrammar<T, U> {
62    fn default() -> Self {
63        Self {
64            variables: Vec::default(),
65            extra_symbols: Vec::default(),
66            expected_conflicts: Vec::default(),
67            precedence_orderings: Vec::default(),
68            external_tokens: Vec::default(),
69            variables_to_inline: Vec::default(),
70            supertype_symbols: Vec::default(),
71            word_token: Option::default(),
72            reserved_word_sets: Vec::default(),
73        }
74    }
75}
76
77pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
78
79#[derive(Debug, Error, Serialize)]
80#[error(transparent)]
81pub enum PrepareGrammarError {
82    ValidatePrecedences(#[from] ValidatePrecedenceError),
83    InternSymbols(#[from] InternSymbolsError),
84    ExtractTokens(#[from] ExtractTokensError),
85    FlattenGrammar(#[from] FlattenGrammarError),
86    ExpandTokens(#[from] ExpandTokensError),
87    ProcessInlines(#[from] ProcessInlinesError),
88}
89
90pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;
91
92#[derive(Debug, Error, Serialize)]
93#[error(transparent)]
94pub enum ValidatePrecedenceError {
95    Undeclared(#[from] UndeclaredPrecedenceError),
96    Ordering(#[from] ConflictingPrecedenceOrderingError),
97}
98
99#[derive(Debug, Error, Serialize)]
100pub struct UndeclaredPrecedenceError {
101    pub precedence: String,
102    pub rule: String,
103}
104
105impl std::fmt::Display for UndeclaredPrecedenceError {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        write!(
108            f,
109            "Undeclared precedence '{}' in rule '{}'",
110            self.precedence, self.rule
111        )?;
112        Ok(())
113    }
114}
115
116#[derive(Debug, Error, Serialize)]
117pub struct ConflictingPrecedenceOrderingError {
118    pub precedence_1: String,
119    pub precedence_2: String,
120}
121
122impl std::fmt::Display for ConflictingPrecedenceOrderingError {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        write!(
125            f,
126            "Conflicting orderings for precedences {} and {}",
127            self.precedence_1, self.precedence_2
128        )?;
129        Ok(())
130    }
131}
132
133/// Transform an input grammar into separate components that are ready
134/// for parse table construction.
135pub fn prepare_grammar(
136    input_grammar: &InputGrammar,
137) -> PrepareGrammarResult<(
138    SyntaxGrammar,
139    LexicalGrammar,
140    InlinedProductionMap,
141    AliasMap,
142)> {
143    validate_precedences(input_grammar)?;
144
145    let interned_grammar = intern_symbols(input_grammar)?;
146    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
147    let syntax_grammar = expand_repeats(syntax_grammar);
148    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
149    let lexical_grammar = expand_tokens(lexical_grammar)?;
150    let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
151    let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
152    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
153}
154
155/// Check that all of the named precedences used in the grammar are declared
156/// within the `precedences` lists, and also that there are no conflicting
157/// precedence orderings declared in those lists.
158fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
159    // Check that no rule contains a named precedence that is not present in
160    // any of the `precedences` lists.
161    fn validate(
162        rule_name: &str,
163        rule: &Rule,
164        names: &HashSet<&String>,
165    ) -> ValidatePrecedenceResult<()> {
166        match rule {
167            Rule::Repeat(rule) => validate(rule_name, rule, names),
168            Rule::Seq(elements) | Rule::Choice(elements) => elements
169                .iter()
170                .try_for_each(|e| validate(rule_name, e, names)),
171            Rule::Metadata { rule, params } => {
172                if let Precedence::Name(n) = &params.precedence {
173                    if !names.contains(n) {
174                        Err(UndeclaredPrecedenceError {
175                            precedence: n.to_string(),
176                            rule: rule_name.to_string(),
177                        })?;
178                    }
179                }
180                validate(rule_name, rule, names)?;
181                Ok(())
182            }
183            _ => Ok(()),
184        }
185    }
186
187    // For any two precedence names `a` and `b`, if `a` comes before `b`
188    // in some list, then it cannot come *after* `b` in any list.
189    let mut pairs = HashMap::new();
190    for list in &grammar.precedence_orderings {
191        for (i, mut entry1) in list.iter().enumerate() {
192            for mut entry2 in list.iter().skip(i + 1) {
193                if entry2 == entry1 {
194                    continue;
195                }
196                let mut ordering = Ordering::Greater;
197                if entry1 > entry2 {
198                    ordering = Ordering::Less;
199                    mem::swap(&mut entry1, &mut entry2);
200                }
201                match pairs.entry((entry1, entry2)) {
202                    hash_map::Entry::Vacant(e) => {
203                        e.insert(ordering);
204                    }
205                    hash_map::Entry::Occupied(e) => {
206                        if e.get() != &ordering {
207                            Err(ConflictingPrecedenceOrderingError {
208                                precedence_1: entry1.to_string(),
209                                precedence_2: entry2.to_string(),
210                            })?;
211                        }
212                    }
213                }
214            }
215        }
216    }
217
218    let precedence_names = grammar
219        .precedence_orderings
220        .iter()
221        .flat_map(|l| l.iter())
222        .filter_map(|p| {
223            if let PrecedenceEntry::Name(n) = p {
224                Some(n)
225            } else {
226                None
227            }
228        })
229        .collect::<HashSet<&String>>();
230    for variable in &grammar.variables {
231        validate(&variable.name, &variable.rule, &precedence_names)?;
232    }
233
234    Ok(())
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use crate::grammars::VariableType;
241
242    #[test]
243    fn test_validate_precedences_with_undeclared_precedence() {
244        let grammar = InputGrammar {
245            precedence_orderings: vec![
246                vec![
247                    PrecedenceEntry::Name("a".to_string()),
248                    PrecedenceEntry::Name("b".to_string()),
249                ],
250                vec![
251                    PrecedenceEntry::Name("b".to_string()),
252                    PrecedenceEntry::Name("c".to_string()),
253                    PrecedenceEntry::Name("d".to_string()),
254                ],
255            ],
256            variables: vec![
257                Variable {
258                    name: "v1".to_string(),
259                    kind: VariableType::Named,
260                    rule: Rule::Seq(vec![
261                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
262                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
263                    ]),
264                },
265                Variable {
266                    name: "v2".to_string(),
267                    kind: VariableType::Named,
268                    rule: Rule::repeat(Rule::Choice(vec![
269                        Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
270                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
271                    ])),
272                },
273            ],
274            ..Default::default()
275        };
276
277        let result = validate_precedences(&grammar);
278        assert_eq!(
279            result.unwrap_err().to_string(),
280            "Undeclared precedence 'omg' in rule 'v2'",
281        );
282    }
283
284    #[test]
285    fn test_validate_precedences_with_conflicting_order() {
286        let grammar = InputGrammar {
287            precedence_orderings: vec![
288                vec![
289                    PrecedenceEntry::Name("a".to_string()),
290                    PrecedenceEntry::Name("b".to_string()),
291                ],
292                vec![
293                    PrecedenceEntry::Name("b".to_string()),
294                    PrecedenceEntry::Name("c".to_string()),
295                    PrecedenceEntry::Name("a".to_string()),
296                ],
297            ],
298            variables: vec![
299                Variable {
300                    name: "v1".to_string(),
301                    kind: VariableType::Named,
302                    rule: Rule::Seq(vec![
303                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
304                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
305                    ]),
306                },
307                Variable {
308                    name: "v2".to_string(),
309                    kind: VariableType::Named,
310                    rule: Rule::repeat(Rule::Choice(vec![
311                        Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
312                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
313                    ])),
314                },
315            ],
316            ..Default::default()
317        };
318
319        let result = validate_precedences(&grammar);
320        assert_eq!(
321            result.unwrap_err().to_string(),
322            "Conflicting orderings for precedences 'a' and 'b'",
323        );
324    }
325}