1use std::collections::HashSet;
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use serde_json::{Map, Value};
6use thiserror::Error;
7
8use super::{
9 grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
10 rules::{Precedence, Rule},
11};
12use crate::grammars::ReservedWordContext;
13
14#[derive(Deserialize)]
15#[serde(tag = "type")]
16#[allow(non_camel_case_types)]
17#[allow(clippy::upper_case_acronyms)]
18enum RuleJSON {
19 ALIAS {
20 content: Box<RuleJSON>,
21 named: bool,
22 value: String,
23 },
24 BLANK,
25 STRING {
26 value: String,
27 },
28 PATTERN {
29 value: String,
30 flags: Option<String>,
31 },
32 SYMBOL {
33 name: String,
34 },
35 CHOICE {
36 members: Vec<RuleJSON>,
37 },
38 FIELD {
39 name: String,
40 content: Box<RuleJSON>,
41 },
42 SEQ {
43 members: Vec<RuleJSON>,
44 },
45 REPEAT {
46 content: Box<RuleJSON>,
47 },
48 REPEAT1 {
49 content: Box<RuleJSON>,
50 },
51 PREC_DYNAMIC {
52 value: i32,
53 content: Box<RuleJSON>,
54 },
55 PREC_LEFT {
56 value: PrecedenceValueJSON,
57 content: Box<RuleJSON>,
58 },
59 PREC_RIGHT {
60 value: PrecedenceValueJSON,
61 content: Box<RuleJSON>,
62 },
63 PREC {
64 value: PrecedenceValueJSON,
65 content: Box<RuleJSON>,
66 },
67 TOKEN {
68 content: Box<RuleJSON>,
69 },
70 IMMEDIATE_TOKEN {
71 content: Box<RuleJSON>,
72 },
73 RESERVED {
74 context_name: String,
75 content: Box<RuleJSON>,
76 },
77}
78
79#[derive(Deserialize)]
80#[serde(untagged)]
81enum PrecedenceValueJSON {
82 Integer(i32),
83 Name(String),
84}
85
86#[derive(Deserialize)]
87pub struct GrammarJSON {
88 pub name: String,
89 rules: Map<String, Value>,
90 #[serde(default)]
91 precedences: Vec<Vec<RuleJSON>>,
92 #[serde(default)]
93 conflicts: Vec<Vec<String>>,
94 #[serde(default)]
95 externals: Vec<RuleJSON>,
96 #[serde(default)]
97 extras: Vec<RuleJSON>,
98 #[serde(default)]
99 inline: Vec<String>,
100 #[serde(default)]
101 supertypes: Vec<String>,
102 #[serde(default)]
103 word: Option<String>,
104 #[serde(default)]
105 reserved: Map<String, Value>,
106}
107
108pub type ParseGrammarResult<T> = Result<T, ParseGrammarError>;
109
110#[derive(Debug, Error, Serialize)]
111pub enum ParseGrammarError {
112 #[error("{0}")]
113 Serialization(String),
114 #[error("Rules in the `extras` array must not contain empty strings")]
115 InvalidExtra,
116 #[error("Invalid rule in precedences array. Only strings and symbols are allowed")]
117 Unexpected,
118 #[error("Reserved word sets must be arrays")]
119 InvalidReservedWordSet,
120 #[error("Grammar Error: Unexpected rule `{0}` in `token()` call")]
121 UnexpectedRule(String),
122}
123
124impl From<serde_json::Error> for ParseGrammarError {
125 fn from(value: serde_json::Error) -> Self {
126 Self::Serialization(value.to_string())
127 }
128}
129
130fn rule_is_referenced(rule: &Rule, target: &str, is_external: bool) -> bool {
139 match rule {
140 Rule::NamedSymbol(name) => name == target && !is_external,
141 Rule::Choice(rules) | Rule::Seq(rules) => {
142 rules.iter().any(|r| rule_is_referenced(r, target, false))
143 }
144 Rule::Metadata { rule, .. } | Rule::Reserved { rule, .. } => {
145 rule_is_referenced(rule, target, is_external)
146 }
147 Rule::Repeat(inner) => rule_is_referenced(inner, target, false),
148 Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false,
149 }
150}
151
152fn variable_is_used(
153 grammar_rules: &[(String, Rule)],
154 extras: &[Rule],
155 externals: &[Rule],
156 target_name: &str,
157 in_progress: &mut HashSet<String>,
158) -> bool {
159 let root = &grammar_rules.first().unwrap().0;
160 if target_name == root {
161 return true;
162 }
163
164 if extras
165 .iter()
166 .any(|rule| rule_is_referenced(rule, target_name, false))
167 {
168 return true;
169 }
170
171 if externals
172 .iter()
173 .any(|rule| rule_is_referenced(rule, target_name, true))
174 {
175 return true;
176 }
177
178 in_progress.insert(target_name.to_string());
179 let result = grammar_rules
180 .iter()
181 .filter(|(key, _)| *key != target_name)
182 .any(|(name, rule)| {
183 if !rule_is_referenced(rule, target_name, false) || in_progress.contains(name) {
184 return false;
185 }
186 variable_is_used(grammar_rules, extras, externals, name, in_progress)
187 });
188 in_progress.remove(target_name);
189
190 result
191}
192
193pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult<InputGrammar> {
194 let mut grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
195
196 let mut extra_symbols =
197 grammar_json
198 .extras
199 .into_iter()
200 .try_fold(Vec::<Rule>::new(), |mut acc, item| {
201 let rule = parse_rule(item, false)?;
202 if let Rule::String(ref value) = rule {
203 if value.is_empty() {
204 Err(ParseGrammarError::InvalidExtra)?;
205 }
206 }
207 acc.push(rule);
208 ParseGrammarResult::Ok(acc)
209 })?;
210
211 let mut external_tokens = grammar_json
212 .externals
213 .into_iter()
214 .map(|e| parse_rule(e, false))
215 .collect::<ParseGrammarResult<Vec<_>>>()?;
216
217 let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
218 for list in grammar_json.precedences {
219 let mut ordering = Vec::with_capacity(list.len());
220 for entry in list {
221 ordering.push(match entry {
222 RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
223 RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
224 _ => Err(ParseGrammarError::Unexpected)?,
225 });
226 }
227 precedence_orderings.push(ordering);
228 }
229
230 let mut variables = Vec::with_capacity(grammar_json.rules.len());
231
232 let rules = grammar_json
233 .rules
234 .into_iter()
235 .map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?, false)?)))
236 .collect::<ParseGrammarResult<Vec<_>>>()?;
237
238 let mut in_progress = HashSet::new();
239
240 for (name, rule) in &rules {
241 if !variable_is_used(
242 &rules,
243 &extra_symbols,
244 &external_tokens,
245 name,
246 &mut in_progress,
247 ) && grammar_json.word.as_ref().is_none_or(|w| w != name)
248 {
249 grammar_json.conflicts.retain(|r| !r.contains(name));
250 grammar_json.supertypes.retain(|r| r != name);
251 grammar_json.inline.retain(|r| r != name);
252 extra_symbols.retain(|r| !rule_is_referenced(r, name, true));
253 external_tokens.retain(|r| !rule_is_referenced(r, name, true));
254 precedence_orderings.retain(|r| {
255 !r.iter().any(|e| {
256 let PrecedenceEntry::Symbol(s) = e else {
257 return false;
258 };
259 s == name
260 })
261 });
262 continue;
263 }
264 variables.push(Variable {
265 name: name.clone(),
266 kind: VariableType::Named,
267 rule: rule.clone(),
268 });
269 }
270
271 let reserved_words = grammar_json
272 .reserved
273 .into_iter()
274 .map(|(name, rule_values)| {
275 let mut reserved_words = Vec::new();
276
277 let Value::Array(rule_values) = rule_values else {
278 Err(ParseGrammarError::InvalidReservedWordSet)?
279 };
280
281 for value in rule_values {
282 reserved_words.push(parse_rule(serde_json::from_value(value)?, false)?);
283 }
284 Ok(ReservedWordContext {
285 name,
286 reserved_words,
287 })
288 })
289 .collect::<ParseGrammarResult<Vec<_>>>()?;
290
291 Ok(InputGrammar {
292 name: grammar_json.name,
293 word_token: grammar_json.word,
294 expected_conflicts: grammar_json.conflicts,
295 supertype_symbols: grammar_json.supertypes,
296 variables_to_inline: grammar_json.inline,
297 precedence_orderings,
298 variables,
299 extra_symbols,
300 external_tokens,
301 reserved_words,
302 })
303}
304
305fn parse_rule(json: RuleJSON, is_token: bool) -> ParseGrammarResult<Rule> {
306 match json {
307 RuleJSON::ALIAS {
308 content,
309 value,
310 named,
311 } => parse_rule(*content, is_token).map(|r| Rule::alias(r, value, named)),
312 RuleJSON::BLANK => Ok(Rule::Blank),
313 RuleJSON::STRING { value } => Ok(Rule::String(value)),
314 RuleJSON::PATTERN { value, flags } => Ok(Rule::Pattern(
315 value,
316 flags.map_or(String::new(), |f| {
317 f.matches(|c| {
318 if c == 'i' {
319 true
320 } else {
321 if c != 'u' && c != 'v' {
323 eprintln!("Warning: unsupported flag {c}");
324 }
325 false
326 }
327 })
328 .collect()
329 }),
330 )),
331 RuleJSON::SYMBOL { name } => {
332 if is_token {
333 Err(ParseGrammarError::UnexpectedRule(name))?
334 } else {
335 Ok(Rule::NamedSymbol(name))
336 }
337 }
338 RuleJSON::CHOICE { members } => members
339 .into_iter()
340 .map(|m| parse_rule(m, is_token))
341 .collect::<ParseGrammarResult<Vec<_>>>()
342 .map(Rule::choice),
343 RuleJSON::FIELD { content, name } => {
344 parse_rule(*content, is_token).map(|r| Rule::field(name, r))
345 }
346 RuleJSON::SEQ { members } => members
347 .into_iter()
348 .map(|m| parse_rule(m, is_token))
349 .collect::<ParseGrammarResult<Vec<_>>>()
350 .map(Rule::seq),
351 RuleJSON::REPEAT1 { content } => parse_rule(*content, is_token).map(Rule::repeat),
352 RuleJSON::REPEAT { content } => {
353 parse_rule(*content, is_token).map(|m| Rule::choice(vec![Rule::repeat(m), Rule::Blank]))
354 }
355 RuleJSON::PREC { value, content } => {
356 parse_rule(*content, is_token).map(|r| Rule::prec(value.into(), r))
357 }
358 RuleJSON::PREC_LEFT { value, content } => {
359 parse_rule(*content, is_token).map(|r| Rule::prec_left(value.into(), r))
360 }
361 RuleJSON::PREC_RIGHT { value, content } => {
362 parse_rule(*content, is_token).map(|r| Rule::prec_right(value.into(), r))
363 }
364 RuleJSON::PREC_DYNAMIC { value, content } => {
365 parse_rule(*content, is_token).map(|r| Rule::prec_dynamic(value, r))
366 }
367 RuleJSON::RESERVED {
368 content,
369 context_name,
370 } => parse_rule(*content, is_token).map(|r| Rule::Reserved {
371 rule: Box::new(r),
372 context_name,
373 }),
374 RuleJSON::TOKEN { content } => parse_rule(*content, true).map(Rule::token),
375 RuleJSON::IMMEDIATE_TOKEN { content } => {
376 parse_rule(*content, is_token).map(Rule::immediate_token)
377 }
378 }
379}
380
381impl From<PrecedenceValueJSON> for Precedence {
382 fn from(val: PrecedenceValueJSON) -> Self {
383 match val {
384 PrecedenceValueJSON::Integer(i) => Self::Integer(i),
385 PrecedenceValueJSON::Name(i) => Self::Name(i),
386 }
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn test_parse_grammar() {
396 let grammar = parse_grammar(
397 r#"{
398 "name": "my_lang",
399 "rules": {
400 "file": {
401 "type": "REPEAT1",
402 "content": {
403 "type": "SYMBOL",
404 "name": "statement"
405 }
406 },
407 "statement": {
408 "type": "STRING",
409 "value": "foo"
410 }
411 }
412 }"#,
413 )
414 .unwrap();
415
416 assert_eq!(grammar.name, "my_lang");
417 assert_eq!(
418 grammar.variables,
419 vec![
420 Variable {
421 name: "file".to_string(),
422 kind: VariableType::Named,
423 rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
424 },
425 Variable {
426 name: "statement".to_string(),
427 kind: VariableType::Named,
428 rule: Rule::String("foo".to_string())
429 },
430 ]
431 );
432 }
433}