1use std::{
2 env, fs,
3 io::Write,
4 path::{Path, PathBuf},
5 process::{Command, Stdio},
6 sync::LazyLock,
7};
8
9use anyhow::Result;
10use regex::{Regex, RegexBuilder};
11use semver::Version;
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15mod build_tables;
16mod dedup;
17mod grammar_files;
18mod grammars;
19mod nfa;
20mod node_types;
21pub mod parse_grammar;
22mod prepare_grammar;
23mod render;
24mod rules;
25mod tables;
26
27use build_tables::build_tables;
28pub use build_tables::ParseTableBuilderError;
29use grammars::InputGrammar;
30pub use node_types::VariableInfoError;
31use parse_grammar::parse_grammar;
32pub use parse_grammar::ParseGrammarError;
33use prepare_grammar::prepare_grammar;
34pub use prepare_grammar::PrepareGrammarError;
35use render::render_c_code;
36pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN};
37
38static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
39 RegexBuilder::new("^\\s*//.*")
40 .multi_line(true)
41 .build()
42 .unwrap()
43});
44
45struct GeneratedParser {
46 c_code: String,
47 node_types_json: String,
48}
49
50pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
51pub const ARRAY_HEADER: &str = include_str!("templates/array.h");
52
53pub type GenerateResult<T> = Result<T, GenerateError>;
54
55#[derive(Debug, Error, Serialize)]
56pub enum GenerateError {
57 #[error("Error with specified path -- {0}")]
58 GrammarPath(String),
59 #[error("{0}")]
60 IO(String),
61 #[error(transparent)]
62 LoadGrammarFile(#[from] LoadGrammarError),
63 #[error(transparent)]
64 ParseGrammar(#[from] ParseGrammarError),
65 #[error(transparent)]
66 Prepare(#[from] PrepareGrammarError),
67 #[error(transparent)]
68 VariableInfo(#[from] VariableInfoError),
69 #[error(transparent)]
70 BuildTables(#[from] ParseTableBuilderError),
71 #[error(transparent)]
72 ParseVersion(#[from] ParseVersionError),
73}
74
75impl From<std::io::Error> for GenerateError {
76 fn from(value: std::io::Error) -> Self {
77 Self::IO(value.to_string())
78 }
79}
80
81pub type LoadGrammarFileResult<T> = Result<T, LoadGrammarError>;
82
83#[derive(Debug, Error, Serialize)]
84pub enum LoadGrammarError {
85 #[error("Path to a grammar file with `.js` or `.json` extension is required")]
86 InvalidPath,
87 #[error("Failed to load grammar.js -- {0}")]
88 LoadJSGrammarFile(#[from] JSError),
89 #[error("Failed to load grammar.json -- {0}")]
90 IO(String),
91 #[error("Unknown grammar file extension: {0:?}")]
92 FileExtension(PathBuf),
93}
94
95impl From<std::io::Error> for LoadGrammarError {
96 fn from(value: std::io::Error) -> Self {
97 Self::IO(value.to_string())
98 }
99}
100
101#[derive(Debug, Error, Serialize)]
102pub enum ParseVersionError {
103 #[error("{0}")]
104 Version(String),
105 #[error("{0}")]
106 JSON(String),
107 #[error("{0}")]
108 IO(String),
109}
110
111pub type JSResult<T> = Result<T, JSError>;
112
113#[derive(Debug, Error, Serialize)]
114pub enum JSError {
115 #[error("Failed to run `{runtime}` -- {error}")]
116 JSRuntimeSpawn { runtime: String, error: String },
117 #[error("Got invalid UTF8 from `{runtime}` -- {error}")]
118 JSRuntimeUtf8 { runtime: String, error: String },
119 #[error("`{runtime}` process exited with status {code}")]
120 JSRuntimeExit { runtime: String, code: i32 },
121 #[error("{0}")]
122 IO(String),
123 #[error("Could not parse this package's version as semver -- {0}")]
124 Semver(String),
125 #[error("Failed to serialze grammar JSON -- {0}")]
126 Serialzation(String),
127}
128
129impl From<std::io::Error> for JSError {
130 fn from(value: std::io::Error) -> Self {
131 Self::IO(value.to_string())
132 }
133}
134
135impl From<serde_json::Error> for JSError {
136 fn from(value: serde_json::Error) -> Self {
137 Self::Serialzation(value.to_string())
138 }
139}
140
141impl From<semver::Error> for JSError {
142 fn from(value: semver::Error) -> Self {
143 Self::Semver(value.to_string())
144 }
145}
146
147pub fn generate_parser_in_directory(
148 repo_path: &Path,
149 out_path: Option<&str>,
150 grammar_path: Option<&str>,
151 mut abi_version: usize,
152 report_symbol_name: Option<&str>,
153 js_runtime: Option<&str>,
154) -> GenerateResult<()> {
155 let mut repo_path = repo_path.to_owned();
156 let mut grammar_path = grammar_path;
157
158 if let Some(path) = grammar_path {
160 let path = PathBuf::from(path);
161 if !path
162 .try_exists()
163 .map_err(|e| GenerateError::GrammarPath(e.to_string()))?
164 {
165 fs::create_dir_all(&path)?;
166 grammar_path = None;
167 repo_path = path;
168 }
169 }
170
171 let grammar_path = grammar_path.map_or_else(|| repo_path.join("grammar.js"), PathBuf::from);
172
173 let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;
175
176 let src_path = out_path.map_or_else(|| repo_path.join("src"), PathBuf::from);
177 let header_path = src_path.join("tree_sitter");
178
179 fs::create_dir_all(&src_path)?;
181 fs::create_dir_all(&header_path)?;
182
183 if grammar_path.file_name().unwrap() != "grammar.json" {
184 fs::write(src_path.join("grammar.json"), &grammar_json).map_err(|e| {
185 GenerateError::IO(format!(
186 "Failed to write grammar.json to {src_path:?} -- {e}"
187 ))
188 })?;
189 }
190
191 let input_grammar = parse_grammar(&grammar_json)?;
193
194 let semantic_version = read_grammar_version(&repo_path)?;
195
196 if semantic_version.is_none() && abi_version > ABI_VERSION_MIN {
197 println!("Warning: No `tree-sitter.json` file found in your grammar, this file is required to generate with ABI {abi_version}. Using ABI version {ABI_VERSION_MIN} instead.");
198 println!("This file can be set up with `tree-sitter init`. For more information, see https://tree-sitter.github.io/tree-sitter/cli/init.");
199 abi_version = ABI_VERSION_MIN;
200 }
201
202 let GeneratedParser {
204 c_code,
205 node_types_json,
206 } = generate_parser_for_grammar_with_opts(
207 &input_grammar,
208 abi_version,
209 semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
210 report_symbol_name,
211 )?;
212
213 write_file(&src_path.join("parser.c"), c_code)?;
214 write_file(&src_path.join("node-types.json"), node_types_json)?;
215 write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
216 write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
217 write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
218
219 Ok(())
220}
221
222pub fn generate_parser_for_grammar(
223 grammar_json: &str,
224 semantic_version: Option<(u8, u8, u8)>,
225) -> GenerateResult<(String, String)> {
226 let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
227 let input_grammar = parse_grammar(&grammar_json)?;
228 let parser = generate_parser_for_grammar_with_opts(
229 &input_grammar,
230 tree_sitter::LANGUAGE_VERSION,
231 semantic_version,
232 None,
233 )?;
234 Ok((input_grammar.name, parser.c_code))
235}
236
237fn generate_parser_for_grammar_with_opts(
238 input_grammar: &InputGrammar,
239 abi_version: usize,
240 semantic_version: Option<(u8, u8, u8)>,
241 report_symbol_name: Option<&str>,
242) -> GenerateResult<GeneratedParser> {
243 let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
244 prepare_grammar(input_grammar)?;
245 let variable_info =
246 node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
247 let node_types_json = node_types::generate_node_types_json(
248 &syntax_grammar,
249 &lexical_grammar,
250 &simple_aliases,
251 &variable_info,
252 );
253 let supertype_symbol_map =
254 node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
255 let tables = build_tables(
256 &syntax_grammar,
257 &lexical_grammar,
258 &simple_aliases,
259 &variable_info,
260 &inlines,
261 report_symbol_name,
262 )?;
263 let c_code = render_c_code(
264 &input_grammar.name,
265 tables,
266 syntax_grammar,
267 lexical_grammar,
268 simple_aliases,
269 abi_version,
270 semantic_version,
271 supertype_symbol_map,
272 );
273 Ok(GeneratedParser {
274 c_code,
275 node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
276 })
277}
278
279fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
285 #[derive(Deserialize)]
286 struct TreeSitterJson {
287 metadata: Metadata,
288 }
289
290 #[derive(Deserialize)]
291 struct Metadata {
292 version: String,
293 }
294
295 let filename = "tree-sitter.json";
296 let mut path = repo_path.join(filename);
297
298 loop {
299 let json = path
300 .exists()
301 .then(|| {
302 let contents = fs::read_to_string(path.as_path()).map_err(|e| {
303 ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
304 })?;
305 serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
306 ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
307 })
308 })
309 .transpose()?;
310 if let Some(json) = json {
311 return Version::parse(&json.metadata.version)
312 .map_err(|e| {
313 ParseVersionError::Version(format!(
314 "Failed to parse `{}` version as semver -- {e}",
315 path.display()
316 ))
317 })
318 .map(Some);
319 }
320 path.pop(); if !path.pop() {
322 return Ok(None);
323 }
324 path.push(filename);
325 }
326}
327
328pub fn load_grammar_file(
329 grammar_path: &Path,
330 js_runtime: Option<&str>,
331) -> LoadGrammarFileResult<String> {
332 if grammar_path.is_dir() {
333 Err(LoadGrammarError::InvalidPath)?;
334 }
335 match grammar_path.extension().and_then(|e| e.to_str()) {
336 Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?),
337 Some("json") => Ok(fs::read_to_string(grammar_path)?),
338 _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?,
339 }
340}
341
342fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult<String> {
343 let grammar_path = fs::canonicalize(grammar_path)?;
344
345 #[cfg(windows)]
346 let grammar_path = url::Url::from_file_path(grammar_path)
347 .expect("Failed to convert path to URL")
348 .to_string();
349
350 let js_runtime = js_runtime.unwrap_or("node");
351
352 let mut js_command = Command::new(js_runtime);
353 match js_runtime {
354 "node" => {
355 js_command.args(["--input-type=module", "-"]);
356 }
357 "bun" => {
358 js_command.arg("-");
359 }
360 "deno" => {
361 js_command.args(["run", "--allow-all", "-"]);
362 }
363 _ => {}
364 }
365
366 let mut js_process = js_command
367 .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
368 .stdin(Stdio::piped())
369 .stdout(Stdio::piped())
370 .spawn()
371 .map_err(|e| JSError::JSRuntimeSpawn {
372 runtime: js_runtime.to_string(),
373 error: e.to_string(),
374 })?;
375
376 let mut js_stdin = js_process
377 .stdin
378 .take()
379 .ok_or_else(|| JSError::IO(format!("Failed to open stdin for `{js_runtime}`")))?;
380
381 let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?;
382 write!(
383 js_stdin,
384 "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
385 globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
386 globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
387 cli_version.major, cli_version.minor, cli_version.patch,
388 )
389 .map_err(|e| {
390 JSError::IO(format!(
391 "Failed to write tree-sitter version to `{js_runtime}`'s stdin -- {e}"
392 ))
393 })?;
394 js_stdin.write(include_bytes!("./dsl.js")).map_err(|e| {
395 JSError::IO(format!(
396 "Failed to write grammar dsl to `{js_runtime}`'s stdin -- {e}"
397 ))
398 })?;
399 drop(js_stdin);
400
401 let output = js_process
402 .wait_with_output()
403 .map_err(|e| JSError::IO(format!("Failed to read output from `{js_runtime}` -- {e}")))?;
404 match output.status.code() {
405 None => panic!("`{js_runtime}` process was killed"),
406 Some(0) => {
407 let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 {
408 runtime: js_runtime.to_string(),
409 error: e.to_string(),
410 })?;
411
412 let mut grammar_json = &stdout[..];
413
414 if let Some(pos) = stdout.rfind('\n') {
415 let node_output = &stdout[..pos];
417 grammar_json = &stdout[pos + 1..];
418
419 let mut stdout = std::io::stdout().lock();
420 stdout.write_all(node_output.as_bytes())?;
421 stdout.write_all(b"\n")?;
422 stdout.flush()?;
423 }
424
425 Ok(serde_json::to_string_pretty(&serde_json::from_str::<
426 serde_json::Value,
427 >(grammar_json)?)?)
428 }
429 Some(code) => Err(JSError::JSRuntimeExit {
430 runtime: js_runtime.to_string(),
431 code,
432 }),
433 }
434}
435
436pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> {
437 fs::write(path, body)
438 .map_err(|e| GenerateError::IO(format!("Failed to write {:?} -- {e}", path.file_name())))
439}