1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14 fuzz::{
15 corpus_test::{
16 check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17 },
18 edits::{get_random_edit, invert_edit},
19 random::Rand,
20 },
21 parse::perform_edit,
22 test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28 LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31 LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42 LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45 LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48 env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52 env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57 int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58 let mut rng = rand::thread_rng();
59 let seed = rng.gen::<usize>();
60 eprintln!("Seed: {seed}");
61 seed
62 })
63}
64
65pub struct FuzzOptions {
66 pub skipped: Option<Vec<String>>,
67 pub subdir: Option<String>,
68 pub edits: usize,
69 pub iterations: usize,
70 pub include: Option<Regex>,
71 pub exclude: Option<Regex>,
72 pub log_graphs: bool,
73 pub log: bool,
74}
75
76pub fn fuzz_language_corpus(
77 language: &Language,
78 language_name: &str,
79 start_seed: usize,
80 grammar_dir: &Path,
81 options: &mut FuzzOptions,
82) {
83 fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
84 match entry {
85 TestEntry::Example { attributes, .. } => {
86 attributes.languages[0].is_empty()
87 || attributes
88 .languages
89 .iter()
90 .any(|lang| lang.as_ref() == language_name)
91 }
92 TestEntry::Group {
93 ref mut children, ..
94 } => {
95 children.retain_mut(|child| retain(child, language_name));
96 !children.is_empty()
97 }
98 }
99 }
100
101 let subdir = options.subdir.take().unwrap_or_default();
102
103 let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
104
105 if !corpus_dir.exists() || !corpus_dir.is_dir() {
106 eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
107 return;
108 }
109
110 if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
111 eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
112 return;
113 }
114
115 let mut main_tests = parse_tests(&corpus_dir).unwrap();
116 match main_tests {
117 TestEntry::Group {
118 ref mut children, ..
119 } => {
120 children.retain_mut(|child| retain(child, language_name));
121 }
122 TestEntry::Example { .. } => unreachable!(),
123 }
124 let tests = flatten_tests(
125 main_tests,
126 options.include.as_ref(),
127 options.exclude.as_ref(),
128 );
129
130 let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
131
132 let mut skipped = options
133 .skipped
134 .take()
135 .unwrap_or_default()
136 .into_iter()
137 .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
138 .map(|x| (x, 0))
139 .collect::<HashMap<String, usize>>();
140
141 let mut failure_count = 0;
142
143 let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
144 let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
145
146 if log_seed {
147 println!(" start seed: {start_seed}");
148 }
149
150 println!();
151 for (test_index, test) in tests.iter().enumerate() {
152 let test_name = get_test_name(test);
153 if let Some(counter) = skipped.get_mut(test_name.as_str()) {
154 println!(" {test_index}. {test_name} - SKIPPED");
155 *counter += 1;
156 continue;
157 }
158
159 println!(" {test_index}. {test_name}");
160
161 let passed = allocations::record(|| {
162 let mut log_session = None;
163 let mut parser = get_parser(&mut log_session, "log.html");
164 parser.set_language(language).unwrap();
165 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
166
167 let tree = parser.parse(&test.input, None).unwrap();
168
169 if test.error {
170 return true;
171 }
172
173 let mut actual_output = tree.root_node().to_sexp();
174 if !test.has_fields {
175 actual_output = strip_sexp_fields(&actual_output);
176 }
177
178 if actual_output != test.output {
179 println!("Incorrect initial parse for {test_name}");
180 print_diff_key();
181 print_diff(&actual_output, &test.output, true);
182 println!();
183 return false;
184 }
185
186 true
187 })
188 .unwrap_or_else(|e| {
189 eprintln!("Error: {e}");
190 false
191 });
192
193 if !passed {
194 failure_count += 1;
195 continue;
196 }
197
198 let mut parser = Parser::new();
199 parser.set_language(language).unwrap();
200 let tree = parser.parse(&test.input, None).unwrap();
201 drop(parser);
202
203 for trial in 0..options.iterations {
204 let seed = start_seed + trial;
205 let passed = allocations::record(|| {
206 let mut rand = Rand::new(seed);
207 let mut log_session = None;
208 let mut parser = get_parser(&mut log_session, "log.html");
209 parser.set_language(language).unwrap();
210 let mut tree = tree.clone();
211 let mut input = test.input.clone();
212
213 if options.log_graphs {
214 eprintln!("{}\n", String::from_utf8_lossy(&input));
215 }
216
217 let mut undo_stack = Vec::new();
219 for _ in 0..=rand.unsigned(*EDIT_COUNT) {
220 let edit = get_random_edit(&mut rand, &input);
221 undo_stack.push(invert_edit(&input, &edit));
222 perform_edit(&mut tree, &mut input, &edit).unwrap();
223 }
224
225 if log_seed {
226 println!(" {test_index}.{trial:<2} seed: {seed}");
227 }
228
229 if dump_edits {
230 fs::create_dir_all("fuzz").unwrap();
231 fs::write(
232 Path::new("fuzz")
233 .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
234 &input,
235 )
236 .unwrap();
237 }
238
239 if options.log_graphs {
240 eprintln!("{}\n", String::from_utf8_lossy(&input));
241 }
242
243 set_included_ranges(&mut parser, &input, test.template_delimiters);
244 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
245
246 check_consistent_sizes(&tree2, &input);
248 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
249 println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
250 return false;
251 }
252
253 while let Some(edit) = undo_stack.pop() {
255 perform_edit(&mut tree2, &mut input, &edit).unwrap();
256 }
257 if options.log_graphs {
258 eprintln!("{}\n", String::from_utf8_lossy(&input));
259 }
260
261 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
262 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
263
264 let mut actual_output = tree3.root_node().to_sexp();
266 if !test.has_fields {
267 actual_output = strip_sexp_fields(&actual_output);
268 }
269
270 if actual_output != test.output && !test.error {
271 println!("Incorrect parse for {test_name} - seed {seed}");
272 print_diff_key();
273 print_diff(&actual_output, &test.output, true);
274 println!();
275 return false;
276 }
277
278 check_consistent_sizes(&tree3, &input);
280 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
281 println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
282 return false;
283 }
284
285 true
286 }).unwrap_or_else(|e| {
287 eprintln!("Error: {e}");
288 false
289 });
290
291 if !passed {
292 failure_count += 1;
293 break;
294 }
295 }
296 }
297
298 if failure_count != 0 {
299 eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
300 }
301
302 skipped.retain(|_, v| *v == 0);
303
304 if !skipped.is_empty() {
305 println!("Non matchable skip definitions:");
306 for k in skipped.keys() {
307 println!(" {k}");
308 }
309 panic!("Non matchable skip definitions needs to be removed");
310 }
311}
312
313pub struct FlattenedTest {
314 pub name: String,
315 pub input: Vec<u8>,
316 pub output: String,
317 pub languages: Vec<Box<str>>,
318 pub error: bool,
319 pub skip: bool,
320 pub has_fields: bool,
321 pub template_delimiters: Option<(&'static str, &'static str)>,
322}
323
324#[must_use]
325pub fn flatten_tests(
326 test: TestEntry,
327 include: Option<&Regex>,
328 exclude: Option<&Regex>,
329) -> Vec<FlattenedTest> {
330 fn helper(
331 test: TestEntry,
332 include: Option<&Regex>,
333 exclude: Option<&Regex>,
334 is_root: bool,
335 prefix: &str,
336 result: &mut Vec<FlattenedTest>,
337 ) {
338 match test {
339 TestEntry::Example {
340 mut name,
341 input,
342 output,
343 has_fields,
344 attributes,
345 ..
346 } => {
347 if !prefix.is_empty() {
348 name.insert_str(0, " - ");
349 name.insert_str(0, prefix);
350 }
351
352 if let Some(include) = include {
353 if !include.is_match(&name) {
354 return;
355 }
356 } else if let Some(exclude) = exclude {
357 if exclude.is_match(&name) {
358 return;
359 }
360 }
361
362 result.push(FlattenedTest {
363 name,
364 input,
365 output,
366 has_fields,
367 languages: attributes.languages,
368 error: attributes.error,
369 skip: attributes.skip,
370 template_delimiters: None,
371 });
372 }
373 TestEntry::Group {
374 mut name, children, ..
375 } => {
376 if !is_root && !prefix.is_empty() {
377 name.insert_str(0, " - ");
378 name.insert_str(0, prefix);
379 }
380 for child in children {
381 helper(child, include, exclude, false, &name, result);
382 }
383 }
384 }
385 }
386 let mut result = Vec::new();
387 helper(test, include, exclude, true, "", &mut result);
388 result
389}