tree_sitter_cli/fuzz/
mod.rs

1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14    fuzz::{
15        corpus_test::{
16            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17        },
18        edits::{get_random_edit, invert_edit},
19        random::Rand,
20    },
21    parse::perform_edit,
22    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48    env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58        let mut rng = rand::thread_rng();
59        let seed = rng.gen::<usize>();
60        eprintln!("Seed: {seed}");
61        seed
62    })
63}
64
65pub struct FuzzOptions {
66    pub skipped: Option<Vec<String>>,
67    pub subdir: Option<String>,
68    pub edits: usize,
69    pub iterations: usize,
70    pub include: Option<Regex>,
71    pub exclude: Option<Regex>,
72    pub log_graphs: bool,
73    pub log: bool,
74}
75
76pub fn fuzz_language_corpus(
77    language: &Language,
78    language_name: &str,
79    start_seed: usize,
80    grammar_dir: &Path,
81    options: &mut FuzzOptions,
82) {
83    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
84        match entry {
85            TestEntry::Example { attributes, .. } => {
86                attributes.languages[0].is_empty()
87                    || attributes
88                        .languages
89                        .iter()
90                        .any(|lang| lang.as_ref() == language_name)
91            }
92            TestEntry::Group {
93                ref mut children, ..
94            } => {
95                children.retain_mut(|child| retain(child, language_name));
96                !children.is_empty()
97            }
98        }
99    }
100
101    let subdir = options.subdir.take().unwrap_or_default();
102
103    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
104
105    if !corpus_dir.exists() || !corpus_dir.is_dir() {
106        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
107        return;
108    }
109
110    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
111        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
112        return;
113    }
114
115    let mut main_tests = parse_tests(&corpus_dir).unwrap();
116    match main_tests {
117        TestEntry::Group {
118            ref mut children, ..
119        } => {
120            children.retain_mut(|child| retain(child, language_name));
121        }
122        TestEntry::Example { .. } => unreachable!(),
123    }
124    let tests = flatten_tests(
125        main_tests,
126        options.include.as_ref(),
127        options.exclude.as_ref(),
128    );
129
130    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
131
132    let mut skipped = options
133        .skipped
134        .take()
135        .unwrap_or_default()
136        .into_iter()
137        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
138        .map(|x| (x, 0))
139        .collect::<HashMap<String, usize>>();
140
141    let mut failure_count = 0;
142
143    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
144    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
145
146    if log_seed {
147        println!("  start seed: {start_seed}");
148    }
149
150    println!();
151    for (test_index, test) in tests.iter().enumerate() {
152        let test_name = get_test_name(test);
153        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
154            println!("  {test_index}. {test_name} - SKIPPED");
155            *counter += 1;
156            continue;
157        }
158
159        println!("  {test_index}. {test_name}");
160
161        let passed = allocations::record(|| {
162            let mut log_session = None;
163            let mut parser = get_parser(&mut log_session, "log.html");
164            parser.set_language(language).unwrap();
165            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
166
167            let tree = parser.parse(&test.input, None).unwrap();
168
169            if test.error {
170                return true;
171            }
172
173            let mut actual_output = tree.root_node().to_sexp();
174            if !test.has_fields {
175                actual_output = strip_sexp_fields(&actual_output);
176            }
177
178            if actual_output != test.output {
179                println!("Incorrect initial parse for {test_name}");
180                print_diff_key();
181                print_diff(&actual_output, &test.output, true);
182                println!();
183                return false;
184            }
185
186            true
187        })
188        .unwrap_or_else(|e| {
189            eprintln!("Error: {e}");
190            false
191        });
192
193        if !passed {
194            failure_count += 1;
195            continue;
196        }
197
198        let mut parser = Parser::new();
199        parser.set_language(language).unwrap();
200        let tree = parser.parse(&test.input, None).unwrap();
201        drop(parser);
202
203        for trial in 0..options.iterations {
204            let seed = start_seed + trial;
205            let passed = allocations::record(|| {
206                let mut rand = Rand::new(seed);
207                let mut log_session = None;
208                let mut parser = get_parser(&mut log_session, "log.html");
209                parser.set_language(language).unwrap();
210                let mut tree = tree.clone();
211                let mut input = test.input.clone();
212
213                if options.log_graphs {
214                    eprintln!("{}\n", String::from_utf8_lossy(&input));
215                }
216
217                // Perform a random series of edits and reparse.
218                let mut undo_stack = Vec::new();
219                for _ in 0..=rand.unsigned(*EDIT_COUNT) {
220                    let edit = get_random_edit(&mut rand, &input);
221                    undo_stack.push(invert_edit(&input, &edit));
222                    perform_edit(&mut tree, &mut input, &edit).unwrap();
223                }
224
225                if log_seed {
226                    println!("   {test_index}.{trial:<2} seed: {seed}");
227                }
228
229                if dump_edits {
230                    fs::create_dir_all("fuzz").unwrap();
231                    fs::write(
232                        Path::new("fuzz")
233                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
234                        &input,
235                    )
236                    .unwrap();
237                }
238
239                if options.log_graphs {
240                    eprintln!("{}\n", String::from_utf8_lossy(&input));
241                }
242
243                set_included_ranges(&mut parser, &input, test.template_delimiters);
244                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
245
246                // Check that the new tree is consistent.
247                check_consistent_sizes(&tree2, &input);
248                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
249                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
250                    return false;
251                }
252
253                // Undo all of the edits and re-parse again.
254                while let Some(edit) = undo_stack.pop() {
255                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
256                }
257                if options.log_graphs {
258                    eprintln!("{}\n", String::from_utf8_lossy(&input));
259                }
260
261                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
262                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
263
264                // Verify that the final tree matches the expectation from the corpus.
265                let mut actual_output = tree3.root_node().to_sexp();
266                if !test.has_fields {
267                    actual_output = strip_sexp_fields(&actual_output);
268                }
269
270                if actual_output != test.output && !test.error {
271                    println!("Incorrect parse for {test_name} - seed {seed}");
272                    print_diff_key();
273                    print_diff(&actual_output, &test.output, true);
274                    println!();
275                    return false;
276                }
277
278                // Check that the edited tree is consistent.
279                check_consistent_sizes(&tree3, &input);
280                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
281                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
282                    return false;
283                }
284
285                true
286            }).unwrap_or_else(|e| {
287                eprintln!("Error: {e}");
288                false
289            });
290
291            if !passed {
292                failure_count += 1;
293                break;
294            }
295        }
296    }
297
298    if failure_count != 0 {
299        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
300    }
301
302    skipped.retain(|_, v| *v == 0);
303
304    if !skipped.is_empty() {
305        println!("Non matchable skip definitions:");
306        for k in skipped.keys() {
307            println!("  {k}");
308        }
309        panic!("Non matchable skip definitions needs to be removed");
310    }
311}
312
313pub struct FlattenedTest {
314    pub name: String,
315    pub input: Vec<u8>,
316    pub output: String,
317    pub languages: Vec<Box<str>>,
318    pub error: bool,
319    pub skip: bool,
320    pub has_fields: bool,
321    pub template_delimiters: Option<(&'static str, &'static str)>,
322}
323
324#[must_use]
325pub fn flatten_tests(
326    test: TestEntry,
327    include: Option<&Regex>,
328    exclude: Option<&Regex>,
329) -> Vec<FlattenedTest> {
330    fn helper(
331        test: TestEntry,
332        include: Option<&Regex>,
333        exclude: Option<&Regex>,
334        is_root: bool,
335        prefix: &str,
336        result: &mut Vec<FlattenedTest>,
337    ) {
338        match test {
339            TestEntry::Example {
340                mut name,
341                input,
342                output,
343                has_fields,
344                attributes,
345                ..
346            } => {
347                if !prefix.is_empty() {
348                    name.insert_str(0, " - ");
349                    name.insert_str(0, prefix);
350                }
351
352                if let Some(include) = include {
353                    if !include.is_match(&name) {
354                        return;
355                    }
356                } else if let Some(exclude) = exclude {
357                    if exclude.is_match(&name) {
358                        return;
359                    }
360                }
361
362                result.push(FlattenedTest {
363                    name,
364                    input,
365                    output,
366                    has_fields,
367                    languages: attributes.languages,
368                    error: attributes.error,
369                    skip: attributes.skip,
370                    template_delimiters: None,
371                });
372            }
373            TestEntry::Group {
374                mut name, children, ..
375            } => {
376                if !is_root && !prefix.is_empty() {
377                    name.insert_str(0, " - ");
378                    name.insert_str(0, prefix);
379                }
380                for child in children {
381                    helper(child, include, exclude, false, &name, result);
382                }
383            }
384        }
385    }
386    let mut result = Vec::new();
387    helper(test, include, exclude, true, "", &mut result);
388    result
389}