tree_sitter_cli/fuzz/
corpus_test.rs

1use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
2
3use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
4use crate::util;
5
6pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
7    fn check(node: Node, line_offsets: &[usize]) {
8        let start_byte = node.start_byte();
9        let end_byte = node.end_byte();
10        let start_point = node.start_position();
11        let end_point = node.end_position();
12
13        assert!(start_byte <= end_byte);
14        assert!(start_point <= end_point);
15        assert_eq!(
16            start_byte,
17            line_offsets[start_point.row] + start_point.column
18        );
19        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
20
21        let mut last_child_end_byte = start_byte;
22        let mut last_child_end_point = start_point;
23        let mut some_child_has_changes = false;
24        let mut actual_named_child_count = 0;
25        for i in 0..node.child_count() {
26            let child = node.child(i).unwrap();
27            assert!(child.start_byte() >= last_child_end_byte);
28            assert!(child.start_position() >= last_child_end_point);
29            check(child, line_offsets);
30            if child.has_changes() {
31                some_child_has_changes = true;
32            }
33            if child.is_named() {
34                actual_named_child_count += 1;
35            }
36            last_child_end_byte = child.end_byte();
37            last_child_end_point = child.end_position();
38        }
39
40        assert_eq!(actual_named_child_count, node.named_child_count());
41
42        if node.child_count() > 0 {
43            assert!(end_byte >= last_child_end_byte);
44            assert!(end_point >= last_child_end_point);
45        }
46
47        if some_child_has_changes {
48            assert!(node.has_changes());
49        }
50    }
51
52    let mut line_offsets = vec![0];
53    for (i, c) in input.iter().enumerate() {
54        if *c == b'\n' {
55            line_offsets.push(i + 1);
56        }
57    }
58
59    check(tree.root_node(), &line_offsets);
60}
61
62pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
63    let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
64    let old_scope_sequence = ScopeSequence::new(old_tree);
65    let new_scope_sequence = ScopeSequence::new(new_tree);
66
67    let old_range = old_tree.root_node().range();
68    let new_range = new_tree.root_node().range();
69
70    let byte_range =
71        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
72    let point_range = old_range.start_point.min(new_range.start_point)
73        ..old_range.end_point.max(new_range.end_point);
74
75    for range in &changed_ranges {
76        if range.end_byte > byte_range.end || range.end_point > point_range.end {
77            return Err(format!(
78                "changed range extends outside of the old and new trees {range:?}",
79            ));
80        }
81    }
82
83    old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
84}
85
86pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
87    if let Some((start, end)) = delimiters {
88        let mut ranges = Vec::new();
89        let mut ix = 0;
90        while ix < input.len() {
91            let Some(mut start_ix) = input[ix..]
92                .windows(2)
93                .position(|win| win == start.as_bytes())
94            else {
95                break;
96            };
97            start_ix += ix + start.len();
98            let end_ix = input[start_ix..]
99                .windows(2)
100                .position(|win| win == end.as_bytes())
101                .map_or(input.len(), |ix| start_ix + ix);
102            ix = end_ix;
103            ranges.push(Range {
104                start_byte: start_ix,
105                end_byte: end_ix,
106                start_point: point_for_offset(input, start_ix),
107                end_point: point_for_offset(input, end_ix),
108            });
109        }
110
111        parser.set_included_ranges(&ranges).unwrap();
112    } else {
113        parser.set_included_ranges(&[]).unwrap();
114    }
115}
116
117fn point_for_offset(text: &[u8], offset: usize) -> Point {
118    let mut point = Point::default();
119    for byte in &text[..offset] {
120        if *byte == b'\n' {
121            point.row += 1;
122            point.column = 0;
123        } else {
124            point.column += 1;
125        }
126    }
127    point
128}
129
130pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
131    let mut parser = Parser::new();
132
133    if *LOG_ENABLED {
134        parser.set_logger(Some(Box::new(|log_type, msg| {
135            if log_type == LogType::Lex {
136                eprintln!("  {msg}");
137            } else {
138                eprintln!("{msg}");
139            }
140        })));
141    }
142    if *LOG_GRAPH_ENABLED {
143        *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
144    }
145
146    parser
147}