1use std::{
2 cmp,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 mem::swap,
6};
7
8use indoc::indoc;
9
10use super::{
11 build_tables::Tables,
12 grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
13 nfa::CharacterSet,
14 node_types::ChildType,
15 rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
16 tables::{
17 AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
18 ParseTableEntry,
19 },
20};
21
22const SMALL_STATE_THRESHOLD: usize = 64;
23pub const ABI_VERSION_MIN: usize = 14;
24pub const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
25const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15;
26const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
27const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
28
29macro_rules! add {
30 ($this: tt, $($arg: tt)*) => {{
31 $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
32 }}
33}
34
35macro_rules! add_whitespace {
36 ($this:tt) => {{
37 for _ in 0..$this.indent_level {
38 write!(&mut $this.buffer, " ").unwrap();
39 }
40 }};
41}
42
43macro_rules! add_line {
44 ($this: tt, $($arg: tt)*) => {
45 add_whitespace!($this);
46 $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
47 $this.buffer += "\n";
48 }
49}
50
51macro_rules! indent {
52 ($this:tt) => {
53 $this.indent_level += 1;
54 };
55}
56
57macro_rules! dedent {
58 ($this:tt) => {
59 assert_ne!($this.indent_level, 0);
60 $this.indent_level -= 1;
61 };
62}
63
64#[derive(Default)]
65struct Generator {
66 buffer: String,
67 indent_level: usize,
68 language_name: String,
69 parse_table: ParseTable,
70 main_lex_table: LexTable,
71 keyword_lex_table: LexTable,
72 large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
73 large_character_set_info: Vec<LargeCharacterSetInfo>,
74 large_state_count: usize,
75 syntax_grammar: SyntaxGrammar,
76 lexical_grammar: LexicalGrammar,
77 default_aliases: AliasMap,
78 symbol_order: HashMap<Symbol, usize>,
79 symbol_ids: HashMap<Symbol, String>,
80 alias_ids: HashMap<Alias, String>,
81 unique_aliases: Vec<Alias>,
82 symbol_map: HashMap<Symbol, Symbol>,
83 reserved_word_sets: Vec<TokenSet>,
84 reserved_word_set_ids_by_parse_state: Vec<usize>,
85 field_names: Vec<String>,
86 supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
87 supertype_map: BTreeMap<String, Vec<ChildType>>,
88 abi_version: usize,
89 metadata: Option<Metadata>,
90}
91
92struct LargeCharacterSetInfo {
93 constant_name: String,
94 is_used: bool,
95}
96
97struct Metadata {
98 major_version: u8,
99 minor_version: u8,
100 patch_version: u8,
101}
102
103impl Generator {
104 fn generate(mut self) -> String {
105 self.init();
106 self.add_header();
107 self.add_includes();
108 self.add_pragmas();
109 self.add_stats();
110 self.add_symbol_enum();
111 self.add_symbol_names_list();
112 self.add_unique_symbol_map();
113 self.add_symbol_metadata_list();
114
115 if !self.field_names.is_empty() {
116 self.add_field_name_enum();
117 self.add_field_name_names_list();
118 self.add_field_sequences();
119 }
120
121 if !self.parse_table.production_infos.is_empty() {
122 self.add_alias_sequences();
123 }
124
125 self.add_non_terminal_alias_map();
126 self.add_primary_state_id_list();
127
128 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() {
129 self.add_supertype_map();
130 }
131
132 let buffer_offset_before_lex_functions = self.buffer.len();
133
134 let mut main_lex_table = LexTable::default();
135 swap(&mut main_lex_table, &mut self.main_lex_table);
136 self.add_lex_function("ts_lex", main_lex_table);
137
138 if self.syntax_grammar.word_token.is_some() {
139 let mut keyword_lex_table = LexTable::default();
140 swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
141 self.add_lex_function("ts_lex_keywords", keyword_lex_table);
142 }
143
144 let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string();
148 self.buffer.truncate(buffer_offset_before_lex_functions);
149 for ix in 0..self.large_character_sets.len() {
150 self.add_character_set(ix);
151 }
152 self.buffer.push_str(&lex_functions);
153
154 self.add_lex_modes();
155
156 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
157 {
158 self.add_reserved_word_sets();
159 }
160
161 self.add_parse_table();
162
163 if !self.syntax_grammar.external_tokens.is_empty() {
164 self.add_external_token_enum();
165 self.add_external_scanner_symbol_map();
166 self.add_external_scanner_states_list();
167 }
168
169 self.add_parser_export();
170
171 self.buffer
172 }
173
174 fn init(&mut self) {
175 let mut symbol_identifiers = HashSet::new();
176 for i in 0..self.parse_table.symbols.len() {
177 self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
178 }
179 self.symbol_ids.insert(
180 Symbol::end_of_nonterminal_extra(),
181 self.symbol_ids[&Symbol::end()].clone(),
182 );
183
184 self.symbol_map = HashMap::new();
185
186 for symbol in &self.parse_table.symbols {
187 let mut mapping = symbol;
188
189 if let Some(alias) = self.default_aliases.get(symbol) {
195 let kind = alias.kind();
196 for other_symbol in &self.parse_table.symbols {
197 if let Some(other_alias) = self.default_aliases.get(other_symbol) {
198 if other_symbol < mapping && other_alias == alias {
199 mapping = other_symbol;
200 }
201 } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
202 mapping = other_symbol;
203 break;
204 }
205 }
206 }
207 else if symbol.is_terminal() {
212 let metadata = self.metadata_for_symbol(*symbol);
213 for other_symbol in &self.parse_table.symbols {
214 let other_metadata = self.metadata_for_symbol(*other_symbol);
215 if other_metadata == metadata {
216 if let Some(mapped) = self.symbol_map.get(other_symbol) {
217 if mapped == symbol {
218 break;
219 }
220 }
221 mapping = other_symbol;
222 break;
223 }
224 }
225 }
226
227 self.symbol_map.insert(*symbol, *mapping);
228 }
229
230 for production_info in &self.parse_table.production_infos {
231 for field_name in production_info.field_map.keys() {
233 if let Err(i) = self.field_names.binary_search(field_name) {
234 self.field_names.insert(i, field_name.clone());
235 }
236 }
237
238 for alias in &production_info.alias_sequence {
239 if let Some(alias) = &alias {
241 let alias_id =
243 if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
244 self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
245 }
246 else {
249 if let Err(i) = self.unique_aliases.binary_search(alias) {
250 self.unique_aliases.insert(i, alias.clone());
251 }
252
253 if alias.is_named {
254 format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
255 } else {
256 format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
257 }
258 };
259
260 self.alias_ids.entry(alias.clone()).or_insert(alias_id);
261 }
262 }
263 }
264
265 for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() {
266 let count = self.large_character_sets[0..ix]
267 .iter()
268 .filter(|(sym, _)| sym == symbol)
269 .count()
270 + 1;
271 let constant_name = if let Some(symbol) = symbol {
272 format!("{}_character_set_{}", self.symbol_ids[symbol], count)
273 } else {
274 format!("extras_character_set_{count}")
275 };
276 self.large_character_set_info.push(LargeCharacterSetInfo {
277 constant_name,
278 is_used: false,
279 });
280 }
281
282 self.reserved_word_sets.push(TokenSet::new());
284 for state in &self.parse_table.states {
285 let id = if let Some(ix) = self
286 .reserved_word_sets
287 .iter()
288 .position(|set| *set == state.reserved_words)
289 {
290 ix
291 } else {
292 self.reserved_word_sets.push(state.reserved_words.clone());
293 self.reserved_word_sets.len() - 1
294 };
295 self.reserved_word_set_ids_by_parse_state.push(id);
296 }
297
298 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
299 for (supertype, subtypes) in &self.supertype_symbol_map {
300 if let Some(supertype) = self.symbol_ids.get(supertype) {
301 self.supertype_map
302 .entry(supertype.clone())
303 .or_insert_with(|| subtypes.clone());
304 }
305 }
306
307 self.supertype_symbol_map.clear();
308 }
309
310 let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
313 self.large_state_count = self
314 .parse_table
315 .states
316 .iter()
317 .enumerate()
318 .take_while(|(i, s)| {
319 *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
320 })
321 .count();
322 }
323
324 fn add_header(&mut self) {
325 let version = BUILD_SHA.map_or_else(
326 || BUILD_VERSION.to_string(),
327 |build_sha| format!("{BUILD_VERSION} ({build_sha})"),
328 );
329 add_line!(
330 self,
331 "/* Automatically generated by tree-sitter v{version} */",
332 );
333 add_line!(self, "");
334 }
335
336 fn add_includes(&mut self) {
337 add_line!(self, "#include \"tree_sitter/parser.h\"");
338 add_line!(self, "");
339 }
340
341 fn add_pragmas(&mut self) {
342 add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
343 add_line!(
344 self,
345 "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
346 );
347 add_line!(self, "#endif");
348 add_line!(self, "");
349
350 if self.main_lex_table.states.len() > 300 {
354 add_line!(self, "#ifdef _MSC_VER");
355 add_line!(self, "#pragma optimize(\"\", off)");
356 add_line!(self, "#elif defined(__clang__)");
357 add_line!(self, "#pragma clang optimize off");
358 add_line!(self, "#elif defined(__GNUC__)");
359 add_line!(self, "#pragma GCC optimize (\"O0\")");
360 add_line!(self, "#endif");
361 add_line!(self, "");
362 }
363 }
364
365 fn add_stats(&mut self) {
366 let token_count = self
367 .parse_table
368 .symbols
369 .iter()
370 .filter(|symbol| {
371 if symbol.is_terminal() || symbol.is_eof() {
372 true
373 } else if symbol.is_external() {
374 self.syntax_grammar.external_tokens[symbol.index]
375 .corresponding_internal_token
376 .is_none()
377 } else {
378 false
379 }
380 })
381 .count();
382
383 add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version);
384 add_line!(
385 self,
386 "#define STATE_COUNT {}",
387 self.parse_table.states.len()
388 );
389 add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
390
391 add_line!(
392 self,
393 "#define SYMBOL_COUNT {}",
394 self.parse_table.symbols.len()
395 );
396 add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
397 add_line!(self, "#define TOKEN_COUNT {token_count}");
398 add_line!(
399 self,
400 "#define EXTERNAL_TOKEN_COUNT {}",
401 self.syntax_grammar.external_tokens.len()
402 );
403 add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
404 add_line!(
405 self,
406 "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
407 self.parse_table.max_aliased_production_length
408 );
409 add_line!(
410 self,
411 "#define MAX_RESERVED_WORD_SET_SIZE {}",
412 self.reserved_word_sets
413 .iter()
414 .map(TokenSet::len)
415 .max()
416 .unwrap()
417 );
418
419 add_line!(
420 self,
421 "#define PRODUCTION_ID_COUNT {}",
422 self.parse_table.production_infos.len()
423 );
424 add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len());
425 add_line!(self, "");
426 }
427
428 fn add_symbol_enum(&mut self) {
429 add_line!(self, "enum ts_symbol_identifiers {{");
430 indent!(self);
431 self.symbol_order.insert(Symbol::end(), 0);
432 let mut i = 1;
433 for symbol in &self.parse_table.symbols {
434 if *symbol != Symbol::end() {
435 self.symbol_order.insert(*symbol, i);
436 add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
437 i += 1;
438 }
439 }
440 for alias in &self.unique_aliases {
441 add_line!(self, "{} = {i},", self.alias_ids[alias]);
442 i += 1;
443 }
444 dedent!(self);
445 add_line!(self, "}};");
446 add_line!(self, "");
447 }
448
449 fn add_symbol_names_list(&mut self) {
450 add_line!(self, "static const char * const ts_symbol_names[] = {{");
451 indent!(self);
452 for symbol in &self.parse_table.symbols {
453 let name = self.sanitize_string(
454 self.default_aliases
455 .get(symbol)
456 .map_or(self.metadata_for_symbol(*symbol).0, |alias| {
457 alias.value.as_str()
458 }),
459 );
460 add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
461 }
462 for alias in &self.unique_aliases {
463 add_line!(
464 self,
465 "[{}] = \"{}\",",
466 self.alias_ids[alias],
467 self.sanitize_string(&alias.value)
468 );
469 }
470 dedent!(self);
471 add_line!(self, "}};");
472 add_line!(self, "");
473 }
474
475 fn add_unique_symbol_map(&mut self) {
476 add_line!(self, "static const TSSymbol ts_symbol_map[] = {{");
477 indent!(self);
478 for symbol in &self.parse_table.symbols {
479 add_line!(
480 self,
481 "[{}] = {},",
482 self.symbol_ids[symbol],
483 self.symbol_ids[&self.symbol_map[symbol]],
484 );
485 }
486
487 for alias in &self.unique_aliases {
488 add_line!(
489 self,
490 "[{}] = {},",
491 self.alias_ids[alias],
492 self.alias_ids[alias],
493 );
494 }
495
496 dedent!(self);
497 add_line!(self, "}};");
498 add_line!(self, "");
499 }
500
501 fn add_field_name_enum(&mut self) {
502 add_line!(self, "enum ts_field_identifiers {{");
503 indent!(self);
504 for (i, field_name) in self.field_names.iter().enumerate() {
505 add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
506 }
507 dedent!(self);
508 add_line!(self, "}};");
509 add_line!(self, "");
510 }
511
512 fn add_field_name_names_list(&mut self) {
513 add_line!(self, "static const char * const ts_field_names[] = {{");
514 indent!(self);
515 add_line!(self, "[0] = NULL,");
516 for field_name in &self.field_names {
517 add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
518 }
519 dedent!(self);
520 add_line!(self, "}};");
521 add_line!(self, "");
522 }
523
524 fn add_symbol_metadata_list(&mut self) {
525 add_line!(
526 self,
527 "static const TSSymbolMetadata ts_symbol_metadata[] = {{"
528 );
529 indent!(self);
530 for symbol in &self.parse_table.symbols {
531 add_line!(self, "[{}] = {{", self.symbol_ids[symbol]);
532 indent!(self);
533 if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
534 add_line!(self, ".visible = true,");
535 add_line!(self, ".named = {is_named},");
536 } else {
537 match self.metadata_for_symbol(*symbol).1 {
538 VariableType::Named => {
539 add_line!(self, ".visible = true,");
540 add_line!(self, ".named = true,");
541 }
542 VariableType::Anonymous => {
543 add_line!(self, ".visible = true,");
544 add_line!(self, ".named = false,");
545 }
546 VariableType::Hidden => {
547 add_line!(self, ".visible = false,");
548 add_line!(self, ".named = true,");
549 if self.syntax_grammar.supertype_symbols.contains(symbol) {
550 add_line!(self, ".supertype = true,");
551 }
552 }
553 VariableType::Auxiliary => {
554 add_line!(self, ".visible = false,");
555 add_line!(self, ".named = false,");
556 }
557 }
558 }
559 dedent!(self);
560 add_line!(self, "}},");
561 }
562 for alias in &self.unique_aliases {
563 add_line!(self, "[{}] = {{", self.alias_ids[alias]);
564 indent!(self);
565 add_line!(self, ".visible = true,");
566 add_line!(self, ".named = {},", alias.is_named);
567 dedent!(self);
568 add_line!(self, "}},");
569 }
570 dedent!(self);
571 add_line!(self, "}};");
572 add_line!(self, "");
573 }
574
575 fn add_alias_sequences(&mut self) {
576 add_line!(
577 self,
578 "static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
579 );
580 indent!(self);
581 for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
582 if production_info.alias_sequence.is_empty() {
583 if i == 0 {
586 add_line!(self, "[0] = {{0}},");
587 }
588 continue;
589 }
590
591 add_line!(self, "[{i}] = {{");
592 indent!(self);
593 for (j, alias) in production_info.alias_sequence.iter().enumerate() {
594 if let Some(alias) = alias {
595 add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
596 }
597 }
598 dedent!(self);
599 add_line!(self, "}},");
600 }
601 dedent!(self);
602 add_line!(self, "}};");
603 add_line!(self, "");
604 }
605
606 fn add_non_terminal_alias_map(&mut self) {
607 let mut alias_ids_by_symbol = HashMap::new();
608 for variable in &self.syntax_grammar.variables {
609 for production in &variable.productions {
610 for step in &production.steps {
611 if let Some(alias) = &step.alias {
612 if step.symbol.is_non_terminal()
613 && Some(alias) != self.default_aliases.get(&step.symbol)
614 && self.symbol_ids.contains_key(&step.symbol)
615 {
616 if let Some(alias_id) = self.alias_ids.get(alias) {
617 let alias_ids =
618 alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new());
619 if let Err(i) = alias_ids.binary_search(&alias_id) {
620 alias_ids.insert(i, alias_id);
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628
629 let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
630 alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
631
632 add_line!(
633 self,
634 "static const uint16_t ts_non_terminal_alias_map[] = {{"
635 );
636 indent!(self);
637 for (symbol, alias_ids) in alias_ids_by_symbol {
638 let symbol_id = &self.symbol_ids[symbol];
639 let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]];
640 add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
641 indent!(self);
642 add_line!(self, "{public_symbol_id},");
643 for alias_id in alias_ids {
644 add_line!(self, "{alias_id},");
645 }
646 dedent!(self);
647 }
648 add_line!(self, "0,");
649 dedent!(self);
650 add_line!(self, "}};");
651 add_line!(self, "");
652 }
653
654 fn add_primary_state_id_list(&mut self) {
660 add_line!(
661 self,
662 "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{"
663 );
664 indent!(self);
665 let mut first_state_for_each_core_id = HashMap::new();
666 for (idx, state) in self.parse_table.states.iter().enumerate() {
667 let primary_state = first_state_for_each_core_id
668 .entry(state.core_id)
669 .or_insert(idx);
670 add_line!(self, "[{idx}] = {primary_state},");
671 }
672 dedent!(self);
673 add_line!(self, "}};");
674 add_line!(self, "");
675 }
676
677 fn add_field_sequences(&mut self) {
678 let mut flat_field_maps = vec![];
679 let mut next_flat_field_map_index = 0;
680 self.get_field_map_id(
681 Vec::new(),
682 &mut flat_field_maps,
683 &mut next_flat_field_map_index,
684 );
685
686 let mut field_map_ids = Vec::new();
687 for production_info in &self.parse_table.production_infos {
688 if production_info.field_map.is_empty() {
689 field_map_ids.push((0, 0));
690 } else {
691 let mut flat_field_map = Vec::new();
692 for (field_name, locations) in &production_info.field_map {
693 for location in locations {
694 flat_field_map.push((field_name.clone(), *location));
695 }
696 }
697 field_map_ids.push((
698 self.get_field_map_id(
699 flat_field_map.clone(),
700 &mut flat_field_maps,
701 &mut next_flat_field_map_index,
702 ),
703 flat_field_map.len(),
704 ));
705 }
706 }
707
708 add_line!(
709 self,
710 "static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
711 );
712 indent!(self);
713 for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
714 if length > 0 {
715 add_line!(
716 self,
717 "[{production_id}] = {{.index = {row_id}, .length = {length}}},",
718 );
719 }
720 }
721 dedent!(self);
722 add_line!(self, "}};");
723 add_line!(self, "");
724
725 add_line!(
726 self,
727 "static const TSFieldMapEntry ts_field_map_entries[] = {{",
728 );
729 indent!(self);
730 for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
731 add_line!(self, "[{row_index}] =");
732 indent!(self);
733 for (field_name, location) in field_pairs {
734 add_whitespace!(self);
735 add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
736 if location.inherited {
737 add!(self, ", .inherited = true");
738 }
739 add!(self, "}},\n");
740 }
741 dedent!(self);
742 }
743
744 dedent!(self);
745 add_line!(self, "}};");
746 add_line!(self, "");
747 }
748
749 fn add_supertype_map(&mut self) {
750 add_line!(
751 self,
752 "static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{"
753 );
754 indent!(self);
755 for supertype in self.supertype_map.keys() {
756 add_line!(self, "{supertype},");
757 }
758 dedent!(self);
759 add_line!(self, "}};\n");
760
761 add_line!(
762 self,
763 "static const TSMapSlice ts_supertype_map_slices[] = {{",
764 );
765 indent!(self);
766 let mut row_id = 0;
767 let mut supertype_ids = vec![0];
768 let mut supertype_string_map = BTreeMap::new();
769 for (supertype, subtypes) in &self.supertype_map {
770 supertype_string_map.insert(
771 supertype,
772 subtypes
773 .iter()
774 .flat_map(|s| match s {
775 ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
776 ChildType::Aliased(alias) => {
777 self.alias_ids.get(alias).cloned().map_or_else(
778 || {
779 self.symbols_for_alias(alias)
780 .into_iter()
781 .map(|s| self.symbol_ids.get(&s).cloned())
782 .collect()
783 },
784 |a| vec![Some(a)],
785 )
786 }
787 })
788 .flatten()
789 .collect::<BTreeSet<String>>(),
790 );
791 }
792 for (supertype, subtypes) in &supertype_string_map {
793 let length = subtypes.len();
794 add_line!(
795 self,
796 "[{supertype}] = {{.index = {row_id}, .length = {length}}},",
797 );
798 row_id += length;
799 supertype_ids.push(row_id);
800 }
801 dedent!(self);
802 add_line!(self, "}};");
803 add_line!(self, "");
804
805 add_line!(
806 self,
807 "static const TSSymbol ts_supertype_map_entries[] = {{",
808 );
809 indent!(self);
810 for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() {
811 let row_index = supertype_ids[i];
812 add_line!(self, "[{row_index}] =");
813 indent!(self);
814 for subtype in subtypes {
815 add_whitespace!(self);
816 add!(self, "{subtype},\n");
817 }
818 dedent!(self);
819 }
820
821 dedent!(self);
822 add_line!(self, "}};");
823 add_line!(self, "");
824 }
825
826 fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
827 add_line!(
828 self,
829 "static bool {name}(TSLexer *lexer, TSStateId state) {{",
830 );
831 indent!(self);
832
833 add_line!(self, "START_LEXER();");
834 add_line!(self, "eof = lexer->eof(lexer);");
835 add_line!(self, "switch (state) {{");
836
837 indent!(self);
838 for (i, state) in lex_table.states.into_iter().enumerate() {
839 add_line!(self, "case {i}:");
840 indent!(self);
841 self.add_lex_state(i, state);
842 dedent!(self);
843 }
844
845 add_line!(self, "default:");
846 indent!(self);
847 add_line!(self, "return false;");
848 dedent!(self);
849
850 dedent!(self);
851 add_line!(self, "}}");
852
853 dedent!(self);
854 add_line!(self, "}}");
855 add_line!(self, "");
856 }
857
858 fn add_lex_state(&mut self, _state_ix: usize, state: LexState) {
859 if let Some(accept_action) = state.accept_action {
860 add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
861 }
862
863 if let Some(eof_action) = state.eof_action {
864 add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
865 }
866
867 let mut chars_copy = CharacterSet::empty();
868 let mut large_set = CharacterSet::empty();
869 let mut ruled_out_chars = CharacterSet::empty();
870
871 let mut leading_simple_transition_count = 0;
877 let mut leading_simple_transition_range_count = 0;
878 for (chars, action) in &state.advance_actions {
879 if action.in_main_token
880 && chars.ranges().all(|r| {
881 let start = *r.start() as u32;
882 let end = *r.end() as u32;
883 end <= start + 1 && u16::try_from(end).is_ok()
884 })
885 {
886 leading_simple_transition_count += 1;
887 leading_simple_transition_range_count += chars.range_count();
888 } else {
889 break;
890 }
891 }
892
893 if leading_simple_transition_range_count >= 8 {
894 add_line!(self, "ADVANCE_MAP(");
895 indent!(self);
896 for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] {
897 for range in chars.ranges() {
898 add_whitespace!(self);
899 self.add_character(*range.start());
900 add!(self, ", {},\n", action.state);
901 if range.end() > range.start() {
902 add_whitespace!(self);
903 self.add_character(*range.end());
904 add!(self, ", {},\n", action.state);
905 }
906 }
907 ruled_out_chars = ruled_out_chars.add(chars);
908 }
909 dedent!(self);
910 add_line!(self, ");");
911 } else {
912 leading_simple_transition_count = 0;
913 }
914
915 for (chars, action) in &state.advance_actions[leading_simple_transition_count..] {
916 add_whitespace!(self);
917
918 let simplified_chars = chars.simplify_ignoring(&ruled_out_chars);
927
928 let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None;
934 if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT {
935 for (ix, (_, set)) in self.large_character_sets.iter().enumerate() {
936 chars_copy.assign(&simplified_chars);
937 large_set.assign(set);
938 let intersection = chars_copy.remove_intersection(&mut large_set);
939 if !intersection.is_empty() {
940 let additions = chars_copy.simplify_ignoring(&ruled_out_chars);
941 let removals = large_set.simplify_ignoring(&ruled_out_chars);
942 let total_range_count = additions.range_count() + removals.range_count();
943 if total_range_count >= simplified_chars.range_count() {
944 continue;
945 }
946 if let Some((_, best_additions, best_removals)) = &best_large_char_set {
947 let best_range_count =
948 best_additions.range_count() + best_removals.range_count();
949 if best_range_count < total_range_count {
950 continue;
951 }
952 }
953 best_large_char_set = Some((ix, additions, removals));
954 }
955 }
956 }
957
958 ruled_out_chars = ruled_out_chars.add(chars);
961
962 let mut large_char_set_ix = None;
963 let mut asserted_chars = simplified_chars;
964 let mut negated_chars = CharacterSet::empty();
965 if let Some((char_set_ix, additions, removals)) = best_large_char_set {
966 asserted_chars = additions;
967 negated_chars = removals;
968 large_char_set_ix = Some(char_set_ix);
969 }
970
971 let mut line_break = "\n".to_string();
972 for _ in 0..self.indent_level + 2 {
973 line_break.push_str(" ");
974 }
975
976 let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
977 let has_negative_condition = !negated_chars.is_empty();
978 let has_condition = has_positive_condition || has_negative_condition;
979 if has_condition {
980 add!(self, "if (");
981 if has_positive_condition && has_negative_condition {
982 add!(self, "(");
983 }
984 }
985
986 if let Some(large_char_set_ix) = large_char_set_ix {
987 let large_set = &self.large_character_sets[large_char_set_ix].1;
988
989 let check_eof = large_set.contains('\0');
992 if check_eof {
993 add!(self, "(!eof && ");
994 }
995
996 let char_set_info = &mut self.large_character_set_info[large_char_set_ix];
997 char_set_info.is_used = true;
998 add!(
999 self,
1000 "set_contains({}, {}, lookahead)",
1001 char_set_info.constant_name,
1002 large_set.range_count(),
1003 );
1004 if check_eof {
1005 add!(self, ")");
1006 }
1007 }
1008
1009 if !asserted_chars.is_empty() {
1010 if large_char_set_ix.is_some() {
1011 add!(self, " ||{line_break}");
1012 }
1013
1014 let is_included = !asserted_chars.contains(char::MAX);
1018 if !is_included {
1019 asserted_chars = asserted_chars.negate().add_char('\0');
1020 }
1021
1022 self.add_character_range_conditions(&asserted_chars, is_included, &line_break);
1023 }
1024
1025 if has_negative_condition {
1026 if has_positive_condition {
1027 add!(self, ") &&{line_break}");
1028 }
1029 self.add_character_range_conditions(&negated_chars, false, &line_break);
1030 }
1031
1032 if has_condition {
1033 add!(self, ") ");
1034 }
1035
1036 self.add_advance_action(action);
1037 add!(self, "\n");
1038 }
1039
1040 add_line!(self, "END_STATE();");
1041 }
1042
1043 fn add_character_range_conditions(
1044 &mut self,
1045 characters: &CharacterSet,
1046 is_included: bool,
1047 line_break: &str,
1048 ) {
1049 for (i, range) in characters.ranges().enumerate() {
1050 let start = *range.start();
1051 let end = *range.end();
1052 if is_included {
1053 if i > 0 {
1054 add!(self, " ||{line_break}");
1055 }
1056
1057 if start == '\0' {
1058 add!(self, "(!eof && ");
1059 if end == '\0' {
1060 add!(self, "lookahead == 0");
1061 } else {
1062 add!(self, "lookahead <= ");
1063 }
1064 self.add_character(end);
1065 add!(self, ")");
1066 } else if end == start {
1067 add!(self, "lookahead == ");
1068 self.add_character(start);
1069 } else if end as u32 == start as u32 + 1 {
1070 add!(self, "lookahead == ");
1071 self.add_character(start);
1072 add!(self, " ||{line_break}lookahead == ");
1073 self.add_character(end);
1074 } else {
1075 add!(self, "(");
1076 self.add_character(start);
1077 add!(self, " <= lookahead && lookahead <= ");
1078 self.add_character(end);
1079 add!(self, ")");
1080 }
1081 } else {
1082 if i > 0 {
1083 add!(self, " &&{line_break}");
1084 }
1085 if end == start {
1086 add!(self, "lookahead != ");
1087 self.add_character(start);
1088 } else if end as u32 == start as u32 + 1 {
1089 add!(self, "lookahead != ");
1090 self.add_character(start);
1091 add!(self, " &&{line_break}lookahead != ");
1092 self.add_character(end);
1093 } else if start != '\0' {
1094 add!(self, "(lookahead < ");
1095 self.add_character(start);
1096 add!(self, " || ");
1097 self.add_character(end);
1098 add!(self, " < lookahead)");
1099 } else {
1100 add!(self, "lookahead > ");
1101 self.add_character(end);
1102 }
1103 }
1104 }
1105 }
1106
1107 fn add_character_set(&mut self, ix: usize) {
1108 let characters = self.large_character_sets[ix].1.clone();
1109 let info = &self.large_character_set_info[ix];
1110 if !info.is_used {
1111 return;
1112 }
1113
1114 add_line!(
1115 self,
1116 "static const TSCharacterRange {}[] = {{",
1117 info.constant_name
1118 );
1119
1120 indent!(self);
1121 for (ix, range) in characters.ranges().enumerate() {
1122 let column = ix % 8;
1123 if column == 0 {
1124 if ix > 0 {
1125 add!(self, "\n");
1126 }
1127 add_whitespace!(self);
1128 } else {
1129 add!(self, " ");
1130 }
1131 add!(self, "{{");
1132 self.add_character(*range.start());
1133 add!(self, ", ");
1134 self.add_character(*range.end());
1135 add!(self, "}},");
1136 }
1137 add!(self, "\n");
1138 dedent!(self);
1139 add_line!(self, "}};");
1140 add_line!(self, "");
1141 }
1142
1143 fn add_advance_action(&mut self, action: &AdvanceAction) {
1144 if action.in_main_token {
1145 add!(self, "ADVANCE({});", action.state);
1146 } else {
1147 add!(self, "SKIP({});", action.state);
1148 }
1149 }
1150
1151 fn add_lex_modes(&mut self) {
1152 add_line!(
1153 self,
1154 "static const {} ts_lex_modes[STATE_COUNT] = {{",
1155 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1156 "TSLexerMode"
1157 } else {
1158 "TSLexMode"
1159 }
1160 );
1161 indent!(self);
1162 for (i, state) in self.parse_table.states.iter().enumerate() {
1163 add_whitespace!(self);
1164 add!(self, "[{i}] = {{");
1165 if state.is_end_of_non_terminal_extra() {
1166 add!(self, "(TSStateId)(-1),");
1167 } else {
1168 add!(self, ".lex_state = {}", state.lex_state_id);
1169
1170 if state.external_lex_state_id > 0 {
1171 add!(
1172 self,
1173 ", .external_lex_state = {}",
1174 state.external_lex_state_id
1175 );
1176 }
1177
1178 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1179 let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
1180 if reserved_word_set_id != 0 {
1181 add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
1182 }
1183 }
1184 }
1185
1186 add!(self, "}},\n");
1187 }
1188 dedent!(self);
1189 add_line!(self, "}};");
1190 add_line!(self, "");
1191 }
1192
1193 fn add_reserved_word_sets(&mut self) {
1194 add_line!(
1195 self,
1196 "static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
1197 self.reserved_word_sets.len(),
1198 );
1199 indent!(self);
1200 for (id, set) in self.reserved_word_sets.iter().enumerate() {
1201 if id == 0 {
1202 continue;
1203 }
1204 add_line!(self, "[{id}] = {{");
1205 indent!(self);
1206 for token in set.iter() {
1207 add_line!(self, "{},", self.symbol_ids[&token]);
1208 }
1209 dedent!(self);
1210 add_line!(self, "}},");
1211 }
1212 dedent!(self);
1213 add_line!(self, "}};");
1214 add_line!(self, "");
1215 }
1216
1217 fn add_external_token_enum(&mut self) {
1218 add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
1219 indent!(self);
1220 for i in 0..self.syntax_grammar.external_tokens.len() {
1221 add_line!(
1222 self,
1223 "{} = {i},",
1224 self.external_token_id(&self.syntax_grammar.external_tokens[i]),
1225 );
1226 }
1227 dedent!(self);
1228 add_line!(self, "}};");
1229 add_line!(self, "");
1230 }
1231
1232 fn add_external_scanner_symbol_map(&mut self) {
1233 add_line!(
1234 self,
1235 "static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
1236 );
1237 indent!(self);
1238 for i in 0..self.syntax_grammar.external_tokens.len() {
1239 let token = &self.syntax_grammar.external_tokens[i];
1240 let id_token = token
1241 .corresponding_internal_token
1242 .unwrap_or_else(|| Symbol::external(i));
1243 add_line!(
1244 self,
1245 "[{}] = {},",
1246 self.external_token_id(token),
1247 self.symbol_ids[&id_token],
1248 );
1249 }
1250 dedent!(self);
1251 add_line!(self, "}};");
1252 add_line!(self, "");
1253 }
1254
1255 fn add_external_scanner_states_list(&mut self) {
1256 add_line!(
1257 self,
1258 "static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
1259 self.parse_table.external_lex_states.len(),
1260 );
1261 indent!(self);
1262 for i in 0..self.parse_table.external_lex_states.len() {
1263 if !self.parse_table.external_lex_states[i].is_empty() {
1264 add_line!(self, "[{i}] = {{");
1265 indent!(self);
1266 for token in self.parse_table.external_lex_states[i].iter() {
1267 add_line!(
1268 self,
1269 "[{}] = true,",
1270 self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
1271 );
1272 }
1273 dedent!(self);
1274 add_line!(self, "}},");
1275 }
1276 }
1277 dedent!(self);
1278 add_line!(self, "}};");
1279 add_line!(self, "");
1280 }
1281
1282 fn add_parse_table(&mut self) {
1283 let mut parse_table_entries = HashMap::new();
1284 let mut next_parse_action_list_index = 0;
1285
1286 self.get_parse_action_list_id(
1288 &ParseTableEntry {
1289 actions: Vec::new(),
1290 reusable: false,
1291 },
1292 &mut parse_table_entries,
1293 &mut next_parse_action_list_index,
1294 );
1295
1296 add_line!(
1297 self,
1298 "static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
1299 );
1300 indent!(self);
1301
1302 let mut terminal_entries = Vec::new();
1303 let mut nonterminal_entries = Vec::new();
1304
1305 for (i, state) in self
1306 .parse_table
1307 .states
1308 .iter()
1309 .enumerate()
1310 .take(self.large_state_count)
1311 {
1312 add_line!(self, "[STATE({i})] = {{");
1313 indent!(self);
1314
1315 terminal_entries.clear();
1318 nonterminal_entries.clear();
1319 terminal_entries.extend(state.terminal_entries.iter());
1320 nonterminal_entries.extend(state.nonterminal_entries.iter());
1321 terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1322 nonterminal_entries.sort_unstable_by_key(|k| k.0);
1323
1324 for (symbol, action) in &nonterminal_entries {
1325 add_line!(
1326 self,
1327 "[{}] = STATE({}),",
1328 self.symbol_ids[symbol],
1329 match action {
1330 GotoAction::Goto(state) => *state,
1331 GotoAction::ShiftExtra => i,
1332 }
1333 );
1334 }
1335
1336 for (symbol, entry) in &terminal_entries {
1337 let entry_id = self.get_parse_action_list_id(
1338 entry,
1339 &mut parse_table_entries,
1340 &mut next_parse_action_list_index,
1341 );
1342 add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
1343 }
1344
1345 dedent!(self);
1346 add_line!(self, "}},");
1347 }
1348
1349 dedent!(self);
1350 add_line!(self, "}};");
1351 add_line!(self, "");
1352
1353 if self.large_state_count < self.parse_table.states.len() {
1354 add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
1355 indent!(self);
1356
1357 let mut next_table_index = 0;
1358 let mut small_state_indices = Vec::new();
1359 let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
1360 for state in self.parse_table.states.iter().skip(self.large_state_count) {
1361 small_state_indices.push(next_table_index);
1362 symbols_by_value.clear();
1363
1364 terminal_entries.clear();
1365 terminal_entries.extend(state.terminal_entries.iter());
1366 terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1367
1368 for (symbol, entry) in &terminal_entries {
1372 let entry_id = self.get_parse_action_list_id(
1373 entry,
1374 &mut parse_table_entries,
1375 &mut next_parse_action_list_index,
1376 );
1377 symbols_by_value
1378 .entry((entry_id, SymbolType::Terminal))
1379 .or_default()
1380 .push(**symbol);
1381 }
1382 for (symbol, action) in &state.nonterminal_entries {
1383 let state_id = match action {
1384 GotoAction::Goto(i) => *i,
1385 GotoAction::ShiftExtra => {
1386 self.large_state_count + small_state_indices.len() - 1
1387 }
1388 };
1389 symbols_by_value
1390 .entry((state_id, SymbolType::NonTerminal))
1391 .or_default()
1392 .push(*symbol);
1393 }
1394
1395 let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
1396 values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
1397 (symbols.len(), *kind, *value, symbols[0])
1398 });
1399
1400 add_line!(
1401 self,
1402 "[{next_table_index}] = {},",
1403 values_with_symbols.len()
1404 );
1405 indent!(self);
1406 next_table_index += 1;
1407
1408 for ((value, kind), symbols) in &mut values_with_symbols {
1409 next_table_index += 2 + symbols.len();
1410 if *kind == SymbolType::NonTerminal {
1411 add_line!(self, "STATE({value}), {},", symbols.len());
1412 } else {
1413 add_line!(self, "ACTIONS({value}), {},", symbols.len());
1414 }
1415
1416 symbols.sort_unstable();
1417 indent!(self);
1418 for symbol in symbols {
1419 add_line!(self, "{},", self.symbol_ids[symbol]);
1420 }
1421 dedent!(self);
1422 }
1423
1424 dedent!(self);
1425 }
1426
1427 dedent!(self);
1428 add_line!(self, "}};");
1429 add_line!(self, "");
1430
1431 add_line!(
1432 self,
1433 "static const uint32_t ts_small_parse_table_map[] = {{"
1434 );
1435 indent!(self);
1436 for i in self.large_state_count..self.parse_table.states.len() {
1437 add_line!(
1438 self,
1439 "[SMALL_STATE({i})] = {},",
1440 small_state_indices[i - self.large_state_count]
1441 );
1442 }
1443 dedent!(self);
1444 add_line!(self, "}};");
1445 add_line!(self, "");
1446 }
1447
1448 let mut parse_table_entries = parse_table_entries
1449 .into_iter()
1450 .map(|(entry, i)| (i, entry))
1451 .collect::<Vec<_>>();
1452 parse_table_entries.sort_by_key(|(index, _)| *index);
1453 self.add_parse_action_list(parse_table_entries);
1454 }
1455
1456 fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
1457 add_line!(
1458 self,
1459 "static const TSParseActionEntry ts_parse_actions[] = {{"
1460 );
1461 indent!(self);
1462 for (i, entry) in parse_table_entries {
1463 add!(
1464 self,
1465 " [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
1466 entry.actions.len(),
1467 entry.reusable
1468 );
1469 for action in entry.actions {
1470 add!(self, " ");
1471 match action {
1472 ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
1473 ParseAction::Recover => add!(self, "RECOVER()"),
1474 ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
1475 ParseAction::Shift {
1476 state,
1477 is_repetition,
1478 } => {
1479 if is_repetition {
1480 add!(self, "SHIFT_REPEAT({state})");
1481 } else {
1482 add!(self, "SHIFT({state})");
1483 }
1484 }
1485 ParseAction::Reduce {
1486 symbol,
1487 child_count,
1488 dynamic_precedence,
1489 production_id,
1490 ..
1491 } => {
1492 add!(
1493 self,
1494 "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
1495 self.symbol_ids[&symbol]
1496 );
1497 }
1498 }
1499 add!(self, ",");
1500 }
1501 add!(self, "\n");
1502 }
1503 dedent!(self);
1504 add_line!(self, "}};");
1505 add_line!(self, "");
1506 }
1507
1508 fn add_parser_export(&mut self) {
1509 let language_function_name = format!("tree_sitter_{}", self.language_name);
1510 let external_scanner_name = format!("{language_function_name}_external_scanner");
1511
1512 add_line!(self, "#ifdef __cplusplus");
1513 add_line!(self, r#"extern "C" {{"#);
1514 add_line!(self, "#endif");
1515
1516 if !self.syntax_grammar.external_tokens.is_empty() {
1517 add_line!(self, "void *{external_scanner_name}_create(void);");
1518 add_line!(self, "void {external_scanner_name}_destroy(void *);");
1519 add_line!(
1520 self,
1521 "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);",
1522 );
1523 add_line!(
1524 self,
1525 "unsigned {external_scanner_name}_serialize(void *, char *);",
1526 );
1527 add_line!(
1528 self,
1529 "void {external_scanner_name}_deserialize(void *, const char *, unsigned);",
1530 );
1531 add_line!(self, "");
1532 }
1533
1534 add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS");
1535 add_line!(self, "#define TS_PUBLIC");
1536 add_line!(self, "#elif defined(_WIN32)");
1537 add_line!(self, "#define TS_PUBLIC __declspec(dllexport)");
1538 add_line!(self, "#else");
1539 add_line!(
1540 self,
1541 "#define TS_PUBLIC __attribute__((visibility(\"default\")))"
1542 );
1543 add_line!(self, "#endif");
1544 add_line!(self, "");
1545
1546 add_line!(
1547 self,
1548 "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
1549 );
1550 indent!(self);
1551 add_line!(self, "static const TSLanguage language = {{");
1552 indent!(self);
1553 add_line!(self, ".abi_version = LANGUAGE_VERSION,");
1554
1555 add_line!(self, ".symbol_count = SYMBOL_COUNT,");
1557 add_line!(self, ".alias_count = ALIAS_COUNT,");
1558 add_line!(self, ".token_count = TOKEN_COUNT,");
1559 add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
1560 add_line!(self, ".state_count = STATE_COUNT,");
1561 add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
1562 add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
1563 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1564 add_line!(self, ".supertype_count = SUPERTYPE_COUNT,");
1565 }
1566 add_line!(self, ".field_count = FIELD_COUNT,");
1567 add_line!(
1568 self,
1569 ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
1570 );
1571
1572 add_line!(self, ".parse_table = &ts_parse_table[0][0],");
1574 if self.large_state_count < self.parse_table.states.len() {
1575 add_line!(self, ".small_parse_table = ts_small_parse_table,");
1576 add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,");
1577 }
1578 add_line!(self, ".parse_actions = ts_parse_actions,");
1579
1580 add_line!(self, ".symbol_names = ts_symbol_names,");
1582 if !self.field_names.is_empty() {
1583 add_line!(self, ".field_names = ts_field_names,");
1584 add_line!(self, ".field_map_slices = ts_field_map_slices,");
1585 add_line!(self, ".field_map_entries = ts_field_map_entries,");
1586 }
1587 if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1588 add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,");
1589 add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,");
1590 add_line!(self, ".supertype_symbols = ts_supertype_symbols,");
1591 }
1592 add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
1593 add_line!(self, ".public_symbol_map = ts_symbol_map,");
1594 add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
1595 if !self.parse_table.production_infos.is_empty() {
1596 add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],");
1597 }
1598
1599 add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
1601 add_line!(self, ".lex_fn = ts_lex,");
1602 if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
1603 add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
1604 add_line!(
1605 self,
1606 ".keyword_capture_token = {},",
1607 self.symbol_ids[&keyword_capture_token]
1608 );
1609 }
1610
1611 if !self.syntax_grammar.external_tokens.is_empty() {
1612 add_line!(self, ".external_scanner = {{");
1613 indent!(self);
1614 add_line!(self, "&ts_external_scanner_states[0][0],");
1615 add_line!(self, "ts_external_scanner_symbol_map,");
1616 add_line!(self, "{external_scanner_name}_create,");
1617 add_line!(self, "{external_scanner_name}_destroy,");
1618 add_line!(self, "{external_scanner_name}_scan,");
1619 add_line!(self, "{external_scanner_name}_serialize,");
1620 add_line!(self, "{external_scanner_name}_deserialize,");
1621 dedent!(self);
1622 add_line!(self, "}},");
1623 }
1624
1625 add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
1626
1627 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1628 add_line!(self, ".name = \"{}\",", self.language_name);
1629
1630 if self.reserved_word_sets.len() > 1 {
1631 add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
1632 }
1633
1634 add_line!(
1635 self,
1636 ".max_reserved_word_set_size = {},",
1637 self.reserved_word_sets
1638 .iter()
1639 .map(TokenSet::len)
1640 .max()
1641 .unwrap()
1642 );
1643
1644 let Some(metadata) = &self.metadata else {
1645 panic!(
1646 indoc! {"
1647 Metadata is required to generate ABI version {}.
1648 This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
1649 "},
1650 self.abi_version
1651 );
1652 };
1653
1654 add_line!(self, ".metadata = {{");
1655 indent!(self);
1656 add_line!(self, ".major_version = {},", metadata.major_version);
1657 add_line!(self, ".minor_version = {},", metadata.minor_version);
1658 add_line!(self, ".patch_version = {},", metadata.patch_version);
1659 dedent!(self);
1660 add_line!(self, "}},");
1661 }
1662
1663 dedent!(self);
1664 add_line!(self, "}};");
1665 add_line!(self, "return &language;");
1666 dedent!(self);
1667 add_line!(self, "}}");
1668 add_line!(self, "#ifdef __cplusplus");
1669 add_line!(self, "}}");
1670 add_line!(self, "#endif");
1671 }
1672
1673 fn get_parse_action_list_id(
1674 &self,
1675 entry: &ParseTableEntry,
1676 parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
1677 next_parse_action_list_index: &mut usize,
1678 ) -> usize {
1679 if let Some(&index) = parse_table_entries.get(entry) {
1680 index
1681 } else {
1682 let result = *next_parse_action_list_index;
1683 parse_table_entries.insert(entry.clone(), result);
1684 *next_parse_action_list_index += 1 + entry.actions.len();
1685 result
1686 }
1687 }
1688
1689 fn get_field_map_id(
1690 &self,
1691 flat_field_map: Vec<(String, FieldLocation)>,
1692 flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
1693 next_flat_field_map_index: &mut usize,
1694 ) -> usize {
1695 if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
1696 return *index;
1697 }
1698
1699 let result = *next_flat_field_map_index;
1700 *next_flat_field_map_index += flat_field_map.len();
1701 flat_field_maps.push((result, flat_field_map));
1702 result
1703 }
1704
1705 fn external_token_id(&self, token: &ExternalToken) -> String {
1706 format!(
1707 "ts_external_token_{}",
1708 self.sanitize_identifier(&token.name)
1709 )
1710 }
1711
1712 fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
1713 let mut id;
1714 if symbol == Symbol::end() {
1715 id = "ts_builtin_sym_end".to_string();
1716 } else {
1717 let (name, kind) = self.metadata_for_symbol(symbol);
1718 id = match kind {
1719 VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
1720 VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
1721 VariableType::Hidden | VariableType::Named => {
1722 format!("sym_{}", self.sanitize_identifier(name))
1723 }
1724 };
1725
1726 let mut suffix_number = 1;
1727 let mut suffix = String::new();
1728 while used_identifiers.contains(&id) {
1729 id.drain(id.len() - suffix.len()..);
1730 suffix_number += 1;
1731 suffix = suffix_number.to_string();
1732 id += &suffix;
1733 }
1734 }
1735
1736 used_identifiers.insert(id.clone());
1737 self.symbol_ids.insert(symbol, id);
1738 }
1739
1740 fn field_id(&self, field_name: &str) -> String {
1741 format!("field_{field_name}")
1742 }
1743
1744 fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
1745 match symbol.kind {
1746 SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
1747 SymbolType::NonTerminal => {
1748 let variable = &self.syntax_grammar.variables[symbol.index];
1749 (&variable.name, variable.kind)
1750 }
1751 SymbolType::Terminal => {
1752 let variable = &self.lexical_grammar.variables[symbol.index];
1753 (&variable.name, variable.kind)
1754 }
1755 SymbolType::External => {
1756 let token = &self.syntax_grammar.external_tokens[symbol.index];
1757 (&token.name, token.kind)
1758 }
1759 }
1760 }
1761
1762 fn symbols_for_alias(&self, alias: &Alias) -> Vec<Symbol> {
1763 self.parse_table
1764 .symbols
1765 .iter()
1766 .copied()
1767 .filter(move |symbol| {
1768 self.default_aliases.get(symbol).map_or_else(
1769 || {
1770 let (name, kind) = self.metadata_for_symbol(*symbol);
1771 name == alias.value && kind == alias.kind()
1772 },
1773 |default_alias| default_alias == alias,
1774 )
1775 })
1776 .collect()
1777 }
1778
1779 fn sanitize_identifier(&self, name: &str) -> String {
1780 let mut result = String::with_capacity(name.len());
1781 for c in name.chars() {
1782 if c.is_ascii_alphanumeric() || c == '_' {
1783 result.push(c);
1784 } else {
1785 'special_chars: {
1786 let replacement = match c {
1787 ' ' if name.len() == 1 => "SPACE",
1788 '~' => "TILDE",
1789 '`' => "BQUOTE",
1790 '!' => "BANG",
1791 '@' => "AT",
1792 '#' => "POUND",
1793 '$' => "DOLLAR",
1794 '%' => "PERCENT",
1795 '^' => "CARET",
1796 '&' => "AMP",
1797 '*' => "STAR",
1798 '(' => "LPAREN",
1799 ')' => "RPAREN",
1800 '-' => "DASH",
1801 '+' => "PLUS",
1802 '=' => "EQ",
1803 '{' => "LBRACE",
1804 '}' => "RBRACE",
1805 '[' => "LBRACK",
1806 ']' => "RBRACK",
1807 '\\' => "BSLASH",
1808 '|' => "PIPE",
1809 ':' => "COLON",
1810 ';' => "SEMI",
1811 '"' => "DQUOTE",
1812 '\'' => "SQUOTE",
1813 '<' => "LT",
1814 '>' => "GT",
1815 ',' => "COMMA",
1816 '.' => "DOT",
1817 '?' => "QMARK",
1818 '/' => "SLASH",
1819 '\n' => "LF",
1820 '\r' => "CR",
1821 '\t' => "TAB",
1822 '\0' => "NULL",
1823 '\u{0001}' => "SOH",
1824 '\u{0002}' => "STX",
1825 '\u{0003}' => "ETX",
1826 '\u{0004}' => "EOT",
1827 '\u{0005}' => "ENQ",
1828 '\u{0006}' => "ACK",
1829 '\u{0007}' => "BEL",
1830 '\u{0008}' => "BS",
1831 '\u{000b}' => "VTAB",
1832 '\u{000c}' => "FF",
1833 '\u{000e}' => "SO",
1834 '\u{000f}' => "SI",
1835 '\u{0010}' => "DLE",
1836 '\u{0011}' => "DC1",
1837 '\u{0012}' => "DC2",
1838 '\u{0013}' => "DC3",
1839 '\u{0014}' => "DC4",
1840 '\u{0015}' => "NAK",
1841 '\u{0016}' => "SYN",
1842 '\u{0017}' => "ETB",
1843 '\u{0018}' => "CAN",
1844 '\u{0019}' => "EM",
1845 '\u{001a}' => "SUB",
1846 '\u{001b}' => "ESC",
1847 '\u{001c}' => "FS",
1848 '\u{001d}' => "GS",
1849 '\u{001e}' => "RS",
1850 '\u{001f}' => "US",
1851 '\u{007F}' => "DEL",
1852 '\u{FEFF}' => "BOM",
1853 '\u{0080}'..='\u{FFFF}' => {
1854 result.push_str(&format!("u{:04x}", c as u32));
1855 break 'special_chars;
1856 }
1857 '\u{10000}'..='\u{10FFFF}' => {
1858 result.push_str(&format!("U{:08x}", c as u32));
1859 break 'special_chars;
1860 }
1861 '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
1862 ' ' => break 'special_chars,
1863 };
1864 if !result.is_empty() && !result.ends_with('_') {
1865 result.push('_');
1866 }
1867 result += replacement;
1868 }
1869 }
1870 }
1871 result
1872 }
1873
1874 fn sanitize_string(&self, name: &str) -> String {
1875 let mut result = String::with_capacity(name.len());
1876 for c in name.chars() {
1877 match c {
1878 '\"' => result += "\\\"",
1879 '?' => result += "\\?",
1880 '\\' => result += "\\\\",
1881 '\u{0007}' => result += "\\a",
1882 '\u{0008}' => result += "\\b",
1883 '\u{000b}' => result += "\\v",
1884 '\u{000c}' => result += "\\f",
1885 '\n' => result += "\\n",
1886 '\r' => result += "\\r",
1887 '\t' => result += "\\t",
1888 '\0' => result += "\\0",
1889 '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
1890 '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
1891 '\u{10000}'..='\u{10FFFF}' => {
1892 result.push_str(&format!("\\U{:08x}", c as u32));
1893 }
1894 _ => result.push(c),
1895 }
1896 }
1897 result
1898 }
1899
1900 fn add_character(&mut self, c: char) {
1901 match c {
1902 '\'' => add!(self, "'\\''"),
1903 '\\' => add!(self, "'\\\\'"),
1904 '\u{000c}' => add!(self, "'\\f'"),
1905 '\n' => add!(self, "'\\n'"),
1906 '\t' => add!(self, "'\\t'"),
1907 '\r' => add!(self, "'\\r'"),
1908 _ => {
1909 if c == '\0' {
1910 add!(self, "0");
1911 } else if c == ' ' || c.is_ascii_graphic() {
1912 add!(self, "'{c}'");
1913 } else {
1914 add!(self, "0x{:02x}", c as u32);
1915 }
1916 }
1917 }
1918 }
1919}
1920
1921#[allow(clippy::too_many_arguments)]
1939pub fn render_c_code(
1940 name: &str,
1941 tables: Tables,
1942 syntax_grammar: SyntaxGrammar,
1943 lexical_grammar: LexicalGrammar,
1944 default_aliases: AliasMap,
1945 abi_version: usize,
1946 semantic_version: Option<(u8, u8, u8)>,
1947 supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
1948) -> String {
1949 assert!(
1950 (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
1951 "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
1952 );
1953
1954 Generator {
1955 language_name: name.to_string(),
1956 parse_table: tables.parse_table,
1957 main_lex_table: tables.main_lex_table,
1958 keyword_lex_table: tables.keyword_lex_table,
1959 large_character_sets: tables.large_character_sets,
1960 large_character_set_info: Vec::new(),
1961 syntax_grammar,
1962 lexical_grammar,
1963 default_aliases,
1964 abi_version,
1965 metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
1966 major_version,
1967 minor_version,
1968 patch_version,
1969 }),
1970 supertype_symbol_map,
1971 ..Default::default()
1972 }
1973 .generate()
1974}