
1use std::{
2    cmp,
3    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4    fmt::Write,
5    mem::swap,
8use indoc::indoc;
10use super::{
11    build_tables::Tables,
12    grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
13    nfa::CharacterSet,
14    node_types::ChildType,
15    rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
16    tables::{
17        AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
18        ParseTableEntry,
19    },
22const SMALL_STATE_THRESHOLD: usize = 64;
23pub const ABI_VERSION_MIN: usize = 14;
24pub const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
26const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
27const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
29macro_rules! add {
30    ($this: tt, $($arg: tt)*) => {{
31        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
32    }}
35macro_rules! add_whitespace {
36    ($this:tt) => {{
37        for _ in 0..$this.indent_level {
38            write!(&mut $this.buffer, "  ").unwrap();
39        }
40    }};
43macro_rules! add_line {
44    ($this: tt, $($arg: tt)*) => {
45        add_whitespace!($this);
46        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
47        $this.buffer += "\n";
48    }
51macro_rules! indent {
52    ($this:tt) => {
53        $this.indent_level += 1;
54    };
57macro_rules! dedent {
58    ($this:tt) => {
59        assert_ne!($this.indent_level, 0);
60        $this.indent_level -= 1;
61    };
65struct Generator {
66    buffer: String,
67    indent_level: usize,
68    language_name: String,
69    parse_table: ParseTable,
70    main_lex_table: LexTable,
71    keyword_lex_table: LexTable,
72    large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
73    large_character_set_info: Vec<LargeCharacterSetInfo>,
74    large_state_count: usize,
75    syntax_grammar: SyntaxGrammar,
76    lexical_grammar: LexicalGrammar,
77    default_aliases: AliasMap,
78    symbol_order: HashMap<Symbol, usize>,
79    symbol_ids: HashMap<Symbol, String>,
80    alias_ids: HashMap<Alias, String>,
81    unique_aliases: Vec<Alias>,
82    symbol_map: HashMap<Symbol, Symbol>,
83    reserved_word_sets: Vec<TokenSet>,
84    reserved_word_set_ids_by_parse_state: Vec<usize>,
85    field_names: Vec<String>,
86    supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
87    supertype_map: BTreeMap<String, Vec<ChildType>>,
88    abi_version: usize,
89    metadata: Option<Metadata>,
92struct LargeCharacterSetInfo {
93    constant_name: String,
94    is_used: bool,
97struct Metadata {
98    major_version: u8,
99    minor_version: u8,
100    patch_version: u8,
103impl Generator {
104    fn generate(mut self) -> String {
105        self.init();
106        self.add_header();
107        self.add_includes();
108        self.add_pragmas();
109        self.add_stats();
110        self.add_symbol_enum();
111        self.add_symbol_names_list();
112        self.add_unique_symbol_map();
113        self.add_symbol_metadata_list();
115        if !self.field_names.is_empty() {
116            self.add_field_name_enum();
117            self.add_field_name_names_list();
118            self.add_field_sequences();
119        }
121        if !self.parse_table.production_infos.is_empty() {
122            self.add_alias_sequences();
123        }
125        self.add_non_terminal_alias_map();
126        self.add_primary_state_id_list();
128        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() {
129            self.add_supertype_map();
130        }
132        let buffer_offset_before_lex_functions = self.buffer.len();
134        let mut main_lex_table = LexTable::default();
135        swap(&mut main_lex_table, &mut self.main_lex_table);
136        self.add_lex_function("ts_lex", main_lex_table);
138        if self.syntax_grammar.word_token.is_some() {
139            let mut keyword_lex_table = LexTable::default();
140            swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
141            self.add_lex_function("ts_lex_keywords", keyword_lex_table);
142        }
144        // Once the lex functions are generated, and we've determined which large
145        // character sets are actually used, we can generate the large character set
146        // constants. Insert them into the output buffer before the lex functions.
147        let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string();
148        self.buffer.truncate(buffer_offset_before_lex_functions);
149        for ix in 0..self.large_character_sets.len() {
150            self.add_character_set(ix);
151        }
152        self.buffer.push_str(&lex_functions);
154        self.add_lex_modes();
156        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
157        {
158            self.add_reserved_word_sets();
159        }
161        self.add_parse_table();
163        if !self.syntax_grammar.external_tokens.is_empty() {
164            self.add_external_token_enum();
165            self.add_external_scanner_symbol_map();
166            self.add_external_scanner_states_list();
167        }
169        self.add_parser_export();
171        self.buffer
172    }
174    fn init(&mut self) {
175        let mut symbol_identifiers = HashSet::new();
176        for i in 0..self.parse_table.symbols.len() {
177            self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
178        }
179        self.symbol_ids.insert(
180            Symbol::end_of_nonterminal_extra(),
181            self.symbol_ids[&Symbol::end()].clone(),
182        );
184        self.symbol_map = HashMap::new();
186        for symbol in &self.parse_table.symbols {
187            let mut mapping = symbol;
189            // There can be multiple symbols in the grammar that have the same name and kind,
190            // due to simple aliases. When that happens, ensure that they map to the same
191            // public-facing symbol. If one of the symbols is not aliased, choose that one
192            // to be the public-facing symbol. Otherwise, pick the symbol with the lowest
193            // numeric value.
194            if let Some(alias) = self.default_aliases.get(symbol) {
195                let kind = alias.kind();
196                for other_symbol in &self.parse_table.symbols {
197                    if let Some(other_alias) = self.default_aliases.get(other_symbol) {
198                        if other_symbol < mapping && other_alias == alias {
199                            mapping = other_symbol;
200                        }
201                    } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
202                        mapping = other_symbol;
203                        break;
204                    }
205                }
206            }
207            // Two anonymous tokens with different flags but the same string value
208            // should be represented with the same symbol in the public API. Examples:
209            // * "<" and token(prec(1, "<"))
210            // * "(" and token.immediate("(")
211            else if symbol.is_terminal() {
212                let metadata = self.metadata_for_symbol(*symbol);
213                for other_symbol in &self.parse_table.symbols {
214                    let other_metadata = self.metadata_for_symbol(*other_symbol);
215                    if other_metadata == metadata {
216                        if let Some(mapped) = self.symbol_map.get(other_symbol) {
217                            if mapped == symbol {
218                                break;
219                            }
220                        }
221                        mapping = other_symbol;
222                        break;
223                    }
224                }
225            }
227            self.symbol_map.insert(*symbol, *mapping);
228        }
230        for production_info in &self.parse_table.production_infos {
231            // Build a list of all field names
232            for field_name in production_info.field_map.keys() {
233                if let Err(i) = self.field_names.binary_search(field_name) {
234                    self.field_names.insert(i, field_name.clone());
235                }
236            }
238            for alias in &production_info.alias_sequence {
239                // Generate a mapping from aliases to C identifiers.
240                if let Some(alias) = &alias {
241                    // Some aliases match an existing symbol in the grammar.
242                    let alias_id =
243                        if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
244                            self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
245                        }
246                        // Other aliases don't match any existing symbol, and need their own
247                        // identifiers.
248                        else {
249                            if let Err(i) = self.unique_aliases.binary_search(alias) {
250                                self.unique_aliases.insert(i, alias.clone());
251                            }
253                            if alias.is_named {
254                                format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
255                            } else {
256                                format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
257                            }
258                        };
260                    self.alias_ids.entry(alias.clone()).or_insert(alias_id);
261                }
262            }
263        }
265        for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() {
266            let count = self.large_character_sets[0..ix]
267                .iter()
268                .filter(|(sym, _)| sym == symbol)
269                .count()
270                + 1;
271            let constant_name = if let Some(symbol) = symbol {
272                format!("{}_character_set_{}", self.symbol_ids[symbol], count)
273            } else {
274                format!("extras_character_set_{count}")
275            };
276            self.large_character_set_info.push(LargeCharacterSetInfo {
277                constant_name,
278                is_used: false,
279            });
280        }
282        // Assign an id to each unique reserved word set
283        self.reserved_word_sets.push(TokenSet::new());
284        for state in &self.parse_table.states {
285            let id = if let Some(ix) = self
286                .reserved_word_sets
287                .iter()
288                .position(|set| *set == state.reserved_words)
289            {
290                ix
291            } else {
292                self.reserved_word_sets.push(state.reserved_words.clone());
293                self.reserved_word_sets.len() - 1
294            };
295            self.reserved_word_set_ids_by_parse_state.push(id);
296        }
298        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
299            for (supertype, subtypes) in &self.supertype_symbol_map {
300                if let Some(supertype) = self.symbol_ids.get(supertype) {
301                    self.supertype_map
302                        .entry(supertype.clone())
303                        .or_insert_with(|| subtypes.clone());
304                }
305            }
307            self.supertype_symbol_map.clear();
308        }
310        // Determine which states should use the "small state" representation, and which should
311        // use the normal array representation.
312        let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
313        self.large_state_count = self
314            .parse_table
315            .states
316            .iter()
317            .enumerate()
318            .take_while(|(i, s)| {
319                *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
320            })
321            .count();
322    }
324    fn add_header(&mut self) {
325        let version = BUILD_SHA.map_or_else(
326            || BUILD_VERSION.to_string(),
327            |build_sha| format!("{BUILD_VERSION} ({build_sha})"),
328        );
329        add_line!(
330            self,
331            "/* Automatically generated by tree-sitter v{version} */",
332        );
333        add_line!(self, "");
334    }
336    fn add_includes(&mut self) {
337        add_line!(self, "#include \"tree_sitter/parser.h\"");
338        add_line!(self, "");
339    }
341    fn add_pragmas(&mut self) {
342        add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
343        add_line!(
344            self,
345            "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
346        );
347        add_line!(self, "#endif");
348        add_line!(self, "");
350        // Compiling large lexer functions can be very slow. Disabling optimizations
351        // is not ideal, but only a very small fraction of overall parse time is
352        // spent lexing, so the performance impact of this is negligible.
353        if self.main_lex_table.states.len() > 300 {
354            add_line!(self, "#ifdef _MSC_VER");
355            add_line!(self, "#pragma optimize(\"\", off)");
356            add_line!(self, "#elif defined(__clang__)");
357            add_line!(self, "#pragma clang optimize off");
358            add_line!(self, "#elif defined(__GNUC__)");
359            add_line!(self, "#pragma GCC optimize (\"O0\")");
360            add_line!(self, "#endif");
361            add_line!(self, "");
362        }
363    }
365    fn add_stats(&mut self) {
366        let token_count = self
367            .parse_table
368            .symbols
369            .iter()
370            .filter(|symbol| {
371                if symbol.is_terminal() || symbol.is_eof() {
372                    true
373                } else if symbol.is_external() {
374                    self.syntax_grammar.external_tokens[symbol.index]
375                        .corresponding_internal_token
376                        .is_none()
377                } else {
378                    false
379                }
380            })
381            .count();
383        add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version);
384        add_line!(
385            self,
386            "#define STATE_COUNT {}",
387            self.parse_table.states.len()
388        );
389        add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
391        add_line!(
392            self,
393            "#define SYMBOL_COUNT {}",
394            self.parse_table.symbols.len()
395        );
396        add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
397        add_line!(self, "#define TOKEN_COUNT {token_count}");
398        add_line!(
399            self,
400            "#define EXTERNAL_TOKEN_COUNT {}",
401            self.syntax_grammar.external_tokens.len()
402        );
403        add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
404        add_line!(
405            self,
406            "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
407            self.parse_table.max_aliased_production_length
408        );
409        add_line!(
410            self,
411            "#define MAX_RESERVED_WORD_SET_SIZE {}",
412            self.reserved_word_sets
413                .iter()
414                .map(TokenSet::len)
415                .max()
416                .unwrap()
417        );
419        add_line!(
420            self,
421            "#define PRODUCTION_ID_COUNT {}",
422            self.parse_table.production_infos.len()
423        );
424        add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len());
425        add_line!(self, "");
426    }
428    fn add_symbol_enum(&mut self) {
429        add_line!(self, "enum ts_symbol_identifiers {{");
430        indent!(self);
431        self.symbol_order.insert(Symbol::end(), 0);
432        let mut i = 1;
433        for symbol in &self.parse_table.symbols {
434            if *symbol != Symbol::end() {
435                self.symbol_order.insert(*symbol, i);
436                add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
437                i += 1;
438            }
439        }
440        for alias in &self.unique_aliases {
441            add_line!(self, "{} = {i},", self.alias_ids[alias]);
442            i += 1;
443        }
444        dedent!(self);
445        add_line!(self, "}};");
446        add_line!(self, "");
447    }
449    fn add_symbol_names_list(&mut self) {
450        add_line!(self, "static const char * const ts_symbol_names[] = {{");
451        indent!(self);
452        for symbol in &self.parse_table.symbols {
453            let name = self.sanitize_string(
454                self.default_aliases
455                    .get(symbol)
456                    .map_or(self.metadata_for_symbol(*symbol).0, |alias| {
457                        alias.value.as_str()
458                    }),
459            );
460            add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
461        }
462        for alias in &self.unique_aliases {
463            add_line!(
464                self,
465                "[{}] = \"{}\",",
466                self.alias_ids[alias],
467                self.sanitize_string(&alias.value)
468            );
469        }
470        dedent!(self);
471        add_line!(self, "}};");
472        add_line!(self, "");
473    }
475    fn add_unique_symbol_map(&mut self) {
476        add_line!(self, "static const TSSymbol ts_symbol_map[] = {{");
477        indent!(self);
478        for symbol in &self.parse_table.symbols {
479            add_line!(
480                self,
481                "[{}] = {},",
482                self.symbol_ids[symbol],
483                self.symbol_ids[&self.symbol_map[symbol]],
484            );
485        }
487        for alias in &self.unique_aliases {
488            add_line!(
489                self,
490                "[{}] = {},",
491                self.alias_ids[alias],
492                self.alias_ids[alias],
493            );
494        }
496        dedent!(self);
497        add_line!(self, "}};");
498        add_line!(self, "");
499    }
501    fn add_field_name_enum(&mut self) {
502        add_line!(self, "enum ts_field_identifiers {{");
503        indent!(self);
504        for (i, field_name) in self.field_names.iter().enumerate() {
505            add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
506        }
507        dedent!(self);
508        add_line!(self, "}};");
509        add_line!(self, "");
510    }
512    fn add_field_name_names_list(&mut self) {
513        add_line!(self, "static const char * const ts_field_names[] = {{");
514        indent!(self);
515        add_line!(self, "[0] = NULL,");
516        for field_name in &self.field_names {
517            add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
518        }
519        dedent!(self);
520        add_line!(self, "}};");
521        add_line!(self, "");
522    }
524    fn add_symbol_metadata_list(&mut self) {
525        add_line!(
526            self,
527            "static const TSSymbolMetadata ts_symbol_metadata[] = {{"
528        );
529        indent!(self);
530        for symbol in &self.parse_table.symbols {
531            add_line!(self, "[{}] = {{", self.symbol_ids[symbol]);
532            indent!(self);
533            if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
534                add_line!(self, ".visible = true,");
535                add_line!(self, ".named = {is_named},");
536            } else {
537                match self.metadata_for_symbol(*symbol).1 {
538                    VariableType::Named => {
539                        add_line!(self, ".visible = true,");
540                        add_line!(self, ".named = true,");
541                    }
542                    VariableType::Anonymous => {
543                        add_line!(self, ".visible = true,");
544                        add_line!(self, ".named = false,");
545                    }
546                    VariableType::Hidden => {
547                        add_line!(self, ".visible = false,");
548                        add_line!(self, ".named = true,");
549                        if self.syntax_grammar.supertype_symbols.contains(symbol) {
550                            add_line!(self, ".supertype = true,");
551                        }
552                    }
553                    VariableType::Auxiliary => {
554                        add_line!(self, ".visible = false,");
555                        add_line!(self, ".named = false,");
556                    }
557                }
558            }
559            dedent!(self);
560            add_line!(self, "}},");
561        }
562        for alias in &self.unique_aliases {
563            add_line!(self, "[{}] = {{", self.alias_ids[alias]);
564            indent!(self);
565            add_line!(self, ".visible = true,");
566            add_line!(self, ".named = {},", alias.is_named);
567            dedent!(self);
568            add_line!(self, "}},");
569        }
570        dedent!(self);
571        add_line!(self, "}};");
572        add_line!(self, "");
573    }
575    fn add_alias_sequences(&mut self) {
576        add_line!(
577            self,
578            "static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
579        );
580        indent!(self);
581        for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
582            if production_info.alias_sequence.is_empty() {
583                // Work around MSVC's intolerance of empty array initializers by
584                // explicitly zero-initializing the first element.
585                if i == 0 {
586                    add_line!(self, "[0] = {{0}},");
587                }
588                continue;
589            }
591            add_line!(self, "[{i}] = {{");
592            indent!(self);
593            for (j, alias) in production_info.alias_sequence.iter().enumerate() {
594                if let Some(alias) = alias {
595                    add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
596                }
597            }
598            dedent!(self);
599            add_line!(self, "}},");
600        }
601        dedent!(self);
602        add_line!(self, "}};");
603        add_line!(self, "");
604    }
606    fn add_non_terminal_alias_map(&mut self) {
607        let mut alias_ids_by_symbol = HashMap::new();
608        for variable in &self.syntax_grammar.variables {
609            for production in & {
610                for step in &production.steps {
611                    if let Some(alias) = &step.alias {
612                        if step.symbol.is_non_terminal()
613                            && Some(alias) != self.default_aliases.get(&step.symbol)
614                            && self.symbol_ids.contains_key(&step.symbol)
615                        {
616                            if let Some(alias_id) = self.alias_ids.get(alias) {
617                                let alias_ids =
618                                    alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new());
619                                if let Err(i) = alias_ids.binary_search(&alias_id) {
620                                    alias_ids.insert(i, alias_id);
621                                }
622                            }
623                        }
624                    }
625                }
626            }
627        }
629        let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
630        alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
632        add_line!(
633            self,
634            "static const uint16_t ts_non_terminal_alias_map[] = {{"
635        );
636        indent!(self);
637        for (symbol, alias_ids) in alias_ids_by_symbol {
638            let symbol_id = &self.symbol_ids[symbol];
639            let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]];
640            add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
641            indent!(self);
642            add_line!(self, "{public_symbol_id},");
643            for alias_id in alias_ids {
644                add_line!(self, "{alias_id},");
645            }
646            dedent!(self);
647        }
648        add_line!(self, "0,");
649        dedent!(self);
650        add_line!(self, "}};");
651        add_line!(self, "");
652    }
654    /// Produces a list of the "primary state" for every state in the grammar.
655    ///
656    /// The "primary state" for a given state is the first encountered state that behaves
657    /// identically with respect to query analysis. We derive this by keeping track of the `core_id`
658    /// for each state and treating the first state with a given `core_id` as primary.
659    fn add_primary_state_id_list(&mut self) {
660        add_line!(
661            self,
662            "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{"
663        );
664        indent!(self);
665        let mut first_state_for_each_core_id = HashMap::new();
666        for (idx, state) in self.parse_table.states.iter().enumerate() {
667            let primary_state = first_state_for_each_core_id
668                .entry(state.core_id)
669                .or_insert(idx);
670            add_line!(self, "[{idx}] = {primary_state},");
671        }
672        dedent!(self);
673        add_line!(self, "}};");
674        add_line!(self, "");
675    }
677    fn add_field_sequences(&mut self) {
678        let mut flat_field_maps = vec![];
679        let mut next_flat_field_map_index = 0;
680        self.get_field_map_id(
681            Vec::new(),
682            &mut flat_field_maps,
683            &mut next_flat_field_map_index,
684        );
686        let mut field_map_ids = Vec::new();
687        for production_info in &self.parse_table.production_infos {
688            if production_info.field_map.is_empty() {
689                field_map_ids.push((0, 0));
690            } else {
691                let mut flat_field_map = Vec::new();
692                for (field_name, locations) in &production_info.field_map {
693                    for location in locations {
694                        flat_field_map.push((field_name.clone(), *location));
695                    }
696                }
697                field_map_ids.push((
698                    self.get_field_map_id(
699                        flat_field_map.clone(),
700                        &mut flat_field_maps,
701                        &mut next_flat_field_map_index,
702                    ),
703                    flat_field_map.len(),
704                ));
705            }
706        }
708        add_line!(
709            self,
710            "static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
711        );
712        indent!(self);
713        for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
714            if length > 0 {
715                add_line!(
716                    self,
717                    "[{production_id}] = {{.index = {row_id}, .length = {length}}},",
718                );
719            }
720        }
721        dedent!(self);
722        add_line!(self, "}};");
723        add_line!(self, "");
725        add_line!(
726            self,
727            "static const TSFieldMapEntry ts_field_map_entries[] = {{",
728        );
729        indent!(self);
730        for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
731            add_line!(self, "[{row_index}] =");
732            indent!(self);
733            for (field_name, location) in field_pairs {
734                add_whitespace!(self);
735                add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
736                if location.inherited {
737                    add!(self, ", .inherited = true");
738                }
739                add!(self, "}},\n");
740            }
741            dedent!(self);
742        }
744        dedent!(self);
745        add_line!(self, "}};");
746        add_line!(self, "");
747    }
749    fn add_supertype_map(&mut self) {
750        add_line!(
751            self,
752            "static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{"
753        );
754        indent!(self);
755        for supertype in self.supertype_map.keys() {
756            add_line!(self, "{supertype},");
757        }
758        dedent!(self);
759        add_line!(self, "}};\n");
761        add_line!(
762            self,
763            "static const TSMapSlice ts_supertype_map_slices[] = {{",
764        );
765        indent!(self);
766        let mut row_id = 0;
767        let mut supertype_ids = vec![0];
768        let mut supertype_string_map = BTreeMap::new();
769        for (supertype, subtypes) in &self.supertype_map {
770            supertype_string_map.insert(
771                supertype,
772                subtypes
773                    .iter()
774                    .flat_map(|s| match s {
775                        ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
776                        ChildType::Aliased(alias) => {
777                            self.alias_ids.get(alias).cloned().map_or_else(
778                                || {
779                                    self.symbols_for_alias(alias)
780                                        .into_iter()
781                                        .map(|s| self.symbol_ids.get(&s).cloned())
782                                        .collect()
783                                },
784                                |a| vec![Some(a)],
785                            )
786                        }
787                    })
788                    .flatten()
789                    .collect::<BTreeSet<String>>(),
790            );
791        }
792        for (supertype, subtypes) in &supertype_string_map {
793            let length = subtypes.len();
794            add_line!(
795                self,
796                "[{supertype}] = {{.index = {row_id}, .length = {length}}},",
797            );
798            row_id += length;
799            supertype_ids.push(row_id);
800        }
801        dedent!(self);
802        add_line!(self, "}};");
803        add_line!(self, "");
805        add_line!(
806            self,
807            "static const TSSymbol ts_supertype_map_entries[] = {{",
808        );
809        indent!(self);
810        for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() {
811            let row_index = supertype_ids[i];
812            add_line!(self, "[{row_index}] =");
813            indent!(self);
814            for subtype in subtypes {
815                add_whitespace!(self);
816                add!(self, "{subtype},\n");
817            }
818            dedent!(self);
819        }
821        dedent!(self);
822        add_line!(self, "}};");
823        add_line!(self, "");
824    }
826    fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
827        add_line!(
828            self,
829            "static bool {name}(TSLexer *lexer, TSStateId state) {{",
830        );
831        indent!(self);
833        add_line!(self, "START_LEXER();");
834        add_line!(self, "eof = lexer->eof(lexer);");
835        add_line!(self, "switch (state) {{");
837        indent!(self);
838        for (i, state) in lex_table.states.into_iter().enumerate() {
839            add_line!(self, "case {i}:");
840            indent!(self);
841            self.add_lex_state(i, state);
842            dedent!(self);
843        }
845        add_line!(self, "default:");
846        indent!(self);
847        add_line!(self, "return false;");
848        dedent!(self);
850        dedent!(self);
851        add_line!(self, "}}");
853        dedent!(self);
854        add_line!(self, "}}");
855        add_line!(self, "");
856    }
858    fn add_lex_state(&mut self, _state_ix: usize, state: LexState) {
859        if let Some(accept_action) = state.accept_action {
860            add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
861        }
863        if let Some(eof_action) = state.eof_action {
864            add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
865        }
867        let mut chars_copy = CharacterSet::empty();
868        let mut large_set = CharacterSet::empty();
869        let mut ruled_out_chars = CharacterSet::empty();
871        // The transitions in a lex state are sorted with the single-character
872        // transitions first. If there are many single-character transitions,
873        // then implement them using an array of (lookahead character, state)
874        // pairs, instead of individual if statements, in order to reduce compile
875        // time.
876        let mut leading_simple_transition_count = 0;
877        let mut leading_simple_transition_range_count = 0;
878        for (chars, action) in &state.advance_actions {
879            if action.in_main_token
880                && chars.ranges().all(|r| {
881                    let start = *r.start() as u32;
882                    let end = *r.end() as u32;
883                    end <= start + 1 && u16::try_from(end).is_ok()
884                })
885            {
886                leading_simple_transition_count += 1;
887                leading_simple_transition_range_count += chars.range_count();
888            } else {
889                break;
890            }
891        }
893        if leading_simple_transition_range_count >= 8 {
894            add_line!(self, "ADVANCE_MAP(");
895            indent!(self);
896            for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] {
897                for range in chars.ranges() {
898                    add_whitespace!(self);
899                    self.add_character(*range.start());
900                    add!(self, ", {},\n", action.state);
901                    if range.end() > range.start() {
902                        add_whitespace!(self);
903                        self.add_character(*range.end());
904                        add!(self, ", {},\n", action.state);
905                    }
906                }
907                ruled_out_chars = ruled_out_chars.add(chars);
908            }
909            dedent!(self);
910            add_line!(self, ");");
911        } else {
912            leading_simple_transition_count = 0;
913        }
915        for (chars, action) in &state.advance_actions[leading_simple_transition_count..] {
916            add_whitespace!(self);
918            // The lex state's advance actions are represented with disjoint
919            // sets of characters. When translating these disjoint sets into a
920            // sequence of checks, we don't need to re-check conditions that
921            // have already been checked due to previous transitions.
922            //
923            // Note that this simplification may result in an empty character set.
924            // That means that the transition is guaranteed (nothing further needs to
925            // be checked), not that this transition is impossible.
926            let simplified_chars = chars.simplify_ignoring(&ruled_out_chars);
928            // For large character sets, find the best matching character set from
929            // a pre-selected list of large character sets, which are based on the
930            // state transitions for invidual tokens. This transition may not exactly
931            // match one of the pre-selected character sets. In that case, determine
932            // the additional checks that need to be performed to match this transition.
933            let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None;
934            if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT {
935                for (ix, (_, set)) in self.large_character_sets.iter().enumerate() {
936                    chars_copy.assign(&simplified_chars);
937                    large_set.assign(set);
938                    let intersection = chars_copy.remove_intersection(&mut large_set);
939                    if !intersection.is_empty() {
940                        let additions = chars_copy.simplify_ignoring(&ruled_out_chars);
941                        let removals = large_set.simplify_ignoring(&ruled_out_chars);
942                        let total_range_count = additions.range_count() + removals.range_count();
943                        if total_range_count >= simplified_chars.range_count() {
944                            continue;
945                        }
946                        if let Some((_, best_additions, best_removals)) = &best_large_char_set {
947                            let best_range_count =
948                                best_additions.range_count() + best_removals.range_count();
949                            if best_range_count < total_range_count {
950                                continue;
951                            }
952                        }
953                        best_large_char_set = Some((ix, additions, removals));
954                    }
955                }
956            }
958            // Add this transition's character set to the set of ruled out characters,
959            // which don't need to be checked for subsequent transitions in this state.
960            ruled_out_chars = ruled_out_chars.add(chars);
962            let mut large_char_set_ix = None;
963            let mut asserted_chars = simplified_chars;
964            let mut negated_chars = CharacterSet::empty();
965            if let Some((char_set_ix, additions, removals)) = best_large_char_set {
966                asserted_chars = additions;
967                negated_chars = removals;
968                large_char_set_ix = Some(char_set_ix);
969            }
971            let mut line_break = "\n".to_string();
972            for _ in 0..self.indent_level + 2 {
973                line_break.push_str("  ");
974            }
976            let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
977            let has_negative_condition = !negated_chars.is_empty();
978            let has_condition = has_positive_condition || has_negative_condition;
979            if has_condition {
980                add!(self, "if (");
981                if has_positive_condition && has_negative_condition {
982                    add!(self, "(");
983                }
984            }
986            if let Some(large_char_set_ix) = large_char_set_ix {
987                let large_set = &self.large_character_sets[large_char_set_ix].1;
989                // If the character set contains the null character, check that we
990                // are not at the end of the file.
991                let check_eof = large_set.contains('\0');
992                if check_eof {
993                    add!(self, "(!eof && ");
994                }
996                let char_set_info = &mut self.large_character_set_info[large_char_set_ix];
997                char_set_info.is_used = true;
998                add!(
999                    self,
1000                    "set_contains({}, {}, lookahead)",
1001                    char_set_info.constant_name,
1002                    large_set.range_count(),
1003                );
1004                if check_eof {
1005                    add!(self, ")");
1006                }
1007            }
1009            if !asserted_chars.is_empty() {
1010                if large_char_set_ix.is_some() {
1011                    add!(self, " ||{line_break}");
1012                }
1014                // If the character set contains the max character, than it probably
1015                // corresponds to a negated character class in a regex, so it will be more
1016                // concise and readable to express it in terms of negated ranges.
1017                let is_included = !asserted_chars.contains(char::MAX);
1018                if !is_included {
1019                    asserted_chars = asserted_chars.negate().add_char('\0');
1020                }
1022                self.add_character_range_conditions(&asserted_chars, is_included, &line_break);
1023            }
1025            if has_negative_condition {
1026                if has_positive_condition {
1027                    add!(self, ") &&{line_break}");
1028                }
1029                self.add_character_range_conditions(&negated_chars, false, &line_break);
1030            }
1032            if has_condition {
1033                add!(self, ") ");
1034            }
1036            self.add_advance_action(action);
1037            add!(self, "\n");
1038        }
1040        add_line!(self, "END_STATE();");
1041    }
1043    fn add_character_range_conditions(
1044        &mut self,
1045        characters: &CharacterSet,
1046        is_included: bool,
1047        line_break: &str,
1048    ) {
1049        for (i, range) in characters.ranges().enumerate() {
1050            let start = *range.start();
1051            let end = *range.end();
1052            if is_included {
1053                if i > 0 {
1054                    add!(self, " ||{line_break}");
1055                }
1057                if start == '\0' {
1058                    add!(self, "(!eof && ");
1059                    if end == '\0' {
1060                        add!(self, "lookahead == 0");
1061                    } else {
1062                        add!(self, "lookahead <= ");
1063                    }
1064                    self.add_character(end);
1065                    add!(self, ")");
1066                } else if end == start {
1067                    add!(self, "lookahead == ");
1068                    self.add_character(start);
1069                } else if end as u32 == start as u32 + 1 {
1070                    add!(self, "lookahead == ");
1071                    self.add_character(start);
1072                    add!(self, " ||{line_break}lookahead == ");
1073                    self.add_character(end);
1074                } else {
1075                    add!(self, "(");
1076                    self.add_character(start);
1077                    add!(self, " <= lookahead && lookahead <= ");
1078                    self.add_character(end);
1079                    add!(self, ")");
1080                }
1081            } else {
1082                if i > 0 {
1083                    add!(self, " &&{line_break}");
1084                }
1085                if end == start {
1086                    add!(self, "lookahead != ");
1087                    self.add_character(start);
1088                } else if end as u32 == start as u32 + 1 {
1089                    add!(self, "lookahead != ");
1090                    self.add_character(start);
1091                    add!(self, " &&{line_break}lookahead != ");
1092                    self.add_character(end);
1093                } else if start != '\0' {
1094                    add!(self, "(lookahead < ");
1095                    self.add_character(start);
1096                    add!(self, " || ");
1097                    self.add_character(end);
1098                    add!(self, " < lookahead)");
1099                } else {
1100                    add!(self, "lookahead > ");
1101                    self.add_character(end);
1102                }
1103            }
1104        }
1105    }
1107    fn add_character_set(&mut self, ix: usize) {
1108        let characters = self.large_character_sets[ix].1.clone();
1109        let info = &self.large_character_set_info[ix];
1110        if !info.is_used {
1111            return;
1112        }
1114        add_line!(
1115            self,
1116            "static const TSCharacterRange {}[] = {{",
1117            info.constant_name
1118        );
1120        indent!(self);
1121        for (ix, range) in characters.ranges().enumerate() {
1122            let column = ix % 8;
1123            if column == 0 {
1124                if ix > 0 {
1125                    add!(self, "\n");
1126                }
1127                add_whitespace!(self);
1128            } else {
1129                add!(self, " ");
1130            }
1131            add!(self, "{{");
1132            self.add_character(*range.start());
1133            add!(self, ", ");
1134            self.add_character(*range.end());
1135            add!(self, "}},");
1136        }
1137        add!(self, "\n");
1138        dedent!(self);
1139        add_line!(self, "}};");
1140        add_line!(self, "");
1141    }
1143    fn add_advance_action(&mut self, action: &AdvanceAction) {
1144        if action.in_main_token {
1145            add!(self, "ADVANCE({});", action.state);
1146        } else {
1147            add!(self, "SKIP({});", action.state);
1148        }
1149    }
1151    fn add_lex_modes(&mut self) {
1152        add_line!(
1153            self,
1154            "static const {} ts_lex_modes[STATE_COUNT] = {{",
1155            if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1156                "TSLexerMode"
1157            } else {
1158                "TSLexMode"
1159            }
1160        );
1161        indent!(self);
1162        for (i, state) in self.parse_table.states.iter().enumerate() {
1163            add_whitespace!(self);
1164            add!(self, "[{i}] = {{");
1165            if state.is_end_of_non_terminal_extra() {
1166                add!(self, "(TSStateId)(-1),");
1167            } else {
1168                add!(self, ".lex_state = {}", state.lex_state_id);
1170                if state.external_lex_state_id > 0 {
1171                    add!(
1172                        self,
1173                        ", .external_lex_state = {}",
1174                        state.external_lex_state_id
1175                    );
1176                }
1178                if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1179                    let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
1180                    if reserved_word_set_id != 0 {
1181                        add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
1182                    }
1183                }
1184            }
1186            add!(self, "}},\n");
1187        }
1188        dedent!(self);
1189        add_line!(self, "}};");
1190        add_line!(self, "");
1191    }
1193    fn add_reserved_word_sets(&mut self) {
1194        add_line!(
1195            self,
1196            "static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
1197            self.reserved_word_sets.len(),
1198        );
1199        indent!(self);
1200        for (id, set) in self.reserved_word_sets.iter().enumerate() {
1201            if id == 0 {
1202                continue;
1203            }
1204            add_line!(self, "[{id}] = {{");
1205            indent!(self);
1206            for token in set.iter() {
1207                add_line!(self, "{},", self.symbol_ids[&token]);
1208            }
1209            dedent!(self);
1210            add_line!(self, "}},");
1211        }
1212        dedent!(self);
1213        add_line!(self, "}};");
1214        add_line!(self, "");
1215    }
1217    fn add_external_token_enum(&mut self) {
1218        add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
1219        indent!(self);
1220        for i in 0..self.syntax_grammar.external_tokens.len() {
1221            add_line!(
1222                self,
1223                "{} = {i},",
1224                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
1225            );
1226        }
1227        dedent!(self);
1228        add_line!(self, "}};");
1229        add_line!(self, "");
1230    }
1232    fn add_external_scanner_symbol_map(&mut self) {
1233        add_line!(
1234            self,
1235            "static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
1236        );
1237        indent!(self);
1238        for i in 0..self.syntax_grammar.external_tokens.len() {
1239            let token = &self.syntax_grammar.external_tokens[i];
1240            let id_token = token
1241                .corresponding_internal_token
1242                .unwrap_or_else(|| Symbol::external(i));
1243            add_line!(
1244                self,
1245                "[{}] = {},",
1246                self.external_token_id(token),
1247                self.symbol_ids[&id_token],
1248            );
1249        }
1250        dedent!(self);
1251        add_line!(self, "}};");
1252        add_line!(self, "");
1253    }
1255    fn add_external_scanner_states_list(&mut self) {
1256        add_line!(
1257            self,
1258            "static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
1259            self.parse_table.external_lex_states.len(),
1260        );
1261        indent!(self);
1262        for i in 0..self.parse_table.external_lex_states.len() {
1263            if !self.parse_table.external_lex_states[i].is_empty() {
1264                add_line!(self, "[{i}] = {{");
1265                indent!(self);
1266                for token in self.parse_table.external_lex_states[i].iter() {
1267                    add_line!(
1268                        self,
1269                        "[{}] = true,",
1270                        self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
1271                    );
1272                }
1273                dedent!(self);
1274                add_line!(self, "}},");
1275            }
1276        }
1277        dedent!(self);
1278        add_line!(self, "}};");
1279        add_line!(self, "");
1280    }
1282    fn add_parse_table(&mut self) {
1283        let mut parse_table_entries = HashMap::new();
1284        let mut next_parse_action_list_index = 0;
1286        // Parse action lists zero is for the default value, when a symbol is not valid.
1287        self.get_parse_action_list_id(
1288            &ParseTableEntry {
1289                actions: Vec::new(),
1290                reusable: false,
1291            },
1292            &mut parse_table_entries,
1293            &mut next_parse_action_list_index,
1294        );
1296        add_line!(
1297            self,
1298            "static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
1299        );
1300        indent!(self);
1302        let mut terminal_entries = Vec::new();
1303        let mut nonterminal_entries = Vec::new();
1305        for (i, state) in self
1306            .parse_table
1307            .states
1308            .iter()
1309            .enumerate()
1310            .take(self.large_state_count)
1311        {
1312            add_line!(self, "[STATE({i})] = {{");
1313            indent!(self);
1315            // Ensure the entries are in a deterministic order, since they are
1316            // internally represented as a hash map.
1317            terminal_entries.clear();
1318            nonterminal_entries.clear();
1319            terminal_entries.extend(state.terminal_entries.iter());
1320            nonterminal_entries.extend(state.nonterminal_entries.iter());
1321            terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1322            nonterminal_entries.sort_unstable_by_key(|k| k.0);
1324            for (symbol, action) in &nonterminal_entries {
1325                add_line!(
1326                    self,
1327                    "[{}] = STATE({}),",
1328                    self.symbol_ids[symbol],
1329                    match action {
1330                        GotoAction::Goto(state) => *state,
1331                        GotoAction::ShiftExtra => i,
1332                    }
1333                );
1334            }
1336            for (symbol, entry) in &terminal_entries {
1337                let entry_id = self.get_parse_action_list_id(
1338                    entry,
1339                    &mut parse_table_entries,
1340                    &mut next_parse_action_list_index,
1341                );
1342                add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
1343            }
1345            dedent!(self);
1346            add_line!(self, "}},");
1347        }
1349        dedent!(self);
1350        add_line!(self, "}};");
1351        add_line!(self, "");
1353        if self.large_state_count < self.parse_table.states.len() {
1354            add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
1355            indent!(self);
1357            let mut next_table_index = 0;
1358            let mut small_state_indices = Vec::new();
1359            let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
1360            for state in self.parse_table.states.iter().skip(self.large_state_count) {
1361                small_state_indices.push(next_table_index);
1362                symbols_by_value.clear();
1364                terminal_entries.clear();
1365                terminal_entries.extend(state.terminal_entries.iter());
1366                terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1368                // In a given parse state, many lookahead symbols have the same actions.
1369                // So in the "small state" representation, group symbols by their action
1370                // in order to avoid repeating the action.
1371                for (symbol, entry) in &terminal_entries {
1372                    let entry_id = self.get_parse_action_list_id(
1373                        entry,
1374                        &mut parse_table_entries,
1375                        &mut next_parse_action_list_index,
1376                    );
1377                    symbols_by_value
1378                        .entry((entry_id, SymbolType::Terminal))
1379                        .or_default()
1380                        .push(**symbol);
1381                }
1382                for (symbol, action) in &state.nonterminal_entries {
1383                    let state_id = match action {
1384                        GotoAction::Goto(i) => *i,
1385                        GotoAction::ShiftExtra => {
1386                            self.large_state_count + small_state_indices.len() - 1
1387                        }
1388                    };
1389                    symbols_by_value
1390                        .entry((state_id, SymbolType::NonTerminal))
1391                        .or_default()
1392                        .push(*symbol);
1393                }
1395                let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
1396                values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
1397                    (symbols.len(), *kind, *value, symbols[0])
1398                });
1400                add_line!(
1401                    self,
1402                    "[{next_table_index}] = {},",
1403                    values_with_symbols.len()
1404                );
1405                indent!(self);
1406                next_table_index += 1;
1408                for ((value, kind), symbols) in &mut values_with_symbols {
1409                    next_table_index += 2 + symbols.len();
1410                    if *kind == SymbolType::NonTerminal {
1411                        add_line!(self, "STATE({value}), {},", symbols.len());
1412                    } else {
1413                        add_line!(self, "ACTIONS({value}), {},", symbols.len());
1414                    }
1416                    symbols.sort_unstable();
1417                    indent!(self);
1418                    for symbol in symbols {
1419                        add_line!(self, "{},", self.symbol_ids[symbol]);
1420                    }
1421                    dedent!(self);
1422                }
1424                dedent!(self);
1425            }
1427            dedent!(self);
1428            add_line!(self, "}};");
1429            add_line!(self, "");
1431            add_line!(
1432                self,
1433                "static const uint32_t ts_small_parse_table_map[] = {{"
1434            );
1435            indent!(self);
1436            for i in self.large_state_count..self.parse_table.states.len() {
1437                add_line!(
1438                    self,
1439                    "[SMALL_STATE({i})] = {},",
1440                    small_state_indices[i - self.large_state_count]
1441                );
1442            }
1443            dedent!(self);
1444            add_line!(self, "}};");
1445            add_line!(self, "");
1446        }
1448        let mut parse_table_entries = parse_table_entries
1449            .into_iter()
1450            .map(|(entry, i)| (i, entry))
1451            .collect::<Vec<_>>();
1452        parse_table_entries.sort_by_key(|(index, _)| *index);
1453        self.add_parse_action_list(parse_table_entries);
1454    }
1456    fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
1457        add_line!(
1458            self,
1459            "static const TSParseActionEntry ts_parse_actions[] = {{"
1460        );
1461        indent!(self);
1462        for (i, entry) in parse_table_entries {
1463            add!(
1464                self,
1465                "  [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
1466                entry.actions.len(),
1467                entry.reusable
1468            );
1469            for action in entry.actions {
1470                add!(self, " ");
1471                match action {
1472                    ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
1473                    ParseAction::Recover => add!(self, "RECOVER()"),
1474                    ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
1475                    ParseAction::Shift {
1476                        state,
1477                        is_repetition,
1478                    } => {
1479                        if is_repetition {
1480                            add!(self, "SHIFT_REPEAT({state})");
1481                        } else {
1482                            add!(self, "SHIFT({state})");
1483                        }
1484                    }
1485                    ParseAction::Reduce {
1486                        symbol,
1487                        child_count,
1488                        dynamic_precedence,
1489                        production_id,
1490                        ..
1491                    } => {
1492                        add!(
1493                            self,
1494                            "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
1495                            self.symbol_ids[&symbol]
1496                        );
1497                    }
1498                }
1499                add!(self, ",");
1500            }
1501            add!(self, "\n");
1502        }
1503        dedent!(self);
1504        add_line!(self, "}};");
1505        add_line!(self, "");
1506    }
1508    fn add_parser_export(&mut self) {
1509        let language_function_name = format!("tree_sitter_{}", self.language_name);
1510        let external_scanner_name = format!("{language_function_name}_external_scanner");
1512        add_line!(self, "#ifdef __cplusplus");
1513        add_line!(self, r#"extern "C" {{"#);
1514        add_line!(self, "#endif");
1516        if !self.syntax_grammar.external_tokens.is_empty() {
1517            add_line!(self, "void *{external_scanner_name}_create(void);");
1518            add_line!(self, "void {external_scanner_name}_destroy(void *);");
1519            add_line!(
1520                self,
1521                "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);",
1522            );
1523            add_line!(
1524                self,
1525                "unsigned {external_scanner_name}_serialize(void *, char *);",
1526            );
1527            add_line!(
1528                self,
1529                "void {external_scanner_name}_deserialize(void *, const char *, unsigned);",
1530            );
1531            add_line!(self, "");
1532        }
1534        add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS");
1535        add_line!(self, "#define TS_PUBLIC");
1536        add_line!(self, "#elif defined(_WIN32)");
1537        add_line!(self, "#define TS_PUBLIC __declspec(dllexport)");
1538        add_line!(self, "#else");
1539        add_line!(
1540            self,
1541            "#define TS_PUBLIC __attribute__((visibility(\"default\")))"
1542        );
1543        add_line!(self, "#endif");
1544        add_line!(self, "");
1546        add_line!(
1547            self,
1548            "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
1549        );
1550        indent!(self);
1551        add_line!(self, "static const TSLanguage language = {{");
1552        indent!(self);
1553        add_line!(self, ".abi_version = LANGUAGE_VERSION,");
1555        // Quantities
1556        add_line!(self, ".symbol_count = SYMBOL_COUNT,");
1557        add_line!(self, ".alias_count = ALIAS_COUNT,");
1558        add_line!(self, ".token_count = TOKEN_COUNT,");
1559        add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
1560        add_line!(self, ".state_count = STATE_COUNT,");
1561        add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
1562        add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
1563        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1564            add_line!(self, ".supertype_count = SUPERTYPE_COUNT,");
1565        }
1566        add_line!(self, ".field_count = FIELD_COUNT,");
1567        add_line!(
1568            self,
1569            ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
1570        );
1572        // Parse table
1573        add_line!(self, ".parse_table = &ts_parse_table[0][0],");
1574        if self.large_state_count < self.parse_table.states.len() {
1575            add_line!(self, ".small_parse_table = ts_small_parse_table,");
1576            add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,");
1577        }
1578        add_line!(self, ".parse_actions = ts_parse_actions,");
1580        // Metadata
1581        add_line!(self, ".symbol_names = ts_symbol_names,");
1582        if !self.field_names.is_empty() {
1583            add_line!(self, ".field_names = ts_field_names,");
1584            add_line!(self, ".field_map_slices = ts_field_map_slices,");
1585            add_line!(self, ".field_map_entries = ts_field_map_entries,");
1586        }
1587        if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1588            add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,");
1589            add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,");
1590            add_line!(self, ".supertype_symbols = ts_supertype_symbols,");
1591        }
1592        add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
1593        add_line!(self, ".public_symbol_map = ts_symbol_map,");
1594        add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
1595        if !self.parse_table.production_infos.is_empty() {
1596            add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],");
1597        }
1599        // Lexing
1600        add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
1601        add_line!(self, ".lex_fn = ts_lex,");
1602        if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
1603            add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
1604            add_line!(
1605                self,
1606                ".keyword_capture_token = {},",
1607                self.symbol_ids[&keyword_capture_token]
1608            );
1609        }
1611        if !self.syntax_grammar.external_tokens.is_empty() {
1612            add_line!(self, ".external_scanner = {{");
1613            indent!(self);
1614            add_line!(self, "&ts_external_scanner_states[0][0],");
1615            add_line!(self, "ts_external_scanner_symbol_map,");
1616            add_line!(self, "{external_scanner_name}_create,");
1617            add_line!(self, "{external_scanner_name}_destroy,");
1618            add_line!(self, "{external_scanner_name}_scan,");
1619            add_line!(self, "{external_scanner_name}_serialize,");
1620            add_line!(self, "{external_scanner_name}_deserialize,");
1621            dedent!(self);
1622            add_line!(self, "}},");
1623        }
1625        add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
1627        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1628            add_line!(self, ".name = \"{}\",", self.language_name);
1630            if self.reserved_word_sets.len() > 1 {
1631                add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
1632            }
1634            add_line!(
1635                self,
1636                ".max_reserved_word_set_size = {},",
1637                self.reserved_word_sets
1638                    .iter()
1639                    .map(TokenSet::len)
1640                    .max()
1641                    .unwrap()
1642            );
1644            let Some(metadata) = &self.metadata else {
1645                panic!(
1646                    indoc! {"
1647                        Metadata is required to generate ABI version {}.
1648                        This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
1649                    "},
1650                    self.abi_version
1651                );
1652            };
1654            add_line!(self, ".metadata = {{");
1655            indent!(self);
1656            add_line!(self, ".major_version = {},", metadata.major_version);
1657            add_line!(self, ".minor_version = {},", metadata.minor_version);
1658            add_line!(self, ".patch_version = {},", metadata.patch_version);
1659            dedent!(self);
1660            add_line!(self, "}},");
1661        }
1663        dedent!(self);
1664        add_line!(self, "}};");
1665        add_line!(self, "return &language;");
1666        dedent!(self);
1667        add_line!(self, "}}");
1668        add_line!(self, "#ifdef __cplusplus");
1669        add_line!(self, "}}");
1670        add_line!(self, "#endif");
1671    }
1673    fn get_parse_action_list_id(
1674        &self,
1675        entry: &ParseTableEntry,
1676        parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
1677        next_parse_action_list_index: &mut usize,
1678    ) -> usize {
1679        if let Some(&index) = parse_table_entries.get(entry) {
1680            index
1681        } else {
1682            let result = *next_parse_action_list_index;
1683            parse_table_entries.insert(entry.clone(), result);
1684            *next_parse_action_list_index += 1 + entry.actions.len();
1685            result
1686        }
1687    }
1689    fn get_field_map_id(
1690        &self,
1691        flat_field_map: Vec<(String, FieldLocation)>,
1692        flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
1693        next_flat_field_map_index: &mut usize,
1694    ) -> usize {
1695        if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
1696            return *index;
1697        }
1699        let result = *next_flat_field_map_index;
1700        *next_flat_field_map_index += flat_field_map.len();
1701        flat_field_maps.push((result, flat_field_map));
1702        result
1703    }
1705    fn external_token_id(&self, token: &ExternalToken) -> String {
1706        format!(
1707            "ts_external_token_{}",
1708            self.sanitize_identifier(&
1709        )
1710    }
1712    fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
1713        let mut id;
1714        if symbol == Symbol::end() {
1715            id = "ts_builtin_sym_end".to_string();
1716        } else {
1717            let (name, kind) = self.metadata_for_symbol(symbol);
1718            id = match kind {
1719                VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
1720                VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
1721                VariableType::Hidden | VariableType::Named => {
1722                    format!("sym_{}", self.sanitize_identifier(name))
1723                }
1724            };
1726            let mut suffix_number = 1;
1727            let mut suffix = String::new();
1728            while used_identifiers.contains(&id) {
1729                id.drain(id.len() - suffix.len()..);
1730                suffix_number += 1;
1731                suffix = suffix_number.to_string();
1732                id += &suffix;
1733            }
1734        }
1736        used_identifiers.insert(id.clone());
1737        self.symbol_ids.insert(symbol, id);
1738    }
1740    fn field_id(&self, field_name: &str) -> String {
1741        format!("field_{field_name}")
1742    }
1744    fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
1745        match symbol.kind {
1746            SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
1747            SymbolType::NonTerminal => {
1748                let variable = &self.syntax_grammar.variables[symbol.index];
1749                (&, variable.kind)
1750            }
1751            SymbolType::Terminal => {
1752                let variable = &self.lexical_grammar.variables[symbol.index];
1753                (&, variable.kind)
1754            }
1755            SymbolType::External => {
1756                let token = &self.syntax_grammar.external_tokens[symbol.index];
1757                (&, token.kind)
1758            }
1759        }
1760    }
1762    fn symbols_for_alias(&self, alias: &Alias) -> Vec<Symbol> {
1763        self.parse_table
1764            .symbols
1765            .iter()
1766            .copied()
1767            .filter(move |symbol| {
1768                self.default_aliases.get(symbol).map_or_else(
1769                    || {
1770                        let (name, kind) = self.metadata_for_symbol(*symbol);
1771                        name == alias.value && kind == alias.kind()
1772                    },
1773                    |default_alias| default_alias == alias,
1774                )
1775            })
1776            .collect()
1777    }
1779    fn sanitize_identifier(&self, name: &str) -> String {
1780        let mut result = String::with_capacity(name.len());
1781        for c in name.chars() {
1782            if c.is_ascii_alphanumeric() || c == '_' {
1783                result.push(c);
1784            } else {
1785                'special_chars: {
1786                    let replacement = match c {
1787                        ' ' if name.len() == 1 => "SPACE",
1788                        '~' => "TILDE",
1789                        '`' => "BQUOTE",
1790                        '!' => "BANG",
1791                        '@' => "AT",
1792                        '#' => "POUND",
1793                        '$' => "DOLLAR",
1794                        '%' => "PERCENT",
1795                        '^' => "CARET",
1796                        '&' => "AMP",
1797                        '*' => "STAR",
1798                        '(' => "LPAREN",
1799                        ')' => "RPAREN",
1800                        '-' => "DASH",
1801                        '+' => "PLUS",
1802                        '=' => "EQ",
1803                        '{' => "LBRACE",
1804                        '}' => "RBRACE",
1805                        '[' => "LBRACK",
1806                        ']' => "RBRACK",
1807                        '\\' => "BSLASH",
1808                        '|' => "PIPE",
1809                        ':' => "COLON",
1810                        ';' => "SEMI",
1811                        '"' => "DQUOTE",
1812                        '\'' => "SQUOTE",
1813                        '<' => "LT",
1814                        '>' => "GT",
1815                        ',' => "COMMA",
1816                        '.' => "DOT",
1817                        '?' => "QMARK",
1818                        '/' => "SLASH",
1819                        '\n' => "LF",
1820                        '\r' => "CR",
1821                        '\t' => "TAB",
1822                        '\0' => "NULL",
1823                        '\u{0001}' => "SOH",
1824                        '\u{0002}' => "STX",
1825                        '\u{0003}' => "ETX",
1826                        '\u{0004}' => "EOT",
1827                        '\u{0005}' => "ENQ",
1828                        '\u{0006}' => "ACK",
1829                        '\u{0007}' => "BEL",
1830                        '\u{0008}' => "BS",
1831                        '\u{000b}' => "VTAB",
1832                        '\u{000c}' => "FF",
1833                        '\u{000e}' => "SO",
1834                        '\u{000f}' => "SI",
1835                        '\u{0010}' => "DLE",
1836                        '\u{0011}' => "DC1",
1837                        '\u{0012}' => "DC2",
1838                        '\u{0013}' => "DC3",
1839                        '\u{0014}' => "DC4",
1840                        '\u{0015}' => "NAK",
1841                        '\u{0016}' => "SYN",
1842                        '\u{0017}' => "ETB",
1843                        '\u{0018}' => "CAN",
1844                        '\u{0019}' => "EM",
1845                        '\u{001a}' => "SUB",
1846                        '\u{001b}' => "ESC",
1847                        '\u{001c}' => "FS",
1848                        '\u{001d}' => "GS",
1849                        '\u{001e}' => "RS",
1850                        '\u{001f}' => "US",
1851                        '\u{007F}' => "DEL",
1852                        '\u{FEFF}' => "BOM",
1853                        '\u{0080}'..='\u{FFFF}' => {
1854                            result.push_str(&format!("u{:04x}", c as u32));
1855                            break 'special_chars;
1856                        }
1857                        '\u{10000}'..='\u{10FFFF}' => {
1858                            result.push_str(&format!("U{:08x}", c as u32));
1859                            break 'special_chars;
1860                        }
1861                        '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
1862                        ' ' => break 'special_chars,
1863                    };
1864                    if !result.is_empty() && !result.ends_with('_') {
1865                        result.push('_');
1866                    }
1867                    result += replacement;
1868                }
1869            }
1870        }
1871        result
1872    }
1874    fn sanitize_string(&self, name: &str) -> String {
1875        let mut result = String::with_capacity(name.len());
1876        for c in name.chars() {
1877            match c {
1878                '\"' => result += "\\\"",
1879                '?' => result += "\\?",
1880                '\\' => result += "\\\\",
1881                '\u{0007}' => result += "\\a",
1882                '\u{0008}' => result += "\\b",
1883                '\u{000b}' => result += "\\v",
1884                '\u{000c}' => result += "\\f",
1885                '\n' => result += "\\n",
1886                '\r' => result += "\\r",
1887                '\t' => result += "\\t",
1888                '\0' => result += "\\0",
1889                '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
1890                '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
1891                '\u{10000}'..='\u{10FFFF}' => {
1892                    result.push_str(&format!("\\U{:08x}", c as u32));
1893                }
1894                _ => result.push(c),
1895            }
1896        }
1897        result
1898    }
1900    fn add_character(&mut self, c: char) {
1901        match c {
1902            '\'' => add!(self, "'\\''"),
1903            '\\' => add!(self, "'\\\\'"),
1904            '\u{000c}' => add!(self, "'\\f'"),
1905            '\n' => add!(self, "'\\n'"),
1906            '\t' => add!(self, "'\\t'"),
1907            '\r' => add!(self, "'\\r'"),
1908            _ => {
1909                if c == '\0' {
1910                    add!(self, "0");
1911                } else if c == ' ' || c.is_ascii_graphic() {
1912                    add!(self, "'{c}'");
1913                } else {
1914                    add!(self, "0x{:02x}", c as u32);
1915                }
1916            }
1917        }
1918    }
1921/// Returns a String of C code for the given components of a parser.
1923/// # Arguments
1925/// * `name` - A string slice containing the name of the language
1926/// * `parse_table` - The generated parse table for the language
1927/// * `main_lex_table` - The generated lexing table for the language
1928/// * `keyword_lex_table` - The generated keyword lexing table for the language
1929/// * `keyword_capture_token` - A symbol indicating which token is used for keyword capture, if any.
1930/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
1931/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
1932/// * `default_aliases` - A map describing the global rename rules that should apply. the keys are
1933///   symbols that are *always* aliased in the same way, and the values are the aliases that are
1934///   applied to those symbols.
1935/// * `abi_version` - The language ABI version that should be generated. Usually you want
1936///   Tree-sitter's current version, but right after making an ABI change, it may be useful to
1937///   generate code with the previous ABI.
1939pub fn render_c_code(
1940    name: &str,
1941    tables: Tables,
1942    syntax_grammar: SyntaxGrammar,
1943    lexical_grammar: LexicalGrammar,
1944    default_aliases: AliasMap,
1945    abi_version: usize,
1946    semantic_version: Option<(u8, u8, u8)>,
1947    supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
1948) -> String {
1949    assert!(
1950        (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
1951        "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
1952    );
1954    Generator {
1955        language_name: name.to_string(),
1956        parse_table: tables.parse_table,
1957        main_lex_table: tables.main_lex_table,
1958        keyword_lex_table: tables.keyword_lex_table,
1959        large_character_sets: tables.large_character_sets,
1960        large_character_set_info: Vec::new(),
1961        syntax_grammar,
1962        lexical_grammar,
1963        default_aliases,
1964        abi_version,
1965        metadata:|(major_version, minor_version, patch_version)| Metadata {
1966            major_version,
1967            minor_version,
1968            patch_version,
1969        }),
1970        supertype_symbol_map,
1971        ..Default::default()
1972    }
1973    .generate()