tree_sitter_loader/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
5use std::ops::Range;
6#[cfg(feature = "tree-sitter-highlight")]
7use std::sync::Mutex;
8use std::{
9    collections::HashMap,
10    env,
11    ffi::{OsStr, OsString},
12    fs,
13    io::{BufRead, BufReader},
14    mem,
15    path::{Path, PathBuf},
16    process::Command,
17    sync::LazyLock,
18    time::SystemTime,
19};
20
21#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
22use anyhow::Error;
23use anyhow::{anyhow, Context, Result};
24use etcetera::BaseStrategy as _;
25use fs4::fs_std::FileExt;
26use indoc::indoc;
27use libloading::{Library, Symbol};
28use once_cell::unsync::OnceCell;
29use path_slash::PathBufExt as _;
30use regex::{Regex, RegexBuilder};
31use semver::Version;
32use serde::{Deserialize, Deserializer, Serialize};
33use tree_sitter::Language;
34#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
35use tree_sitter::QueryError;
36#[cfg(feature = "tree-sitter-highlight")]
37use tree_sitter::QueryErrorKind;
38#[cfg(feature = "tree-sitter-highlight")]
39use tree_sitter_highlight::HighlightConfiguration;
40#[cfg(feature = "tree-sitter-tags")]
41use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
42use url::Url;
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
48
49#[derive(Default, Deserialize, Serialize)]
50pub struct Config {
51    #[serde(default)]
52    #[serde(
53        rename = "parser-directories",
54        deserialize_with = "deserialize_parser_directories"
55    )]
56    pub parser_directories: Vec<PathBuf>,
57}
58
59#[derive(Serialize, Deserialize, Clone, Default)]
60#[serde(untagged)]
61pub enum PathsJSON {
62    #[default]
63    Empty,
64    Single(PathBuf),
65    Multiple(Vec<PathBuf>),
66}
67
68impl PathsJSON {
69    fn into_vec(self) -> Option<Vec<PathBuf>> {
70        match self {
71            Self::Empty => None,
72            Self::Single(s) => Some(vec![s]),
73            Self::Multiple(s) => Some(s),
74        }
75    }
76
77    const fn is_empty(&self) -> bool {
78        matches!(self, Self::Empty)
79    }
80}
81
82#[derive(Serialize, Deserialize, Clone)]
83#[serde(untagged)]
84pub enum PackageJSONAuthor {
85    String(String),
86    Object {
87        name: String,
88        email: Option<String>,
89        url: Option<String>,
90    },
91}
92
93#[derive(Serialize, Deserialize, Clone)]
94#[serde(untagged)]
95pub enum PackageJSONRepository {
96    String(String),
97    Object { url: String },
98}
99
100#[derive(Serialize, Deserialize)]
101pub struct PackageJSON {
102    pub name: String,
103    pub version: Version,
104    pub description: Option<String>,
105    pub author: Option<PackageJSONAuthor>,
106    pub maintainers: Option<Vec<PackageJSONAuthor>>,
107    pub license: Option<String>,
108    pub repository: Option<PackageJSONRepository>,
109    #[serde(default)]
110    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
111    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
112}
113
114fn default_path() -> PathBuf {
115    PathBuf::from(".")
116}
117
118#[derive(Serialize, Deserialize, Clone)]
119#[serde(rename_all = "kebab-case")]
120pub struct LanguageConfigurationJSON {
121    #[serde(default = "default_path")]
122    pub path: PathBuf,
123    pub scope: Option<String>,
124    pub file_types: Option<Vec<String>>,
125    pub content_regex: Option<String>,
126    pub first_line_regex: Option<String>,
127    pub injection_regex: Option<String>,
128    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
129    pub highlights: PathsJSON,
130    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
131    pub injections: PathsJSON,
132    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
133    pub locals: PathsJSON,
134    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
135    pub tags: PathsJSON,
136    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
137    pub external_files: PathsJSON,
138}
139
140#[derive(Serialize, Deserialize)]
141#[serde(rename_all = "kebab-case")]
142pub struct TreeSitterJSON {
143    #[serde(rename = "$schema")]
144    pub schema: Option<String>,
145    pub grammars: Vec<Grammar>,
146    pub metadata: Metadata,
147    #[serde(default)]
148    pub bindings: Bindings,
149}
150
151impl TreeSitterJSON {
152    pub fn from_file(path: &Path) -> Result<Self> {
153        Ok(serde_json::from_str(&fs::read_to_string(
154            path.join("tree-sitter.json"),
155        )?)?)
156    }
157
158    #[must_use]
159    pub fn has_multiple_language_configs(&self) -> bool {
160        self.grammars.len() > 1
161    }
162}
163
164#[derive(Serialize, Deserialize)]
165#[serde(rename_all = "kebab-case")]
166pub struct Grammar {
167    pub name: String,
168    #[serde(skip_serializing_if = "Option::is_none")]
169    pub camelcase: Option<String>,
170    #[serde(skip_serializing_if = "Option::is_none")]
171    pub title: Option<String>,
172    pub scope: String,
173    #[serde(skip_serializing_if = "Option::is_none")]
174    pub path: Option<PathBuf>,
175    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
176    pub external_files: PathsJSON,
177    pub file_types: Option<Vec<String>>,
178    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
179    pub highlights: PathsJSON,
180    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
181    pub injections: PathsJSON,
182    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
183    pub locals: PathsJSON,
184    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
185    pub tags: PathsJSON,
186    #[serde(skip_serializing_if = "Option::is_none")]
187    pub injection_regex: Option<String>,
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub first_line_regex: Option<String>,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub content_regex: Option<String>,
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub class_name: Option<String>,
194}
195
196#[derive(Serialize, Deserialize)]
197pub struct Metadata {
198    pub version: Version,
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub license: Option<String>,
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub description: Option<String>,
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub authors: Option<Vec<Author>>,
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub links: Option<Links>,
207    #[serde(skip)]
208    pub namespace: Option<String>,
209}
210
211#[derive(Serialize, Deserialize)]
212pub struct Author {
213    pub name: String,
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub email: Option<String>,
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub url: Option<String>,
218}
219
220#[derive(Serialize, Deserialize)]
221pub struct Links {
222    pub repository: Url,
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub funding: Option<Url>,
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub homepage: Option<String>,
227}
228
229#[derive(Serialize, Deserialize)]
230#[serde(default)]
231pub struct Bindings {
232    pub c: bool,
233    pub go: bool,
234    #[serde(skip)]
235    pub java: bool,
236    #[serde(skip)]
237    pub kotlin: bool,
238    pub node: bool,
239    pub python: bool,
240    pub rust: bool,
241    pub swift: bool,
242    pub zig: bool,
243}
244
245impl Default for Bindings {
246    fn default() -> Self {
247        Self {
248            c: true,
249            go: true,
250            java: false,
251            kotlin: false,
252            node: true,
253            python: true,
254            rust: true,
255            swift: true,
256            zig: false,
257        }
258    }
259}
260
261// Replace `~` or `$HOME` with home path string.
262// (While paths like "~/.tree-sitter/config.json" can be deserialized,
263// they're not valid path for I/O modules.)
264fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
265where
266    D: Deserializer<'de>,
267{
268    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
269    let Ok(home) = etcetera::home_dir() else {
270        return Ok(paths);
271    };
272    let standardized = paths
273        .into_iter()
274        .map(|path| standardize_path(path, &home))
275        .collect();
276    Ok(standardized)
277}
278
279fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
280    if let Ok(p) = path.strip_prefix("~") {
281        return home.join(p);
282    }
283    if let Ok(p) = path.strip_prefix("$HOME") {
284        return home.join(p);
285    }
286    path
287}
288
289impl Config {
290    #[must_use]
291    pub fn initial() -> Self {
292        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
293        Self {
294            parser_directories: vec![
295                home_dir.join("github"),
296                home_dir.join("src"),
297                home_dir.join("source"),
298                home_dir.join("projects"),
299                home_dir.join("dev"),
300                home_dir.join("git"),
301            ],
302        }
303    }
304}
305
306const BUILD_TARGET: &str = env!("BUILD_TARGET");
307const BUILD_HOST: &str = env!("BUILD_HOST");
308
309pub struct LanguageConfiguration<'a> {
310    pub scope: Option<String>,
311    pub content_regex: Option<Regex>,
312    pub first_line_regex: Option<Regex>,
313    pub injection_regex: Option<Regex>,
314    pub file_types: Vec<String>,
315    pub root_path: PathBuf,
316    pub highlights_filenames: Option<Vec<PathBuf>>,
317    pub injections_filenames: Option<Vec<PathBuf>>,
318    pub locals_filenames: Option<Vec<PathBuf>>,
319    pub tags_filenames: Option<Vec<PathBuf>>,
320    pub language_name: String,
321    language_id: usize,
322    #[cfg(feature = "tree-sitter-highlight")]
323    highlight_config: OnceCell<Option<HighlightConfiguration>>,
324    #[cfg(feature = "tree-sitter-tags")]
325    tags_config: OnceCell<Option<TagsConfiguration>>,
326    #[cfg(feature = "tree-sitter-highlight")]
327    highlight_names: &'a Mutex<Vec<String>>,
328    #[cfg(feature = "tree-sitter-highlight")]
329    use_all_highlight_names: bool,
330}
331
332pub struct Loader {
333    pub parser_lib_path: PathBuf,
334    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
335    language_configurations: Vec<LanguageConfiguration<'static>>,
336    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
337    language_configuration_in_current_path: Option<usize>,
338    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
339    #[cfg(feature = "tree-sitter-highlight")]
340    highlight_names: Box<Mutex<Vec<String>>>,
341    #[cfg(feature = "tree-sitter-highlight")]
342    use_all_highlight_names: bool,
343    debug_build: bool,
344    sanitize_build: bool,
345    force_rebuild: bool,
346
347    #[cfg(feature = "wasm")]
348    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
349}
350
351pub struct CompileConfig<'a> {
352    pub src_path: &'a Path,
353    pub header_paths: Vec<&'a Path>,
354    pub parser_path: PathBuf,
355    pub scanner_path: Option<PathBuf>,
356    pub external_files: Option<&'a [PathBuf]>,
357    pub output_path: Option<PathBuf>,
358    pub flags: &'a [&'a str],
359    pub sanitize: bool,
360    pub name: String,
361}
362
363impl<'a> CompileConfig<'a> {
364    #[must_use]
365    pub fn new(
366        src_path: &'a Path,
367        externals: Option<&'a [PathBuf]>,
368        output_path: Option<PathBuf>,
369    ) -> Self {
370        Self {
371            src_path,
372            header_paths: vec![src_path],
373            parser_path: src_path.join("parser.c"),
374            scanner_path: None,
375            external_files: externals,
376            output_path,
377            flags: &[],
378            sanitize: false,
379            name: String::new(),
380        }
381    }
382}
383
384unsafe impl Sync for Loader {}
385
386impl Loader {
387    pub fn new() -> Result<Self> {
388        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
389            PathBuf::from(path)
390        } else {
391            if cfg!(target_os = "macos") {
392                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
393                    .cache_dir() // `$HOME/Library/Caches/`
394                    .join("tree-sitter");
395                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
396                    std::fs::remove_dir_all(legacy_apple_path)?;
397                }
398            }
399
400            etcetera::choose_base_strategy()?
401                .cache_dir()
402                .join("tree-sitter")
403                .join("lib")
404        };
405        Ok(Self::with_parser_lib_path(parser_lib_path))
406    }
407
408    #[must_use]
409    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
410        Self {
411            parser_lib_path,
412            languages_by_id: Vec::new(),
413            language_configurations: Vec::new(),
414            language_configuration_ids_by_file_type: HashMap::new(),
415            language_configuration_in_current_path: None,
416            language_configuration_ids_by_first_line_regex: HashMap::new(),
417            #[cfg(feature = "tree-sitter-highlight")]
418            highlight_names: Box::new(Mutex::new(Vec::new())),
419            #[cfg(feature = "tree-sitter-highlight")]
420            use_all_highlight_names: true,
421            debug_build: false,
422            sanitize_build: false,
423            force_rebuild: false,
424
425            #[cfg(feature = "wasm")]
426            wasm_store: Mutex::default(),
427        }
428    }
429
430    #[cfg(feature = "tree-sitter-highlight")]
431    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
432    pub fn configure_highlights(&mut self, names: &[String]) {
433        self.use_all_highlight_names = false;
434        let mut highlights = self.highlight_names.lock().unwrap();
435        highlights.clear();
436        highlights.extend(names.iter().cloned());
437    }
438
439    #[must_use]
440    #[cfg(feature = "tree-sitter-highlight")]
441    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
442    pub fn highlight_names(&self) -> Vec<String> {
443        self.highlight_names.lock().unwrap().clone()
444    }
445
446    pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
447        if config.parser_directories.is_empty() {
448            eprintln!("Warning: You have not configured any parser directories!");
449            eprintln!("Please run `tree-sitter init-config` and edit the resulting");
450            eprintln!("configuration file to indicate where we should look for");
451            eprintln!("language grammars.\n");
452        }
453        for parser_container_dir in &config.parser_directories {
454            if let Ok(entries) = fs::read_dir(parser_container_dir) {
455                for entry in entries {
456                    let entry = entry?;
457                    if let Some(parser_dir_name) = entry.file_name().to_str() {
458                        if parser_dir_name.starts_with("tree-sitter-") {
459                            self.find_language_configurations_at_path(
460                                &parser_container_dir.join(parser_dir_name),
461                                false,
462                            )
463                            .ok();
464                        }
465                    }
466                }
467            }
468        }
469        Ok(())
470    }
471
472    pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<(Language, String)>> {
473        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
474            let mut language_ids = configurations
475                .iter()
476                .map(|c| (c.language_id, c.language_name.clone()))
477                .collect::<Vec<_>>();
478            language_ids.sort_unstable();
479            language_ids.dedup();
480            language_ids
481                .into_iter()
482                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
483                .collect::<Result<Vec<_>>>()
484        } else {
485            Ok(Vec::new())
486        }
487    }
488
489    #[must_use]
490    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
491        self.language_configurations
492            .iter()
493            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
494            .collect()
495    }
496
497    pub fn language_configuration_for_scope(
498        &self,
499        scope: &str,
500    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
501        for configuration in &self.language_configurations {
502            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
503                let language = self.language_for_id(configuration.language_id)?;
504                return Ok(Some((language, configuration)));
505            }
506        }
507        Ok(None)
508    }
509
510    pub fn language_configuration_for_first_line_regex(
511        &self,
512        path: &Path,
513    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
514        self.language_configuration_ids_by_first_line_regex
515            .iter()
516            .try_fold(None, |_, (regex, ids)| {
517                if let Some(regex) = Self::regex(Some(regex)) {
518                    let file = fs::File::open(path)?;
519                    let reader = BufReader::new(file);
520                    let first_line = reader.lines().next().transpose()?;
521                    if let Some(first_line) = first_line {
522                        if regex.is_match(&first_line) && !ids.is_empty() {
523                            let configuration = &self.language_configurations[ids[0]];
524                            let language = self.language_for_id(configuration.language_id)?;
525                            return Ok(Some((language, configuration)));
526                        }
527                    }
528                }
529
530                Ok(None)
531            })
532    }
533
534    pub fn language_configuration_for_file_name(
535        &self,
536        path: &Path,
537    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
538        // Find all the language configurations that match this file name
539        // or a suffix of the file name.
540        let configuration_ids = path
541            .file_name()
542            .and_then(|n| n.to_str())
543            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
544            .or_else(|| {
545                let mut path = path.to_owned();
546                let mut extensions = Vec::with_capacity(2);
547                while let Some(extension) = path.extension() {
548                    extensions.push(extension.to_str()?.to_string());
549                    path = PathBuf::from(path.file_stem()?.to_os_string());
550                }
551                extensions.reverse();
552                self.language_configuration_ids_by_file_type
553                    .get(&extensions.join("."))
554            });
555
556        if let Some(configuration_ids) = configuration_ids {
557            if !configuration_ids.is_empty() {
558                let configuration = if configuration_ids.len() == 1 {
559                    &self.language_configurations[configuration_ids[0]]
560                }
561                // If multiple language configurations match, then determine which
562                // one to use by applying the configurations' content regexes.
563                else {
564                    let file_contents =
565                        fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?;
566                    let file_contents = String::from_utf8_lossy(&file_contents);
567                    let mut best_score = -2isize;
568                    let mut best_configuration_id = None;
569                    for configuration_id in configuration_ids {
570                        let config = &self.language_configurations[*configuration_id];
571
572                        // If the language configuration has a content regex, assign
573                        // a score based on the length of the first match.
574                        let score;
575                        if let Some(content_regex) = &config.content_regex {
576                            if let Some(mat) = content_regex.find(&file_contents) {
577                                score = (mat.end() - mat.start()) as isize;
578                            }
579                            // If the content regex does not match, then *penalize* this
580                            // language configuration, so that language configurations
581                            // without content regexes are preferred over those with
582                            // non-matching content regexes.
583                            else {
584                                score = -1;
585                            }
586                        } else {
587                            score = 0;
588                        }
589                        if score > best_score {
590                            best_configuration_id = Some(*configuration_id);
591                            best_score = score;
592                        }
593                    }
594
595                    &self.language_configurations[best_configuration_id.unwrap()]
596                };
597
598                let language = self.language_for_id(configuration.language_id)?;
599                return Ok(Some((language, configuration)));
600            }
601        }
602
603        Ok(None)
604    }
605
606    pub fn language_configuration_for_injection_string(
607        &self,
608        string: &str,
609    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
610        let mut best_match_length = 0;
611        let mut best_match_position = None;
612        for (i, configuration) in self.language_configurations.iter().enumerate() {
613            if let Some(injection_regex) = &configuration.injection_regex {
614                if let Some(mat) = injection_regex.find(string) {
615                    let length = mat.end() - mat.start();
616                    if length > best_match_length {
617                        best_match_position = Some(i);
618                        best_match_length = length;
619                    }
620                }
621            }
622        }
623
624        if let Some(i) = best_match_position {
625            let configuration = &self.language_configurations[i];
626            let language = self.language_for_id(configuration.language_id)?;
627            Ok(Some((language, configuration)))
628        } else {
629            Ok(None)
630        }
631    }
632
633    pub fn language_for_configuration(
634        &self,
635        configuration: &LanguageConfiguration,
636    ) -> Result<Language> {
637        self.language_for_id(configuration.language_id)
638    }
639
640    fn language_for_id(&self, id: usize) -> Result<Language> {
641        let (path, language, externals) = &self.languages_by_id[id];
642        language
643            .get_or_try_init(|| {
644                let src_path = path.join("src");
645                self.load_language_at_path(CompileConfig::new(
646                    &src_path,
647                    externals.as_deref(),
648                    None,
649                ))
650            })
651            .cloned()
652    }
653
654    pub fn compile_parser_at_path(
655        &self,
656        grammar_path: &Path,
657        output_path: PathBuf,
658        flags: &[&str],
659    ) -> Result<()> {
660        let src_path = grammar_path.join("src");
661        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
662        config.flags = flags;
663        self.load_language_at_path(config).map(|_| ())
664    }
665
666    pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result<Language> {
667        let grammar_path = config.src_path.join("grammar.json");
668        config.name = Self::grammar_json_name(&grammar_path)?;
669        self.load_language_at_path_with_name(config)
670    }
671
672    pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result<Language> {
673        let mut lib_name = config.name.to_string();
674        let language_fn_name = format!(
675            "tree_sitter_{}",
676            replace_dashes_with_underscores(&config.name)
677        );
678        if self.debug_build {
679            lib_name.push_str(".debug._");
680        }
681
682        if self.sanitize_build {
683            lib_name.push_str(".sanitize._");
684            config.sanitize = true;
685        }
686
687        if config.output_path.is_none() {
688            fs::create_dir_all(&self.parser_lib_path)?;
689        }
690
691        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
692
693        let output_path = config.output_path.unwrap_or_else(|| {
694            let mut path = self.parser_lib_path.join(lib_name);
695            path.set_extension(env::consts::DLL_EXTENSION);
696            #[cfg(feature = "wasm")]
697            if self.wasm_store.lock().unwrap().is_some() {
698                path.set_extension("wasm");
699            }
700            path
701        });
702        config.output_path = Some(output_path.clone());
703
704        let parser_path = config.src_path.join("parser.c");
705        config.scanner_path = self.get_scanner_path(config.src_path);
706
707        let mut paths_to_check = vec![parser_path];
708
709        if let Some(scanner_path) = config.scanner_path.as_ref() {
710            paths_to_check.push(scanner_path.clone());
711        }
712
713        paths_to_check.extend(
714            config
715                .external_files
716                .unwrap_or_default()
717                .iter()
718                .map(|p| config.src_path.join(p)),
719        );
720
721        if !recompile {
722            recompile = needs_recompile(&output_path, &paths_to_check)
723                .with_context(|| "Failed to compare source and binary timestamps")?;
724        }
725
726        #[cfg(feature = "wasm")]
727        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
728            if recompile {
729                self.compile_parser_to_wasm(
730                    &config.name,
731                    None,
732                    config.src_path,
733                    config
734                        .scanner_path
735                        .as_ref()
736                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
737                    &output_path,
738                    false,
739                )?;
740            }
741
742            let wasm_bytes = fs::read(&output_path)?;
743            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
744        }
745
746        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
747            tempfile::tempdir()
748                .unwrap()
749                .path()
750                .join("tree-sitter")
751                .join("lock")
752                .join(format!("{}.lock", config.name))
753        } else {
754            etcetera::choose_base_strategy()?
755                .cache_dir()
756                .join("tree-sitter")
757                .join("lock")
758                .join(format!("{}.lock", config.name))
759        };
760
761        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
762            recompile = false;
763            if lock_file.try_lock_exclusive().is_err() {
764                // if we can't acquire the lock, another process is compiling the parser, wait for
765                // it and don't recompile
766                lock_file.lock_exclusive()?;
767                recompile = false;
768            } else {
769                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
770                // run that was interrupted and left the lock file behind should not block
771                // subsequent runs
772                let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs();
773                if time > 30 {
774                    fs::remove_file(&lock_path)?;
775                    recompile = true;
776                }
777            }
778        }
779
780        if recompile {
781            fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| {
782                format!(
783                    "Failed to create directory {:?}",
784                    lock_path.parent().unwrap()
785                )
786            })?;
787            let lock_file = fs::OpenOptions::new()
788                .create(true)
789                .truncate(true)
790                .write(true)
791                .open(&lock_path)?;
792            lock_file.lock_exclusive()?;
793
794            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
795
796            if config.scanner_path.is_some() {
797                self.check_external_scanner(&config.name, &output_path)?;
798            }
799        }
800
801        let library = unsafe { Library::new(&output_path) }
802            .with_context(|| format!("Error opening dynamic library {output_path:?}"))?;
803        let language = unsafe {
804            let language_fn = library
805                .get::<Symbol<unsafe extern "C" fn() -> Language>>(language_fn_name.as_bytes())
806                .with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
807            language_fn()
808        };
809        mem::forget(library);
810        Ok(language)
811    }
812
813    fn compile_parser_to_dylib(
814        &self,
815        config: &CompileConfig,
816        lock_file: &fs::File,
817        lock_path: &Path,
818    ) -> Result<(), Error> {
819        let mut cc_config = cc::Build::new();
820        cc_config
821            .cargo_metadata(false)
822            .cargo_warnings(false)
823            .target(BUILD_TARGET)
824            .host(BUILD_HOST)
825            .debug(self.debug_build)
826            .file(&config.parser_path)
827            .includes(&config.header_paths)
828            .std("c11");
829
830        if let Some(scanner_path) = config.scanner_path.as_ref() {
831            cc_config.file(scanner_path);
832        }
833
834        if self.debug_build {
835            cc_config.opt_level(0).extra_warnings(true);
836        } else {
837            cc_config.opt_level(2).extra_warnings(false);
838        }
839
840        for flag in config.flags {
841            cc_config.define(flag, None);
842        }
843
844        let compiler = cc_config.get_compiler();
845        let mut command = Command::new(compiler.path());
846        command.args(compiler.args());
847        for (key, value) in compiler.env() {
848            command.env(key, value);
849        }
850
851        let output_path = config.output_path.as_ref().unwrap();
852
853        if compiler.is_like_msvc() {
854            let out = format!("-out:{}", output_path.to_str().unwrap());
855            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
856            command.arg("-utf-8");
857            command.args(cc_config.get_files());
858            command.arg("-link").arg(out);
859        } else {
860            command.arg("-Werror=implicit-function-declaration");
861            if cfg!(any(target_os = "macos", target_os = "ios")) {
862                command.arg("-dynamiclib");
863                // TODO: remove when supported
864                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
865            } else {
866                command.arg("-shared");
867            }
868            command.args(cc_config.get_files());
869            command.arg("-o").arg(output_path);
870        }
871
872        let output = command.output().with_context(|| {
873            format!("Failed to execute the C compiler with the following command:\n{command:?}")
874        })?;
875
876        FileExt::unlock(lock_file)?;
877        fs::remove_file(lock_path)?;
878
879        if output.status.success() {
880            Ok(())
881        } else {
882            Err(anyhow!(
883                "Parser compilation failed.\nStdout: {}\nStderr: {}",
884                String::from_utf8_lossy(&output.stdout),
885                String::from_utf8_lossy(&output.stderr)
886            ))
887        }
888    }
889
890    #[cfg(unix)]
891    fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> {
892        let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
893            "_"
894        } else {
895            ""
896        };
897        let mut must_have = vec![
898            format!("{prefix}tree_sitter_{name}_external_scanner_create"),
899            format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
900            format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
901            format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
902            format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
903        ];
904
905        let command = Command::new("nm")
906            .arg("-W")
907            .arg("-U")
908            .arg(library_path)
909            .output();
910        if let Ok(output) = command {
911            if output.status.success() {
912                let mut found_non_static = false;
913                for line in String::from_utf8_lossy(&output.stdout).lines() {
914                    if line.contains(" T ") {
915                        if let Some(function_name) =
916                            line.split_whitespace().collect::<Vec<_>>().get(2)
917                        {
918                            if !line.contains("tree_sitter_") {
919                                if !found_non_static {
920                                    found_non_static = true;
921                                    eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner");
922                                }
923                                eprintln!("  `{function_name}`");
924                            } else {
925                                must_have.retain(|f| f != function_name);
926                            }
927                        }
928                    }
929                }
930                if found_non_static {
931                    eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
932                }
933
934                if !must_have.is_empty() {
935                    let missing = must_have
936                        .iter()
937                        .map(|f| format!("  `{f}`"))
938                        .collect::<Vec<_>>()
939                        .join("\n");
940
941                    return Err(anyhow!(format!(
942                        indoc! {"
943                            Missing required functions in the external scanner, parsing won't work without these!
944
945                            {}
946
947                            You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
948                        "},
949                        missing,
950                    )));
951                }
952            }
953        }
954
955        Ok(())
956    }
957
958    #[cfg(windows)]
959    fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> {
960        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
961
962        // let mut must_have = vec![
963        //     format!("tree_sitter_{name}_external_scanner_create"),
964        //     format!("tree_sitter_{name}_external_scanner_destroy"),
965        //     format!("tree_sitter_{name}_external_scanner_serialize"),
966        //     format!("tree_sitter_{name}_external_scanner_deserialize"),
967        //     format!("tree_sitter_{name}_external_scanner_scan"),
968        // ];
969
970        Ok(())
971    }
972
973    pub fn compile_parser_to_wasm(
974        &self,
975        language_name: &str,
976        root_path: Option<&Path>,
977        src_path: &Path,
978        scanner_filename: Option<&Path>,
979        output_path: &Path,
980        force_docker: bool,
981    ) -> Result<(), Error> {
982        #[derive(PartialEq, Eq)]
983        enum EmccSource {
984            Native,
985            Docker,
986            Podman,
987        }
988
989        let root_path = root_path.unwrap_or(src_path);
990        let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" };
991
992        // Order of preference: emscripten > docker > podman > error
993        let source = if !force_docker && Command::new(emcc_name).output().is_ok() {
994            EmccSource::Native
995        } else if Command::new("docker")
996            .output()
997            .is_ok_and(|out| out.status.success())
998        {
999            EmccSource::Docker
1000        } else if Command::new("podman")
1001            .arg("--version")
1002            .output()
1003            .is_ok_and(|out| out.status.success())
1004        {
1005            EmccSource::Podman
1006        } else {
1007            return Err(anyhow!(
1008                "You must have either emcc, docker, or podman on your PATH to run this command"
1009            ));
1010        };
1011
1012        let mut command = match source {
1013            EmccSource::Native => {
1014                let mut command = Command::new(emcc_name);
1015                command.current_dir(src_path);
1016                command
1017            }
1018
1019            EmccSource::Docker | EmccSource::Podman => {
1020                let mut command = match source {
1021                    EmccSource::Docker => Command::new("docker"),
1022                    EmccSource::Podman => Command::new("podman"),
1023                    EmccSource::Native => unreachable!(),
1024                };
1025                command.args(["run", "--rm"]);
1026
1027                // The working directory is the directory containing the parser itself
1028                let workdir = if root_path == src_path {
1029                    PathBuf::from("/src")
1030                } else {
1031                    let mut path = PathBuf::from("/src");
1032                    path.push(src_path.strip_prefix(root_path).unwrap());
1033                    path
1034                };
1035                command.args(["--workdir", &workdir.to_slash_lossy()]);
1036
1037                // Mount the root directory as a volume, which is the repo root
1038                let mut volume_string = OsString::from(&root_path);
1039                volume_string.push(":/src:Z");
1040                command.args([OsStr::new("--volume"), &volume_string]);
1041
1042                // In case `docker` is an alias to `podman`, ensure that podman
1043                // mounts the current directory as writable by the container
1044                // user which has the same uid as the host user. Setting the
1045                // podman-specific variable is more reliable than attempting to
1046                // detect whether `docker` is an alias for `podman`.
1047                // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode
1048                command.env("PODMAN_USERNS", "keep-id");
1049
1050                // Get the current user id so that files created in the docker container will have
1051                // the same owner.
1052                #[cfg(unix)]
1053                {
1054                    #[link(name = "c")]
1055                    extern "C" {
1056                        fn getuid() -> u32;
1057                    }
1058                    // don't need to set user for podman since PODMAN_USERNS=keep-id is already set
1059                    if source == EmccSource::Docker {
1060                        let user_id = unsafe { getuid() };
1061                        command.args(["--user", &user_id.to_string()]);
1062                    }
1063                };
1064
1065                // Run `emcc` in a container using the `emscripten-slim` image
1066                command.args([EMSCRIPTEN_TAG, "emcc"]);
1067                command
1068            }
1069        };
1070
1071        let output_name = "output.wasm";
1072
1073        command.args([
1074            "-o",
1075            output_name,
1076            "-Os",
1077            "-s",
1078            "WASM=1",
1079            "-s",
1080            "SIDE_MODULE=2",
1081            "-s",
1082            "TOTAL_MEMORY=33554432",
1083            "-s",
1084            "NODEJS_CATCH_EXIT=0",
1085            "-s",
1086            &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"),
1087            "-fno-exceptions",
1088            "-fvisibility=hidden",
1089            "-I",
1090            ".",
1091        ]);
1092
1093        if let Some(scanner_filename) = scanner_filename {
1094            command.arg(scanner_filename);
1095        }
1096
1097        command.arg("parser.c");
1098        let status = command
1099            .spawn()
1100            .with_context(|| "Failed to run emcc command")?
1101            .wait()?;
1102        if !status.success() {
1103            return Err(anyhow!("emcc command failed"));
1104        }
1105
1106        fs::rename(src_path.join(output_name), output_path)
1107            .context("failed to rename wasm output file")?;
1108
1109        Ok(())
1110    }
1111
1112    #[must_use]
1113    #[cfg(feature = "tree-sitter-highlight")]
1114    pub fn highlight_config_for_injection_string<'a>(
1115        &'a self,
1116        string: &str,
1117    ) -> Option<&'a HighlightConfiguration> {
1118        match self.language_configuration_for_injection_string(string) {
1119            Err(e) => {
1120                eprintln!("Failed to load language for injection string '{string}': {e}",);
1121                None
1122            }
1123            Ok(None) => None,
1124            Ok(Some((language, configuration))) => {
1125                match configuration.highlight_config(language, None) {
1126                    Err(e) => {
1127                        eprintln!(
1128                            "Failed to load property sheet for injection string '{string}': {e}",
1129                        );
1130                        None
1131                    }
1132                    Ok(None) => None,
1133                    Ok(Some(config)) => Some(config),
1134                }
1135            }
1136        }
1137    }
1138
1139    #[must_use]
1140    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1141        self.language_configuration_in_current_path
1142            .map(|i| &self.language_configurations[i])
1143    }
1144
1145    pub fn find_language_configurations_at_path(
1146        &mut self,
1147        parser_path: &Path,
1148        set_current_path_config: bool,
1149    ) -> Result<&[LanguageConfiguration]> {
1150        let initial_language_configuration_count = self.language_configurations.len();
1151
1152        let ts_json = TreeSitterJSON::from_file(parser_path);
1153        if let Ok(config) = ts_json {
1154            let language_count = self.languages_by_id.len();
1155            for grammar in config.grammars {
1156                // Determine the path to the parser directory. This can be specified in
1157                // the tree-sitter.json, but defaults to the directory containing the
1158                // tree-sitter.json.
1159                let language_path = parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1160
1161                // Determine if a previous language configuration in this package.json file
1162                // already uses the same language.
1163                let mut language_id = None;
1164                for (id, (path, _, _)) in
1165                    self.languages_by_id.iter().enumerate().skip(language_count)
1166                {
1167                    if language_path == *path {
1168                        language_id = Some(id);
1169                    }
1170                }
1171
1172                // If not, add a new language path to the list.
1173                let language_id = if let Some(language_id) = language_id {
1174                    language_id
1175                } else {
1176                    self.languages_by_id.push((
1177                            language_path,
1178                            OnceCell::new(),
1179                            grammar.external_files.clone().into_vec().map(|files| {
1180                                files.into_iter()
1181                                    .map(|path| {
1182                                       let path = parser_path.join(path);
1183                                        // prevent p being above/outside of parser_path
1184                                        if path.starts_with(parser_path) {
1185                                            Ok(path)
1186                                        } else {
1187                                            Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
1188                                        }
1189                                    })
1190                                    .collect::<Result<Vec<_>>>()
1191                            }).transpose()?,
1192                        ));
1193                    self.languages_by_id.len() - 1
1194                };
1195
1196                let configuration = LanguageConfiguration {
1197                    root_path: parser_path.to_path_buf(),
1198                    language_name: grammar.name,
1199                    scope: Some(grammar.scope),
1200                    language_id,
1201                    file_types: grammar.file_types.unwrap_or_default(),
1202                    content_regex: Self::regex(grammar.content_regex.as_deref()),
1203                    first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1204                    injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1205                    injections_filenames: grammar.injections.into_vec(),
1206                    locals_filenames: grammar.locals.into_vec(),
1207                    tags_filenames: grammar.tags.into_vec(),
1208                    highlights_filenames: grammar.highlights.into_vec(),
1209                    #[cfg(feature = "tree-sitter-highlight")]
1210                    highlight_config: OnceCell::new(),
1211                    #[cfg(feature = "tree-sitter-tags")]
1212                    tags_config: OnceCell::new(),
1213                    #[cfg(feature = "tree-sitter-highlight")]
1214                    highlight_names: &self.highlight_names,
1215                    #[cfg(feature = "tree-sitter-highlight")]
1216                    use_all_highlight_names: self.use_all_highlight_names,
1217                };
1218
1219                for file_type in &configuration.file_types {
1220                    self.language_configuration_ids_by_file_type
1221                        .entry(file_type.to_string())
1222                        .or_default()
1223                        .push(self.language_configurations.len());
1224                }
1225                if let Some(first_line_regex) = &configuration.first_line_regex {
1226                    self.language_configuration_ids_by_first_line_regex
1227                        .entry(first_line_regex.to_string())
1228                        .or_default()
1229                        .push(self.language_configurations.len());
1230                }
1231
1232                self.language_configurations.push(unsafe {
1233                    mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1234                        configuration,
1235                    )
1236                });
1237
1238                if set_current_path_config && self.language_configuration_in_current_path.is_none()
1239                {
1240                    self.language_configuration_in_current_path =
1241                        Some(self.language_configurations.len() - 1);
1242                }
1243            }
1244        } else if let Err(e) = ts_json {
1245            match e.downcast_ref::<std::io::Error>() {
1246                // This is noisy, and not really an issue.
1247                Some(e) if e.kind() == std::io::ErrorKind::NotFound => {}
1248                _ => {
1249                    eprintln!(
1250                        "Warning: Failed to parse {} -- {e}",
1251                        parser_path.join("tree-sitter.json").display()
1252                    );
1253                }
1254            }
1255        }
1256
1257        // If we didn't find any language configurations in the tree-sitter.json file,
1258        // but there is a grammar.json file, then use the grammar file to form a simple
1259        // language configuration.
1260        if self.language_configurations.len() == initial_language_configuration_count
1261            && parser_path.join("src").join("grammar.json").exists()
1262        {
1263            let grammar_path = parser_path.join("src").join("grammar.json");
1264            let language_name = Self::grammar_json_name(&grammar_path)?;
1265            let configuration = LanguageConfiguration {
1266                root_path: parser_path.to_owned(),
1267                language_name,
1268                language_id: self.languages_by_id.len(),
1269                file_types: Vec::new(),
1270                scope: None,
1271                content_regex: None,
1272                first_line_regex: None,
1273                injection_regex: None,
1274                injections_filenames: None,
1275                locals_filenames: None,
1276                highlights_filenames: None,
1277                tags_filenames: None,
1278                #[cfg(feature = "tree-sitter-highlight")]
1279                highlight_config: OnceCell::new(),
1280                #[cfg(feature = "tree-sitter-tags")]
1281                tags_config: OnceCell::new(),
1282                #[cfg(feature = "tree-sitter-highlight")]
1283                highlight_names: &self.highlight_names,
1284                #[cfg(feature = "tree-sitter-highlight")]
1285                use_all_highlight_names: self.use_all_highlight_names,
1286            };
1287            self.language_configurations.push(unsafe {
1288                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1289                    configuration,
1290                )
1291            });
1292            self.languages_by_id
1293                .push((parser_path.to_owned(), OnceCell::new(), None));
1294        }
1295
1296        Ok(&self.language_configurations[initial_language_configuration_count..])
1297    }
1298
1299    fn regex(pattern: Option<&str>) -> Option<Regex> {
1300        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1301    }
1302
1303    fn grammar_json_name(grammar_path: &Path) -> Result<String> {
1304        let file = fs::File::open(grammar_path).with_context(|| {
1305            format!("Failed to open grammar.json at {}", grammar_path.display())
1306        })?;
1307
1308        let first_three_lines = BufReader::new(file)
1309            .lines()
1310            .take(3)
1311            .collect::<Result<Vec<_>, _>>()
1312            .with_context(|| {
1313                format!(
1314                    "Failed to read the first three lines of grammar.json at {}",
1315                    grammar_path.display()
1316                )
1317            })?
1318            .join("\n");
1319
1320        let name = GRAMMAR_NAME_REGEX
1321            .captures(&first_three_lines)
1322            .and_then(|c| c.get(1))
1323            .ok_or_else(|| {
1324                anyhow!(
1325                    "Failed to parse the language name from grammar.json at {}",
1326                    grammar_path.display()
1327                )
1328            })?;
1329
1330        Ok(name.as_str().to_string())
1331    }
1332
1333    pub fn select_language(
1334        &mut self,
1335        path: &Path,
1336        current_dir: &Path,
1337        scope: Option<&str>,
1338    ) -> Result<Language> {
1339        if let Some(scope) = scope {
1340            if let Some(config) = self
1341                .language_configuration_for_scope(scope)
1342                .with_context(|| format!("Failed to load language for scope '{scope}'"))?
1343            {
1344                Ok(config.0)
1345            } else {
1346                Err(anyhow!("Unknown scope '{scope}'"))
1347            }
1348        } else if let Some((lang, _)) = self
1349            .language_configuration_for_file_name(path)
1350            .with_context(|| {
1351                format!(
1352                    "Failed to load language for file name {}",
1353                    path.file_name().unwrap().to_string_lossy()
1354                )
1355            })?
1356        {
1357            Ok(lang)
1358        } else if let Some(id) = self.language_configuration_in_current_path {
1359            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1360        } else if let Some(lang) = self
1361            .languages_at_path(current_dir)
1362            .with_context(|| "Failed to load language in current directory")?
1363            .first()
1364            .cloned()
1365        {
1366            Ok(lang.0)
1367        } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? {
1368            Ok(lang.0)
1369        } else {
1370            Err(anyhow!("No language found"))
1371        }
1372    }
1373
1374    pub fn debug_build(&mut self, flag: bool) {
1375        self.debug_build = flag;
1376    }
1377
1378    pub fn sanitize_build(&mut self, flag: bool) {
1379        self.sanitize_build = flag;
1380    }
1381
1382    pub fn force_rebuild(&mut self, rebuild: bool) {
1383        self.force_rebuild = rebuild;
1384    }
1385
1386    #[cfg(feature = "wasm")]
1387    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1388    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1389        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1390    }
1391
1392    #[must_use]
1393    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1394        let path = src_path.join("scanner.c");
1395        path.exists().then_some(path)
1396    }
1397}
1398
1399impl LanguageConfiguration<'_> {
1400    #[cfg(feature = "tree-sitter-highlight")]
1401    pub fn highlight_config(
1402        &self,
1403        language: Language,
1404        paths: Option<&[PathBuf]>,
1405    ) -> Result<Option<&HighlightConfiguration>> {
1406        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1407            Some(paths) => (
1408                Some(
1409                    paths
1410                        .iter()
1411                        .filter(|p| p.ends_with("highlights.scm"))
1412                        .cloned()
1413                        .collect::<Vec<_>>(),
1414                ),
1415                Some(
1416                    paths
1417                        .iter()
1418                        .filter(|p| p.ends_with("tags.scm"))
1419                        .cloned()
1420                        .collect::<Vec<_>>(),
1421                ),
1422                Some(
1423                    paths
1424                        .iter()
1425                        .filter(|p| p.ends_with("locals.scm"))
1426                        .cloned()
1427                        .collect::<Vec<_>>(),
1428                ),
1429            ),
1430            None => (None, None, None),
1431        };
1432        self.highlight_config
1433            .get_or_try_init(|| {
1434                let (highlights_query, highlight_ranges) = self.read_queries(
1435                    if highlights_filenames.is_some() {
1436                        highlights_filenames.as_deref()
1437                    } else {
1438                        self.highlights_filenames.as_deref()
1439                    },
1440                    "highlights.scm",
1441                )?;
1442                let (injections_query, injection_ranges) = self.read_queries(
1443                    if injections_filenames.is_some() {
1444                        injections_filenames.as_deref()
1445                    } else {
1446                        self.injections_filenames.as_deref()
1447                    },
1448                    "injections.scm",
1449                )?;
1450                let (locals_query, locals_ranges) = self.read_queries(
1451                    if locals_filenames.is_some() {
1452                        locals_filenames.as_deref()
1453                    } else {
1454                        self.locals_filenames.as_deref()
1455                    },
1456                    "locals.scm",
1457                )?;
1458
1459                if highlights_query.is_empty() {
1460                    Ok(None)
1461                } else {
1462                    let mut result = HighlightConfiguration::new(
1463                        language,
1464                        &self.language_name,
1465                        &highlights_query,
1466                        &injections_query,
1467                        &locals_query,
1468                    )
1469                    .map_err(|error| match error.kind {
1470                        QueryErrorKind::Language => Error::from(error),
1471                        _ => {
1472                            if error.offset < injections_query.len() {
1473                                Self::include_path_in_query_error(
1474                                    error,
1475                                    &injection_ranges,
1476                                    &injections_query,
1477                                    0,
1478                                )
1479                            } else if error.offset < injections_query.len() + locals_query.len() {
1480                                Self::include_path_in_query_error(
1481                                    error,
1482                                    &locals_ranges,
1483                                    &locals_query,
1484                                    injections_query.len(),
1485                                )
1486                            } else {
1487                                Self::include_path_in_query_error(
1488                                    error,
1489                                    &highlight_ranges,
1490                                    &highlights_query,
1491                                    injections_query.len() + locals_query.len(),
1492                                )
1493                            }
1494                        }
1495                    })?;
1496                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1497                    if self.use_all_highlight_names {
1498                        for capture_name in result.query.capture_names() {
1499                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1500                                all_highlight_names.push((*capture_name).to_string());
1501                            }
1502                        }
1503                    }
1504                    result.configure(all_highlight_names.as_slice());
1505                    drop(all_highlight_names);
1506                    Ok(Some(result))
1507                }
1508            })
1509            .map(Option::as_ref)
1510    }
1511
1512    #[cfg(feature = "tree-sitter-tags")]
1513    pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
1514        self.tags_config
1515            .get_or_try_init(|| {
1516                let (tags_query, tags_ranges) =
1517                    self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
1518                let (locals_query, locals_ranges) =
1519                    self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
1520                if tags_query.is_empty() {
1521                    Ok(None)
1522                } else {
1523                    TagsConfiguration::new(language, &tags_query, &locals_query)
1524                        .map(Some)
1525                        .map_err(|error| {
1526                            if let TagsError::Query(error) = error {
1527                                if error.offset < locals_query.len() {
1528                                    Self::include_path_in_query_error(
1529                                        error,
1530                                        &locals_ranges,
1531                                        &locals_query,
1532                                        0,
1533                                    )
1534                                } else {
1535                                    Self::include_path_in_query_error(
1536                                        error,
1537                                        &tags_ranges,
1538                                        &tags_query,
1539                                        locals_query.len(),
1540                                    )
1541                                }
1542                            } else {
1543                                error.into()
1544                            }
1545                        })
1546                }
1547            })
1548            .map(Option::as_ref)
1549    }
1550
1551    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1552    fn include_path_in_query_error(
1553        mut error: QueryError,
1554        ranges: &[(PathBuf, Range<usize>)],
1555        source: &str,
1556        start_offset: usize,
1557    ) -> Error {
1558        let offset_within_section = error.offset - start_offset;
1559        let (path, range) = ranges
1560            .iter()
1561            .find(|(_, range)| range.contains(&offset_within_section))
1562            .unwrap_or_else(|| ranges.last().unwrap());
1563        error.offset = offset_within_section - range.start;
1564        error.row = source[range.start..offset_within_section]
1565            .matches('\n')
1566            .count();
1567        Error::from(error).context(format!("Error in query file {path:?}"))
1568    }
1569
1570    #[allow(clippy::type_complexity)]
1571    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1572    fn read_queries(
1573        &self,
1574        paths: Option<&[PathBuf]>,
1575        default_path: &str,
1576    ) -> Result<(String, Vec<(PathBuf, Range<usize>)>)> {
1577        let mut query = String::new();
1578        let mut path_ranges = Vec::new();
1579        if let Some(paths) = paths {
1580            for path in paths {
1581                let abs_path = self.root_path.join(path);
1582                let prev_query_len = query.len();
1583                query += &fs::read_to_string(&abs_path)
1584                    .with_context(|| format!("Failed to read query file {path:?}"))?;
1585                path_ranges.push((path.clone(), prev_query_len..query.len()));
1586            }
1587        } else {
1588            // highlights.scm is needed to test highlights, and tags.scm to test tags
1589            if default_path == "highlights.scm" || default_path == "tags.scm" {
1590                eprintln!(
1591                    indoc! {"
1592                        Warning: you should add a `{}` entry pointing to the highlights path in the `tree-sitter` object in the grammar's tree-sitter.json file.
1593                        See more here: https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths
1594                    "},
1595                    default_path.replace(".scm", "")
1596                );
1597            }
1598            let queries_path = self.root_path.join("queries");
1599            let path = queries_path.join(default_path);
1600            if path.exists() {
1601                query = fs::read_to_string(&path)
1602                    .with_context(|| format!("Failed to read query file {path:?}"))?;
1603                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1604            }
1605        }
1606
1607        Ok((query, path_ranges))
1608    }
1609}
1610
1611fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
1612    if !lib_path.exists() {
1613        return Ok(true);
1614    }
1615    let lib_mtime =
1616        mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?;
1617    for path in paths_to_check {
1618        if mtime(path)? > lib_mtime {
1619            return Ok(true);
1620        }
1621    }
1622    Ok(false)
1623}
1624
1625fn mtime(path: &Path) -> Result<SystemTime> {
1626    Ok(fs::metadata(path)?.modified()?)
1627}
1628
1629fn replace_dashes_with_underscores(name: &str) -> String {
1630    let mut result = String::with_capacity(name.len());
1631    for c in name.chars() {
1632        if c == '-' {
1633            result.push('_');
1634        } else {
1635            result.push(c);
1636        }
1637    }
1638    result
1639}