tree_sitter_highlight/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod c_lib;
4use core::slice;
5use std::{
6    collections::HashSet,
7    iter,
8    marker::PhantomData,
9    mem::{self, MaybeUninit},
10    ops, str,
11    sync::{
12        atomic::{AtomicUsize, Ordering},
13        LazyLock,
14    },
15};
16
17pub use c_lib as c;
18use streaming_iterator::StreamingIterator;
19use thiserror::Error;
20use tree_sitter::{
21    ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture,
22    QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree,
23};
24
25const CANCELLATION_CHECK_INTERVAL: usize = 100;
26const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
27const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
28
29static STANDARD_CAPTURE_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
30    vec![
31        "attribute",
32        "boolean",
33        "carriage-return",
34        "comment",
35        "comment.documentation",
36        "constant",
37        "constant.builtin",
38        "constructor",
39        "constructor.builtin",
40        "embedded",
41        "error",
42        "escape",
43        "function",
44        "function.builtin",
45        "keyword",
46        "markup",
47        "markup.bold",
48        "markup.heading",
49        "markup.italic",
50        "markup.link",
51        "markup.link.url",
52        "markup.list",
53        "markup.list.checked",
54        "markup.list.numbered",
55        "markup.list.unchecked",
56        "markup.list.unnumbered",
57        "markup.quote",
58        "markup.raw",
59        "markup.raw.block",
60        "markup.raw.inline",
61        "markup.strikethrough",
62        "module",
63        "number",
64        "operator",
65        "property",
66        "property.builtin",
67        "punctuation",
68        "punctuation.bracket",
69        "punctuation.delimiter",
70        "punctuation.special",
71        "string",
72        "string.escape",
73        "string.regexp",
74        "string.special",
75        "string.special.symbol",
76        "tag",
77        "type",
78        "type.builtin",
79        "variable",
80        "variable.builtin",
81        "variable.member",
82        "variable.parameter",
83    ]
84    .into_iter()
85    .collect()
86});
87
88/// Indicates which highlight should be applied to a region of source code.
89#[derive(Copy, Clone, Debug, PartialEq, Eq)]
90pub struct Highlight(pub usize);
91
92/// Represents the reason why syntax highlighting failed.
93#[derive(Debug, Error, PartialEq, Eq)]
94pub enum Error {
95    #[error("Cancelled")]
96    Cancelled,
97    #[error("Invalid language")]
98    InvalidLanguage,
99    #[error("Unknown error")]
100    Unknown,
101}
102
103/// Represents a single step in rendering a syntax-highlighted document.
104#[derive(Copy, Clone, Debug)]
105pub enum HighlightEvent {
106    Source { start: usize, end: usize },
107    HighlightStart(Highlight),
108    HighlightEnd,
109}
110
111/// Contains the data needed to highlight code written in a particular language.
112///
113/// This struct is immutable and can be shared between threads.
114pub struct HighlightConfiguration {
115    pub language: Language,
116    pub language_name: String,
117    pub query: Query,
118    combined_injections_query: Option<Query>,
119    locals_pattern_index: usize,
120    highlights_pattern_index: usize,
121    highlight_indices: Vec<Option<Highlight>>,
122    non_local_variable_patterns: Vec<bool>,
123    injection_content_capture_index: Option<u32>,
124    injection_language_capture_index: Option<u32>,
125    local_scope_capture_index: Option<u32>,
126    local_def_capture_index: Option<u32>,
127    local_def_value_capture_index: Option<u32>,
128    local_ref_capture_index: Option<u32>,
129}
130
131/// Performs syntax highlighting, recognizing a given list of highlight names.
132///
133/// For the best performance `Highlighter` values should be reused between
134/// syntax highlighting calls. A separate highlighter is needed for each thread that
135/// is performing highlighting.
136pub struct Highlighter {
137    pub parser: Parser,
138    cursors: Vec<QueryCursor>,
139}
140
141/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
142pub struct HtmlRenderer {
143    pub html: Vec<u8>,
144    pub line_offsets: Vec<u32>,
145    carriage_return_highlight: Option<Highlight>,
146}
147
148#[derive(Debug)]
149struct LocalDef<'a> {
150    name: &'a str,
151    value_range: ops::Range<usize>,
152    highlight: Option<Highlight>,
153}
154
155#[derive(Debug)]
156struct LocalScope<'a> {
157    inherits: bool,
158    range: ops::Range<usize>,
159    local_defs: Vec<LocalDef<'a>>,
160}
161
162struct HighlightIter<'a, F>
163where
164    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
165{
166    source: &'a [u8],
167    language_name: &'a str,
168    byte_offset: usize,
169    highlighter: &'a mut Highlighter,
170    injection_callback: F,
171    cancellation_flag: Option<&'a AtomicUsize>,
172    layers: Vec<HighlightIterLayer<'a>>,
173    iter_count: usize,
174    next_event: Option<HighlightEvent>,
175    last_highlight_range: Option<(usize, usize, usize)>,
176}
177
178struct HighlightIterLayer<'a> {
179    _tree: Tree,
180    cursor: QueryCursor,
181    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
182    config: &'a HighlightConfiguration,
183    highlight_end_stack: Vec<usize>,
184    scope_stack: Vec<LocalScope<'a>>,
185    ranges: Vec<Range>,
186    depth: usize,
187}
188
189pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
190    ptr: *mut ffi::TSQueryCursor,
191    query: &'query Query,
192    text_provider: T,
193    buffer1: Vec<u8>,
194    buffer2: Vec<u8>,
195    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
196    _options: Option<*mut ffi::TSQueryCursorOptions>,
197    _phantom: PhantomData<(&'tree (), I)>,
198}
199
200struct _QueryMatch<'cursor, 'tree> {
201    pub _pattern_index: usize,
202    pub _captures: &'cursor [QueryCapture<'tree>],
203    _id: u32,
204    _cursor: *mut ffi::TSQueryCursor,
205}
206
207impl<'tree> _QueryMatch<'_, 'tree> {
208    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
209        _QueryMatch {
210            _cursor: cursor,
211            _id: m.id,
212            _pattern_index: m.pattern_index as usize,
213            _captures: (m.capture_count > 0)
214                .then(|| unsafe {
215                    slice::from_raw_parts(
216                        m.captures.cast::<QueryCapture<'tree>>(),
217                        m.capture_count as usize,
218                    )
219                })
220                .unwrap_or_default(),
221        }
222    }
223}
224
225impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
226    for _QueryCaptures<'query, 'tree, T, I>
227{
228    type Item = (QueryMatch<'query, 'tree>, usize);
229
230    fn next(&mut self) -> Option<Self::Item> {
231        unsafe {
232            loop {
233                let mut capture_index = 0u32;
234                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
235                if ffi::ts_query_cursor_next_capture(
236                    self.ptr,
237                    m.as_mut_ptr(),
238                    core::ptr::addr_of_mut!(capture_index),
239                ) {
240                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
241                        &m.assume_init(),
242                        self.ptr,
243                    ));
244                    if result.satisfies_text_predicates(
245                        self.query,
246                        &mut self.buffer1,
247                        &mut self.buffer2,
248                        &mut self.text_provider,
249                    ) {
250                        return Some((result, capture_index as usize));
251                    }
252                    result.remove();
253                } else {
254                    return None;
255                }
256            }
257        }
258    }
259}
260
261impl Default for Highlighter {
262    fn default() -> Self {
263        Self::new()
264    }
265}
266
267impl Highlighter {
268    #[must_use]
269    pub fn new() -> Self {
270        Self {
271            parser: Parser::new(),
272            cursors: Vec::new(),
273        }
274    }
275
276    pub fn parser(&mut self) -> &mut Parser {
277        &mut self.parser
278    }
279
280    /// Iterate over the highlighted regions for a given slice of source code.
281    pub fn highlight<'a>(
282        &'a mut self,
283        config: &'a HighlightConfiguration,
284        source: &'a [u8],
285        cancellation_flag: Option<&'a AtomicUsize>,
286        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
287    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
288        let layers = HighlightIterLayer::new(
289            source,
290            None,
291            self,
292            cancellation_flag,
293            &mut injection_callback,
294            config,
295            0,
296            vec![Range {
297                start_byte: 0,
298                end_byte: usize::MAX,
299                start_point: Point::new(0, 0),
300                end_point: Point::new(usize::MAX, usize::MAX),
301            }],
302        )?;
303        assert_ne!(layers.len(), 0);
304        let mut result = HighlightIter {
305            source,
306            language_name: &config.language_name,
307            byte_offset: 0,
308            injection_callback,
309            cancellation_flag,
310            highlighter: self,
311            iter_count: 0,
312            layers,
313            next_event: None,
314            last_highlight_range: None,
315        };
316        result.sort_layers();
317        Ok(result)
318    }
319}
320
321impl HighlightConfiguration {
322    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
323    /// queries.
324    ///
325    /// # Parameters
326    ///
327    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
328    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
329    ///   should be non-empty, otherwise no syntax highlights will be added.
330    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
331    ///   the document. This can be empty if no injections are desired.
332    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
333    ///   and references. This can be empty if local variable tracking is not needed.
334    ///
335    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
336    pub fn new(
337        language: Language,
338        name: impl Into<String>,
339        highlights_query: &str,
340        injection_query: &str,
341        locals_query: &str,
342    ) -> Result<Self, QueryError> {
343        // Concatenate the query strings, keeping track of the start offset of each section.
344        let mut query_source = String::new();
345        query_source.push_str(injection_query);
346        let locals_query_offset = query_source.len();
347        query_source.push_str(locals_query);
348        let highlights_query_offset = query_source.len();
349        query_source.push_str(highlights_query);
350
351        // Construct a single query by concatenating the three query strings, but record the
352        // range of pattern indices that belong to each individual string.
353        let mut query = Query::new(&language, &query_source)?;
354        let mut locals_pattern_index = 0;
355        let mut highlights_pattern_index = 0;
356        for i in 0..(query.pattern_count()) {
357            let pattern_offset = query.start_byte_for_pattern(i);
358            if pattern_offset < highlights_query_offset {
359                if pattern_offset < highlights_query_offset {
360                    highlights_pattern_index += 1;
361                }
362                if pattern_offset < locals_query_offset {
363                    locals_pattern_index += 1;
364                }
365            }
366        }
367
368        // Construct a separate query just for dealing with the 'combined injections'.
369        // Disable the combined injection patterns in the main query.
370        let mut combined_injections_query = Query::new(&language, injection_query)?;
371        let mut has_combined_queries = false;
372        for pattern_index in 0..locals_pattern_index {
373            let settings = query.property_settings(pattern_index);
374            if settings.iter().any(|s| &*s.key == "injection.combined") {
375                has_combined_queries = true;
376                query.disable_pattern(pattern_index);
377            } else {
378                combined_injections_query.disable_pattern(pattern_index);
379            }
380        }
381        let combined_injections_query = if has_combined_queries {
382            Some(combined_injections_query)
383        } else {
384            None
385        };
386
387        // Find all of the highlighting patterns that are disabled for nodes that
388        // have been identified as local variables.
389        let non_local_variable_patterns = (0..query.pattern_count())
390            .map(|i| {
391                query
392                    .property_predicates(i)
393                    .iter()
394                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
395            })
396            .collect();
397
398        // Store the numeric ids for all of the special captures.
399        let mut injection_content_capture_index = None;
400        let mut injection_language_capture_index = None;
401        let mut local_def_capture_index = None;
402        let mut local_def_value_capture_index = None;
403        let mut local_ref_capture_index = None;
404        let mut local_scope_capture_index = None;
405        for (i, name) in query.capture_names().iter().enumerate() {
406            let i = Some(i as u32);
407            match *name {
408                "injection.content" => injection_content_capture_index = i,
409                "injection.language" => injection_language_capture_index = i,
410                "local.definition" => local_def_capture_index = i,
411                "local.definition-value" => local_def_value_capture_index = i,
412                "local.reference" => local_ref_capture_index = i,
413                "local.scope" => local_scope_capture_index = i,
414                _ => {}
415            }
416        }
417
418        let highlight_indices = vec![None; query.capture_names().len()];
419        Ok(Self {
420            language,
421            language_name: name.into(),
422            query,
423            combined_injections_query,
424            locals_pattern_index,
425            highlights_pattern_index,
426            highlight_indices,
427            non_local_variable_patterns,
428            injection_content_capture_index,
429            injection_language_capture_index,
430            local_def_capture_index,
431            local_def_value_capture_index,
432            local_ref_capture_index,
433            local_scope_capture_index,
434        })
435    }
436
437    /// Get a slice containing all of the highlight names used in the configuration.
438    #[must_use]
439    pub const fn names(&self) -> &[&str] {
440        self.query.capture_names()
441    }
442
443    /// Set the list of recognized highlight names.
444    ///
445    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
446    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
447    /// these queries can choose to recognize highlights with different levels of specificity.
448    /// For example, the string `function.builtin` will match against `function.method.builtin`
449    /// and `function.builtin.constructor`, but will not match `function.method`.
450    ///
451    /// When highlighting, results are returned as `Highlight` values, which contain the index
452    /// of the matched highlight this list of highlight names.
453    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
454        let mut capture_parts = Vec::new();
455        self.highlight_indices.clear();
456        self.highlight_indices
457            .extend(self.query.capture_names().iter().map(move |capture_name| {
458                capture_parts.clear();
459                capture_parts.extend(capture_name.split('.'));
460
461                let mut best_index = None;
462                let mut best_match_len = 0;
463                for (i, recognized_name) in recognized_names.iter().enumerate() {
464                    let mut len = 0;
465                    let mut matches = true;
466                    for part in recognized_name.as_ref().split('.') {
467                        len += 1;
468                        if !capture_parts.contains(&part) {
469                            matches = false;
470                            break;
471                        }
472                    }
473                    if matches && len > best_match_len {
474                        best_index = Some(i);
475                        best_match_len = len;
476                    }
477                }
478                best_index.map(Highlight)
479            }));
480    }
481
482    // Return the list of this configuration's capture names that are neither present in the
483    // list of predefined 'canonical' names nor start with an underscore (denoting 'private'
484    // captures used as part of capture internals).
485    #[must_use]
486    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
487        let capture_names = if capture_names.is_empty() {
488            &*STANDARD_CAPTURE_NAMES
489        } else {
490            capture_names
491        };
492        self.names()
493            .iter()
494            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
495            .copied()
496            .collect()
497    }
498}
499
500impl<'a> HighlightIterLayer<'a> {
501    /// Create a new 'layer' of highlighting for this document.
502    ///
503    /// In the event that the new layer contains "combined injections" (injections where multiple
504    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
505    /// added to the returned vector.
506    #[allow(clippy::too_many_arguments)]
507    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
508        source: &'a [u8],
509        parent_name: Option<&str>,
510        highlighter: &mut Highlighter,
511        cancellation_flag: Option<&'a AtomicUsize>,
512        injection_callback: &mut F,
513        mut config: &'a HighlightConfiguration,
514        mut depth: usize,
515        mut ranges: Vec<Range>,
516    ) -> Result<Vec<Self>, Error> {
517        let mut result = Vec::with_capacity(1);
518        let mut queue = Vec::new();
519        loop {
520            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
521                highlighter
522                    .parser
523                    .set_language(&config.language)
524                    .map_err(|_| Error::InvalidLanguage)?;
525
526                let tree = highlighter
527                    .parser
528                    .parse_with_options(
529                        &mut |i, _| {
530                            if i < source.len() {
531                                &source[i..]
532                            } else {
533                                &[]
534                            }
535                        },
536                        None,
537                        Some(ParseOptions::new().progress_callback(&mut |_| {
538                            if let Some(cancellation_flag) = cancellation_flag {
539                                cancellation_flag.load(Ordering::SeqCst) != 0
540                            } else {
541                                false
542                            }
543                        })),
544                    )
545                    .ok_or(Error::Cancelled)?;
546                let mut cursor = highlighter.cursors.pop().unwrap_or_default();
547
548                // Process combined injections.
549                if let Some(combined_injections_query) = &config.combined_injections_query {
550                    let mut injections_by_pattern_index =
551                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
552                    let mut matches =
553                        cursor.matches(combined_injections_query, tree.root_node(), source);
554                    while let Some(mat) = matches.next() {
555                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
556                        let (language_name, content_node, include_children) = injection_for_match(
557                            config,
558                            parent_name,
559                            combined_injections_query,
560                            mat,
561                            source,
562                        );
563                        if language_name.is_some() {
564                            entry.0 = language_name;
565                        }
566                        if let Some(content_node) = content_node {
567                            entry.1.push(content_node);
568                        }
569                        entry.2 = include_children;
570                    }
571                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
572                    {
573                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
574                            if let Some(next_config) = (injection_callback)(lang_name) {
575                                let ranges = Self::intersect_ranges(
576                                    &ranges,
577                                    &content_nodes,
578                                    includes_children,
579                                );
580                                if !ranges.is_empty() {
581                                    queue.push((next_config, depth + 1, ranges));
582                                }
583                            }
584                        }
585                    }
586                }
587
588                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
589                // prevents them from being moved. But both of these values are really just
590                // pointers, so it's actually ok to move them.
591                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
592                let cursor_ref = unsafe {
593                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
594                };
595                let captures = unsafe {
596                    std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
597                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
598                    )
599                }
600                .peekable();
601
602                result.push(HighlightIterLayer {
603                    highlight_end_stack: Vec::new(),
604                    scope_stack: vec![LocalScope {
605                        inherits: false,
606                        range: 0..usize::MAX,
607                        local_defs: Vec::new(),
608                    }],
609                    cursor,
610                    depth,
611                    _tree: tree,
612                    captures,
613                    config,
614                    ranges,
615                });
616            }
617
618            if queue.is_empty() {
619                break;
620            }
621
622            let (next_config, next_depth, next_ranges) = queue.remove(0);
623            config = next_config;
624            depth = next_depth;
625            ranges = next_ranges;
626        }
627
628        Ok(result)
629    }
630
631    // Compute the ranges that should be included when parsing an injection.
632    // This takes into account three things:
633    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
634    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
635    //   ranges of those nodes.
636    // * `includes_children` - For some injections, the content nodes' children should be excluded
637    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
638    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
639    //   of their children.
640    fn intersect_ranges(
641        parent_ranges: &[Range],
642        nodes: &[Node],
643        includes_children: bool,
644    ) -> Vec<Range> {
645        let mut cursor = nodes[0].walk();
646        let mut result = Vec::new();
647        let mut parent_range_iter = parent_ranges.iter();
648        let mut parent_range = parent_range_iter
649            .next()
650            .expect("Layers should only be constructed with non-empty ranges vectors");
651        for node in nodes {
652            let mut preceding_range = Range {
653                start_byte: 0,
654                start_point: Point::new(0, 0),
655                end_byte: node.start_byte(),
656                end_point: node.start_position(),
657            };
658            let following_range = Range {
659                start_byte: node.end_byte(),
660                start_point: node.end_position(),
661                end_byte: usize::MAX,
662                end_point: Point::new(usize::MAX, usize::MAX),
663            };
664
665            for excluded_range in node
666                .children(&mut cursor)
667                .filter_map(|child| {
668                    if includes_children {
669                        None
670                    } else {
671                        Some(child.range())
672                    }
673                })
674                .chain(std::iter::once(following_range))
675            {
676                let mut range = Range {
677                    start_byte: preceding_range.end_byte,
678                    start_point: preceding_range.end_point,
679                    end_byte: excluded_range.start_byte,
680                    end_point: excluded_range.start_point,
681                };
682                preceding_range = excluded_range;
683
684                if range.end_byte < parent_range.start_byte {
685                    continue;
686                }
687
688                while parent_range.start_byte <= range.end_byte {
689                    if parent_range.end_byte > range.start_byte {
690                        if range.start_byte < parent_range.start_byte {
691                            range.start_byte = parent_range.start_byte;
692                            range.start_point = parent_range.start_point;
693                        }
694
695                        if parent_range.end_byte < range.end_byte {
696                            if range.start_byte < parent_range.end_byte {
697                                result.push(Range {
698                                    start_byte: range.start_byte,
699                                    start_point: range.start_point,
700                                    end_byte: parent_range.end_byte,
701                                    end_point: parent_range.end_point,
702                                });
703                            }
704                            range.start_byte = parent_range.end_byte;
705                            range.start_point = parent_range.end_point;
706                        } else {
707                            if range.start_byte < range.end_byte {
708                                result.push(range);
709                            }
710                            break;
711                        }
712                    }
713
714                    if let Some(next_range) = parent_range_iter.next() {
715                        parent_range = next_range;
716                    } else {
717                        return result;
718                    }
719                }
720            }
721        }
722        result
723    }
724
725    // First, sort scope boundaries by their byte offset in the document. At a
726    // given position, emit scope endings before scope beginnings. Finally, emit
727    // scope boundaries from deeper layers first.
728    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
729        let depth = -(self.depth as isize);
730        let next_start = self
731            .captures
732            .peek()
733            .map(|(m, i)| m.captures[*i].node.start_byte());
734        let next_end = self.highlight_end_stack.last().copied();
735        match (next_start, next_end) {
736            (Some(start), Some(end)) => {
737                if start < end {
738                    Some((start, true, depth))
739                } else {
740                    Some((end, false, depth))
741                }
742            }
743            (Some(i), None) => Some((i, true, depth)),
744            (None, Some(j)) => Some((j, false, depth)),
745            _ => None,
746        }
747    }
748}
749
750impl<'a, F> HighlightIter<'a, F>
751where
752    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
753{
754    fn emit_event(
755        &mut self,
756        offset: usize,
757        event: Option<HighlightEvent>,
758    ) -> Option<Result<HighlightEvent, Error>> {
759        let result;
760        if self.byte_offset < offset {
761            result = Some(Ok(HighlightEvent::Source {
762                start: self.byte_offset,
763                end: offset,
764            }));
765            self.byte_offset = offset;
766            self.next_event = event;
767        } else {
768            result = event.map(Ok);
769        }
770        self.sort_layers();
771        result
772    }
773
774    fn sort_layers(&mut self) {
775        while !self.layers.is_empty() {
776            if let Some(sort_key) = self.layers[0].sort_key() {
777                let mut i = 0;
778                while i + 1 < self.layers.len() {
779                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
780                        if next_offset < sort_key {
781                            i += 1;
782                            continue;
783                        }
784                    }
785                    break;
786                }
787                if i > 0 {
788                    self.layers[0..=i].rotate_left(1);
789                }
790                break;
791            }
792            let layer = self.layers.remove(0);
793            self.highlighter.cursors.push(layer.cursor);
794        }
795    }
796
797    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
798        if let Some(sort_key) = layer.sort_key() {
799            let mut i = 1;
800            while i < self.layers.len() {
801                if let Some(sort_key_i) = self.layers[i].sort_key() {
802                    if sort_key_i > sort_key {
803                        self.layers.insert(i, layer);
804                        return;
805                    }
806                    i += 1;
807                } else {
808                    self.layers.remove(i);
809                }
810            }
811            self.layers.push(layer);
812        }
813    }
814}
815
816impl<'a, F> Iterator for HighlightIter<'a, F>
817where
818    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
819{
820    type Item = Result<HighlightEvent, Error>;
821
822    fn next(&mut self) -> Option<Self::Item> {
823        'main: loop {
824            // If we've already determined the next highlight boundary, just return it.
825            if let Some(e) = self.next_event.take() {
826                return Some(Ok(e));
827            }
828
829            // Periodically check for cancellation, returning `Cancelled` error if the
830            // cancellation flag was flipped.
831            if let Some(cancellation_flag) = self.cancellation_flag {
832                self.iter_count += 1;
833                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
834                    self.iter_count = 0;
835                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
836                        return Some(Err(Error::Cancelled));
837                    }
838                }
839            }
840
841            // If none of the layers have any more highlight boundaries, terminate.
842            if self.layers.is_empty() {
843                return if self.byte_offset < self.source.len() {
844                    let result = Some(Ok(HighlightEvent::Source {
845                        start: self.byte_offset,
846                        end: self.source.len(),
847                    }));
848                    self.byte_offset = self.source.len();
849                    result
850                } else {
851                    None
852                };
853            }
854
855            // Get the next capture from whichever layer has the earliest highlight boundary.
856            let range;
857            let layer = &mut self.layers[0];
858            if let Some((next_match, capture_index)) = layer.captures.peek() {
859                let next_capture = next_match.captures[*capture_index];
860                range = next_capture.node.byte_range();
861
862                // If any previous highlight ends before this node starts, then before
863                // processing this capture, emit the source code up until the end of the
864                // previous highlight, and an end event for that highlight.
865                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
866                    if end_byte <= range.start {
867                        layer.highlight_end_stack.pop();
868                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
869                    }
870                }
871            }
872            // If there are no more captures, then emit any remaining highlight end events.
873            // And if there are none of those, then just advance to the end of the document.
874            else {
875                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
876                    layer.highlight_end_stack.pop();
877                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
878                }
879                return self.emit_event(self.source.len(), None);
880            }
881
882            let (mut match_, capture_index) = layer.captures.next().unwrap();
883            let mut capture = match_.captures[capture_index];
884
885            // If this capture represents an injection, then process the injection.
886            if match_.pattern_index < layer.config.locals_pattern_index {
887                let (language_name, content_node, include_children) = injection_for_match(
888                    layer.config,
889                    Some(self.language_name),
890                    &layer.config.query,
891                    &match_,
892                    self.source,
893                );
894
895                // Explicitly remove this match so that none of its other captures will remain
896                // in the stream of captures.
897                match_.remove();
898
899                // If a language is found with the given name, then add a new language layer
900                // to the highlighted document.
901                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
902                    if let Some(config) = (self.injection_callback)(language_name) {
903                        let ranges = HighlightIterLayer::intersect_ranges(
904                            &self.layers[0].ranges,
905                            &[content_node],
906                            include_children,
907                        );
908                        if !ranges.is_empty() {
909                            match HighlightIterLayer::new(
910                                self.source,
911                                Some(self.language_name),
912                                self.highlighter,
913                                self.cancellation_flag,
914                                &mut self.injection_callback,
915                                config,
916                                self.layers[0].depth + 1,
917                                ranges,
918                            ) {
919                                Ok(layers) => {
920                                    for layer in layers {
921                                        self.insert_layer(layer);
922                                    }
923                                }
924                                Err(e) => return Some(Err(e)),
925                            }
926                        }
927                    }
928                }
929
930                self.sort_layers();
931                continue 'main;
932            }
933
934            // Remove from the local scope stack any local scopes that have already ended.
935            while range.start > layer.scope_stack.last().unwrap().range.end {
936                layer.scope_stack.pop();
937            }
938
939            // If this capture is for tracking local variables, then process the
940            // local variable info.
941            let mut reference_highlight = None;
942            let mut definition_highlight = None;
943            while match_.pattern_index < layer.config.highlights_pattern_index {
944                // If the node represents a local scope, push a new local scope onto
945                // the scope stack.
946                if Some(capture.index) == layer.config.local_scope_capture_index {
947                    definition_highlight = None;
948                    let mut scope = LocalScope {
949                        inherits: true,
950                        range: range.clone(),
951                        local_defs: Vec::new(),
952                    };
953                    for prop in layer.config.query.property_settings(match_.pattern_index) {
954                        if prop.key.as_ref() == "local.scope-inherits" {
955                            scope.inherits =
956                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
957                        }
958                    }
959                    layer.scope_stack.push(scope);
960                }
961                // If the node represents a definition, add a new definition to the
962                // local scope at the top of the scope stack.
963                else if Some(capture.index) == layer.config.local_def_capture_index {
964                    reference_highlight = None;
965                    definition_highlight = None;
966                    let scope = layer.scope_stack.last_mut().unwrap();
967
968                    let mut value_range = 0..0;
969                    for capture in match_.captures {
970                        if Some(capture.index) == layer.config.local_def_value_capture_index {
971                            value_range = capture.node.byte_range();
972                        }
973                    }
974
975                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
976                        scope.local_defs.push(LocalDef {
977                            name,
978                            value_range,
979                            highlight: None,
980                        });
981                        definition_highlight =
982                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
983                    }
984                }
985                // If the node represents a reference, then try to find the corresponding
986                // definition in the scope stack.
987                else if Some(capture.index) == layer.config.local_ref_capture_index
988                    && definition_highlight.is_none()
989                {
990                    definition_highlight = None;
991                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
992                        for scope in layer.scope_stack.iter().rev() {
993                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
994                                if def.name == name && range.start >= def.value_range.end {
995                                    Some(def.highlight)
996                                } else {
997                                    None
998                                }
999                            }) {
1000                                reference_highlight = highlight;
1001                                break;
1002                            }
1003                            if !scope.inherits {
1004                                break;
1005                            }
1006                        }
1007                    }
1008                }
1009
1010                // Continue processing any additional matches for the same node.
1011                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1012                    let next_capture = next_match.captures[*next_capture_index];
1013                    if next_capture.node == capture.node {
1014                        capture = next_capture;
1015                        match_ = layer.captures.next().unwrap().0;
1016                        continue;
1017                    }
1018                }
1019
1020                self.sort_layers();
1021                continue 'main;
1022            }
1023
1024            // Otherwise, this capture must represent a highlight.
1025            // If this exact range has already been highlighted by an earlier pattern, or by
1026            // a different layer, then skip over this one.
1027            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
1028                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
1029                    self.sort_layers();
1030                    continue 'main;
1031                }
1032            }
1033
1034            // Once a highlighting pattern is found for the current node, keep iterating over
1035            // any later highlighting patterns that also match this node and set the match to it.
1036            // Captures for a given node are ordered by pattern index, so these subsequent
1037            // captures are guaranteed to be for highlighting, not injections or
1038            // local variables.
1039            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1040                let next_capture = next_match.captures[*next_capture_index];
1041                if next_capture.node == capture.node {
1042                    let following_match = layer.captures.next().unwrap().0;
1043                    // If the current node was found to be a local variable, then ignore
1044                    // the following match if it's a highlighting pattern that is disabled
1045                    // for local variables.
1046                    if (definition_highlight.is_some() || reference_highlight.is_some())
1047                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
1048                    {
1049                        continue;
1050                    }
1051                    match_.remove();
1052                    capture = next_capture;
1053                    match_ = following_match;
1054                } else {
1055                    break;
1056                }
1057            }
1058
1059            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1060
1061            // If this node represents a local definition, then store the current
1062            // highlight value on the local scope entry representing this node.
1063            if let Some(definition_highlight) = definition_highlight {
1064                *definition_highlight = current_highlight;
1065            }
1066
1067            // Emit a scope start event and push the node's end position to the stack.
1068            if let Some(highlight) = reference_highlight.or(current_highlight) {
1069                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1070                layer.highlight_end_stack.push(range.end);
1071                return self
1072                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1073            }
1074
1075            self.sort_layers();
1076        }
1077    }
1078}
1079
1080impl Default for HtmlRenderer {
1081    fn default() -> Self {
1082        Self::new()
1083    }
1084}
1085
1086impl HtmlRenderer {
1087    #[must_use]
1088    pub fn new() -> Self {
1089        let mut result = Self {
1090            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
1091            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
1092            carriage_return_highlight: None,
1093        };
1094        result.line_offsets.push(0);
1095        result
1096    }
1097
1098    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
1099        self.carriage_return_highlight = highlight;
1100    }
1101
1102    pub fn reset(&mut self) {
1103        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
1104        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
1105        self.line_offsets.push(0);
1106    }
1107
1108    pub fn render<F>(
1109        &mut self,
1110        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
1111        source: &[u8],
1112        attribute_callback: &F,
1113    ) -> Result<(), Error>
1114    where
1115        F: Fn(Highlight, &mut Vec<u8>),
1116    {
1117        let mut highlights = Vec::new();
1118        for event in highlighter {
1119            match event {
1120                Ok(HighlightEvent::HighlightStart(s)) => {
1121                    highlights.push(s);
1122                    self.start_highlight(s, &attribute_callback);
1123                }
1124                Ok(HighlightEvent::HighlightEnd) => {
1125                    highlights.pop();
1126                    self.end_highlight();
1127                }
1128                Ok(HighlightEvent::Source { start, end }) => {
1129                    self.add_text(&source[start..end], &highlights, &attribute_callback);
1130                }
1131                Err(a) => return Err(a),
1132            }
1133        }
1134        if self.html.last() != Some(&b'\n') {
1135            self.html.push(b'\n');
1136        }
1137        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
1138            self.line_offsets.pop();
1139        }
1140        Ok(())
1141    }
1142
1143    pub fn lines(&self) -> impl Iterator<Item = &str> {
1144        self.line_offsets
1145            .iter()
1146            .enumerate()
1147            .map(move |(i, line_start)| {
1148                let line_start = *line_start as usize;
1149                let line_end = if i + 1 == self.line_offsets.len() {
1150                    self.html.len()
1151                } else {
1152                    self.line_offsets[i + 1] as usize
1153                };
1154                str::from_utf8(&self.html[line_start..line_end]).unwrap()
1155            })
1156    }
1157
1158    fn add_carriage_return<F>(&mut self, attribute_callback: &F)
1159    where
1160        F: Fn(Highlight, &mut Vec<u8>),
1161    {
1162        if let Some(highlight) = self.carriage_return_highlight {
1163            self.html.extend(b"<span ");
1164            (attribute_callback)(highlight, &mut self.html);
1165            self.html.extend(b"></span>");
1166        }
1167    }
1168
1169    fn start_highlight<F>(&mut self, h: Highlight, attribute_callback: &F)
1170    where
1171        F: Fn(Highlight, &mut Vec<u8>),
1172    {
1173        self.html.extend(b"<span ");
1174        (attribute_callback)(h, &mut self.html);
1175        self.html.extend(b">");
1176    }
1177
1178    fn end_highlight(&mut self) {
1179        self.html.extend(b"</span>");
1180    }
1181
1182    fn add_text<F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
1183    where
1184        F: Fn(Highlight, &mut Vec<u8>),
1185    {
1186        pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
1187            match c as char {
1188                '>' => Some(b"&gt;"),
1189                '<' => Some(b"&lt;"),
1190                '&' => Some(b"&amp;"),
1191                '\'' => Some(b"&#39;"),
1192                '"' => Some(b"&quot;"),
1193                _ => None,
1194            }
1195        }
1196
1197        let mut last_char_was_cr = false;
1198        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
1199            // Don't render carriage return characters, but allow lone carriage returns (not
1200            // followed by line feeds) to be styled via the attribute callback.
1201            if c == b'\r' {
1202                last_char_was_cr = true;
1203                continue;
1204            }
1205            if last_char_was_cr {
1206                if c != b'\n' {
1207                    self.add_carriage_return(attribute_callback);
1208                }
1209                last_char_was_cr = false;
1210            }
1211
1212            // At line boundaries, close and re-open all of the open tags.
1213            if c == b'\n' {
1214                highlights.iter().for_each(|_| self.end_highlight());
1215                self.html.push(c);
1216                self.line_offsets.push(self.html.len() as u32);
1217                highlights
1218                    .iter()
1219                    .for_each(|scope| self.start_highlight(*scope, attribute_callback));
1220            } else if let Some(escape) = html_escape(c) {
1221                self.html.extend_from_slice(escape);
1222            } else {
1223                self.html.push(c);
1224            }
1225        }
1226    }
1227}
1228
1229fn injection_for_match<'a>(
1230    config: &'a HighlightConfiguration,
1231    parent_name: Option<&'a str>,
1232    query: &'a Query,
1233    query_match: &QueryMatch<'a, 'a>,
1234    source: &'a [u8],
1235) -> (Option<&'a str>, Option<Node<'a>>, bool) {
1236    let content_capture_index = config.injection_content_capture_index;
1237    let language_capture_index = config.injection_language_capture_index;
1238
1239    let mut language_name = None;
1240    let mut content_node = None;
1241
1242    for capture in query_match.captures {
1243        let index = Some(capture.index);
1244        if index == language_capture_index {
1245            language_name = capture.node.utf8_text(source).ok();
1246        } else if index == content_capture_index {
1247            content_node = Some(capture.node);
1248        }
1249    }
1250
1251    let mut include_children = false;
1252    for prop in query.property_settings(query_match.pattern_index) {
1253        match prop.key.as_ref() {
1254            // In addition to specifying the language name via the text of a
1255            // captured node, it can also be hard-coded via a `#set!` predicate
1256            // that sets the injection.language key.
1257            "injection.language" => {
1258                if language_name.is_none() {
1259                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
1260                }
1261            }
1262
1263            // Setting the `injection.self` key can be used to specify that the
1264            // language name should be the same as the language of the current
1265            // layer.
1266            "injection.self" => {
1267                if language_name.is_none() {
1268                    language_name = Some(config.language_name.as_str());
1269                }
1270            }
1271
1272            // Setting the `injection.parent` key can be used to specify that
1273            // the language name should be the same as the language of the
1274            // parent layer
1275            "injection.parent" => {
1276                if language_name.is_none() {
1277                    language_name = parent_name;
1278                }
1279            }
1280
1281            // By default, injections do not include the *children* of an
1282            // `injection.content` node - only the ranges that belong to the
1283            // node itself. This can be changed using a `#set!` predicate that
1284            // sets the `injection.include-children` key.
1285            "injection.include-children" => include_children = true,
1286            _ => {}
1287        }
1288    }
1289
1290    (language_name, content_node, include_children)
1291}
1292
1293fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
1294    if vec.len() > capacity {
1295        vec.truncate(capacity);
1296        vec.shrink_to_fit();
1297    }
1298    vec.clear();
1299}