jellybean_tree_sitter_highlight/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod c_lib;
4pub mod util;
5pub use c_lib as c;
6
7use lazy_static::lazy_static;
8use std::collections::HashSet;
9use std::sync::atomic::{AtomicUsize, Ordering};
10use std::{iter, mem, ops, str, usize};
11use thiserror::Error;
12use tree_sitter::{
13    Language, LossyUtf8, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
14    QueryMatch, Range, SerializableQuery, SerializationError, Tree,
15};
16
17const CANCELLATION_CHECK_INTERVAL: usize = 100;
18const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
19const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
20
21lazy_static! {
22    static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![
23        "attribute",
24        "boolean",
25        "carriage-return",
26        "comment",
27        "comment.documentation",
28        "constant",
29        "constant.builtin",
30        "constructor",
31        "constructor.builtin",
32        "embedded",
33        "error",
34        "escape",
35        "function",
36        "function.builtin",
37        "keyword",
38        "markup",
39        "markup.bold",
40        "markup.heading",
41        "markup.italic",
42        "markup.link",
43        "markup.link.url",
44        "markup.list",
45        "markup.list.checked",
46        "markup.list.numbered",
47        "markup.list.unchecked",
48        "markup.list.unnumbered",
49        "markup.quote",
50        "markup.raw",
51        "markup.raw.block",
52        "markup.raw.inline",
53        "markup.strikethrough",
54        "module",
55        "number",
56        "operator",
57        "property",
58        "property.builtin",
59        "punctuation",
60        "punctuation.bracket",
61        "punctuation.delimiter",
62        "punctuation.special",
63        "string",
64        "string.escape",
65        "string.regexp",
66        "string.special",
67        "string.special.symbol",
68        "tag",
69        "type",
70        "type.builtin",
71        "variable",
72        "variable.builtin",
73        "variable.member",
74        "variable.parameter",
75    ]
76    .into_iter()
77    .collect();
78}
79
80/// Indicates which highlight should be applied to a region of source code.
81#[derive(Copy, Clone, Debug, PartialEq, Eq)]
82#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83pub struct Highlight(pub usize);
84
85/// Represents the reason why syntax highlighting failed.
86#[derive(Debug, Error, PartialEq, Eq)]
87pub enum Error {
88    #[error("Cancelled")]
89    Cancelled,
90    #[error("Invalid language")]
91    InvalidLanguage,
92    #[error("Unknown error")]
93    Unknown,
94}
95
96/// Represents a single step in rendering a syntax-highlighted document.
97#[derive(Copy, Clone, Debug)]
98pub enum HighlightEvent {
99    Source { start: usize, end: usize },
100    HighlightStart(Highlight),
101    HighlightEnd,
102}
103
104/// Contains the data needed to highlight code written in a particular language.
105///
106/// This struct is immutable and can be shared between threads.
107pub struct HighlightConfiguration {
108    pub language: Language,
109    pub query: Query,
110    combined_injections_query: Option<Query>,
111    metadata: HighlightConfigurationMetadata,
112}
113
114#[derive(Debug, Clone)]
115#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
116pub struct HighlightConfigurationMetadata {
117    pub language_name: String,
118    pub apply_all_captures: bool,
119    locals_pattern_index: usize,
120    highlights_pattern_index: usize,
121    highlight_indices: Vec<Option<Highlight>>,
122    non_local_variable_patterns: Vec<bool>,
123    injection_content_capture_index: Option<u32>,
124    injection_language_capture_index: Option<u32>,
125    local_scope_capture_index: Option<u32>,
126    local_def_capture_index: Option<u32>,
127    local_def_value_capture_index: Option<u32>,
128    local_ref_capture_index: Option<u32>,
129}
130
131impl std::ops::Deref for HighlightConfiguration {
132    type Target = HighlightConfigurationMetadata;
133
134    fn deref(&self) -> &Self::Target {
135        &self.metadata
136    }
137}
138
139impl std::ops::DerefMut for HighlightConfiguration {
140    fn deref_mut(&mut self) -> &mut Self::Target {
141        &mut self.metadata
142    }
143}
144
145/// Performs syntax highlighting, recognizing a given list of highlight names.
146///
147/// For the best performance `Highlighter` values should be reused between
148/// syntax highlighting calls. A separate highlighter is needed for each thread that
149/// is performing highlighting.
150pub struct Highlighter {
151    parser: Parser,
152    cursors: Vec<QueryCursor>,
153}
154
155/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
156pub struct HtmlRenderer {
157    pub html: Vec<u8>,
158    pub line_offsets: Vec<u32>,
159    carriage_return_highlight: Option<Highlight>,
160}
161
162#[derive(Debug)]
163struct LocalDef<'a> {
164    name: &'a str,
165    value_range: ops::Range<usize>,
166    highlight: Option<Highlight>,
167}
168
169#[derive(Debug)]
170struct LocalScope<'a> {
171    inherits: bool,
172    range: ops::Range<usize>,
173    local_defs: Vec<LocalDef<'a>>,
174}
175
176struct HighlightIter<'a, F>
177where
178    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
179{
180    source: &'a [u8],
181    language_name: &'a str,
182    byte_offset: usize,
183    highlighter: &'a mut Highlighter,
184    injection_callback: F,
185    cancellation_flag: Option<&'a AtomicUsize>,
186    layers: Vec<HighlightIterLayer<'a>>,
187    iter_count: usize,
188    next_event: Option<HighlightEvent>,
189    last_highlight_range: Option<(usize, usize, usize)>,
190    apply_all_captures: bool,
191}
192
193struct HighlightIterLayer<'a> {
194    _tree: Tree,
195    cursor: QueryCursor,
196    captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
197    config: &'a HighlightConfiguration,
198    highlight_end_stack: Vec<usize>,
199    scope_stack: Vec<LocalScope<'a>>,
200    ranges: Vec<Range>,
201    depth: usize,
202}
203
204impl Highlighter {
205    pub fn new() -> Self {
206        Highlighter {
207            parser: Parser::new(),
208            cursors: Vec::new(),
209        }
210    }
211
212    pub fn parser(&mut self) -> &mut Parser {
213        &mut self.parser
214    }
215
216    /// Iterate over the highlighted regions for a given slice of source code.
217    pub fn highlight<'a>(
218        &'a mut self,
219        config: &'a HighlightConfiguration,
220        source: &'a [u8],
221        cancellation_flag: Option<&'a AtomicUsize>,
222        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
223    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
224        let layers = HighlightIterLayer::new(
225            source,
226            None,
227            self,
228            cancellation_flag,
229            &mut injection_callback,
230            config,
231            0,
232            vec![Range {
233                start_byte: 0,
234                end_byte: usize::MAX,
235                start_point: Point::new(0, 0),
236                end_point: Point::new(usize::MAX, usize::MAX),
237            }],
238        )?;
239        assert_ne!(layers.len(), 0);
240        let mut result = HighlightIter {
241            source,
242            language_name: &config.language_name,
243            byte_offset: 0,
244            injection_callback,
245            cancellation_flag,
246            highlighter: self,
247            iter_count: 0,
248            layers,
249            next_event: None,
250            last_highlight_range: None,
251            apply_all_captures: config.apply_all_captures,
252        };
253        result.sort_layers();
254        Ok(result)
255    }
256}
257
258#[derive(Debug, Clone)]
259#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
260pub struct SerializableHighlightConfig {
261    query: SerializableQuery,
262    combined_injections_query: Option<SerializableQuery>,
263    pub metadata: HighlightConfigurationMetadata,
264}
265
266impl HighlightConfiguration {
267    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
268    /// queries.
269    ///
270    /// # Parameters
271    ///
272    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
273    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
274    ///   should be non-empty, otherwise no syntax highlights will be added.
275    /// * `injections_query` -  A string containing tree patterns for injecting other languages
276    ///   into the document. This can be empty if no injections are desired.
277    /// * `locals_query` - A string containing tree patterns for tracking local variable
278    ///   definitions and references. This can be empty if local variable tracking is not needed.
279    ///
280    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
281    pub fn new(
282        language: Language,
283        name: impl Into<String>,
284        highlights_query: &str,
285        injection_query: &str,
286        locals_query: &str,
287        apply_all_captures: bool,
288    ) -> Result<Self, QueryError> {
289        // Concatenate the query strings, keeping track of the start offset of each section.
290        let mut query_source = String::new();
291        query_source.push_str(injection_query);
292        let locals_query_offset = query_source.len();
293        query_source.push_str(locals_query);
294        let highlights_query_offset = query_source.len();
295        query_source.push_str(highlights_query);
296
297        // Construct a single query by concatenating the three query strings, but record the
298        // range of pattern indices that belong to each individual string.
299        let mut query = Query::new(language, &query_source)?;
300        let mut locals_pattern_index = 0;
301        let mut highlights_pattern_index = 0;
302        for i in 0..(query.pattern_count()) {
303            let pattern_offset = query.start_byte_for_pattern(i);
304            if pattern_offset < highlights_query_offset {
305                if pattern_offset < highlights_query_offset {
306                    highlights_pattern_index += 1;
307                }
308                if pattern_offset < locals_query_offset {
309                    locals_pattern_index += 1;
310                }
311            }
312        }
313
314        // Construct a separate query just for dealing with the 'combined injections'.
315        // Disable the combined injection patterns in the main query.
316        let mut combined_injections_query = Query::new(language, injection_query)?;
317        let mut has_combined_queries = false;
318        for pattern_index in 0..locals_pattern_index {
319            let settings = query.property_settings(pattern_index);
320            if settings.iter().any(|s| &*s.key == "injection.combined") {
321                has_combined_queries = true;
322                query.disable_pattern(pattern_index);
323            } else {
324                combined_injections_query.disable_pattern(pattern_index);
325            }
326        }
327        let combined_injections_query = if has_combined_queries {
328            Some(combined_injections_query)
329        } else {
330            None
331        };
332
333        // Find all of the highlighting patterns that are disabled for nodes that
334        // have been identified as local variables.
335        let non_local_variable_patterns = (0..query.pattern_count())
336            .map(|i| {
337                query
338                    .property_predicates(i)
339                    .iter()
340                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
341            })
342            .collect();
343
344        // Store the numeric ids for all of the special captures.
345        let mut injection_content_capture_index = None;
346        let mut injection_language_capture_index = None;
347        let mut local_def_capture_index = None;
348        let mut local_def_value_capture_index = None;
349        let mut local_ref_capture_index = None;
350        let mut local_scope_capture_index = None;
351        for (i, name) in query.capture_names().iter().enumerate() {
352            let i = Some(i as u32);
353            match name.as_str() {
354                "injection.content" => injection_content_capture_index = i,
355                "injection.language" => injection_language_capture_index = i,
356                "local.definition" => local_def_capture_index = i,
357                "local.definition-value" => local_def_value_capture_index = i,
358                "local.reference" => local_ref_capture_index = i,
359                "local.scope" => local_scope_capture_index = i,
360                _ => {}
361            }
362        }
363
364        let highlight_indices = vec![None; query.capture_names().len()];
365        Ok(HighlightConfiguration {
366            language,
367            query,
368            combined_injections_query,
369            metadata: HighlightConfigurationMetadata {
370                language_name: name.into(),
371                apply_all_captures,
372                locals_pattern_index,
373                highlights_pattern_index,
374                highlight_indices,
375                non_local_variable_patterns,
376                injection_content_capture_index,
377                injection_language_capture_index,
378                local_def_capture_index,
379                local_def_value_capture_index,
380                local_ref_capture_index,
381                local_scope_capture_index,
382            },
383        })
384    }
385
386    /// Get a slice containing all of the highlight names used in the configuration.
387    pub fn names(&self) -> &[String] {
388        self.query.capture_names()
389    }
390
391    /// Set the list of recognized highlight names.
392    ///
393    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
394    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
395    /// these queries can choose to recognize highlights with different levels of specificity.
396    /// For example, the string `function.builtin` will match against `function.method.builtin`
397    /// and `function.builtin.constructor`, but will not match `function.method`.
398    ///
399    /// When highlighting, results are returned as `Highlight` values, which contain the index
400    /// of the matched highlight this list of highlight names.
401    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
402        let mut capture_parts = Vec::new();
403        self.highlight_indices.clear();
404        self.metadata
405            .highlight_indices
406            .extend(self.query.capture_names().iter().map(move |capture_name| {
407                capture_parts.clear();
408                capture_parts.extend(capture_name.split('.'));
409
410                let mut best_index = None;
411                let mut best_match_len = 0;
412                for (i, recognized_name) in recognized_names.into_iter().enumerate() {
413                    let mut len = 0;
414                    let mut matches = true;
415                    for part in recognized_name.as_ref().split('.') {
416                        len += 1;
417                        if !capture_parts.contains(&part) {
418                            matches = false;
419                            break;
420                        }
421                    }
422                    if matches && len > best_match_len {
423                        best_index = Some(i);
424                        best_match_len = len;
425                    }
426                }
427                best_index.map(Highlight)
428            }));
429    }
430
431    // Return the list of this configuration's capture names that are neither present in the
432    // list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
433    // used as part of capture internals).
434    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
435        let capture_names = if capture_names.is_empty() {
436            &*STANDARD_CAPTURE_NAMES
437        } else {
438            &capture_names
439        };
440        self.names()
441            .iter()
442            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n.as_str())))
443            .map(|n| n.as_str())
444            .collect()
445    }
446
447    /// Convert `self` into a serializable version.
448    pub fn serializable(self) -> Result<SerializableHighlightConfig, SerializationError> {
449        Ok(SerializableHighlightConfig {
450            query: self.query.serializable()?,
451            combined_injections_query: self
452                .combined_injections_query
453                .map(|q| q.serializable())
454                .transpose()?,
455            metadata: self.metadata,
456        })
457    }
458
459    /// Deserialize the serializable version of a highlight configuration.
460    pub fn deserialize(
461        serialized: SerializableHighlightConfig,
462        language: Language,
463    ) -> Result<Self, SerializationError> {
464        Ok(HighlightConfiguration {
465            query: Query::deserialize(serialized.query, language)?,
466            combined_injections_query: serialized
467                .combined_injections_query
468                .map(|data| Query::deserialize(data, language))
469                .transpose()?,
470            metadata: serialized.metadata,
471            language,
472        })
473    }
474}
475
476impl<'a> HighlightIterLayer<'a> {
477    /// Create a new 'layer' of highlighting for this document.
478    ///
479    /// In the even that the new layer contains "combined injections" (injections where multiple
480    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
481    /// added to the returned vector.
482    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
483        source: &'a [u8],
484        parent_name: Option<&str>,
485        highlighter: &mut Highlighter,
486        cancellation_flag: Option<&'a AtomicUsize>,
487        injection_callback: &mut F,
488        mut config: &'a HighlightConfiguration,
489        mut depth: usize,
490        mut ranges: Vec<Range>,
491    ) -> Result<Vec<Self>, Error> {
492        let mut result = Vec::with_capacity(1);
493        let mut queue = Vec::new();
494        loop {
495            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
496                highlighter
497                    .parser
498                    .set_language(config.language)
499                    .map_err(|_| Error::InvalidLanguage)?;
500
501                unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) };
502                let tree = highlighter
503                    .parser
504                    .parse(source, None)
505                    .ok_or(Error::Cancelled)?;
506                unsafe { highlighter.parser.set_cancellation_flag(None) };
507                let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new());
508
509                // Process combined injections.
510                if let Some(combined_injections_query) = &config.combined_injections_query {
511                    let mut injections_by_pattern_index =
512                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
513                    let matches =
514                        cursor.matches(combined_injections_query, tree.root_node(), source);
515                    for mat in matches {
516                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
517                        let (language_name, content_node, include_children) = injection_for_match(
518                            config,
519                            parent_name,
520                            combined_injections_query,
521                            &mat,
522                            source,
523                        );
524                        if language_name.is_some() {
525                            entry.0 = language_name;
526                        }
527                        if let Some(content_node) = content_node {
528                            entry.1.push(content_node);
529                        }
530                        entry.2 = include_children;
531                    }
532                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
533                    {
534                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
535                            if let Some(next_config) = (injection_callback)(lang_name) {
536                                let ranges = Self::intersect_ranges(
537                                    &ranges,
538                                    &content_nodes,
539                                    includes_children,
540                                );
541                                if !ranges.is_empty() {
542                                    queue.push((next_config, depth + 1, ranges));
543                                }
544                            }
545                        }
546                    }
547                }
548
549                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
550                // prevents them from being moved. But both of these values are really just
551                // pointers, so it's actually ok to move them.
552                let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) };
553                let cursor_ref =
554                    unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
555                let captures = cursor_ref
556                    .captures(&config.query, tree_ref.root_node(), source)
557                    .peekable();
558
559                result.push(HighlightIterLayer {
560                    highlight_end_stack: Vec::new(),
561                    scope_stack: vec![LocalScope {
562                        inherits: false,
563                        range: 0..usize::MAX,
564                        local_defs: Vec::new(),
565                    }],
566                    cursor,
567                    depth,
568                    _tree: tree,
569                    captures,
570                    config,
571                    ranges,
572                });
573            }
574
575            if queue.is_empty() {
576                break;
577            } else {
578                let (next_config, next_depth, next_ranges) = queue.remove(0);
579                config = next_config;
580                depth = next_depth;
581                ranges = next_ranges;
582            }
583        }
584
585        Ok(result)
586    }
587
588    // Compute the ranges that should be included when parsing an injection.
589    // This takes into account three things:
590    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
591    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges
592    //   are the ranges of those nodes.
593    // * `includes_children` - For some injections, the content nodes' children should be
594    //   excluded from the nested document, so that only the content nodes' *own* content
595    //   is reparsed. For other injections, the content nodes' entire ranges should be
596    //   reparsed, including the ranges of their children.
597    fn intersect_ranges(
598        parent_ranges: &[Range],
599        nodes: &[Node],
600        includes_children: bool,
601    ) -> Vec<Range> {
602        let mut cursor = nodes[0].walk();
603        let mut result = Vec::new();
604        let mut parent_range_iter = parent_ranges.iter();
605        let mut parent_range = parent_range_iter
606            .next()
607            .expect("Layers should only be constructed with non-empty ranges vectors");
608        for node in nodes.iter() {
609            let mut preceding_range = Range {
610                start_byte: 0,
611                start_point: Point::new(0, 0),
612                end_byte: node.start_byte(),
613                end_point: node.start_position(),
614            };
615            let following_range = Range {
616                start_byte: node.end_byte(),
617                start_point: node.end_position(),
618                end_byte: usize::MAX,
619                end_point: Point::new(usize::MAX, usize::MAX),
620            };
621
622            for excluded_range in node
623                .children(&mut cursor)
624                .filter_map(|child| {
625                    if includes_children {
626                        None
627                    } else {
628                        Some(child.range())
629                    }
630                })
631                .chain([following_range].iter().cloned())
632            {
633                let mut range = Range {
634                    start_byte: preceding_range.end_byte,
635                    start_point: preceding_range.end_point,
636                    end_byte: excluded_range.start_byte,
637                    end_point: excluded_range.start_point,
638                };
639                preceding_range = excluded_range;
640
641                if range.end_byte < parent_range.start_byte {
642                    continue;
643                }
644
645                while parent_range.start_byte <= range.end_byte {
646                    if parent_range.end_byte > range.start_byte {
647                        if range.start_byte < parent_range.start_byte {
648                            range.start_byte = parent_range.start_byte;
649                            range.start_point = parent_range.start_point;
650                        }
651
652                        if parent_range.end_byte < range.end_byte {
653                            if range.start_byte < parent_range.end_byte {
654                                result.push(Range {
655                                    start_byte: range.start_byte,
656                                    start_point: range.start_point,
657                                    end_byte: parent_range.end_byte,
658                                    end_point: parent_range.end_point,
659                                });
660                            }
661                            range.start_byte = parent_range.end_byte;
662                            range.start_point = parent_range.end_point;
663                        } else {
664                            if range.start_byte < range.end_byte {
665                                result.push(range);
666                            }
667                            break;
668                        }
669                    }
670
671                    if let Some(next_range) = parent_range_iter.next() {
672                        parent_range = next_range;
673                    } else {
674                        return result;
675                    }
676                }
677            }
678        }
679        result
680    }
681
682    // First, sort scope boundaries by their byte offset in the document. At a
683    // given position, emit scope endings before scope beginnings. Finally, emit
684    // scope boundaries from deeper layers first.
685    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
686        let depth = -(self.depth as isize);
687        let next_start = self
688            .captures
689            .peek()
690            .map(|(m, i)| m.captures[*i].node.start_byte());
691        let next_end = self.highlight_end_stack.last().cloned();
692        match (next_start, next_end) {
693            (Some(start), Some(end)) => {
694                if start < end {
695                    Some((start, true, depth))
696                } else {
697                    Some((end, false, depth))
698                }
699            }
700            (Some(i), None) => Some((i, true, depth)),
701            (None, Some(j)) => Some((j, false, depth)),
702            _ => None,
703        }
704    }
705}
706
707impl<'a, F> HighlightIter<'a, F>
708where
709    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
710{
711    fn emit_event(
712        &mut self,
713        offset: usize,
714        event: Option<HighlightEvent>,
715    ) -> Option<Result<HighlightEvent, Error>> {
716        let result;
717        if self.byte_offset < offset {
718            result = Some(Ok(HighlightEvent::Source {
719                start: self.byte_offset,
720                end: offset,
721            }));
722            self.byte_offset = offset;
723            self.next_event = event;
724        } else {
725            result = event.map(Ok);
726        }
727        self.sort_layers();
728        result
729    }
730
731    fn sort_layers(&mut self) {
732        while !self.layers.is_empty() {
733            if let Some(sort_key) = self.layers[0].sort_key() {
734                let mut i = 0;
735                while i + 1 < self.layers.len() {
736                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
737                        if next_offset < sort_key {
738                            i += 1;
739                            continue;
740                        }
741                    }
742                    break;
743                }
744                if i > 0 {
745                    self.layers[0..(i + 1)].rotate_left(1);
746                }
747                break;
748            } else {
749                let layer = self.layers.remove(0);
750                self.highlighter.cursors.push(layer.cursor);
751            }
752        }
753    }
754
755    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
756        if let Some(sort_key) = layer.sort_key() {
757            let mut i = 1;
758            while i < self.layers.len() {
759                if let Some(sort_key_i) = self.layers[i].sort_key() {
760                    if sort_key_i > sort_key {
761                        self.layers.insert(i, layer);
762                        return;
763                    }
764                    i += 1;
765                } else {
766                    self.layers.remove(i);
767                }
768            }
769            self.layers.push(layer);
770        }
771    }
772}
773
774impl<'a, F> Iterator for HighlightIter<'a, F>
775where
776    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
777{
778    type Item = Result<HighlightEvent, Error>;
779
780    fn next(&mut self) -> Option<Self::Item> {
781        'main: loop {
782            // If we've already determined the next highlight boundary, just return it.
783            if let Some(e) = self.next_event.take() {
784                return Some(Ok(e));
785            }
786
787            // Periodically check for cancellation, returning `Cancelled` error if the
788            // cancellation flag was flipped.
789            if let Some(cancellation_flag) = self.cancellation_flag {
790                self.iter_count += 1;
791                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
792                    self.iter_count = 0;
793                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
794                        return Some(Err(Error::Cancelled));
795                    }
796                }
797            }
798
799            // If none of the layers have any more highlight boundaries, terminate.
800            if self.layers.is_empty() {
801                return if self.byte_offset < self.source.len() {
802                    let result = Some(Ok(HighlightEvent::Source {
803                        start: self.byte_offset,
804                        end: self.source.len(),
805                    }));
806                    self.byte_offset = self.source.len();
807                    result
808                } else {
809                    None
810                };
811            }
812
813            // Get the next capture from whichever layer has the earliest highlight boundary.
814            let range;
815            let layer = &mut self.layers[0];
816            if let Some((next_match, capture_index)) = layer.captures.peek() {
817                let next_capture = next_match.captures[*capture_index];
818                range = next_capture.node.byte_range();
819
820                // If any previous highlight ends before this node starts, then before
821                // processing this capture, emit the source code up until the end of the
822                // previous highlight, and an end event for that highlight.
823                if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
824                    if end_byte <= range.start {
825                        layer.highlight_end_stack.pop();
826                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
827                    }
828                }
829            }
830            // If there are no more captures, then emit any remaining highlight end events.
831            // And if there are none of those, then just advance to the end of the document.
832            else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
833                layer.highlight_end_stack.pop();
834                return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
835            } else {
836                return self.emit_event(self.source.len(), None);
837            };
838
839            let (mut match_, capture_index) = layer.captures.next().unwrap();
840            let mut capture = match_.captures[capture_index];
841
842            // If this capture represents an injection, then process the injection.
843            if match_.pattern_index < layer.config.locals_pattern_index {
844                let (language_name, content_node, include_children) = injection_for_match(
845                    layer.config,
846                    Some(self.language_name),
847                    &layer.config.query,
848                    &match_,
849                    self.source,
850                );
851
852                // Explicitly remove this match so that none of its other captures will remain
853                // in the stream of captures.
854                match_.remove();
855
856                // If a language is found with the given name, then add a new language layer
857                // to the highlighted document.
858                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
859                    if let Some(config) = (self.injection_callback)(language_name) {
860                        let ranges = HighlightIterLayer::intersect_ranges(
861                            &self.layers[0].ranges,
862                            &[content_node],
863                            include_children,
864                        );
865                        if !ranges.is_empty() {
866                            match HighlightIterLayer::new(
867                                self.source,
868                                Some(self.language_name),
869                                self.highlighter,
870                                self.cancellation_flag,
871                                &mut self.injection_callback,
872                                config,
873                                self.layers[0].depth + 1,
874                                ranges,
875                            ) {
876                                Ok(layers) => {
877                                    for layer in layers {
878                                        self.insert_layer(layer);
879                                    }
880                                }
881                                Err(e) => return Some(Err(e)),
882                            }
883                        }
884                    }
885                }
886
887                self.sort_layers();
888                continue 'main;
889            }
890
891            // Remove from the local scope stack any local scopes that have already ended.
892            while range.start > layer.scope_stack.last().unwrap().range.end {
893                layer.scope_stack.pop();
894            }
895
896            // If this capture is for tracking local variables, then process the
897            // local variable info.
898            let mut reference_highlight = None;
899            let mut definition_highlight = None;
900            while match_.pattern_index < layer.config.highlights_pattern_index {
901                // If the node represents a local scope, push a new local scope onto
902                // the scope stack.
903                if Some(capture.index) == layer.config.local_scope_capture_index {
904                    definition_highlight = None;
905                    let mut scope = LocalScope {
906                        inherits: true,
907                        range: range.clone(),
908                        local_defs: Vec::new(),
909                    };
910                    for prop in layer.config.query.property_settings(match_.pattern_index) {
911                        match prop.key.as_ref() {
912                            "local.scope-inherits" => {
913                                scope.inherits =
914                                    prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
915                            }
916                            _ => {}
917                        }
918                    }
919                    layer.scope_stack.push(scope);
920                }
921                // If the node represents a definition, add a new definition to the
922                // local scope at the top of the scope stack.
923                else if Some(capture.index) == layer.config.local_def_capture_index {
924                    reference_highlight = None;
925                    definition_highlight = None;
926                    let scope = layer.scope_stack.last_mut().unwrap();
927
928                    let mut value_range = 0..0;
929                    for capture in match_.captures {
930                        if Some(capture.index) == layer.config.local_def_value_capture_index {
931                            value_range = capture.node.byte_range();
932                        }
933                    }
934
935                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
936                        scope.local_defs.push(LocalDef {
937                            name,
938                            value_range,
939                            highlight: None,
940                        });
941                        definition_highlight =
942                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
943                    }
944                }
945                // If the node represents a reference, then try to find the corresponding
946                // definition in the scope stack.
947                else if Some(capture.index) == layer.config.local_ref_capture_index {
948                    if definition_highlight.is_none() {
949                        definition_highlight = None;
950                        if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
951                            for scope in layer.scope_stack.iter().rev() {
952                                if let Some(highlight) =
953                                    scope.local_defs.iter().rev().find_map(|def| {
954                                        if def.name == name && range.start >= def.value_range.end {
955                                            Some(def.highlight)
956                                        } else {
957                                            None
958                                        }
959                                    })
960                                {
961                                    reference_highlight = highlight;
962                                    break;
963                                }
964                                if !scope.inherits {
965                                    break;
966                                }
967                            }
968                        }
969                    }
970                }
971
972                // Continue processing any additional matches for the same node.
973                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
974                    let next_capture = next_match.captures[*next_capture_index];
975                    if next_capture.node == capture.node {
976                        capture = next_capture;
977                        match_ = layer.captures.next().unwrap().0;
978                        continue;
979                    }
980                }
981
982                self.sort_layers();
983                continue 'main;
984            }
985
986            // Otherwise, this capture must represent a highlight.
987            // If this exact range has already been highlighted by an earlier pattern, or by
988            // a different layer, then skip over this one.
989            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
990                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
991                    self.sort_layers();
992                    continue 'main;
993                }
994            }
995
996            // If the current node was found to be a local variable, then skip over any
997            // highlighting patterns that are disabled for local variables.
998            if definition_highlight.is_some() || reference_highlight.is_some() {
999                while layer.config.non_local_variable_patterns[match_.pattern_index] {
1000                    match_.remove();
1001                    if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1002                        let next_capture = next_match.captures[*next_capture_index];
1003                        if next_capture.node == capture.node {
1004                            capture = next_capture;
1005                            match_ = layer.captures.next().unwrap().0;
1006                            continue;
1007                        }
1008                    }
1009
1010                    self.sort_layers();
1011                    continue 'main;
1012                }
1013            }
1014
1015            // Once a highlighting pattern is found for the current node, skip over
1016            // any later highlighting patterns that also match this node. Captures
1017            // for a given node are ordered by pattern index, so these subsequent
1018            // captures are guaranteed to be for highlighting, not injections or
1019            // local variables.
1020            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1021                let next_capture = next_match.captures[*next_capture_index];
1022                if next_capture.node == capture.node {
1023                    if self.apply_all_captures {
1024                        match_.remove();
1025                        capture = next_capture;
1026                        match_ = layer.captures.next().unwrap().0;
1027                    } else {
1028                        layer.captures.next();
1029                    }
1030                } else {
1031                    break;
1032                }
1033            }
1034
1035            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1036
1037            // If this node represents a local definition, then store the current
1038            // highlight value on the local scope entry representing this node.
1039            if let Some(definition_highlight) = definition_highlight {
1040                *definition_highlight = current_highlight;
1041            }
1042
1043            // Emit a scope start event and push the node's end position to the stack.
1044            if let Some(highlight) = reference_highlight.or(current_highlight) {
1045                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1046                layer.highlight_end_stack.push(range.end);
1047                return self
1048                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1049            }
1050
1051            self.sort_layers();
1052        }
1053    }
1054}
1055
1056impl HtmlRenderer {
1057    pub fn new() -> Self {
1058        let mut result = HtmlRenderer {
1059            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
1060            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
1061            carriage_return_highlight: None,
1062        };
1063        result.line_offsets.push(0);
1064        result
1065    }
1066
1067    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
1068        self.carriage_return_highlight = highlight;
1069    }
1070
1071    pub fn reset(&mut self) {
1072        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
1073        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
1074        self.line_offsets.push(0);
1075    }
1076
1077    pub fn render<'a, F>(
1078        &mut self,
1079        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
1080        source: &'a [u8],
1081        attribute_callback: &F,
1082    ) -> Result<(), Error>
1083    where
1084        F: Fn(Highlight) -> &'a [u8],
1085    {
1086        let mut highlights = Vec::new();
1087        for event in highlighter {
1088            match event {
1089                Ok(HighlightEvent::HighlightStart(s)) => {
1090                    highlights.push(s);
1091                    self.start_highlight(s, attribute_callback);
1092                }
1093                Ok(HighlightEvent::HighlightEnd) => {
1094                    highlights.pop();
1095                    self.end_highlight();
1096                }
1097                Ok(HighlightEvent::Source { start, end }) => {
1098                    self.add_text(&source[start..end], &highlights, attribute_callback);
1099                }
1100                Err(a) => return Err(a),
1101            }
1102        }
1103        if self.html.last() != Some(&b'\n') {
1104            self.html.push(b'\n');
1105        }
1106        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
1107            self.line_offsets.pop();
1108        }
1109        Ok(())
1110    }
1111
1112    pub fn lines(&self) -> impl Iterator<Item = &str> {
1113        self.line_offsets
1114            .iter()
1115            .enumerate()
1116            .map(move |(i, line_start)| {
1117                let line_start = *line_start as usize;
1118                let line_end = if i + 1 == self.line_offsets.len() {
1119                    self.html.len()
1120                } else {
1121                    self.line_offsets[i + 1] as usize
1122                };
1123                str::from_utf8(&self.html[line_start..line_end]).unwrap()
1124            })
1125    }
1126
1127    fn add_carriage_return<'a, F>(&mut self, attribute_callback: &F)
1128    where
1129        F: Fn(Highlight) -> &'a [u8],
1130    {
1131        if let Some(highlight) = self.carriage_return_highlight {
1132            let attribute_string = (attribute_callback)(highlight);
1133            if !attribute_string.is_empty() {
1134                self.html.extend(b"<span ");
1135                self.html.extend(attribute_string);
1136                self.html.extend(b"></span>");
1137            }
1138        }
1139    }
1140
1141    fn start_highlight<'a, F>(&mut self, h: Highlight, attribute_callback: &F)
1142    where
1143        F: Fn(Highlight) -> &'a [u8],
1144    {
1145        let attribute_string = (attribute_callback)(h);
1146        self.html.extend(b"<span");
1147        if !attribute_string.is_empty() {
1148            self.html.extend(b" ");
1149            self.html.extend(attribute_string);
1150        }
1151        self.html.extend(b">");
1152    }
1153
1154    fn end_highlight(&mut self) {
1155        self.html.extend(b"</span>");
1156    }
1157
1158    fn add_text<'a, F>(&mut self, src: &[u8], highlights: &Vec<Highlight>, attribute_callback: &F)
1159    where
1160        F: Fn(Highlight) -> &'a [u8],
1161    {
1162        let mut last_char_was_cr = false;
1163        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
1164            // Don't render carriage return characters, but allow lone carriage returns (not
1165            // followed by line feeds) to be styled via the attribute callback.
1166            if c == b'\r' {
1167                last_char_was_cr = true;
1168                continue;
1169            }
1170            if last_char_was_cr {
1171                if c != b'\n' {
1172                    self.add_carriage_return(attribute_callback);
1173                }
1174                last_char_was_cr = false;
1175            }
1176
1177            // At line boundaries, close and re-open all of the open tags.
1178            if c == b'\n' {
1179                highlights.iter().for_each(|_| self.end_highlight());
1180                self.html.push(c);
1181                self.line_offsets.push(self.html.len() as u32);
1182                highlights
1183                    .iter()
1184                    .for_each(|scope| self.start_highlight(*scope, attribute_callback));
1185            } else if let Some(escape) = util::html_escape(c) {
1186                self.html.extend_from_slice(escape);
1187            } else {
1188                self.html.push(c);
1189            }
1190        }
1191    }
1192}
1193
1194fn injection_for_match<'a>(
1195    config: &'a HighlightConfiguration,
1196    parent_name: Option<&'a str>,
1197    query: &'a Query,
1198    query_match: &QueryMatch<'a, 'a>,
1199    source: &'a [u8],
1200) -> (Option<&'a str>, Option<Node<'a>>, bool) {
1201    let content_capture_index = config.injection_content_capture_index;
1202    let language_capture_index = config.injection_language_capture_index;
1203
1204    let mut language_name = None;
1205    let mut content_node = None;
1206
1207    for capture in query_match.captures {
1208        let index = Some(capture.index);
1209        if index == language_capture_index {
1210            language_name = capture.node.utf8_text(source).ok();
1211        } else if index == content_capture_index {
1212            content_node = Some(capture.node);
1213        }
1214    }
1215
1216    let mut include_children = false;
1217    for prop in query.property_settings(query_match.pattern_index) {
1218        match prop.key.as_ref() {
1219            // In addition to specifying the language name via the text of a
1220            // captured node, it can also be hard-coded via a `#set!` predicate
1221            // that sets the injection.language key.
1222            "injection.language" => {
1223                if language_name.is_none() {
1224                    language_name = prop.value.as_ref().map(|s| s.as_ref());
1225                }
1226            }
1227
1228            // Setting the `injection.self` key can be used to specify that the
1229            // language name should be the same as the language of the current
1230            // layer.
1231            "injection.self" => {
1232                if language_name.is_none() {
1233                    language_name = Some(config.language_name.as_str());
1234                }
1235            }
1236
1237            // Setting the `injection.parent` key can be used to specify that
1238            // the language name should be the same as the language of the
1239            // parent layer
1240            "injection.parent" => {
1241                if language_name.is_none() {
1242                    language_name = parent_name;
1243                }
1244            }
1245
1246            // By default, injections do not include the *children* of an
1247            // `injection.content` node - only the ranges that belong to the
1248            // node itself. This can be changed using a `#set!` predicate that
1249            // sets the `injection.include-children` key.
1250            "injection.include-children" => include_children = true,
1251            _ => {}
1252        }
1253    }
1254
1255    (language_name, content_node, include_children)
1256}
1257
1258fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
1259    if vec.len() > capacity {
1260        vec.truncate(capacity);
1261        vec.shrink_to_fit();
1262    }
1263    vec.clear();
1264}