tree_sitter_highlight/
c_lib.rs

1use std::{
2    collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str,
3    sync::atomic::AtomicUsize,
4};
5
6use regex::Regex;
7use tree_sitter::Language;
8
9use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer};
10
11pub struct TSHighlighter {
12    pub languages: HashMap<String, (Option<Regex>, HighlightConfiguration)>,
13    pub attribute_strings: Vec<&'static [u8]>,
14    pub highlight_names: Vec<String>,
15    pub carriage_return_index: Option<usize>,
16}
17
18pub struct TSHighlightBuffer {
19    highlighter: Highlighter,
20    renderer: HtmlRenderer,
21}
22
23#[repr(C)]
24pub enum ErrorCode {
25    Ok,
26    UnknownScope,
27    Timeout,
28    InvalidLanguage,
29    InvalidUtf8,
30    InvalidRegex,
31    InvalidQuery,
32    InvalidLanguageName,
33}
34
35/// Create a new [`TSHighlighter`] instance.
36///
37/// # Safety
38///
39/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for
40/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null.
41#[no_mangle]
42pub unsafe extern "C" fn ts_highlighter_new(
43    highlight_names: *const *const c_char,
44    attribute_strings: *const *const c_char,
45    highlight_count: u32,
46) -> *mut TSHighlighter {
47    let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize);
48    let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize);
49    let highlight_names = highlight_names
50        .iter()
51        .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string())
52        .collect::<Vec<_>>();
53    let attribute_strings = attribute_strings
54        .iter()
55        .map(|s| CStr::from_ptr(*s).to_bytes())
56        .collect();
57    let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return");
58    Box::into_raw(Box::new(TSHighlighter {
59        languages: HashMap::new(),
60        attribute_strings,
61        highlight_names,
62        carriage_return_index,
63    }))
64}
65
66/// Add a language to a [`TSHighlighter`] instance.
67///
68/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not.
69///
70/// # Safety
71///
72/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
73/// created by [`ts_highlighter_new`].
74///
75/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the
76/// lifetime of the [`TSHighlighter`] instance, and are non-null.
77#[no_mangle]
78pub unsafe extern "C" fn ts_highlighter_add_language(
79    this: *mut TSHighlighter,
80    language_name: *const c_char,
81    scope_name: *const c_char,
82    injection_regex: *const c_char,
83    language: Language,
84    highlight_query: *const c_char,
85    injection_query: *const c_char,
86    locals_query: *const c_char,
87    highlight_query_len: u32,
88    injection_query_len: u32,
89    locals_query_len: u32,
90) -> ErrorCode {
91    let f = move || {
92        let this = unwrap_mut_ptr(this);
93        let scope_name = CStr::from_ptr(scope_name);
94        let scope_name = scope_name
95            .to_str()
96            .or(Err(ErrorCode::InvalidUtf8))?
97            .to_string();
98        let injection_regex = if injection_regex.is_null() {
99            None
100        } else {
101            let pattern = CStr::from_ptr(injection_regex);
102            let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
103            Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
104        };
105
106        let highlight_query =
107            slice::from_raw_parts(highlight_query.cast::<u8>(), highlight_query_len as usize);
108
109        let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
110
111        let injection_query = if injection_query_len > 0 {
112            let query =
113                slice::from_raw_parts(injection_query.cast::<u8>(), injection_query_len as usize);
114            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
115        } else {
116            ""
117        };
118
119        let locals_query = if locals_query_len > 0 {
120            let query = slice::from_raw_parts(locals_query.cast::<u8>(), locals_query_len as usize);
121            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
122        } else {
123            ""
124        };
125
126        let lang = CStr::from_ptr(language_name)
127            .to_str()
128            .or(Err(ErrorCode::InvalidLanguageName))?;
129
130        let mut config = HighlightConfiguration::new(
131            language,
132            lang,
133            highlight_query,
134            injection_query,
135            locals_query,
136        )
137        .or(Err(ErrorCode::InvalidQuery))?;
138        config.configure(this.highlight_names.as_slice());
139        this.languages.insert(scope_name, (injection_regex, config));
140
141        Ok(())
142    };
143
144    match f() {
145        Ok(()) => ErrorCode::Ok,
146        Err(e) => e,
147    }
148}
149
150#[no_mangle]
151pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
152    Box::into_raw(Box::new(TSHighlightBuffer {
153        highlighter: Highlighter::new(),
154        renderer: HtmlRenderer::new(),
155    }))
156}
157
158/// Deletes a [`TSHighlighter`] instance.
159///
160/// # Safety
161///
162/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
163/// created by [`ts_highlighter_new`].
164///
165/// It cannot be used after this function is called.
166#[no_mangle]
167pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
168    drop(Box::from_raw(this));
169}
170
171/// Deletes a [`TSHighlightBuffer`] instance.
172///
173/// # Safety
174///
175/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
176/// created by [`ts_highlight_buffer_new`]
177///
178/// It cannot be used after this function is called.
179#[no_mangle]
180pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
181    drop(Box::from_raw(this));
182}
183
184/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer.
185///
186/// # Safety
187///
188/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
189/// created by [`ts_highlight_buffer_new`].
190///
191/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance,
192/// else the data will point to garbage.
193///
194/// To get the length of the HTML content, use [`ts_highlight_buffer_len`].
195#[no_mangle]
196pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
197    let this = unwrap_ptr(this);
198    this.renderer.html.as_slice().as_ptr()
199}
200
201/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array.
202///
203/// # Safety
204///
205/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
206/// created by [`ts_highlight_buffer_new`].
207///
208/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`]
209/// instance, else the data will point to garbage.
210///
211/// To get the length of the array, use [`ts_highlight_buffer_line_count`].
212#[no_mangle]
213pub unsafe extern "C" fn ts_highlight_buffer_line_offsets(
214    this: *const TSHighlightBuffer,
215) -> *const u32 {
216    let this = unwrap_ptr(this);
217    this.renderer.line_offsets.as_slice().as_ptr()
218}
219
220/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance.
221///
222/// # Safety
223///
224/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
225/// created by [`ts_highlight_buffer_new`].
226#[no_mangle]
227pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
228    let this = unwrap_ptr(this);
229    this.renderer.html.len() as u32
230}
231
232/// Get the number of lines in a [`TSHighlightBuffer`] instance.
233///
234/// # Safety
235///
236/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
237/// created by [`ts_highlight_buffer_new`].
238#[no_mangle]
239pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
240    let this = unwrap_ptr(this);
241    this.renderer.line_offsets.len() as u32
242}
243
244/// Highlight a string of source code.
245///
246/// # Safety
247///
248/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are
249/// valid for the lifetime of the [`TSHighlighter`] instance, and are non-null.
250///
251/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by
252/// [`ts_highlighter_new`]
253#[no_mangle]
254pub unsafe extern "C" fn ts_highlighter_highlight(
255    this: *const TSHighlighter,
256    scope_name: *const c_char,
257    source_code: *const c_char,
258    source_code_len: u32,
259    output: *mut TSHighlightBuffer,
260    cancellation_flag: *const AtomicUsize,
261) -> ErrorCode {
262    let this = unwrap_ptr(this);
263    let output = unwrap_mut_ptr(output);
264    let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
265    let source_code = slice::from_raw_parts(source_code.cast::<u8>(), source_code_len as usize);
266    let cancellation_flag = cancellation_flag.as_ref();
267    this.highlight(source_code, scope_name, output, cancellation_flag)
268}
269
270impl TSHighlighter {
271    fn highlight(
272        &self,
273        source_code: &[u8],
274        scope_name: &str,
275        output: &mut TSHighlightBuffer,
276        cancellation_flag: Option<&AtomicUsize>,
277    ) -> ErrorCode {
278        let entry = self.languages.get(scope_name);
279        if entry.is_none() {
280            return ErrorCode::UnknownScope;
281        }
282        let (_, configuration) = entry.unwrap();
283        let languages = &self.languages;
284
285        let highlights = output.highlighter.highlight(
286            configuration,
287            source_code,
288            cancellation_flag,
289            move |injection_string| {
290                languages.values().find_map(|(injection_regex, config)| {
291                    injection_regex.as_ref().and_then(|regex| {
292                        if regex.is_match(injection_string) {
293                            Some(config)
294                        } else {
295                            None
296                        }
297                    })
298                })
299            },
300        );
301
302        if let Ok(highlights) = highlights {
303            output.renderer.reset();
304            output
305                .renderer
306                .set_carriage_return_highlight(self.carriage_return_index.map(Highlight));
307            let result = output.renderer.render(highlights, source_code, &|s, out| {
308                out.extend(self.attribute_strings[s.0]);
309            });
310            match result {
311                Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout,
312                Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage,
313                Ok(()) => ErrorCode::Ok,
314            }
315        } else {
316            ErrorCode::Timeout
317        }
318    }
319}
320
321unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
322    result.as_ref().unwrap_or_else(|| {
323        eprintln!("{}:{} - pointer must not be null", file!(), line!());
324        abort();
325    })
326}
327
328unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
329    result.as_mut().unwrap_or_else(|| {
330        eprintln!("{}:{} - pointer must not be null", file!(), line!());
331        abort();
332    })
333}
334
335fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
336    result.unwrap_or_else(|error| {
337        eprintln!("tree-sitter highlight error: {error}");
338        abort();
339    })
340}