1use std::ffi::OsStr;
2use std::fs;
3use std::path::Path;
4
5use once_cell::unsync::OnceCell;
6
7use syntect::highlighting::Theme;
8use syntect::parsing::{SyntaxReference, SyntaxSet};
9
10use path_abs::PathAbs;
11
12use crate::error::*;
13use crate::input::{InputReader, OpenedInput};
14use crate::syntax_mapping::ignored_suffixes::IgnoredSuffixes;
15use crate::syntax_mapping::MappingTarget;
16use crate::{bat_warning, SyntaxMapping};
17
18use lazy_theme_set::LazyThemeSet;
19
20use serialized_syntax_set::*;
21
22#[cfg(feature = "build-assets")]
23pub use crate::assets::build_assets::*;
24
25pub(crate) mod assets_metadata;
26#[cfg(feature = "build-assets")]
27mod build_assets;
28mod lazy_theme_set;
29mod serialized_syntax_set;
30
31#[derive(Debug)]
32pub struct HighlightingAssets {
33 syntax_set_cell: OnceCell<SyntaxSet>,
34 serialized_syntax_set: SerializedSyntaxSet,
35
36 theme_set: LazyThemeSet,
37 fallback_theme: Option<&'static str>,
38}
39
40#[derive(Debug)]
41pub struct SyntaxReferenceInSet<'a> {
42 pub syntax: &'a SyntaxReference,
43 pub syntax_set: &'a SyntaxSet,
44}
45
46pub(crate) const COMPRESS_SYNTAXES: bool = false;
49
50pub(crate) const COMPRESS_THEMES: bool = false;
54
55pub(crate) const COMPRESS_LAZY_THEMES: bool = true;
58
59pub(crate) const COMPRESS_ACKNOWLEDGEMENTS: bool = true;
61
62impl HighlightingAssets {
63 fn new(serialized_syntax_set: SerializedSyntaxSet, theme_set: LazyThemeSet) -> Self {
64 HighlightingAssets {
65 syntax_set_cell: OnceCell::new(),
66 serialized_syntax_set,
67 theme_set,
68 fallback_theme: None,
69 }
70 }
71
72 pub fn default_theme() -> &'static str {
94 #[cfg(not(target_os = "macos"))]
95 {
96 Self::default_dark_theme()
97 }
98 #[cfg(target_os = "macos")]
99 {
100 if macos_dark_mode_active() {
101 Self::default_dark_theme()
102 } else {
103 Self::default_light_theme()
104 }
105 }
106 }
107
108 fn default_dark_theme() -> &'static str {
112 "Monokai Extended"
113 }
114
115 #[cfg(target_os = "macos")]
119 fn default_light_theme() -> &'static str {
120 "Monokai Extended Light"
121 }
122
123 pub fn from_cache(cache_path: &Path) -> Result<Self> {
124 Ok(HighlightingAssets::new(
125 SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
126 asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
127 ))
128 }
129
130 pub fn from_binary() -> Self {
131 HighlightingAssets::new(
132 SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
133 get_integrated_themeset(),
134 )
135 }
136
137 pub fn set_fallback_theme(&mut self, theme: &'static str) {
138 self.fallback_theme = Some(theme);
139 }
140
141 pub fn get_syntax_set(&self) -> Result<&SyntaxSet> {
143 self.syntax_set_cell
144 .get_or_try_init(|| self.serialized_syntax_set.deserialize())
145 }
146
147 #[deprecated]
149 pub fn syntaxes(&self) -> &[SyntaxReference] {
150 self.get_syntax_set()
151 .expect(".syntaxes() is deprecated, use .get_syntaxes() instead")
152 .syntaxes()
153 }
154
155 pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> {
156 Ok(self.get_syntax_set()?.syntaxes())
157 }
158
159 fn get_theme_set(&self) -> &LazyThemeSet {
160 &self.theme_set
161 }
162
163 pub fn themes(&self) -> impl Iterator<Item = &str> {
164 self.get_theme_set().themes()
165 }
166
167 #[deprecated]
169 pub fn syntax_for_file_name(
170 &self,
171 file_name: impl AsRef<Path>,
172 mapping: &SyntaxMapping,
173 ) -> Option<&SyntaxReference> {
174 self.get_syntax_for_path(file_name, mapping)
175 .ok()
176 .map(|syntax_in_set| syntax_in_set.syntax)
177 }
178
179 pub fn get_syntax_for_path(
202 &self,
203 path: impl AsRef<Path>,
204 mapping: &SyntaxMapping,
205 ) -> Result<SyntaxReferenceInSet> {
206 let path = path.as_ref();
207
208 let syntax_match = mapping.get_syntax_for(path);
209
210 if let Some(MappingTarget::MapToUnknown) = syntax_match {
211 return Err(Error::UndetectedSyntax(path.to_string_lossy().into()));
212 }
213
214 if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match {
215 return self
216 .find_syntax_by_name(syntax_name)?
217 .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned()));
218 }
219
220 let file_name = path.file_name().unwrap_or_default();
221
222 match (
223 self.get_syntax_for_file_name(file_name, &mapping.ignored_suffixes)?,
224 syntax_match,
225 ) {
226 (Some(syntax), _) => Ok(syntax),
227
228 (_, Some(MappingTarget::MapExtensionToUnknown)) => {
229 Err(Error::UndetectedSyntax(path.to_string_lossy().into()))
230 }
231
232 _ => self
233 .get_syntax_for_file_extension(file_name, &mapping.ignored_suffixes)?
234 .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())),
235 }
236 }
237
238 pub fn get_theme(&self, theme: &str) -> &Theme {
240 match self.get_theme_set().get(theme) {
241 Some(theme) => theme,
242 None => {
243 if theme == "ansi-light" || theme == "ansi-dark" {
244 bat_warning!("Theme '{}' is deprecated, using 'ansi' instead.", theme);
245 return self.get_theme("ansi");
246 }
247 if !theme.is_empty() {
248 bat_warning!("Unknown theme '{}', using default.", theme)
249 }
250 self.get_theme_set()
251 .get(self.fallback_theme.unwrap_or_else(Self::default_theme))
252 .expect("something is very wrong if the default theme is missing")
253 }
254 }
255 }
256
257 pub(crate) fn get_syntax(
258 &self,
259 language: Option<&str>,
260 input: &mut OpenedInput,
261 mapping: &SyntaxMapping,
262 ) -> Result<SyntaxReferenceInSet> {
263 if let Some(language) = language {
264 let syntax_set = self.get_syntax_set()?;
265 return syntax_set
266 .find_syntax_by_token(language)
267 .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })
268 .ok_or_else(|| Error::UnknownSyntax(language.to_owned()));
269 }
270
271 let path = input.path();
272 let path_syntax = if let Some(path) = path {
273 self.get_syntax_for_path(
274 PathAbs::new(path).map_or_else(|_| path.to_owned(), |p| p.as_path().to_path_buf()),
275 mapping,
276 )
277 } else {
278 Err(Error::UndetectedSyntax("[unknown]".into()))
279 };
280
281 match path_syntax {
282 Err(Error::UndetectedSyntax(path)) => self
285 .get_first_line_syntax(&mut input.reader)?
286 .ok_or(Error::UndetectedSyntax(path)),
287 _ => path_syntax,
288 }
289 }
290
291 pub(crate) fn find_syntax_by_name(
292 &self,
293 syntax_name: &str,
294 ) -> Result<Option<SyntaxReferenceInSet>> {
295 let syntax_set = self.get_syntax_set()?;
296 Ok(syntax_set
297 .find_syntax_by_name(syntax_name)
298 .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
299 }
300
301 fn find_syntax_by_extension(&self, e: Option<&OsStr>) -> Result<Option<SyntaxReferenceInSet>> {
302 let syntax_set = self.get_syntax_set()?;
303 let extension = e.and_then(|x| x.to_str()).unwrap_or_default();
304 Ok(syntax_set
305 .find_syntax_by_extension(extension)
306 .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
307 }
308
309 fn get_syntax_for_file_name(
310 &self,
311 file_name: &OsStr,
312 ignored_suffixes: &IgnoredSuffixes,
313 ) -> Result<Option<SyntaxReferenceInSet>> {
314 let mut syntax = self.find_syntax_by_extension(Some(file_name))?;
315 if syntax.is_none() {
316 syntax =
317 ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| {
318 self.get_syntax_for_file_name(stripped_file_name, ignored_suffixes)
320 })?;
321 }
322 Ok(syntax)
323 }
324
325 fn get_syntax_for_file_extension(
326 &self,
327 file_name: &OsStr,
328 ignored_suffixes: &IgnoredSuffixes,
329 ) -> Result<Option<SyntaxReferenceInSet>> {
330 let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
331 if syntax.is_none() {
332 syntax =
333 ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| {
334 self.get_syntax_for_file_extension(stripped_file_name, ignored_suffixes)
336 })?;
337 }
338 Ok(syntax)
339 }
340
341 fn get_first_line_syntax(
342 &self,
343 reader: &mut InputReader,
344 ) -> Result<Option<SyntaxReferenceInSet>> {
345 let syntax_set = self.get_syntax_set()?;
346 Ok(String::from_utf8(reader.first_line.clone())
347 .ok()
348 .and_then(|l| syntax_set.find_syntax_by_first_line(&l))
349 .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
350 }
351}
352
353pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
354 include_bytes!("../assets/syntaxes.bin")
355}
356
357pub(crate) fn get_integrated_themeset() -> LazyThemeSet {
358 from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
359}
360
361pub fn get_acknowledgements() -> String {
362 from_binary(
363 include_bytes!("../assets/acknowledgements.bin"),
364 COMPRESS_ACKNOWLEDGEMENTS,
365 )
366}
367
368pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
369 asset_from_contents(v, "n/a", compressed)
370 .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
371}
372
373fn asset_from_contents<T: serde::de::DeserializeOwned>(
374 contents: &[u8],
375 description: &str,
376 compressed: bool,
377) -> Result<T> {
378 if compressed {
379 bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
380 } else {
381 bincode::deserialize_from(contents)
382 }
383 .map_err(|_| format!("Could not parse {}", description).into())
384}
385
386fn asset_from_cache<T: serde::de::DeserializeOwned>(
387 path: &Path,
388 description: &str,
389 compressed: bool,
390) -> Result<T> {
391 let contents = fs::read(path).map_err(|_| {
392 format!(
393 "Could not load cached {} '{}'",
394 description,
395 path.to_string_lossy()
396 )
397 })?;
398 asset_from_contents(&contents[..], description, compressed)
399 .map_err(|_| format!("Could not parse cached {}", description).into())
400}
401
402#[cfg(target_os = "macos")]
403fn macos_dark_mode_active() -> bool {
404 let mut defaults_cmd = std::process::Command::new("defaults");
405 defaults_cmd.args(&["read", "-globalDomain", "AppleInterfaceStyle"]);
406 match defaults_cmd.output() {
407 Ok(output) => output.stdout == b"Dark\n",
408 Err(_) => true,
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 use std::ffi::OsStr;
417
418 use std::fs::File;
419 use std::io::{BufReader, Write};
420 use tempfile::TempDir;
421
422 use crate::input::Input;
423
424 struct SyntaxDetectionTest<'a> {
425 assets: HighlightingAssets,
426 pub syntax_mapping: SyntaxMapping<'a>,
427 pub temp_dir: TempDir,
428 }
429
430 impl<'a> SyntaxDetectionTest<'a> {
431 fn new() -> Self {
432 SyntaxDetectionTest {
433 assets: HighlightingAssets::from_binary(),
434 syntax_mapping: SyntaxMapping::builtin(),
435 temp_dir: TempDir::new().expect("creation of temporary directory"),
436 }
437 }
438
439 fn get_syntax_name(
440 &self,
441 language: Option<&str>,
442 input: &mut OpenedInput,
443 mapping: &SyntaxMapping,
444 ) -> String {
445 self.assets
446 .get_syntax(language, input, mapping)
447 .map(|syntax_in_set| syntax_in_set.syntax.name.clone())
448 .unwrap_or_else(|_| "!no syntax!".to_owned())
449 }
450
451 fn syntax_for_real_file_with_content_os(
452 &self,
453 file_name: &OsStr,
454 first_line: &str,
455 ) -> String {
456 let file_path = self.temp_dir.path().join(file_name);
457 {
458 let mut temp_file = File::create(&file_path).unwrap();
459 writeln!(temp_file, "{}", first_line).unwrap();
460 }
461
462 let input = Input::ordinary_file(&file_path);
463 let dummy_stdin: &[u8] = &[];
464 let mut opened_input = input.open(dummy_stdin, None).unwrap();
465
466 self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
467 }
468
469 fn syntax_for_file_with_content_os(&self, file_name: &OsStr, first_line: &str) -> String {
470 let file_path = self.temp_dir.path().join(file_name);
471 let input = Input::from_reader(Box::new(BufReader::new(first_line.as_bytes())))
472 .with_name(Some(&file_path));
473 let dummy_stdin: &[u8] = &[];
474 let mut opened_input = input.open(dummy_stdin, None).unwrap();
475
476 self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
477 }
478
479 #[cfg(unix)]
480 fn syntax_for_file_os(&self, file_name: &OsStr) -> String {
481 self.syntax_for_file_with_content_os(file_name, "")
482 }
483
484 fn syntax_for_file_with_content(&self, file_name: &str, first_line: &str) -> String {
485 self.syntax_for_file_with_content_os(OsStr::new(file_name), first_line)
486 }
487
488 fn syntax_for_file(&self, file_name: &str) -> String {
489 self.syntax_for_file_with_content(file_name, "")
490 }
491
492 fn syntax_for_stdin_with_content(&self, file_name: &str, content: &[u8]) -> String {
493 let input = Input::stdin().with_name(Some(file_name));
494 let mut opened_input = input.open(content, None).unwrap();
495
496 self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
497 }
498
499 fn syntax_is_same_for_inputkinds(&self, file_name: &str, content: &str) -> bool {
500 let as_file = self.syntax_for_real_file_with_content_os(file_name.as_ref(), content);
501 let as_reader = self.syntax_for_file_with_content_os(file_name.as_ref(), content);
502 let consistent = as_file == as_reader;
503 if !consistent {
506 eprintln!(
507 "Inconsistent syntax detection:\nFor File: {}\nFor Reader: {}",
508 as_file, as_reader
509 )
510 }
511
512 consistent
513 }
514 }
515
516 #[test]
517 fn syntax_detection_basic() {
518 let test = SyntaxDetectionTest::new();
519
520 assert_eq!(test.syntax_for_file("test.rs"), "Rust");
521 assert_eq!(test.syntax_for_file("test.cpp"), "C++");
522 assert_eq!(test.syntax_for_file("test.build"), "NAnt Build File");
523 assert_eq!(
524 test.syntax_for_file("PKGBUILD"),
525 "Bourne Again Shell (bash)"
526 );
527 assert_eq!(test.syntax_for_file(".bashrc"), "Bourne Again Shell (bash)");
528 assert_eq!(test.syntax_for_file("Makefile"), "Makefile");
529 }
530
531 #[cfg(unix)]
532 #[test]
533 fn syntax_detection_invalid_utf8() {
534 use std::os::unix::ffi::OsStrExt;
535
536 let test = SyntaxDetectionTest::new();
537
538 assert_eq!(
539 test.syntax_for_file_os(OsStr::from_bytes(b"invalid_\xFEutf8_filename.rs")),
540 "Rust"
541 );
542 }
543
544 #[test]
545 fn syntax_detection_same_for_inputkinds() {
546 let mut test = SyntaxDetectionTest::new();
547
548 test.syntax_mapping
549 .insert("*.myext", MappingTarget::MapTo("C"))
550 .ok();
551 test.syntax_mapping
552 .insert("MY_FILE", MappingTarget::MapTo("Markdown"))
553 .ok();
554
555 assert!(test.syntax_is_same_for_inputkinds("Test.md", ""));
556 assert!(test.syntax_is_same_for_inputkinds("Test.txt", "#!/bin/bash"));
557 assert!(test.syntax_is_same_for_inputkinds(".bashrc", ""));
558 assert!(test.syntax_is_same_for_inputkinds("test.h", ""));
559 assert!(test.syntax_is_same_for_inputkinds("test.js", "#!/bin/bash"));
560 assert!(test.syntax_is_same_for_inputkinds("test.myext", ""));
561 assert!(test.syntax_is_same_for_inputkinds("MY_FILE", ""));
562 assert!(test.syntax_is_same_for_inputkinds("MY_FILE", "<?php"));
563 }
564
565 #[test]
566 fn syntax_detection_well_defined_mapping_for_duplicate_extensions() {
567 let test = SyntaxDetectionTest::new();
568
569 assert_eq!(test.syntax_for_file("test.h"), "C++");
570 assert_eq!(test.syntax_for_file("test.sass"), "Sass");
571 assert_eq!(test.syntax_for_file("test.js"), "JavaScript (Babel)");
572 assert_eq!(test.syntax_for_file("test.fs"), "F#");
573 assert_eq!(test.syntax_for_file("test.v"), "Verilog");
574 }
575
576 #[test]
577 fn syntax_detection_first_line() {
578 let test = SyntaxDetectionTest::new();
579
580 assert_eq!(
581 test.syntax_for_file_with_content("my_script", "#!/bin/bash"),
582 "Bourne Again Shell (bash)"
583 );
584 assert_eq!(
585 test.syntax_for_file_with_content("build", "#!/bin/bash"),
586 "Bourne Again Shell (bash)"
587 );
588 assert_eq!(
589 test.syntax_for_file_with_content("my_script", "<?php"),
590 "PHP"
591 );
592 }
593
594 #[test]
595 fn syntax_detection_with_custom_mapping() {
596 let mut test = SyntaxDetectionTest::new();
597
598 assert_eq!(test.syntax_for_file("test.h"), "C++");
599 test.syntax_mapping
600 .insert("*.h", MappingTarget::MapTo("C"))
601 .ok();
602 assert_eq!(test.syntax_for_file("test.h"), "C");
603 }
604
605 #[test]
606 fn syntax_detection_with_extension_mapping_to_unknown() {
607 let mut test = SyntaxDetectionTest::new();
608
609 assert_eq!(
612 test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"),
613 "CMake"
614 );
615
616 assert_eq!(
618 test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"),
619 "Plain Text"
620 );
621
622 test.syntax_mapping
626 .insert("*.txt", MappingTarget::MapExtensionToUnknown)
627 .ok();
628 assert_eq!(
629 test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"),
630 "CMake"
631 );
632
633 assert_eq!(
636 test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"),
637 "Bourne Again Shell (bash)"
638 );
639 }
640
641 #[test]
642 fn syntax_detection_is_case_insensitive() {
643 let mut test = SyntaxDetectionTest::new();
644
645 assert_eq!(test.syntax_for_file("README.md"), "Markdown");
646 assert_eq!(test.syntax_for_file("README.mD"), "Markdown");
647 assert_eq!(test.syntax_for_file("README.Md"), "Markdown");
648 assert_eq!(test.syntax_for_file("README.MD"), "Markdown");
649
650 test.syntax_mapping
652 .insert("*.MD", MappingTarget::MapTo("Markdown"))
653 .ok();
654
655 assert_eq!(test.syntax_for_file("README.md"), "Markdown");
656 assert_eq!(test.syntax_for_file("README.mD"), "Markdown");
657 assert_eq!(test.syntax_for_file("README.Md"), "Markdown");
658 assert_eq!(test.syntax_for_file("README.MD"), "Markdown");
659 }
660
661 #[test]
662 fn syntax_detection_stdin_filename() {
663 let test = SyntaxDetectionTest::new();
664
665 assert_eq!(test.syntax_for_stdin_with_content("test.cpp", b"a"), "C++");
667 assert_eq!(
669 test.syntax_for_stdin_with_content("my_script", b"#!/bin/bash"),
670 "Bourne Again Shell (bash)"
671 );
672 }
673
674 #[cfg(unix)]
675 #[test]
676 fn syntax_detection_for_symlinked_file() {
677 use std::os::unix::fs::symlink;
678
679 let test = SyntaxDetectionTest::new();
680 let file_path = test.temp_dir.path().join("my_ssh_config_filename");
681 {
682 File::create(&file_path).unwrap();
683 }
684 let file_path_symlink = test.temp_dir.path().join(".ssh").join("config");
685
686 std::fs::create_dir(test.temp_dir.path().join(".ssh"))
687 .expect("creation of directory succeeds");
688 symlink(&file_path, &file_path_symlink).expect("creation of symbolic link succeeds");
689
690 let input = Input::ordinary_file(&file_path_symlink);
691 let dummy_stdin: &[u8] = &[];
692 let mut opened_input = input.open(dummy_stdin, None).unwrap();
693
694 assert_eq!(
695 test.get_syntax_name(None, &mut opened_input, &test.syntax_mapping),
696 "SSH Config"
697 );
698 }
699}