tree_sitter_cli/
test.rs

1use std::{
2    collections::BTreeMap,
3    ffi::OsStr,
4    fmt::Write as _,
5    fs,
6    io::{self, Write},
7    path::{Path, PathBuf},
8    str,
9    sync::LazyLock,
10    time::Duration,
11};
12
13use anstyle::{AnsiColor, Color, Style};
14use anyhow::{anyhow, Context, Result};
15use clap::ValueEnum;
16use indoc::indoc;
17use regex::{
18    bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder},
19    Regex,
20};
21use similar::{ChangeTag, TextDiff};
22use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree};
23use walkdir::WalkDir;
24
25use super::util;
26use crate::parse::Stats;
27
28static HEADER_REGEX: LazyLock<ByteRegex> = LazyLock::new(|| {
29    ByteRegexBuilder::new(
30        r"^(?x)
31           (?P<equals>(?:=+){3,})
32           (?P<suffix1>[^=\r\n][^\r\n]*)?
33           \r?\n
34           (?P<test_name_and_markers>(?:([^=\r\n]|\s+:)[^\r\n]*\r?\n)+)
35           ===+
36           (?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n",
37    )
38    .multi_line(true)
39    .build()
40    .unwrap()
41});
42
43static DIVIDER_REGEX: LazyLock<ByteRegex> = LazyLock::new(|| {
44    ByteRegexBuilder::new(r"^(?P<hyphens>(?:-+){3,})(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
45        .multi_line(true)
46        .build()
47        .unwrap()
48});
49
50static COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^\s*;.*$").unwrap());
51
52static WHITESPACE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
53
54static SEXP_FIELD_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" \w+: \(").unwrap());
55
56static POINT_REGEX: LazyLock<Regex> =
57    LazyLock::new(|| Regex::new(r"\s*\[\s*\d+\s*,\s*\d+\s*\]\s*").unwrap());
58
59#[derive(Debug, PartialEq, Eq)]
60pub enum TestEntry {
61    Group {
62        name: String,
63        children: Vec<TestEntry>,
64        file_path: Option<PathBuf>,
65    },
66    Example {
67        name: String,
68        input: Vec<u8>,
69        output: String,
70        header_delim_len: usize,
71        divider_delim_len: usize,
72        has_fields: bool,
73        attributes_str: String,
74        attributes: TestAttributes,
75        file_name: Option<String>,
76    },
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct TestAttributes {
81    pub skip: bool,
82    pub platform: bool,
83    pub fail_fast: bool,
84    pub error: bool,
85    pub languages: Vec<Box<str>>,
86}
87
88impl Default for TestEntry {
89    fn default() -> Self {
90        Self::Group {
91            name: String::new(),
92            children: Vec::new(),
93            file_path: None,
94        }
95    }
96}
97
98impl Default for TestAttributes {
99    fn default() -> Self {
100        Self {
101            skip: false,
102            platform: true,
103            fail_fast: false,
104            error: false,
105            languages: vec!["".into()],
106        }
107    }
108}
109
110#[derive(ValueEnum, Default, Copy, Clone, PartialEq, Eq)]
111pub enum TestStats {
112    All,
113    #[default]
114    OutliersAndTotal,
115    TotalOnly,
116}
117
118pub struct TestOptions<'a> {
119    pub output: &'a mut String,
120    pub path: PathBuf,
121    pub debug: bool,
122    pub debug_graph: bool,
123    pub include: Option<Regex>,
124    pub exclude: Option<Regex>,
125    pub file_name: Option<String>,
126    pub update: bool,
127    pub open_log: bool,
128    pub languages: BTreeMap<&'a str, &'a Language>,
129    pub color: bool,
130    pub test_num: usize,
131    /// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted
132    /// parse rate
133    pub parse_rates: &'a mut Vec<(bool, Option<(f64, f64)>)>,
134    pub stat_display: TestStats,
135    pub stats: &'a mut Stats,
136    pub show_fields: bool,
137    pub overview_only: bool,
138}
139
140pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<()> {
141    let test_entry = parse_tests(&opts.path)?;
142    let mut _log_session = None;
143
144    if opts.debug_graph {
145        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
146    } else if opts.debug {
147        parser.set_logger(Some(Box::new(|log_type, message| {
148            if log_type == LogType::Lex {
149                io::stderr().write_all(b"  ").unwrap();
150            }
151            writeln!(&mut io::stderr(), "{message}").unwrap();
152        })));
153    }
154
155    let mut failures = Vec::new();
156    let mut corrected_entries = Vec::new();
157    let mut has_parse_errors = false;
158    run_tests(
159        parser,
160        test_entry,
161        opts,
162        0,
163        &mut failures,
164        &mut corrected_entries,
165        &mut has_parse_errors,
166    )?;
167
168    let (count, total_adj_parse_time) = opts
169        .parse_rates
170        .iter()
171        .flat_map(|(_, rates)| rates)
172        .fold((0usize, 0.0f64), |(count, rate_accum), (_, adj_rate)| {
173            (count + 1, rate_accum + adj_rate)
174        });
175
176    let avg = total_adj_parse_time / count as f64;
177    let std_dev = {
178        let variance = opts
179            .parse_rates
180            .iter()
181            .flat_map(|(_, rates)| rates)
182            .map(|(_, rate_i)| (rate_i - avg).powi(2))
183            .sum::<f64>()
184            / count as f64;
185        variance.sqrt()
186    };
187
188    for ((is_test, rates), out_line) in opts.parse_rates.iter().zip(opts.output.lines()) {
189        let stat_display = if !is_test {
190            // Test group, no actual parsing took place
191            String::new()
192        } else {
193            match (opts.stat_display, rates) {
194                (TestStats::TotalOnly, _) | (_, None) => String::new(),
195                (display, Some((true_rate, adj_rate))) => {
196                    let mut stats = if display == TestStats::All {
197                        format!(" ({true_rate:.3} bytes/ms)")
198                    } else {
199                        String::new()
200                    };
201                    // 3 standard deviations below the mean, aka the "Empirical Rule"
202                    if *adj_rate < 3.0f64.mul_add(-std_dev, avg) {
203                        stats += &paint(
204                            opts.color.then_some(AnsiColor::Red),
205                            &format!(" -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)"),
206                        );
207                    }
208                    stats
209                }
210            }
211        };
212        println!("{out_line}{stat_display}");
213    }
214
215    parser.stop_printing_dot_graphs();
216
217    if failures.is_empty() {
218        Ok(())
219    } else {
220        println!();
221
222        if opts.update && !has_parse_errors {
223            if failures.len() == 1 {
224                println!("1 update:\n");
225            } else {
226                println!("{} updates:\n", failures.len());
227            }
228
229            for (i, (name, ..)) in failures.iter().enumerate() {
230                println!("  {}. {name}", i + 1);
231            }
232
233            Ok(())
234        } else {
235            has_parse_errors = opts.update && has_parse_errors;
236
237            if !opts.overview_only {
238                if !has_parse_errors {
239                    if failures.len() == 1 {
240                        println!("1 failure:");
241                    } else {
242                        println!("{} failures:", failures.len());
243                    }
244                }
245
246                if opts.color {
247                    print_diff_key();
248                }
249                for (i, (name, actual, expected)) in failures.iter().enumerate() {
250                    if expected == "NO ERROR" {
251                        println!("\n  {}. {name}:\n", i + 1);
252                        println!("  Expected an ERROR node, but got:");
253                        println!(
254                            "  {}",
255                            paint(
256                                opts.color.then_some(AnsiColor::Red),
257                                &format_sexp(actual, 2)
258                            )
259                        );
260                    } else {
261                        println!("\n  {}. {name}:", i + 1);
262                        let actual = format_sexp(actual, 2);
263                        let expected = format_sexp(expected, 2);
264                        print_diff(&actual, &expected, opts.color);
265                    }
266                }
267            }
268
269            if has_parse_errors {
270                Err(anyhow!(indoc! {"
271                    Some tests failed to parse with unexpected `ERROR` or `MISSING` nodes, as shown above, and cannot be updated automatically.
272                    Either fix the grammar or manually update the tests if this is expected."}))
273            } else {
274                Err(anyhow!(""))
275            }
276        }
277    }
278}
279
280pub fn check_queries_at_path(language: &Language, path: &Path) -> Result<()> {
281    if path.exists() {
282        for entry in WalkDir::new(path)
283            .into_iter()
284            .filter_map(std::result::Result::ok)
285            .filter(|e| {
286                e.file_type().is_file()
287                    && e.path().extension().and_then(OsStr::to_str) == Some("scm")
288                    && !e.path().starts_with(".")
289            })
290        {
291            let filepath = entry.file_name().to_str().unwrap_or("");
292            let content = fs::read_to_string(entry.path())
293                .with_context(|| format!("Error reading query file {filepath:?}"))?;
294            Query::new(language, &content)
295                .with_context(|| format!("Error in query file {filepath:?}"))?;
296        }
297    }
298    Ok(())
299}
300
301pub fn print_diff_key() {
302    println!(
303        "\ncorrect / {} / {}",
304        paint(Some(AnsiColor::Green), "expected"),
305        paint(Some(AnsiColor::Red), "unexpected")
306    );
307}
308
309pub fn print_diff(actual: &str, expected: &str, use_color: bool) {
310    let diff = TextDiff::from_lines(actual, expected);
311    for diff in diff.iter_all_changes() {
312        match diff.tag() {
313            ChangeTag::Equal => {
314                if use_color {
315                    print!("{diff}");
316                } else {
317                    print!(" {diff}");
318                }
319            }
320            ChangeTag::Insert => {
321                if use_color {
322                    print!("{}", paint(Some(AnsiColor::Green), diff.as_str().unwrap()));
323                } else {
324                    print!("+{diff}");
325                }
326                if diff.missing_newline() {
327                    println!();
328                }
329            }
330            ChangeTag::Delete => {
331                if use_color {
332                    print!("{}", paint(Some(AnsiColor::Red), diff.as_str().unwrap()));
333                } else {
334                    print!("-{diff}");
335                }
336                if diff.missing_newline() {
337                    println!();
338                }
339            }
340        }
341    }
342
343    println!();
344}
345
346pub fn paint(color: Option<impl Into<Color>>, text: &str) -> String {
347    let style = Style::new().fg_color(color.map(Into::into));
348    format!("{style}{text}{style:#}")
349}
350
351/// This will return false if we want to "fail fast". It will bail and not parse any more tests.
352#[allow(clippy::too_many_arguments)]
353fn run_tests(
354    parser: &mut Parser,
355    test_entry: TestEntry,
356    opts: &mut TestOptions,
357    mut indent_level: u32,
358    failures: &mut Vec<(String, String, String)>,
359    corrected_entries: &mut Vec<(String, String, String, String, usize, usize)>,
360    has_parse_errors: &mut bool,
361) -> Result<bool> {
362    match test_entry {
363        TestEntry::Example {
364            name,
365            input,
366            output,
367            header_delim_len,
368            divider_delim_len,
369            has_fields,
370            attributes_str,
371            attributes,
372            ..
373        } => {
374            write!(opts.output, "{}", "  ".repeat(indent_level as usize))?;
375
376            if attributes.skip {
377                writeln!(
378                    opts.output,
379                    "{:>3}. ⌀ {}",
380                    opts.test_num,
381                    paint(opts.color.then_some(AnsiColor::Yellow), &name),
382                )?;
383                opts.parse_rates.push((true, None));
384                opts.test_num += 1;
385                return Ok(true);
386            }
387
388            if !attributes.platform {
389                writeln!(
390                    opts.output,
391                    "{:>3}. ⌀ {}",
392                    opts.test_num,
393                    paint(opts.color.then_some(AnsiColor::Magenta), &name),
394                )?;
395                opts.parse_rates.push((true, None));
396                opts.test_num += 1;
397                return Ok(true);
398            }
399
400            for (i, language_name) in attributes.languages.iter().enumerate() {
401                if !language_name.is_empty() {
402                    let language = opts
403                        .languages
404                        .get(language_name.as_ref())
405                        .ok_or_else(|| anyhow!("Language not found: {language_name}"))?;
406                    parser.set_language(language)?;
407                }
408                let start = std::time::Instant::now();
409                let tree = parser.parse(&input, None).unwrap();
410                {
411                    let parse_time = start.elapsed();
412                    let true_parse_rate = tree.root_node().byte_range().len() as f64
413                        / (parse_time.as_nanos() as f64 / 1_000_000.0);
414                    let adj_parse_rate = adjusted_parse_rate(&tree, parse_time);
415
416                    opts.parse_rates
417                        .push((true, Some((true_parse_rate, adj_parse_rate))));
418                    opts.stats.total_parses += 1;
419                    opts.stats.total_duration += parse_time;
420                    opts.stats.total_bytes += tree.root_node().byte_range().len();
421                }
422
423                if attributes.error {
424                    if tree.root_node().has_error() {
425                        writeln!(
426                            opts.output,
427                            "{:>3}. ✓ {}",
428                            opts.test_num,
429                            paint(opts.color.then_some(AnsiColor::Green), &name),
430                        )?;
431                        opts.stats.successful_parses += 1;
432                        if opts.update {
433                            let input = String::from_utf8(input.clone()).unwrap();
434                            let output = format_sexp(&output, 0);
435                            corrected_entries.push((
436                                name.clone(),
437                                input,
438                                output,
439                                attributes_str.clone(),
440                                header_delim_len,
441                                divider_delim_len,
442                            ));
443                        }
444                    } else {
445                        if opts.update {
446                            let input = String::from_utf8(input.clone()).unwrap();
447                            // Keep the original `expected` output if the actual output has no error
448                            let output = format_sexp(&output, 0);
449                            corrected_entries.push((
450                                name.clone(),
451                                input,
452                                output,
453                                attributes_str.clone(),
454                                header_delim_len,
455                                divider_delim_len,
456                            ));
457                        }
458                        writeln!(
459                            opts.output,
460                            "{:>3}. ✗ {}",
461                            opts.test_num,
462                            paint(opts.color.then_some(AnsiColor::Red), &name),
463                        )?;
464                        failures.push((
465                            name.clone(),
466                            tree.root_node().to_sexp(),
467                            "NO ERROR".to_string(),
468                        ));
469                    }
470
471                    if attributes.fail_fast {
472                        return Ok(false);
473                    }
474                } else {
475                    let mut actual = tree.root_node().to_sexp();
476                    if !(opts.show_fields || has_fields) {
477                        actual = strip_sexp_fields(&actual);
478                    }
479
480                    if actual == output {
481                        writeln!(
482                            opts.output,
483                            "{:>3}. ✓ {}",
484                            opts.test_num,
485                            paint(opts.color.then_some(AnsiColor::Green), &name),
486                        )?;
487                        opts.stats.successful_parses += 1;
488                        if opts.update {
489                            let input = String::from_utf8(input.clone()).unwrap();
490                            let output = format_sexp(&output, 0);
491                            corrected_entries.push((
492                                name.clone(),
493                                input,
494                                output,
495                                attributes_str.clone(),
496                                header_delim_len,
497                                divider_delim_len,
498                            ));
499                        }
500                    } else {
501                        if opts.update {
502                            let input = String::from_utf8(input.clone()).unwrap();
503                            let expected_output = format_sexp(&output, 0);
504                            let actual_output = format_sexp(&actual, 0);
505
506                            // Only bail early before updating if the actual is not the output,
507                            // sometimes users want to test cases that
508                            // are intended to have errors, hence why this
509                            // check isn't shown above
510                            if actual.contains("ERROR") || actual.contains("MISSING") {
511                                *has_parse_errors = true;
512
513                                // keep the original `expected` output if the actual output has an
514                                // error
515                                corrected_entries.push((
516                                    name.clone(),
517                                    input,
518                                    expected_output,
519                                    attributes_str.clone(),
520                                    header_delim_len,
521                                    divider_delim_len,
522                                ));
523                            } else {
524                                corrected_entries.push((
525                                    name.clone(),
526                                    input,
527                                    actual_output,
528                                    attributes_str.clone(),
529                                    header_delim_len,
530                                    divider_delim_len,
531                                ));
532                                writeln!(
533                                    opts.output,
534                                    "{:>3}. ✓ {}",
535                                    opts.test_num,
536                                    paint(opts.color.then_some(AnsiColor::Blue), &name),
537                                )?;
538                            }
539                        } else {
540                            writeln!(
541                                opts.output,
542                                "{:>3}. ✗ {}",
543                                opts.test_num,
544                                paint(opts.color.then_some(AnsiColor::Red), &name),
545                            )?;
546                        }
547                        failures.push((name.clone(), actual, output.clone()));
548
549                        if attributes.fail_fast {
550                            return Ok(false);
551                        }
552                    }
553                }
554
555                if i == attributes.languages.len() - 1 {
556                    // reset to the first language
557                    parser.set_language(opts.languages.values().next().unwrap())?;
558                }
559            }
560            opts.test_num += 1;
561        }
562        TestEntry::Group {
563            name,
564            children,
565            file_path,
566        } => {
567            if children.is_empty() {
568                return Ok(true);
569            }
570
571            indent_level += 1;
572            let failure_count = failures.len();
573            let mut has_printed = false;
574
575            let matches_filter = |name: &str, file_name: &Option<String>, opts: &TestOptions| {
576                if let (Some(test_file_path), Some(filter_file_name)) = (file_name, &opts.file_name)
577                {
578                    if !filter_file_name.eq(test_file_path) {
579                        return false;
580                    }
581                }
582                if let Some(include) = &opts.include {
583                    include.is_match(name)
584                } else if let Some(exclude) = &opts.exclude {
585                    !exclude.is_match(name)
586                } else {
587                    true
588                }
589            };
590
591            let should_skip = |entry: &TestEntry, opts: &TestOptions| match entry {
592                TestEntry::Example {
593                    name, file_name, ..
594                } => !matches_filter(name, file_name, opts),
595                TestEntry::Group { .. } => false,
596            };
597
598            for child in children {
599                if let TestEntry::Example {
600                    ref name,
601                    ref input,
602                    ref output,
603                    ref attributes_str,
604                    header_delim_len,
605                    divider_delim_len,
606                    ..
607                } = child
608                {
609                    if should_skip(&child, opts) {
610                        let input = String::from_utf8(input.clone()).unwrap();
611                        let output = format_sexp(output, 0);
612                        corrected_entries.push((
613                            name.clone(),
614                            input,
615                            output,
616                            attributes_str.clone(),
617                            header_delim_len,
618                            divider_delim_len,
619                        ));
620
621                        opts.test_num += 1;
622
623                        continue;
624                    }
625                }
626                if !has_printed && indent_level > 1 {
627                    has_printed = true;
628                    writeln!(
629                        opts.output,
630                        "{}{name}:",
631                        "  ".repeat((indent_level - 1) as usize)
632                    )?;
633                    opts.parse_rates.push((false, None));
634                }
635                if !run_tests(
636                    parser,
637                    child,
638                    opts,
639                    indent_level,
640                    failures,
641                    corrected_entries,
642                    has_parse_errors,
643                )? {
644                    // fail fast
645                    return Ok(false);
646                }
647            }
648
649            if let Some(file_path) = file_path {
650                if opts.update && failures.len() - failure_count > 0 {
651                    write_tests(&file_path, corrected_entries)?;
652                }
653                corrected_entries.clear();
654            }
655        }
656    }
657    Ok(true)
658}
659
660// Parse time is interpreted in ns before converting to ms to avoid truncation issues
661// Parse rates often have several outliers, leading to a large standard deviation. Taking
662// the log of these rates serves to "flatten" out the distribution, yielding a more
663// usable standard deviation for finding statistically significant slow parse rates
664// NOTE: This is just a heuristic
665#[must_use]
666pub fn adjusted_parse_rate(tree: &Tree, parse_time: Duration) -> f64 {
667    f64::ln(
668        tree.root_node().byte_range().len() as f64 / (parse_time.as_nanos() as f64 / 1_000_000.0),
669    )
670}
671
672fn write_tests(
673    file_path: &Path,
674    corrected_entries: &[(String, String, String, String, usize, usize)],
675) -> Result<()> {
676    let mut buffer = fs::File::create(file_path)?;
677    write_tests_to_buffer(&mut buffer, corrected_entries)
678}
679
680fn write_tests_to_buffer(
681    buffer: &mut impl Write,
682    corrected_entries: &[(String, String, String, String, usize, usize)],
683) -> Result<()> {
684    for (i, (name, input, output, attributes_str, header_delim_len, divider_delim_len)) in
685        corrected_entries.iter().enumerate()
686    {
687        if i > 0 {
688            writeln!(buffer)?;
689        }
690        writeln!(
691            buffer,
692            "{}\n{name}\n{}{}\n{input}\n{}\n\n{}",
693            "=".repeat(*header_delim_len),
694            if attributes_str.is_empty() {
695                attributes_str.clone()
696            } else {
697                format!("{attributes_str}\n")
698            },
699            "=".repeat(*header_delim_len),
700            "-".repeat(*divider_delim_len),
701            output.trim()
702        )?;
703    }
704    Ok(())
705}
706
707pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
708    let name = path
709        .file_stem()
710        .and_then(|s| s.to_str())
711        .unwrap_or("")
712        .to_string();
713    if path.is_dir() {
714        let mut children = Vec::new();
715        for entry in fs::read_dir(path)? {
716            let entry = entry?;
717            let hidden = entry.file_name().to_str().unwrap_or("").starts_with('.');
718            if !hidden {
719                children.push(entry.path());
720            }
721        }
722        children.sort_by(|a, b| {
723            a.file_name()
724                .unwrap_or_default()
725                .cmp(b.file_name().unwrap_or_default())
726        });
727        let children = children
728            .iter()
729            .map(|path| parse_tests(path))
730            .collect::<io::Result<Vec<TestEntry>>>()?;
731        Ok(TestEntry::Group {
732            name,
733            children,
734            file_path: None,
735        })
736    } else {
737        let content = fs::read_to_string(path)?;
738        Ok(parse_test_content(name, &content, Some(path.to_path_buf())))
739    }
740}
741
742#[must_use]
743pub fn strip_sexp_fields(sexp: &str) -> String {
744    SEXP_FIELD_REGEX.replace_all(sexp, " (").to_string()
745}
746
747#[must_use]
748pub fn strip_points(sexp: &str) -> String {
749    POINT_REGEX.replace_all(sexp, "").to_string()
750}
751
752fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -> TestEntry {
753    let mut children = Vec::new();
754    let bytes = content.as_bytes();
755    let mut prev_name = String::new();
756    let mut prev_attributes_str = String::new();
757    let mut prev_header_end = 0;
758
759    // Find the first test header in the file, and determine if it has a
760    // custom suffix. If so, then this suffix will be used to identify
761    // all subsequent headers and divider lines in the file.
762    let first_suffix = HEADER_REGEX
763        .captures(bytes)
764        .and_then(|c| c.name("suffix1"))
765        .map(|m| String::from_utf8_lossy(m.as_bytes()));
766
767    // Find all of the `===` test headers, which contain the test names.
768    // Ignore any matches whose suffix does not match the first header
769    // suffix in the file.
770    let header_matches = HEADER_REGEX.captures_iter(bytes).filter_map(|c| {
771        let header_delim_len = c.name("equals").map_or(80, |m| m.as_bytes().len());
772        let suffix1 = c
773            .name("suffix1")
774            .map(|m| String::from_utf8_lossy(m.as_bytes()));
775        let suffix2 = c
776            .name("suffix2")
777            .map(|m| String::from_utf8_lossy(m.as_bytes()));
778
779        let (mut skip, mut platform, mut fail_fast, mut error, mut languages) =
780            (false, None, false, false, vec![]);
781
782        let test_name_and_markers = c
783            .name("test_name_and_markers")
784            .map_or("".as_bytes(), |m| m.as_bytes());
785
786        let mut test_name = String::new();
787        let mut attributes_str = String::new();
788
789        let mut seen_marker = false;
790
791        let test_name_and_markers = str::from_utf8(test_name_and_markers).unwrap();
792        for line in test_name_and_markers
793            .split_inclusive('\n')
794            .filter(|s| !s.is_empty())
795        {
796            let trimmed_line = line.trim();
797            match trimmed_line.split('(').next().unwrap() {
798                ":skip" => (seen_marker, skip) = (true, true),
799                ":platform" => {
800                    if let Some(platforms) = trimmed_line.strip_prefix(':').and_then(|s| {
801                        s.strip_prefix("platform(")
802                            .and_then(|s| s.strip_suffix(')'))
803                    }) {
804                        seen_marker = true;
805                        platform = Some(
806                            platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS,
807                        );
808                    }
809                }
810                ":fail-fast" => (seen_marker, fail_fast) = (true, true),
811                ":error" => (seen_marker, error) = (true, true),
812                ":language" => {
813                    if let Some(lang) = trimmed_line.strip_prefix(':').and_then(|s| {
814                        s.strip_prefix("language(")
815                            .and_then(|s| s.strip_suffix(')'))
816                    }) {
817                        seen_marker = true;
818                        languages.push(lang.into());
819                    }
820                }
821                _ if !seen_marker => {
822                    test_name.push_str(line);
823                }
824                _ => {}
825            }
826        }
827        attributes_str.push_str(test_name_and_markers.strip_prefix(&test_name).unwrap());
828
829        // prefer skip over error, both shouldn't be set
830        if skip {
831            error = false;
832        }
833
834        // add a default language if none are specified, will defer to the first language
835        if languages.is_empty() {
836            languages.push("".into());
837        }
838
839        if suffix1 == first_suffix && suffix2 == first_suffix {
840            let header_range = c.get(0).unwrap().range();
841            let test_name = if test_name.is_empty() {
842                None
843            } else {
844                Some(test_name.trim_end().to_string())
845            };
846            let attributes_str = if attributes_str.is_empty() {
847                None
848            } else {
849                Some(attributes_str.trim_end().to_string())
850            };
851            Some((
852                header_delim_len,
853                header_range,
854                test_name,
855                attributes_str,
856                TestAttributes {
857                    skip,
858                    platform: platform.unwrap_or(true),
859                    fail_fast,
860                    error,
861                    languages,
862                },
863            ))
864        } else {
865            None
866        }
867    });
868
869    let (mut prev_header_len, mut prev_attributes) = (80, TestAttributes::default());
870    for (header_delim_len, header_range, test_name, attributes_str, attributes) in header_matches
871        .chain(Some((
872            80,
873            bytes.len()..bytes.len(),
874            None,
875            None,
876            TestAttributes::default(),
877        )))
878    {
879        // Find the longest line of dashes following each test description. That line
880        // separates the input from the expected output. Ignore any matches whose suffix
881        // does not match the first suffix in the file.
882        if prev_header_end > 0 {
883            let divider_range = DIVIDER_REGEX
884                .captures_iter(&bytes[prev_header_end..header_range.start])
885                .filter_map(|m| {
886                    let divider_delim_len = m.name("hyphens").map_or(80, |m| m.as_bytes().len());
887                    let suffix = m
888                        .name("suffix")
889                        .map(|m| String::from_utf8_lossy(m.as_bytes()));
890                    if suffix == first_suffix {
891                        let range = m.get(0).unwrap().range();
892                        Some((
893                            divider_delim_len,
894                            (prev_header_end + range.start)..(prev_header_end + range.end),
895                        ))
896                    } else {
897                        None
898                    }
899                })
900                .max_by_key(|(_, range)| range.len());
901
902            if let Some((divider_delim_len, divider_range)) = divider_range {
903                if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
904                    let mut input = bytes[prev_header_end..divider_range.start].to_vec();
905
906                    // Remove trailing newline from the input.
907                    input.pop();
908                    #[cfg(target_os = "windows")]
909                    if input.last() == Some(&b'\r') {
910                        input.pop();
911                    }
912
913                    // Remove all comments
914                    let output = COMMENT_REGEX.replace_all(output, "").to_string();
915
916                    // Normalize the whitespace in the expected output.
917                    let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
918                    let output = output.replace(" )", ")");
919
920                    // Identify if the expected output has fields indicated. If not, then
921                    // fields will not be checked.
922                    let has_fields = SEXP_FIELD_REGEX.is_match(&output);
923
924                    let file_name = if let Some(ref path) = file_path {
925                        path.file_name().map(|n| n.to_string_lossy().to_string())
926                    } else {
927                        None
928                    };
929
930                    let t = TestEntry::Example {
931                        name: prev_name,
932                        input,
933                        output,
934                        header_delim_len: prev_header_len,
935                        divider_delim_len,
936                        has_fields,
937                        attributes_str: prev_attributes_str,
938                        attributes: prev_attributes,
939                        file_name,
940                    };
941
942                    children.push(t);
943                }
944            }
945        }
946        prev_attributes = attributes;
947        prev_name = test_name.unwrap_or_default();
948        prev_attributes_str = attributes_str.unwrap_or_default();
949        prev_header_len = header_delim_len;
950        prev_header_end = header_range.end;
951    }
952    TestEntry::Group {
953        name,
954        children,
955        file_path,
956    }
957}
958
959#[cfg(test)]
960mod tests {
961    use super::*;
962
963    #[test]
964    fn test_parse_test_content_simple() {
965        let entry = parse_test_content(
966            "the-filename".to_string(),
967            r"
968===============
969The first test
970===============
971
972a b c
973
974---
975
976(a
977    (b c))
978
979================
980The second test
981================
982d
983---
984(d)
985        "
986            .trim(),
987            None,
988        );
989
990        assert_eq!(
991            entry,
992            TestEntry::Group {
993                name: "the-filename".to_string(),
994                children: vec![
995                    TestEntry::Example {
996                        name: "The first test".to_string(),
997                        input: b"\na b c\n".to_vec(),
998                        output: "(a (b c))".to_string(),
999                        header_delim_len: 15,
1000                        divider_delim_len: 3,
1001                        has_fields: false,
1002                        attributes_str: String::new(),
1003                        attributes: TestAttributes::default(),
1004                        file_name: None,
1005                    },
1006                    TestEntry::Example {
1007                        name: "The second test".to_string(),
1008                        input: b"d".to_vec(),
1009                        output: "(d)".to_string(),
1010                        header_delim_len: 16,
1011                        divider_delim_len: 3,
1012                        has_fields: false,
1013                        attributes_str: String::new(),
1014                        attributes: TestAttributes::default(),
1015                        file_name: None,
1016                    },
1017                ],
1018                file_path: None,
1019            }
1020        );
1021    }
1022
1023    #[test]
1024    fn test_parse_test_content_with_dashes_in_source_code() {
1025        let entry = parse_test_content(
1026            "the-filename".to_string(),
1027            r"
1028==================
1029Code with dashes
1030==================
1031abc
1032---
1033defg
1034----
1035hijkl
1036-------
1037
1038(a (b))
1039
1040=========================
1041Code ending with dashes
1042=========================
1043abc
1044-----------
1045-------------------
1046
1047(c (d))
1048        "
1049            .trim(),
1050            None,
1051        );
1052
1053        assert_eq!(
1054            entry,
1055            TestEntry::Group {
1056                name: "the-filename".to_string(),
1057                children: vec![
1058                    TestEntry::Example {
1059                        name: "Code with dashes".to_string(),
1060                        input: b"abc\n---\ndefg\n----\nhijkl".to_vec(),
1061                        output: "(a (b))".to_string(),
1062                        header_delim_len: 18,
1063                        divider_delim_len: 7,
1064                        has_fields: false,
1065                        attributes_str: String::new(),
1066                        attributes: TestAttributes::default(),
1067                        file_name: None,
1068                    },
1069                    TestEntry::Example {
1070                        name: "Code ending with dashes".to_string(),
1071                        input: b"abc\n-----------".to_vec(),
1072                        output: "(c (d))".to_string(),
1073                        header_delim_len: 25,
1074                        divider_delim_len: 19,
1075                        has_fields: false,
1076                        attributes_str: String::new(),
1077                        attributes: TestAttributes::default(),
1078                        file_name: None,
1079                    },
1080                ],
1081                file_path: None,
1082            }
1083        );
1084    }
1085
1086    #[test]
1087    fn test_format_sexp() {
1088        assert_eq!(format_sexp("", 0), "");
1089        assert_eq!(
1090            format_sexp("(a b: (c) (d) e: (f (g (h (MISSING i)))))", 0),
1091            r"
1092(a
1093  b: (c)
1094  (d)
1095  e: (f
1096    (g
1097      (h
1098        (MISSING i)))))
1099"
1100            .trim()
1101        );
1102        assert_eq!(
1103            format_sexp("(program (ERROR (UNEXPECTED ' ')) (identifier))", 0),
1104            r"
1105(program
1106  (ERROR
1107    (UNEXPECTED ' '))
1108  (identifier))
1109"
1110            .trim()
1111        );
1112        assert_eq!(
1113            format_sexp(r#"(source_file (MISSING ")"))"#, 0),
1114            r#"
1115(source_file
1116  (MISSING ")"))
1117        "#
1118            .trim()
1119        );
1120        assert_eq!(
1121            format_sexp(
1122                r"(source_file (ERROR (UNEXPECTED 'f') (UNEXPECTED '+')))",
1123                0
1124            ),
1125            r"
1126(source_file
1127  (ERROR
1128    (UNEXPECTED 'f')
1129    (UNEXPECTED '+')))
1130"
1131            .trim()
1132        );
1133    }
1134
1135    #[test]
1136    fn test_write_tests_to_buffer() {
1137        let mut buffer = Vec::new();
1138        let corrected_entries = vec![
1139            (
1140                "title 1".to_string(),
1141                "input 1".to_string(),
1142                "output 1".to_string(),
1143                String::new(),
1144                80,
1145                80,
1146            ),
1147            (
1148                "title 2".to_string(),
1149                "input 2".to_string(),
1150                "output 2".to_string(),
1151                String::new(),
1152                80,
1153                80,
1154            ),
1155        ];
1156        write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
1157        assert_eq!(
1158            String::from_utf8(buffer).unwrap(),
1159            r"
1160================================================================================
1161title 1
1162================================================================================
1163input 1
1164--------------------------------------------------------------------------------
1165
1166output 1
1167
1168================================================================================
1169title 2
1170================================================================================
1171input 2
1172--------------------------------------------------------------------------------
1173
1174output 2
1175"
1176            .trim_start()
1177            .to_string()
1178        );
1179    }
1180
1181    #[test]
1182    fn test_parse_test_content_with_comments_in_sexp() {
1183        let entry = parse_test_content(
1184            "the-filename".to_string(),
1185            r#"
1186==================
1187sexp with comment
1188==================
1189code
1190---
1191
1192; Line start comment
1193(a (b))
1194
1195==================
1196sexp with comment between
1197==================
1198code
1199---
1200
1201; Line start comment
1202(a
1203; ignore this
1204    (b)
1205    ; also ignore this
1206)
1207
1208=========================
1209sexp with ';'
1210=========================
1211code
1212---
1213
1214(MISSING ";")
1215        "#
1216            .trim(),
1217            None,
1218        );
1219
1220        assert_eq!(
1221            entry,
1222            TestEntry::Group {
1223                name: "the-filename".to_string(),
1224                children: vec![
1225                    TestEntry::Example {
1226                        name: "sexp with comment".to_string(),
1227                        input: b"code".to_vec(),
1228                        output: "(a (b))".to_string(),
1229                        header_delim_len: 18,
1230                        divider_delim_len: 3,
1231                        has_fields: false,
1232                        attributes_str: String::new(),
1233                        attributes: TestAttributes::default(),
1234                        file_name: None,
1235                    },
1236                    TestEntry::Example {
1237                        name: "sexp with comment between".to_string(),
1238                        input: b"code".to_vec(),
1239                        output: "(a (b))".to_string(),
1240                        header_delim_len: 18,
1241                        divider_delim_len: 3,
1242                        has_fields: false,
1243                        attributes_str: String::new(),
1244                        attributes: TestAttributes::default(),
1245                        file_name: None,
1246                    },
1247                    TestEntry::Example {
1248                        name: "sexp with ';'".to_string(),
1249                        input: b"code".to_vec(),
1250                        output: "(MISSING \";\")".to_string(),
1251                        header_delim_len: 25,
1252                        divider_delim_len: 3,
1253                        has_fields: false,
1254                        attributes_str: String::new(),
1255                        attributes: TestAttributes::default(),
1256                        file_name: None,
1257                    }
1258                ],
1259                file_path: None,
1260            }
1261        );
1262    }
1263
1264    #[test]
1265    fn test_parse_test_content_with_suffixes() {
1266        let entry = parse_test_content(
1267            "the-filename".to_string(),
1268            r"
1269==================asdf\()[]|{}*+?^$.-
1270First test
1271==================asdf\()[]|{}*+?^$.-
1272
1273=========================
1274NOT A TEST HEADER
1275=========================
1276-------------------------
1277
1278---asdf\()[]|{}*+?^$.-
1279
1280(a)
1281
1282==================asdf\()[]|{}*+?^$.-
1283Second test
1284==================asdf\()[]|{}*+?^$.-
1285
1286=========================
1287NOT A TEST HEADER
1288=========================
1289-------------------------
1290
1291---asdf\()[]|{}*+?^$.-
1292
1293(a)
1294
1295=========================asdf\()[]|{}*+?^$.-
1296Test name with = symbol
1297=========================asdf\()[]|{}*+?^$.-
1298
1299=========================
1300NOT A TEST HEADER
1301=========================
1302-------------------------
1303
1304---asdf\()[]|{}*+?^$.-
1305
1306(a)
1307
1308==============================asdf\()[]|{}*+?^$.-
1309Test containing equals
1310==============================asdf\()[]|{}*+?^$.-
1311
1312===
1313
1314------------------------------asdf\()[]|{}*+?^$.-
1315
1316(a)
1317
1318==============================asdf\()[]|{}*+?^$.-
1319Subsequent test containing equals
1320==============================asdf\()[]|{}*+?^$.-
1321
1322===
1323
1324------------------------------asdf\()[]|{}*+?^$.-
1325
1326(a)
1327"
1328            .trim(),
1329            None,
1330        );
1331
1332        let expected_input = b"\n=========================\n\
1333            NOT A TEST HEADER\n\
1334            =========================\n\
1335            -------------------------\n"
1336            .to_vec();
1337        pretty_assertions::assert_eq!(
1338            entry,
1339            TestEntry::Group {
1340                name: "the-filename".to_string(),
1341                children: vec![
1342                    TestEntry::Example {
1343                        name: "First test".to_string(),
1344                        input: expected_input.clone(),
1345                        output: "(a)".to_string(),
1346                        header_delim_len: 18,
1347                        divider_delim_len: 3,
1348                        has_fields: false,
1349                        attributes_str: String::new(),
1350                        attributes: TestAttributes::default(),
1351                        file_name: None,
1352                    },
1353                    TestEntry::Example {
1354                        name: "Second test".to_string(),
1355                        input: expected_input.clone(),
1356                        output: "(a)".to_string(),
1357                        header_delim_len: 18,
1358                        divider_delim_len: 3,
1359                        has_fields: false,
1360                        attributes_str: String::new(),
1361                        attributes: TestAttributes::default(),
1362                        file_name: None,
1363                    },
1364                    TestEntry::Example {
1365                        name: "Test name with = symbol".to_string(),
1366                        input: expected_input,
1367                        output: "(a)".to_string(),
1368                        header_delim_len: 25,
1369                        divider_delim_len: 3,
1370                        has_fields: false,
1371                        attributes_str: String::new(),
1372                        attributes: TestAttributes::default(),
1373                        file_name: None,
1374                    },
1375                    TestEntry::Example {
1376                        name: "Test containing equals".to_string(),
1377                        input: "\n===\n".into(),
1378                        output: "(a)".into(),
1379                        header_delim_len: 30,
1380                        divider_delim_len: 30,
1381                        has_fields: false,
1382                        attributes_str: String::new(),
1383                        attributes: TestAttributes::default(),
1384                        file_name: None,
1385                    },
1386                    TestEntry::Example {
1387                        name: "Subsequent test containing equals".to_string(),
1388                        input: "\n===\n".into(),
1389                        output: "(a)".into(),
1390                        header_delim_len: 30,
1391                        divider_delim_len: 30,
1392                        has_fields: false,
1393                        attributes_str: String::new(),
1394                        attributes: TestAttributes::default(),
1395                        file_name: None,
1396                    }
1397                ],
1398                file_path: None,
1399            }
1400        );
1401    }
1402
1403    #[test]
1404    fn test_parse_test_content_with_newlines_in_test_names() {
1405        let entry = parse_test_content(
1406            "the-filename".to_string(),
1407            r"
1408===============
1409name
1410with
1411newlines
1412===============
1413a
1414---
1415(b)
1416
1417====================
1418name with === signs
1419====================
1420code with ----
1421---
1422(d)
1423",
1424            None,
1425        );
1426
1427        assert_eq!(
1428            entry,
1429            TestEntry::Group {
1430                name: "the-filename".to_string(),
1431                file_path: None,
1432                children: vec![
1433                    TestEntry::Example {
1434                        name: "name\nwith\nnewlines".to_string(),
1435                        input: b"a".to_vec(),
1436                        output: "(b)".to_string(),
1437                        header_delim_len: 15,
1438                        divider_delim_len: 3,
1439                        has_fields: false,
1440                        attributes_str: String::new(),
1441                        attributes: TestAttributes::default(),
1442                        file_name: None,
1443                    },
1444                    TestEntry::Example {
1445                        name: "name with === signs".to_string(),
1446                        input: b"code with ----".to_vec(),
1447                        output: "(d)".to_string(),
1448                        header_delim_len: 20,
1449                        divider_delim_len: 3,
1450                        has_fields: false,
1451                        attributes_str: String::new(),
1452                        attributes: TestAttributes::default(),
1453                        file_name: None,
1454                    }
1455                ]
1456            }
1457        );
1458    }
1459
1460    #[test]
1461    fn test_parse_test_with_markers() {
1462        // do one with :skip, we should not see it in the entry output
1463
1464        let entry = parse_test_content(
1465            "the-filename".to_string(),
1466            r"
1467=====================
1468Test with skip marker
1469:skip
1470=====================
1471a
1472---
1473(b)
1474",
1475            None,
1476        );
1477
1478        assert_eq!(
1479            entry,
1480            TestEntry::Group {
1481                name: "the-filename".to_string(),
1482                file_path: None,
1483                children: vec![TestEntry::Example {
1484                    name: "Test with skip marker".to_string(),
1485                    input: b"a".to_vec(),
1486                    output: "(b)".to_string(),
1487                    header_delim_len: 21,
1488                    divider_delim_len: 3,
1489                    has_fields: false,
1490                    attributes_str: ":skip".to_string(),
1491                    attributes: TestAttributes {
1492                        skip: true,
1493                        platform: true,
1494                        fail_fast: false,
1495                        error: false,
1496                        languages: vec!["".into()]
1497                    },
1498                    file_name: None,
1499                }]
1500            }
1501        );
1502
1503        let entry = parse_test_content(
1504            "the-filename".to_string(),
1505            &format!(
1506                r"
1507=========================
1508Test with platform marker
1509:platform({})
1510:fail-fast
1511=========================
1512a
1513---
1514(b)
1515
1516=============================
1517Test with bad platform marker
1518:platform({})
1519
1520:language(foo)
1521=============================
1522a
1523---
1524(b)
1525",
1526                std::env::consts::OS,
1527                if std::env::consts::OS == "linux" {
1528                    "macos"
1529                } else {
1530                    "linux"
1531                }
1532            ),
1533            None,
1534        );
1535
1536        assert_eq!(
1537            entry,
1538            TestEntry::Group {
1539                name: "the-filename".to_string(),
1540                file_path: None,
1541                children: vec![
1542                    TestEntry::Example {
1543                        name: "Test with platform marker".to_string(),
1544                        input: b"a".to_vec(),
1545                        output: "(b)".to_string(),
1546                        header_delim_len: 25,
1547                        divider_delim_len: 3,
1548                        has_fields: false,
1549                        attributes_str: format!(":platform({})\n:fail-fast", std::env::consts::OS),
1550                        attributes: TestAttributes {
1551                            skip: false,
1552                            platform: true,
1553                            fail_fast: true,
1554                            error: false,
1555                            languages: vec!["".into()]
1556                        },
1557                        file_name: None,
1558                    },
1559                    TestEntry::Example {
1560                        name: "Test with bad platform marker".to_string(),
1561                        input: b"a".to_vec(),
1562                        output: "(b)".to_string(),
1563                        header_delim_len: 29,
1564                        divider_delim_len: 3,
1565                        has_fields: false,
1566                        attributes_str: if std::env::consts::OS == "linux" {
1567                            ":platform(macos)\n\n:language(foo)".to_string()
1568                        } else {
1569                            ":platform(linux)\n\n:language(foo)".to_string()
1570                        },
1571                        attributes: TestAttributes {
1572                            skip: false,
1573                            platform: false,
1574                            fail_fast: false,
1575                            error: false,
1576                            languages: vec!["foo".into()]
1577                        },
1578                        file_name: None,
1579                    }
1580                ]
1581            }
1582        );
1583    }
1584}