1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{BufRead, BufReader};
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::linted_dir::LintedDir;
8use crate::cli::formatters::Formatter;
9use crate::core::config::FluffConfig;
10use crate::core::linter::common::{ParsedString, RenderedFile};
11use crate::core::linter::linted_file::LintedFile;
12use crate::core::linter::linting_result::LintingResult;
13use crate::core::rules::base::{ErasedRule, LintPhase, RulePack};
14use crate::core::rules::noqa::IgnoreMask;
15use crate::rules::get_ruleset;
16use crate::templaters::raw::RawTemplater;
17use crate::templaters::{TEMPLATERS, Templater};
18use ahash::{AHashMap, AHashSet};
19use itertools::Itertools;
20use rayon::iter::{IntoParallelRefIterator as _, ParallelIterator as _};
21use smol_str::{SmolStr, ToSmolStr};
22use sqruff_lib_core::dialects::base::Dialect;
23use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
24use sqruff_lib_core::errors::{
25 SQLBaseError, SQLFluffUserError, SQLLexError, SQLLintError, SQLParseError, SqlError,
26};
27use sqruff_lib_core::helpers;
28use sqruff_lib_core::lint_fix::LintFix;
29use sqruff_lib_core::linter::compute_anchor_edit_info;
30use sqruff_lib_core::parser::lexer::StringOrTemplate;
31use sqruff_lib_core::parser::parser::Parser;
32use sqruff_lib_core::parser::segments::base::{ErasedSegment, Tables};
33use sqruff_lib_core::parser::segments::fix::SourceFix;
34use sqruff_lib_core::templaters::base::TemplatedFile;
35use walkdir::WalkDir;
36
37pub struct Linter {
38 config: FluffConfig,
39 formatter: Option<Arc<dyn Formatter>>,
40 templater: &'static dyn Templater,
41 rules: OnceLock<Vec<ErasedRule>>,
42
43 include_parse_errors: bool,
45}
46
47impl Linter {
48 pub fn new(
49 config: FluffConfig,
50 formatter: Option<Arc<dyn Formatter>>,
51 templater: Option<&'static dyn Templater>,
52 include_parse_errors: bool,
53 ) -> Linter {
54 let templater: &'static dyn Templater = match templater {
55 Some(templater) => templater,
56 None => Linter::get_templater(&config),
57 };
58 Linter {
59 config,
60 formatter,
61 templater,
62 rules: OnceLock::new(),
63 include_parse_errors,
64 }
65 }
66
67 pub fn get_templater(config: &FluffConfig) -> &'static dyn Templater {
68 let templater_name = config.get("templater", "core").as_string();
69 match templater_name {
70 Some(name) => match TEMPLATERS.into_iter().find(|t| t.name() == name) {
71 Some(t) => t,
72 None => panic!("Unknown templater: {}", name),
73 },
74 None => &RawTemplater,
75 }
76 }
77
78 pub fn lint_string_wrapped(
80 &mut self,
81 sql: &str,
82 filename: Option<String>,
83 fix: bool,
84 ) -> LintingResult {
85 let filename = filename.unwrap_or_else(|| "<string input>".into());
86
87 let linted_path = LintedDir::new(filename.clone());
88 linted_path.add(self.lint_string(sql, Some(filename), fix));
89
90 let mut result = LintingResult::new();
91 result.add(linted_path);
92 result.stop_timer();
93 result
94 }
95
96 pub fn parse_string(
98 &self,
99 tables: &Tables,
100 sql: &str,
101 filename: Option<String>,
102 ) -> Result<ParsedString, SQLFluffUserError> {
103 let f_name = filename.unwrap_or_else(|| "<string>".to_string());
104
105 let mut violations: Vec<Box<dyn SqlError>> = vec![];
106
107 self.config.process_raw_file_for_config(sql);
109 let rendered = self.render_string(sql, f_name.clone(), &self.config)?;
110
111 for violation in &rendered.templater_violations {
112 violations.push(Box::new(violation.clone()));
113 }
114
115 if let Some(formatter) = &self.formatter {
117 formatter.dispatch_parse_header(f_name.clone());
118 }
119
120 Ok(self.parse_rendered(tables, rendered))
121 }
122
123 pub fn lint_string(&self, sql: &str, filename: Option<String>, fix: bool) -> LintedFile {
125 let tables = Tables::default();
126 let parsed = self.parse_string(&tables, sql, filename).unwrap();
127
128 self.lint_parsed(&tables, parsed, fix)
130 }
131
132 pub fn lint_paths(
135 &mut self,
136 mut paths: Vec<PathBuf>,
137 fix: bool,
138 ignorer: &(dyn Fn(&Path) -> bool + Send + Sync),
139 ) -> LintingResult {
140 let mut result = LintingResult::new();
141
142 if paths.is_empty() {
143 paths.push(std::env::current_dir().unwrap());
144 }
145
146 let mut expanded_paths = Vec::new();
147 let mut expanded_path_to_linted_dir = AHashMap::default();
148
149 for path in paths {
150 let linted_dir = LintedDir::new(path.display().to_string());
151 let key = result.add(linted_dir);
152
153 let paths = if path.is_file() {
154 vec![path.to_string_lossy().to_string()]
155 } else {
156 self.paths_from_path(path, None, None, None, None)
157 };
158
159 expanded_paths.reserve(paths.len());
160 expanded_path_to_linted_dir.reserve(paths.len());
161
162 for path in paths {
163 expanded_paths.push(path.clone());
164 expanded_path_to_linted_dir.insert(path, key);
165 }
166 }
167
168 expanded_paths
169 .par_iter()
170 .filter(|path| !ignorer(Path::new(path)))
171 .map(|path| {
172 let rendered = self.render_file(path.clone());
173 self.lint_rendered(rendered, fix)
174 })
175 .for_each(|linted_file| {
176 let path = expanded_path_to_linted_dir[&linted_file.path];
177 result.paths[path].add(linted_file);
178 });
179
180 result
181 }
182
183 pub fn get_rulepack(&self) -> RulePack {
184 let rs = get_ruleset();
185 rs.get_rulepack(&self.config)
186 }
187
188 pub fn render_file(&self, fname: String) -> RenderedFile {
189 let in_str = std::fs::read_to_string(&fname).unwrap();
190 self.render_string(&in_str, fname, &self.config).unwrap()
191 }
192
193 pub fn lint_rendered(&self, rendered: RenderedFile, fix: bool) -> LintedFile {
194 let tables = Tables::default();
195 let parsed = self.parse_rendered(&tables, rendered);
196 self.lint_parsed(&tables, parsed, fix)
197 }
198
199 pub fn lint_parsed(
200 &self,
201 tables: &Tables,
202 parsed_string: ParsedString,
203 fix: bool,
204 ) -> LintedFile {
205 let mut violations = parsed_string.violations;
206
207 let (patches, ignore_mask, initial_linting_errors) =
208 parsed_string
209 .tree
210 .map_or((Vec::new(), None, Vec::new()), |erased_segment| {
211 let (tree, ignore_mask, initial_linting_errors) = self.lint_fix_parsed(
212 tables,
213 erased_segment,
214 &parsed_string.templated_file,
215 fix,
216 );
217 let patches = tree.iter_patches(&parsed_string.templated_file);
218 (patches, ignore_mask, initial_linting_errors)
219 });
220 violations.extend(initial_linting_errors.into_iter().map_into());
221
222 let violations = violations
224 .into_iter()
225 .filter(|violation| {
226 ignore_mask
227 .as_ref()
228 .is_none_or(|ignore_mask| !ignore_mask.is_masked(violation))
229 })
230 .collect();
231
232 let linted_file = LintedFile {
234 path: parsed_string.filename,
235 patches,
236 templated_file: parsed_string.templated_file,
237 violations,
238 ignore_mask,
239 };
240
241 if let Some(formatter) = &self.formatter {
242 formatter.dispatch_file_violations(&linted_file, false);
243 }
244
245 linted_file
246 }
247
248 pub fn lint_fix_parsed(
249 &self,
250 tables: &Tables,
251 mut tree: ErasedSegment,
252 templated_file: &TemplatedFile,
253 fix: bool,
254 ) -> (ErasedSegment, Option<IgnoreMask>, Vec<SQLLintError>) {
255 let mut tmp;
256 let mut initial_linting_errors = Vec::new();
257 let phases: &[_] = if fix {
258 &[LintPhase::Main, LintPhase::Post]
259 } else {
260 &[LintPhase::Main]
261 };
262 let mut previous_versions: AHashSet<(SmolStr, Vec<SourceFix>)> =
263 [(tree.raw().to_smolstr(), vec![])].into_iter().collect();
264
265 let loop_limit = if fix { 10 } else { 1 };
268 let (ignore_mask, violations): (Option<IgnoreMask>, Vec<SQLBaseError>) = {
270 let disable_noqa = self
271 .config
272 .get("disable_noqa", "core")
273 .as_bool()
274 .unwrap_or(false);
275 if disable_noqa {
276 (None, Vec::new())
277 } else {
278 let (ignore_mask, errors) = IgnoreMask::from_tree(&tree);
279 (Some(ignore_mask), errors)
280 }
281 };
282 initial_linting_errors.extend(violations.into_iter().map_into());
283
284 for phase in phases {
285 let mut rules_this_phase = if phases.len() > 1 {
286 tmp = self
287 .rules()
288 .iter()
289 .filter(|rule| rule.lint_phase() == *phase)
290 .cloned()
291 .collect_vec();
292
293 &tmp
294 } else {
295 self.rules()
296 };
297
298 for loop_ in 0..(if *phase == LintPhase::Main {
299 loop_limit
300 } else {
301 2
302 }) {
303 let is_first_linter_pass = *phase == phases[0] && loop_ == 0;
304 let mut changed = false;
305
306 if is_first_linter_pass {
307 rules_this_phase = self.rules();
308 }
309
310 let last_fixes = Vec::new();
311 for rule in rules_this_phase {
312 if fix && !is_first_linter_pass && !rule.is_fix_compatible() {
317 continue;
318 }
319
320 let linting_errors = rule.crawl(
321 tables,
322 &self.config.dialect,
323 templated_file,
324 tree.clone(),
325 &self.config,
326 );
327 let linting_errors: Vec<SQLLintError> = linting_errors
328 .into_iter()
329 .filter(|error| {
330 !ignore_mask
331 .clone()
332 .is_some_and(|ignore_mask: IgnoreMask| ignore_mask.is_masked(error))
333 })
334 .collect();
335
336 if is_first_linter_pass {
337 initial_linting_errors.extend(linting_errors.clone());
338 }
339
340 let fixes: Vec<LintFix> = linting_errors
341 .into_iter()
342 .flat_map(|linting_error| linting_error.clone().fixes.clone())
343 .collect();
344
345 if fix && !fixes.is_empty() {
346 if fixes == last_fixes {
352 eprintln!(
353 "One fix for {} not applied, it would re-cause the same error.",
354 rule.code()
355 );
356 continue;
357 }
358
359 let mut anchor_info = compute_anchor_edit_info(fixes.into_iter());
360 let (new_tree, _, _, _valid) = tree.apply_fixes(&mut anchor_info);
361
362 if false {
363 println!(
364 "Fixes for {rule:?} not applied, as it would result in an \
365 unparsable file. Please report this as a bug with a minimal \
366 query which demonstrates this warning.",
367 );
368 }
369
370 let loop_check_tuple =
371 (new_tree.raw().to_smolstr(), new_tree.get_source_fixes());
372
373 if previous_versions.insert(loop_check_tuple) {
374 tree = new_tree;
375 changed = true;
376 continue;
377 }
378 }
379 }
380
381 if fix && !changed {
382 break;
383 }
384 }
385 }
386
387 (tree, ignore_mask, initial_linting_errors)
388 }
389
390 pub fn render_string(
392 &self,
393 sql: &str,
394 filename: String,
395 config: &FluffConfig,
396 ) -> Result<RenderedFile, SQLFluffUserError> {
397 let sql = Self::normalise_newlines(sql);
398
399 if let Some(error) = config.verify_dialect_specified() {
400 return Err(error);
401 }
402
403 let templater_violations = vec![];
404 match self
405 .templater
406 .process(sql.as_ref(), filename.as_str(), config, &self.formatter)
407 {
408 Ok(templated_file) => Ok(RenderedFile {
409 templated_file,
410 templater_violations,
411 filename,
412 source_str: sql.to_string(),
413 }),
414 Err(err) => Err(SQLFluffUserError::new(format!(
415 "Failed to template file {} with error {:?}",
416 filename, err
417 ))),
418 }
419 }
420
421 pub fn parse_rendered(&self, tables: &Tables, rendered: RenderedFile) -> ParsedString {
423 let violations = rendered.templater_violations.clone();
424 if !violations.is_empty() {
425 unimplemented!()
426 }
427
428 let mut violations = Vec::new();
429 let tokens = if rendered.templated_file.is_templated() {
430 let (t, lvs) = Self::lex_templated_file(
431 tables,
432 rendered.templated_file.clone(),
433 &self.config.dialect,
434 );
435 if !lvs.is_empty() {
436 unimplemented!("violations.extend(lvs);")
437 }
438 t
439 } else {
440 None
441 };
442
443 let parsed: Option<ErasedSegment>;
444 if let Some(token_list) = tokens {
445 let (p, pvs) = Self::parse_tokens(
446 tables,
447 &token_list,
448 &self.config,
449 Some(rendered.filename.to_string()),
450 self.include_parse_errors,
451 );
452 parsed = p;
453 violations.extend(pvs.into_iter().map_into());
454 } else {
455 parsed = None;
456 };
457
458 ParsedString {
459 tree: parsed,
460 violations,
461 templated_file: rendered.templated_file,
462 filename: rendered.filename,
463 source_str: rendered.source_str,
464 }
465 }
466
467 fn parse_tokens(
468 tables: &Tables,
469 tokens: &[ErasedSegment],
470 config: &FluffConfig,
471 filename: Option<String>,
472 include_parse_errors: bool,
473 ) -> (Option<ErasedSegment>, Vec<SQLParseError>) {
474 let parser: Parser = config.into();
475 let mut violations: Vec<SQLParseError> = Vec::new();
476
477 let parsed = match parser.parse(tables, tokens, filename) {
478 Ok(parsed) => parsed,
479 Err(error) => {
480 violations.push(error);
481 None
482 }
483 };
484
485 if include_parse_errors {
486 if let Some(parsed) = &parsed {
487 let unparsables = parsed.recursive_crawl(
488 &SyntaxSet::single(SyntaxKind::Unparsable),
489 true,
490 &SyntaxSet::EMPTY,
491 true,
492 );
493
494 violations.extend(unparsables.into_iter().map(|segment| SQLParseError {
495 description: "Unparsable section".into(),
496 segment: segment.into(),
497 }));
498 }
499 };
500
501 (parsed, violations)
502 }
503
504 pub fn lex_templated_file(
506 tables: &Tables,
507 templated_file: TemplatedFile,
508 dialect: &Dialect,
509 ) -> (Option<Vec<ErasedSegment>>, Vec<SQLLexError>) {
510 let mut violations: Vec<SQLLexError> = vec![];
511 let lexer = dialect.lexer();
514 let result = lexer.lex(tables, StringOrTemplate::Template(templated_file));
516 match result {
517 Err(_err) => {
518 unimplemented!("violations.push(_err)");
519 }
521 Ok((tokens, lex_vs)) => {
522 violations.extend(lex_vs);
523
524 if tokens.is_empty() {
525 return (None, violations);
526 }
527
528 (tokens.into(), violations)
529 }
530 }
531 }
532
533 fn normalise_newlines(string: &str) -> Cow<str> {
535 lazy_regex::regex!("\r\n|\r").replace_all(string, "\n")
536 }
537
538 fn paths_from_path(
546 &self,
547 path: PathBuf,
548 ignore_file_name: Option<String>,
549 ignore_non_existent_files: Option<bool>,
550 ignore_files: Option<bool>,
551 working_path: Option<String>,
552 ) -> Vec<String> {
553 let ignore_file_name = ignore_file_name.unwrap_or_else(|| String::from(".sqlfluffignore"));
554 let ignore_non_existent_files = ignore_non_existent_files.unwrap_or(false);
555 let ignore_files = ignore_files.unwrap_or(true);
556 let _working_path =
557 working_path.unwrap_or_else(|| std::env::current_dir().unwrap().display().to_string());
558
559 let Ok(metadata) = std::fs::metadata(&path) else {
560 if ignore_non_existent_files {
561 return Vec::new();
562 } else {
563 panic!(
564 "Specified path does not exist. Check it/they exist(s): {:?}",
565 path
566 );
567 }
568 };
569
570 let is_exact_file = metadata.is_file();
573
574 let mut path_walk = if is_exact_file {
575 let path = Path::new(&path);
576 let dirpath = path.parent().unwrap().to_str().unwrap().to_string();
577 let files = vec![path.file_name().unwrap().to_str().unwrap().to_string()];
578 vec![(dirpath, None, files)]
579 } else {
580 WalkDir::new(&path)
581 .into_iter()
582 .filter_map(Result::ok) .map(|entry| {
584 let dirpath = entry.path().parent().unwrap().to_str().unwrap().to_string();
585 let files = vec![entry.file_name().to_str().unwrap().to_string()];
586 (dirpath, None, files)
587 })
588 .collect_vec()
589 };
590
591 let ignore_file_paths: Vec<String> = Vec::new();
596
597 let path_walk_ignore_file: Vec<(String, Option<()>, Vec<String>)> = ignore_file_paths
600 .iter()
601 .map(|ignore_file_path| {
602 let ignore_file_path = Path::new(ignore_file_path);
603
604 let dir_name = ignore_file_path
606 .parent()
607 .unwrap()
608 .to_str()
609 .unwrap()
610 .to_string();
611
612 let file_name = vec![
615 ignore_file_path
616 .file_name()
617 .unwrap()
618 .to_str()
619 .unwrap()
620 .to_string(),
621 ];
622
623 (dir_name, None, file_name)
624 })
625 .collect();
626
627 path_walk.extend(path_walk_ignore_file);
628
629 let mut buffer = Vec::new();
630 let mut ignores = AHashMap::new();
631 let sql_file_exts = self.config.sql_file_exts();
632
633 for (dirpath, _, filenames) in path_walk {
634 for fname in filenames {
635 let fpath = Path::new(&dirpath).join(&fname);
636
637 if ignore_files && fname == ignore_file_name {
639 let file = File::open(&fpath).unwrap();
640 let lines = BufReader::new(file).lines();
641 let spec = lines.map_while(Result::ok); ignores.insert(dirpath.clone(), spec.collect::<Vec<String>>());
643
644 continue;
646 }
647
648 for ext in sql_file_exts {
653 if fname.to_lowercase().ends_with(ext) {
655 buffer.push(fpath.clone());
656 }
657 }
658 }
659 }
660
661 let mut filtered_buffer = AHashSet::new();
662
663 for fpath in buffer {
664 let npath = helpers::normalize(&fpath).to_str().unwrap().to_string();
665 filtered_buffer.insert(npath);
666 }
667
668 let mut files = filtered_buffer.into_iter().collect_vec();
669 files.sort();
670 files
671 }
672
673 pub fn config(&self) -> &FluffConfig {
674 &self.config
675 }
676
677 pub fn config_mut(&mut self) -> &mut FluffConfig {
678 self.rules = OnceLock::new();
679 &mut self.config
680 }
681
682 pub fn rules(&self) -> &[ErasedRule] {
683 self.rules.get_or_init(|| self.get_rulepack().rules)
684 }
685
686 pub fn formatter(&self) -> Option<&Arc<dyn Formatter>> {
687 self.formatter.as_ref()
688 }
689
690 pub fn formatter_mut(&mut self) -> Option<&mut Arc<dyn Formatter>> {
691 self.formatter.as_mut()
692 }
693}
694
695#[cfg(test)]
696mod tests {
697 use sqruff_lib_core::parser::segments::base::Tables;
698
699 use crate::core::config::FluffConfig;
700 use crate::core::linter::core::Linter;
701
702 fn normalise_paths(paths: Vec<String>) -> Vec<String> {
703 paths
704 .into_iter()
705 .map(|path| path.replace(['/', '\\'], "."))
706 .collect()
707 }
708
709 #[test]
710 fn test_linter_path_from_paths_dir() {
711 let lntr = Linter::new(
713 FluffConfig::new(<_>::default(), None, None),
714 None,
715 None,
716 false,
717 ); let paths = lntr.paths_from_path("test/fixtures/lexer".into(), None, None, None, None);
719 let expected = vec![
720 "test.fixtures.lexer.basic.sql",
721 "test.fixtures.lexer.block_comment.sql",
722 "test.fixtures.lexer.inline_comment.sql",
723 ];
724 assert_eq!(normalise_paths(paths), expected);
725 }
726
727 #[test]
728 fn test_linter_path_from_paths_default() {
729 let lntr = Linter::new(
731 FluffConfig::new(<_>::default(), None, None),
732 None,
733 None,
734 false,
735 ); let paths = normalise_paths(lntr.paths_from_path(
737 "test/fixtures/linter".into(),
738 None,
739 None,
740 None,
741 None,
742 ));
743 assert!(paths.contains(&"test.fixtures.linter.passing.sql".to_string()));
744 assert!(paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".to_string()));
745 assert!(!paths.contains(&"test.fixtures.linter.discovery_file.txt".to_string()));
746 }
747
748 #[test]
749 fn test_linter_path_from_paths_exts() {
750 let config =
753 FluffConfig::new(<_>::default(), None, None).with_sql_file_exts(vec![".txt".into()]);
754 let lntr = Linter::new(config, None, None, false); let paths = lntr.paths_from_path("test/fixtures/linter".into(), None, None, None, None);
757
758 let normalized_paths = normalise_paths(paths);
760
761 assert!(!normalized_paths.contains(&"test.fixtures.linter.passing.sql".into()));
763 assert!(
764 !normalized_paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".into())
765 );
766 assert!(normalized_paths.contains(&"test.fixtures.linter.discovery_file.txt".into()));
767 }
768
769 #[test]
770 fn test_linter_path_from_paths_file() {
771 let lntr = Linter::new(
772 FluffConfig::new(<_>::default(), None, None),
773 None,
774 None,
775 false,
776 ); let paths = lntr.paths_from_path(
778 "test/fixtures/linter/indentation_errors.sql".into(),
779 None,
780 None,
781 None,
782 None,
783 );
784
785 assert_eq!(
786 normalise_paths(paths),
787 &["test.fixtures.linter.indentation_errors.sql"]
788 );
789 }
790
791 #[test]
809 fn test_linter_empty_file() {
810 let linter = Linter::new(
811 FluffConfig::new(<_>::default(), None, None),
812 None,
813 None,
814 false,
815 );
816 let tables = Tables::default();
817 let parsed = linter.parse_string(&tables, "", None).unwrap();
818
819 assert!(parsed.violations.is_empty());
820 }
821
822 #[test]
828 #[ignore = "The implementation of Lexer::lex_templated_file is required"]
829 fn test_advanced_api_methods() {
830 let sql = "
831 WITH cte AS (
832 SELECT * FROM tab_a
833 )
834 SELECT
835 cte.col_a,
836 tab_b.col_b
837 FROM cte
838 INNER JOIN tab_b;
839 "
840 .to_string();
841
842 let linter = Linter::new(
843 FluffConfig::new(<_>::default(), None, None),
844 None,
845 None,
846 false,
847 );
848 let tables = Tables::default();
849 let _parsed = linter.parse_string(&tables, &sql, None).unwrap();
850 }
851
852 #[test]
853 fn test_normalise_newlines() {
854 let in_str = "SELECT\r\n foo\n FROM \r \n\r bar;";
855 let out_str = "SELECT\n foo\n FROM \n \n\n bar;";
856
857 assert_eq!(Linter::normalise_newlines(in_str), out_str);
858 }
859}