sqruff_lib/rules/references/
rf02.rs1use ahash::AHashMap;
2use itertools::Itertools;
3use regex::Regex;
4use smol_str::SmolStr;
5use sqruff_lib_core::dialects::common::{AliasInfo, ColumnAliasInfo};
6use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
7use sqruff_lib_core::parser::segments::object_reference::ObjectReferenceSegment;
8
9use crate::core::config::Value;
10use crate::core::rules::base::{CloneRule, ErasedRule, LintResult, Rule, RuleGroups};
11use crate::core::rules::context::RuleContext;
12use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
13use crate::rules::aliasing::al04::RuleAL04;
14
15#[derive(Clone, Debug)]
16pub struct RuleRF02 {
17 base: RuleAL04<(Vec<String>, Vec<Regex>)>,
18}
19
20impl Default for RuleRF02 {
21 fn default() -> Self {
22 Self {
23 base: RuleAL04 {
24 lint_references_and_aliases: Self::lint_references_and_aliases,
25 context: (Vec::new(), Vec::new()),
26 },
27 }
28 }
29}
30
31impl Rule for RuleRF02 {
32 fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
33 let ignore_words = config["ignore_words"]
34 .map(|it| {
35 it.as_array()
36 .unwrap()
37 .iter()
38 .map(|it| it.as_string().unwrap().to_lowercase())
39 .collect()
40 })
41 .unwrap_or_default();
42
43 let ignore_words_regex = config["ignore_words_regex"]
44 .map(|it| {
45 it.as_array()
46 .unwrap()
47 .iter()
48 .map(|it| Regex::new(it.as_string().unwrap()).unwrap())
49 .collect()
50 })
51 .unwrap_or_default();
52
53 Ok(Self {
54 base: RuleAL04 {
55 lint_references_and_aliases: Self::lint_references_and_aliases,
56 context: (ignore_words, ignore_words_regex),
57 },
58 }
59 .erased())
60 }
61
62 fn name(&self) -> &'static str {
63 "references.qualification"
64 }
65
66 fn description(&self) -> &'static str {
67 "References should be qualified if select has more than one referenced table/view."
68 }
69
70 fn long_description(&self) -> &'static str {
71 r"
72**Anti-pattern**
73
74In this example, the reference `vee` has not been declared, and the variables `a` and `b` are potentially ambiguous.
75
76```sql
77SELECT a, b
78FROM foo
79LEFT JOIN vee ON vee.a = foo.a
80```
81
82**Best practice**
83
84Add the references.
85
86```sql
87SELECT foo.a, vee.b
88FROM foo
89LEFT JOIN vee ON vee.a = foo.a
90```
91"
92 }
93
94 fn groups(&self) -> &'static [RuleGroups] {
95 &[RuleGroups::All, RuleGroups::References]
96 }
97
98 fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
99 self.base.eval(context)
100 }
101
102 fn crawl_behaviour(&self) -> Crawler {
103 SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::SelectStatement]) }).into()
104 }
105}
106
107impl RuleRF02 {
108 fn lint_references_and_aliases(
109 table_aliases: Vec<AliasInfo>,
110 standalone_aliases: Vec<SmolStr>,
111 references: Vec<ObjectReferenceSegment>,
112 col_aliases: Vec<ColumnAliasInfo>,
113 using_cols: Vec<SmolStr>,
114 context: &(Vec<String>, Vec<Regex>),
115 ) -> Vec<LintResult> {
116 if table_aliases.len() <= 1 {
117 return Vec::new();
118 }
119
120 let mut violation_buff = Vec::new();
121 for r in references {
122 if context.0.contains(&r.0.raw().to_lowercase()) {
123 continue;
124 }
125
126 if context
127 .1
128 .iter()
129 .any(|regex| regex.is_match(r.0.raw().as_ref()))
130 {
131 continue;
132 }
133
134 let this_ref_type = r.qualification();
135 let col_alias_names = col_aliases
136 .iter()
137 .filter_map(|c| {
138 if !c.column_reference_segments.contains(&r.0) {
139 Some(c.alias_identifier_name.as_str())
140 } else {
141 None
142 }
143 })
144 .collect_vec();
145
146 if this_ref_type == "unqualified"
147 && !col_alias_names.contains(&r.0.raw().as_ref())
148 && !using_cols.contains(r.0.raw())
149 && !standalone_aliases.contains(r.0.raw())
150 {
151 violation_buff.push(LintResult::new(
152 r.0.clone().into(),
153 Vec::new(),
154 format!(
155 "Unqualified reference {} found in select with more than one referenced \
156 table/view.",
157 r.0.raw()
158 )
159 .into(),
160 None,
161 ));
162 }
163 }
164
165 violation_buff
166 }
167}