sqruff_lib/rules/references/
rf01.rs1use std::cell::RefCell;
2
3use ahash::AHashMap;
4use itertools::Itertools;
5use smol_str::SmolStr;
6use sqruff_lib_core::dialects::base::Dialect;
7use sqruff_lib_core::dialects::common::AliasInfo;
8use sqruff_lib_core::dialects::init::DialectKind;
9use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
10use sqruff_lib_core::parser::segments::object_reference::{
11 ObjectReferenceLevel, ObjectReferencePart, ObjectReferenceSegment,
12};
13use sqruff_lib_core::utils::analysis::query::{Query, Selectable};
14
15use crate::core::config::Value;
16use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
17use crate::core::rules::context::RuleContext;
18use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
19use crate::core::rules::reference::object_ref_matches_table;
20
21#[derive(Debug, Default, Clone)]
22struct RF01Query {
23 aliases: Vec<AliasInfo>,
24 standalone_aliases: Vec<SmolStr>,
25}
26
27#[derive(Debug, Clone, Default)]
28pub struct RuleRF01 {
29 force_enable: bool,
30}
31
32impl RuleRF01 {
33 #[allow(clippy::only_used_in_recursion)]
34 fn resolve_reference(
35 &self,
36 r: &ObjectReferenceSegment,
37 tbl_refs: Vec<(ObjectReferencePart, Vec<SmolStr>)>,
38 dml_target_table: &[SmolStr],
39 query: Query<RF01Query>,
40 ) -> Option<LintResult> {
41 let possible_references: Vec<_> = tbl_refs
42 .clone()
43 .into_iter()
44 .map(|tbl_ref| tbl_ref.1)
45 .collect();
46
47 let mut targets = vec![];
48
49 for alias in &RefCell::borrow(&query.inner).payload.aliases {
50 if alias.aliased {
51 targets.push(vec![alias.ref_str.clone()]);
52 }
53
54 if let Some(object_reference) = &alias.object_reference {
55 let references = object_reference
56 .reference()
57 .iter_raw_references()
58 .into_iter()
59 .map(|it| it.part.into())
60 .collect_vec();
61
62 targets.push(references);
63 }
64 }
65
66 for standalone_alias in &RefCell::borrow(&query.inner).payload.standalone_aliases {
67 targets.push(vec![standalone_alias.clone()]);
68 }
69
70 if !object_ref_matches_table(&possible_references, &targets) {
71 if let Some(parent) = RefCell::borrow(&query.inner).parent.clone() {
72 return self.resolve_reference(r, tbl_refs.clone(), dml_target_table, parent);
73 } else if dml_target_table.is_empty()
74 || !object_ref_matches_table(&possible_references, &[dml_target_table.to_vec()])
75 {
76 return LintResult::new(
77 tbl_refs[0].0.segments[0].clone().into(),
78 Vec::new(),
79 format!(
80 "Reference '{}' refers to table/view not found in the FROM clause or \
81 found in ancestor statement.",
82 r.0.raw()
83 )
84 .into(),
85 None,
86 )
87 .into();
88 }
89 }
90 None
91 }
92
93 fn get_table_refs(
94 &self,
95 r: &ObjectReferenceSegment,
96 dialect: &Dialect,
97 ) -> Vec<(ObjectReferencePart, Vec<SmolStr>)> {
98 let mut tbl_refs = Vec::new();
99
100 for values in r.extract_possible_multipart_references(&[
101 ObjectReferenceLevel::Schema,
102 ObjectReferenceLevel::Table,
103 ]) {
104 tbl_refs.push((
105 values[1].clone(),
106 vec![values[0].part.clone().into(), values[1].part.clone().into()],
107 ));
108 }
109
110 if tbl_refs.is_empty() || dialect.name == DialectKind::Bigquery {
111 tbl_refs.extend(
112 r.extract_possible_references(ObjectReferenceLevel::Table, dialect.name)
113 .into_iter()
114 .map(|it| (it.clone(), vec![it.part.into()])),
115 );
116 }
117
118 tbl_refs
119 }
120
121 fn analyze_table_references(
122 &self,
123 query: Query<RF01Query>,
124 dml_target_table: &[SmolStr],
125 violations: &mut Vec<LintResult>,
126 ) {
127 let selectables = std::mem::take(&mut RefCell::borrow_mut(&query.inner).selectables);
128
129 for selectable in &selectables {
130 if let Some(select_info) = selectable.select_info() {
131 RefCell::borrow_mut(&query.inner)
132 .payload
133 .aliases
134 .extend(select_info.table_aliases);
135 RefCell::borrow_mut(&query.inner)
136 .payload
137 .standalone_aliases
138 .extend(select_info.standalone_aliases);
139
140 for r in select_info.reference_buffer {
141 if !self.should_ignore_reference(&r, selectable) {
142 let violation = self.resolve_reference(
143 &r,
144 self.get_table_refs(&r, RefCell::borrow(&query.inner).dialect),
145 dml_target_table,
146 query.clone(),
147 );
148 violations.extend(violation);
149 }
150 }
151 }
152 }
153
154 RefCell::borrow_mut(&query.inner).selectables = selectables;
155
156 for child in query.children() {
157 self.analyze_table_references(child, dml_target_table, violations);
158 }
159 }
160
161 fn should_ignore_reference(
162 &self,
163 reference: &ObjectReferenceSegment,
164 selectable: &Selectable,
165 ) -> bool {
166 let ref_path = selectable.selectable.path_to(&reference.0);
167
168 if !ref_path.is_empty() {
169 ref_path
170 .iter()
171 .any(|ps| ps.segment.is_type(SyntaxKind::IntoTableClause))
172 } else {
173 false
174 }
175 }
176}
177
178impl Rule for RuleRF01 {
179 fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
180 Ok(RuleRF01 {
181 force_enable: config["force_enable"].as_bool().unwrap(),
182 }
183 .erased())
184 }
185
186 fn name(&self) -> &'static str {
187 "references.from"
188 }
189
190 fn description(&self) -> &'static str {
191 "References cannot reference objects not present in 'FROM' clause."
192 }
193
194 fn long_description(&self) -> &'static str {
195 r#"
196**Anti-pattern**
197
198In this example, the reference `vee` has not been declared.
199
200```sql
201SELECT
202 vee.a
203FROM foo
204```
205
206**Best practice**
207
208Remove the reference.
209
210```sql
211SELECT
212 a
213FROM foo
214```
215"#
216 }
217
218 fn groups(&self) -> &'static [RuleGroups] {
219 &[RuleGroups::All, RuleGroups::Core, RuleGroups::References]
220 }
221
222 fn force_enable(&self) -> bool {
223 self.force_enable
224 }
225
226 fn dialect_skip(&self) -> &'static [DialectKind] {
227 &[
229 DialectKind::Redshift,
230 DialectKind::Bigquery,
231 DialectKind::Sparksql,
232 ]
233 }
234
235 fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
236 let query = Query::from_segment(&context.segment, context.dialect, None);
237 let mut violations = Vec::new();
238 let tmp;
239
240 let dml_target_table = if !context.segment.is_type(SyntaxKind::SelectStatement) {
241 let refs = context.segment.recursive_crawl(
242 const { &SyntaxSet::new(&[SyntaxKind::TableReference]) },
243 true,
244 &SyntaxSet::EMPTY,
245 true,
246 );
247 if let Some(reference) = refs.first() {
248 let reference = reference.reference();
249
250 tmp = reference
251 .iter_raw_references()
252 .into_iter()
253 .map(|it| it.part.into())
254 .collect_vec();
255 &tmp
256 } else {
257 [].as_slice()
258 }
259 } else {
260 &[]
261 };
262
263 self.analyze_table_references(query, dml_target_table, &mut violations);
264
265 violations
266 }
267
268 fn crawl_behaviour(&self) -> Crawler {
269 SegmentSeekerCrawler::new(
270 const {
271 SyntaxSet::new(&[
272 SyntaxKind::DeleteStatement,
273 SyntaxKind::MergeStatement,
274 SyntaxKind::SelectStatement,
275 SyntaxKind::UpdateStatement,
276 ])
277 },
278 )
279 .disallow_recurse()
280 .into()
281 }
282}