sqruff_lib/rules/references/
rf01.rs

1use std::cell::RefCell;
2
3use ahash::AHashMap;
4use itertools::Itertools;
5use smol_str::SmolStr;
6use sqruff_lib_core::dialects::base::Dialect;
7use sqruff_lib_core::dialects::common::AliasInfo;
8use sqruff_lib_core::dialects::init::DialectKind;
9use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
10use sqruff_lib_core::parser::segments::object_reference::{
11    ObjectReferenceLevel, ObjectReferencePart, ObjectReferenceSegment,
12};
13use sqruff_lib_core::utils::analysis::query::{Query, Selectable};
14
15use crate::core::config::Value;
16use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
17use crate::core::rules::context::RuleContext;
18use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
19use crate::core::rules::reference::object_ref_matches_table;
20
21#[derive(Debug, Default, Clone)]
22struct RF01Query {
23    aliases: Vec<AliasInfo>,
24    standalone_aliases: Vec<SmolStr>,
25}
26
27#[derive(Debug, Clone, Default)]
28pub struct RuleRF01 {
29    force_enable: bool,
30}
31
32impl RuleRF01 {
33    #[allow(clippy::only_used_in_recursion)]
34    fn resolve_reference(
35        &self,
36        r: &ObjectReferenceSegment,
37        tbl_refs: Vec<(ObjectReferencePart, Vec<SmolStr>)>,
38        dml_target_table: &[SmolStr],
39        query: Query<RF01Query>,
40    ) -> Option<LintResult> {
41        let possible_references: Vec<_> = tbl_refs
42            .clone()
43            .into_iter()
44            .map(|tbl_ref| tbl_ref.1)
45            .collect();
46
47        let mut targets = vec![];
48
49        for alias in &RefCell::borrow(&query.inner).payload.aliases {
50            if alias.aliased {
51                targets.push(vec![alias.ref_str.clone()]);
52            }
53
54            if let Some(object_reference) = &alias.object_reference {
55                let references = object_reference
56                    .reference()
57                    .iter_raw_references()
58                    .into_iter()
59                    .map(|it| it.part.into())
60                    .collect_vec();
61
62                targets.push(references);
63            }
64        }
65
66        for standalone_alias in &RefCell::borrow(&query.inner).payload.standalone_aliases {
67            targets.push(vec![standalone_alias.clone()]);
68        }
69
70        if !object_ref_matches_table(&possible_references, &targets) {
71            if let Some(parent) = RefCell::borrow(&query.inner).parent.clone() {
72                return self.resolve_reference(r, tbl_refs.clone(), dml_target_table, parent);
73            } else if dml_target_table.is_empty()
74                || !object_ref_matches_table(&possible_references, &[dml_target_table.to_vec()])
75            {
76                return LintResult::new(
77                    tbl_refs[0].0.segments[0].clone().into(),
78                    Vec::new(),
79                    format!(
80                        "Reference '{}' refers to table/view not found in the FROM clause or \
81                         found in ancestor statement.",
82                        r.0.raw()
83                    )
84                    .into(),
85                    None,
86                )
87                .into();
88            }
89        }
90        None
91    }
92
93    fn get_table_refs(
94        &self,
95        r: &ObjectReferenceSegment,
96        dialect: &Dialect,
97    ) -> Vec<(ObjectReferencePart, Vec<SmolStr>)> {
98        let mut tbl_refs = Vec::new();
99
100        for values in r.extract_possible_multipart_references(&[
101            ObjectReferenceLevel::Schema,
102            ObjectReferenceLevel::Table,
103        ]) {
104            tbl_refs.push((
105                values[1].clone(),
106                vec![values[0].part.clone().into(), values[1].part.clone().into()],
107            ));
108        }
109
110        if tbl_refs.is_empty() || dialect.name == DialectKind::Bigquery {
111            tbl_refs.extend(
112                r.extract_possible_references(ObjectReferenceLevel::Table, dialect.name)
113                    .into_iter()
114                    .map(|it| (it.clone(), vec![it.part.into()])),
115            );
116        }
117
118        tbl_refs
119    }
120
121    fn analyze_table_references(
122        &self,
123        query: Query<RF01Query>,
124        dml_target_table: &[SmolStr],
125        violations: &mut Vec<LintResult>,
126    ) {
127        let selectables = std::mem::take(&mut RefCell::borrow_mut(&query.inner).selectables);
128
129        for selectable in &selectables {
130            if let Some(select_info) = selectable.select_info() {
131                RefCell::borrow_mut(&query.inner)
132                    .payload
133                    .aliases
134                    .extend(select_info.table_aliases);
135                RefCell::borrow_mut(&query.inner)
136                    .payload
137                    .standalone_aliases
138                    .extend(select_info.standalone_aliases);
139
140                for r in select_info.reference_buffer {
141                    if !self.should_ignore_reference(&r, selectable) {
142                        let violation = self.resolve_reference(
143                            &r,
144                            self.get_table_refs(&r, RefCell::borrow(&query.inner).dialect),
145                            dml_target_table,
146                            query.clone(),
147                        );
148                        violations.extend(violation);
149                    }
150                }
151            }
152        }
153
154        RefCell::borrow_mut(&query.inner).selectables = selectables;
155
156        for child in query.children() {
157            self.analyze_table_references(child, dml_target_table, violations);
158        }
159    }
160
161    fn should_ignore_reference(
162        &self,
163        reference: &ObjectReferenceSegment,
164        selectable: &Selectable,
165    ) -> bool {
166        let ref_path = selectable.selectable.path_to(&reference.0);
167
168        if !ref_path.is_empty() {
169            ref_path
170                .iter()
171                .any(|ps| ps.segment.is_type(SyntaxKind::IntoTableClause))
172        } else {
173            false
174        }
175    }
176}
177
178impl Rule for RuleRF01 {
179    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
180        Ok(RuleRF01 {
181            force_enable: config["force_enable"].as_bool().unwrap(),
182        }
183        .erased())
184    }
185
186    fn name(&self) -> &'static str {
187        "references.from"
188    }
189
190    fn description(&self) -> &'static str {
191        "References cannot reference objects not present in 'FROM' clause."
192    }
193
194    fn long_description(&self) -> &'static str {
195        r#"
196**Anti-pattern**
197
198In this example, the reference `vee` has not been declared.
199
200```sql
201SELECT
202    vee.a
203FROM foo
204```
205
206**Best practice**
207
208Remove the reference.
209
210```sql
211SELECT
212    a
213FROM foo
214```
215"#
216    }
217
218    fn groups(&self) -> &'static [RuleGroups] {
219        &[RuleGroups::All, RuleGroups::Core, RuleGroups::References]
220    }
221
222    fn force_enable(&self) -> bool {
223        self.force_enable
224    }
225
226    fn dialect_skip(&self) -> &'static [DialectKind] {
227        // TODO Add others when finished, whole list["databricks", "hive", "soql"]
228        &[
229            DialectKind::Redshift,
230            DialectKind::Bigquery,
231            DialectKind::Sparksql,
232        ]
233    }
234
235    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
236        let query = Query::from_segment(&context.segment, context.dialect, None);
237        let mut violations = Vec::new();
238        let tmp;
239
240        let dml_target_table = if !context.segment.is_type(SyntaxKind::SelectStatement) {
241            let refs = context.segment.recursive_crawl(
242                const { &SyntaxSet::new(&[SyntaxKind::TableReference]) },
243                true,
244                &SyntaxSet::EMPTY,
245                true,
246            );
247            if let Some(reference) = refs.first() {
248                let reference = reference.reference();
249
250                tmp = reference
251                    .iter_raw_references()
252                    .into_iter()
253                    .map(|it| it.part.into())
254                    .collect_vec();
255                &tmp
256            } else {
257                [].as_slice()
258            }
259        } else {
260            &[]
261        };
262
263        self.analyze_table_references(query, dml_target_table, &mut violations);
264
265        violations
266    }
267
268    fn crawl_behaviour(&self) -> Crawler {
269        SegmentSeekerCrawler::new(
270            const {
271                SyntaxSet::new(&[
272                    SyntaxKind::DeleteStatement,
273                    SyntaxKind::MergeStatement,
274                    SyntaxKind::SelectStatement,
275                    SyntaxKind::UpdateStatement,
276                ])
277            },
278        )
279        .disallow_recurse()
280        .into()
281    }
282}