sqruff_lib/rules/references/
rf02.rs

1use ahash::AHashMap;
2use itertools::Itertools;
3use regex::Regex;
4use smol_str::SmolStr;
5use sqruff_lib_core::dialects::common::{AliasInfo, ColumnAliasInfo};
6use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
7use sqruff_lib_core::parser::segments::object_reference::ObjectReferenceSegment;
8
9use crate::core::config::Value;
10use crate::core::rules::base::{CloneRule, ErasedRule, LintResult, Rule, RuleGroups};
11use crate::core::rules::context::RuleContext;
12use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
13use crate::rules::aliasing::al04::RuleAL04;
14
15#[derive(Clone, Debug)]
16pub struct RuleRF02 {
17    base: RuleAL04<(Vec<String>, Vec<Regex>)>,
18}
19
20impl Default for RuleRF02 {
21    fn default() -> Self {
22        Self {
23            base: RuleAL04 {
24                lint_references_and_aliases: Self::lint_references_and_aliases,
25                context: (Vec::new(), Vec::new()),
26            },
27        }
28    }
29}
30
31impl Rule for RuleRF02 {
32    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
33        let ignore_words = config["ignore_words"]
34            .map(|it| {
35                it.as_array()
36                    .unwrap()
37                    .iter()
38                    .map(|it| it.as_string().unwrap().to_lowercase())
39                    .collect()
40            })
41            .unwrap_or_default();
42
43        let ignore_words_regex = config["ignore_words_regex"]
44            .map(|it| {
45                it.as_array()
46                    .unwrap()
47                    .iter()
48                    .map(|it| Regex::new(it.as_string().unwrap()).unwrap())
49                    .collect()
50            })
51            .unwrap_or_default();
52
53        Ok(Self {
54            base: RuleAL04 {
55                lint_references_and_aliases: Self::lint_references_and_aliases,
56                context: (ignore_words, ignore_words_regex),
57            },
58        }
59        .erased())
60    }
61
62    fn name(&self) -> &'static str {
63        "references.qualification"
64    }
65
66    fn description(&self) -> &'static str {
67        "References should be qualified if select has more than one referenced table/view."
68    }
69
70    fn long_description(&self) -> &'static str {
71        r"
72**Anti-pattern**
73
74In this example, the reference `vee` has not been declared, and the variables `a` and `b` are potentially ambiguous.
75
76```sql
77SELECT a, b
78FROM foo
79LEFT JOIN vee ON vee.a = foo.a
80```
81
82**Best practice**
83
84Add the references.
85
86```sql
87SELECT foo.a, vee.b
88FROM foo
89LEFT JOIN vee ON vee.a = foo.a
90```
91"
92    }
93
94    fn groups(&self) -> &'static [RuleGroups] {
95        &[RuleGroups::All, RuleGroups::References]
96    }
97
98    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
99        self.base.eval(context)
100    }
101
102    fn crawl_behaviour(&self) -> Crawler {
103        SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::SelectStatement]) }).into()
104    }
105}
106
107impl RuleRF02 {
108    fn lint_references_and_aliases(
109        table_aliases: Vec<AliasInfo>,
110        standalone_aliases: Vec<SmolStr>,
111        references: Vec<ObjectReferenceSegment>,
112        col_aliases: Vec<ColumnAliasInfo>,
113        using_cols: Vec<SmolStr>,
114        context: &(Vec<String>, Vec<Regex>),
115    ) -> Vec<LintResult> {
116        if table_aliases.len() <= 1 {
117            return Vec::new();
118        }
119
120        let mut violation_buff = Vec::new();
121        for r in references {
122            if context.0.contains(&r.0.raw().to_lowercase()) {
123                continue;
124            }
125
126            if context
127                .1
128                .iter()
129                .any(|regex| regex.is_match(r.0.raw().as_ref()))
130            {
131                continue;
132            }
133
134            let this_ref_type = r.qualification();
135            let col_alias_names = col_aliases
136                .iter()
137                .filter_map(|c| {
138                    if !c.column_reference_segments.contains(&r.0) {
139                        Some(c.alias_identifier_name.as_str())
140                    } else {
141                        None
142                    }
143                })
144                .collect_vec();
145
146            if this_ref_type == "unqualified"
147                && !col_alias_names.contains(&r.0.raw().as_ref())
148                && !using_cols.contains(r.0.raw())
149                && !standalone_aliases.contains(r.0.raw())
150            {
151                violation_buff.push(LintResult::new(
152                    r.0.clone().into(),
153                    Vec::new(),
154                    format!(
155                        "Unqualified reference {} found in select with more than one referenced \
156                         table/view.",
157                        r.0.raw()
158                    )
159                    .into(),
160                    None,
161                ));
162            }
163        }
164
165        violation_buff
166    }
167}