sqruff_lib/rules/ambiguous/
am04.rs

1use ahash::AHashMap;
2use sqruff_lib_core::dialects::common::AliasInfo;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::parser::segments::base::ErasedSegment;
5use sqruff_lib_core::utils::analysis::query::{Query, Selectable, Source};
6
7use crate::core::config::Value;
8use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
9use crate::core::rules::context::RuleContext;
10use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
11
12#[derive(Clone, Debug, Default)]
13pub struct RuleAM04;
14
15const START_TYPES: [SyntaxKind; 3] = [
16    SyntaxKind::SelectStatement,
17    SyntaxKind::SetExpression,
18    SyntaxKind::WithCompoundStatement,
19];
20
21impl Rule for RuleAM04 {
22    fn load_from_config(&self, _config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
23        Ok(RuleAM04.erased())
24    }
25
26    fn name(&self) -> &'static str {
27        "ambiguous.column_count"
28    }
29
30    fn description(&self) -> &'static str {
31        "Outermost query should produce known number of columns."
32    }
33
34    fn long_description(&self) -> &'static str {
35        r#"
36**Anti-pattern**
37
38Querying all columns using `*` produces a query result where the number or ordering of columns changes if the upstream table's schema changes. This should generally be avoided because it can cause slow performance, cause important schema changes to go undetected, or break production code. For example:
39
40* If a query does `SELECT t.*` and is expected to return columns `a`, `b`, and `c`, the actual columns returned will be wrong/different if columns are added to or deleted from the input table.
41* `UNION` and `DIFFERENCE` clauses require the inputs have the same number of columns (and compatible types).
42* `JOIN` queries may break due to new column name conflicts, e.g. the query references a column `c` which initially existed in only one input table but a column of the same name is added to another table.
43* `CREATE TABLE (<<column schema>>) AS SELECT *`
44
45```sql
46WITH cte AS (
47    SELECT * FROM foo
48)
49
50SELECT * FROM cte
51UNION
52SELECT a, b FROM t
53```
54
55**Best practice**
56
57Somewhere along the "path" to the source data, specify columns explicitly.
58
59```sql
60WITH cte AS (
61    SELECT * FROM foo
62)
63
64SELECT a, b FROM cte
65UNION
66SELECT a, b FROM t
67```"#
68    }
69
70    fn groups(&self) -> &'static [RuleGroups] {
71        &[RuleGroups::All, RuleGroups::Ambiguous]
72    }
73
74    fn eval(&self, rule_cx: &RuleContext) -> Vec<LintResult> {
75        let query = Query::from_segment(&rule_cx.segment, rule_cx.dialect, None);
76
77        let result = self.analyze_result_columns(query);
78        match result {
79            Ok(_) => {
80                vec![]
81            }
82            Err(anchor) => {
83                vec![LintResult::new(Some(anchor), vec![], None, None)]
84            }
85        }
86    }
87
88    fn crawl_behaviour(&self) -> Crawler {
89        SegmentSeekerCrawler::new(const { SyntaxSet::new(&START_TYPES) })
90            .disallow_recurse()
91            .into()
92    }
93}
94
95impl RuleAM04 {
96    /// returns an anchor to the rule
97    fn analyze_result_columns(&self, query: Query<()>) -> Result<(), ErasedSegment> {
98        if query.inner.borrow().selectables.is_empty() {
99            return Ok(());
100        }
101
102        let selectables = query.inner.borrow().selectables.clone();
103        for selectable in selectables {
104            for wildcard in selectable.wildcard_info() {
105                if !wildcard.tables.is_empty() {
106                    for wildcard_table in wildcard.tables {
107                        if let Some(alias_info) = selectable.find_alias(&wildcard_table) {
108                            self.handle_alias(&selectable, alias_info, &query)?;
109                        } else {
110                            let Some(cte) = query.lookup_cte(&wildcard_table, true) else {
111                                return Err(selectable.selectable);
112                            };
113
114                            self.analyze_result_columns(cte)?;
115                        }
116                    }
117                } else {
118                    let selectable = query.inner.borrow().selectables[0].selectable.clone();
119                    for source in query.crawl_sources(selectable.clone(), false, true) {
120                        if let Source::Query(query) = source {
121                            self.analyze_result_columns(query)?;
122                            return Ok(());
123                        }
124                    }
125
126                    return Err(selectable);
127                }
128            }
129        }
130
131        Ok(())
132    }
133
134    fn handle_alias(
135        &self,
136        selectable: &Selectable,
137        alias_info: AliasInfo,
138        query: &Query<'_, ()>,
139    ) -> Result<(), ErasedSegment> {
140        let select_info_target = query
141            .crawl_sources(alias_info.from_expression_element, false, true)
142            .into_iter()
143            .next()
144            .unwrap();
145        match select_info_target {
146            Source::TableReference(_) => Err(selectable.selectable.clone()),
147            Source::Query(query) => self.analyze_result_columns(query),
148        }
149    }
150}