sqruff_lib/rules/ambiguous/
am04.rs1use ahash::AHashMap;
2use sqruff_lib_core::dialects::common::AliasInfo;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::parser::segments::base::ErasedSegment;
5use sqruff_lib_core::utils::analysis::query::{Query, Selectable, Source};
6
7use crate::core::config::Value;
8use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
9use crate::core::rules::context::RuleContext;
10use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
11
12#[derive(Clone, Debug, Default)]
13pub struct RuleAM04;
14
15const START_TYPES: [SyntaxKind; 3] = [
16 SyntaxKind::SelectStatement,
17 SyntaxKind::SetExpression,
18 SyntaxKind::WithCompoundStatement,
19];
20
21impl Rule for RuleAM04 {
22 fn load_from_config(&self, _config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
23 Ok(RuleAM04.erased())
24 }
25
26 fn name(&self) -> &'static str {
27 "ambiguous.column_count"
28 }
29
30 fn description(&self) -> &'static str {
31 "Outermost query should produce known number of columns."
32 }
33
34 fn long_description(&self) -> &'static str {
35 r#"
36**Anti-pattern**
37
38Querying all columns using `*` produces a query result where the number or ordering of columns changes if the upstream table's schema changes. This should generally be avoided because it can cause slow performance, cause important schema changes to go undetected, or break production code. For example:
39
40* If a query does `SELECT t.*` and is expected to return columns `a`, `b`, and `c`, the actual columns returned will be wrong/different if columns are added to or deleted from the input table.
41* `UNION` and `DIFFERENCE` clauses require the inputs have the same number of columns (and compatible types).
42* `JOIN` queries may break due to new column name conflicts, e.g. the query references a column `c` which initially existed in only one input table but a column of the same name is added to another table.
43* `CREATE TABLE (<<column schema>>) AS SELECT *`
44
45```sql
46WITH cte AS (
47 SELECT * FROM foo
48)
49
50SELECT * FROM cte
51UNION
52SELECT a, b FROM t
53```
54
55**Best practice**
56
57Somewhere along the "path" to the source data, specify columns explicitly.
58
59```sql
60WITH cte AS (
61 SELECT * FROM foo
62)
63
64SELECT a, b FROM cte
65UNION
66SELECT a, b FROM t
67```"#
68 }
69
70 fn groups(&self) -> &'static [RuleGroups] {
71 &[RuleGroups::All, RuleGroups::Ambiguous]
72 }
73
74 fn eval(&self, rule_cx: &RuleContext) -> Vec<LintResult> {
75 let query = Query::from_segment(&rule_cx.segment, rule_cx.dialect, None);
76
77 let result = self.analyze_result_columns(query);
78 match result {
79 Ok(_) => {
80 vec![]
81 }
82 Err(anchor) => {
83 vec![LintResult::new(Some(anchor), vec![], None, None)]
84 }
85 }
86 }
87
88 fn crawl_behaviour(&self) -> Crawler {
89 SegmentSeekerCrawler::new(const { SyntaxSet::new(&START_TYPES) })
90 .disallow_recurse()
91 .into()
92 }
93}
94
95impl RuleAM04 {
96 fn analyze_result_columns(&self, query: Query<()>) -> Result<(), ErasedSegment> {
98 if query.inner.borrow().selectables.is_empty() {
99 return Ok(());
100 }
101
102 let selectables = query.inner.borrow().selectables.clone();
103 for selectable in selectables {
104 for wildcard in selectable.wildcard_info() {
105 if !wildcard.tables.is_empty() {
106 for wildcard_table in wildcard.tables {
107 if let Some(alias_info) = selectable.find_alias(&wildcard_table) {
108 self.handle_alias(&selectable, alias_info, &query)?;
109 } else {
110 let Some(cte) = query.lookup_cte(&wildcard_table, true) else {
111 return Err(selectable.selectable);
112 };
113
114 self.analyze_result_columns(cte)?;
115 }
116 }
117 } else {
118 let selectable = query.inner.borrow().selectables[0].selectable.clone();
119 for source in query.crawl_sources(selectable.clone(), false, true) {
120 if let Source::Query(query) = source {
121 self.analyze_result_columns(query)?;
122 return Ok(());
123 }
124 }
125
126 return Err(selectable);
127 }
128 }
129 }
130
131 Ok(())
132 }
133
134 fn handle_alias(
135 &self,
136 selectable: &Selectable,
137 alias_info: AliasInfo,
138 query: &Query<'_, ()>,
139 ) -> Result<(), ErasedSegment> {
140 let select_info_target = query
141 .crawl_sources(alias_info.from_expression_element, false, true)
142 .into_iter()
143 .next()
144 .unwrap();
145 match select_info_target {
146 Source::TableReference(_) => Err(selectable.selectable.clone()),
147 Source::Query(query) => self.analyze_result_columns(query),
148 }
149 }
150}