sqruff_lib/rules/references/
rf05.rs1use ahash::{AHashMap, AHashSet};
2use regex::Regex;
3use sqruff_lib_core::dialects::init::DialectKind;
4use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
5
6use crate::core::config::Value;
7use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
8use crate::core::rules::context::RuleContext;
9use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
10use crate::utils::identifers::identifiers_policy_applicable;
11
12#[derive(Clone, Default, Debug)]
13pub struct RuleRF05 {
14 quoted_identifiers_policy: String,
15 unquoted_identifiers_policy: String,
16 allow_space_in_identifier: bool,
17 additional_allowed_characters: String,
18 ignore_words: Vec<String>,
19 ignore_words_regex: Vec<Regex>,
20}
21
22impl Rule for RuleRF05 {
23 fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
24 Ok(RuleRF05 {
25 unquoted_identifiers_policy: config["unquoted_identifiers_policy"]
26 .as_string()
27 .unwrap()
28 .to_owned(),
29 quoted_identifiers_policy: config["quoted_identifiers_policy"]
30 .as_string()
31 .unwrap()
32 .to_owned(),
33 ignore_words: config["ignore_words"]
34 .map(|it| {
35 it.as_array()
36 .unwrap()
37 .iter()
38 .map(|it| it.as_string().unwrap().to_lowercase())
39 .collect()
40 })
41 .unwrap_or_default(),
42 ignore_words_regex: config["ignore_words_regex"]
43 .map(|it| {
44 it.as_array()
45 .unwrap()
46 .iter()
47 .map(|it| Regex::new(it.as_string().unwrap()).unwrap())
48 .collect()
49 })
50 .unwrap_or_default(),
51 allow_space_in_identifier: config["allow_space_in_identifier"].as_bool().unwrap(),
52 additional_allowed_characters: config["additional_allowed_characters"]
53 .map(|it| it.as_string().unwrap().to_owned())
54 .unwrap_or_default(),
55 }
56 .erased())
57 }
58
59 fn name(&self) -> &'static str {
60 "references.special_chars"
61 }
62
63 fn description(&self) -> &'static str {
64 "Do not use special characters in identifiers."
65 }
66
67 fn long_description(&self) -> &'static str {
68 r"
69**Anti-pattern**
70
71Using special characters within identifiers when creating or aliasing objects.
72
73```sql
74CREATE TABLE DBO.ColumnNames
75(
76 [Internal Space] INT,
77 [Greater>Than] INT,
78 [Less<Than] INT,
79 Number# INT
80)
81```
82
83**Best practice**
84
85Identifiers should include only alphanumerics and underscores.
86
87```sql
88CREATE TABLE DBO.ColumnNames
89(
90 [Internal_Space] INT,
91 [GreaterThan] INT,
92 [LessThan] INT,
93 NumberVal INT
94)
95```
96"
97 }
98
99 fn groups(&self) -> &'static [RuleGroups] {
100 &[RuleGroups::All, RuleGroups::References]
101 }
102
103 fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
104 if self
105 .ignore_words
106 .contains(&context.segment.raw().to_lowercase())
107 || self
108 .ignore_words_regex
109 .iter()
110 .any(|it| it.is_match(context.segment.raw().as_ref()))
111 {
112 return Vec::new();
113 }
114
115 let mut policy = self.unquoted_identifiers_policy.as_str();
116 let mut identifier = context.segment.raw().to_string();
117
118 if context.segment.is_type(SyntaxKind::QuotedIdentifier) {
119 policy = self.quoted_identifiers_policy.as_str();
120 identifier = identifier[1..identifier.len() - 1].to_string();
121
122 if self.ignore_words.contains(&identifier.to_lowercase())
123 || self
124 .ignore_words_regex
125 .iter()
126 .any(|it| it.is_match(&identifier))
127 {
128 return Vec::new();
129 }
130
131 if context.dialect.name == DialectKind::Bigquery
132 && context
133 .parent_stack
134 .last()
135 .is_some_and(|it| it.is_type(SyntaxKind::TableReference))
136 {
137 if identifier.ends_with('*') {
138 identifier.pop();
139 }
140 identifier = identifier.replace(".", "");
141 }
142
143 if context.dialect.name == DialectKind::Sparksql && !context.parent_stack.is_empty() {
145 if context
146 .parent_stack
147 .last()
148 .unwrap()
149 .is_type(SyntaxKind::FileReference)
150 {
151 return Vec::new();
152 }
153
154 if context
155 .parent_stack
156 .last()
157 .unwrap()
158 .is_type(SyntaxKind::PropertyNameIdentifier)
159 {
160 identifier = identifier.replace(".", "");
161 }
162 }
163
164 if self.allow_space_in_identifier {
165 identifier = identifier.replace(" ", "");
166 }
167 }
168
169 identifier = identifier.replace("_", "");
170
171 if context.dialect.name == DialectKind::Redshift
172 && identifier.starts_with('#')
173 && context
174 .parent_stack
175 .last()
176 .is_some_and(|it| it.get_type() == SyntaxKind::TableReference)
177 {
178 identifier = identifier[1..].to_string();
179 }
180
181 let additional_allowed_characters =
182 self.get_additional_allowed_characters(context.dialect.name);
183 if !additional_allowed_characters.is_empty() {
184 identifier.retain(|it| !additional_allowed_characters.contains(&it));
185 }
186
187 if identifiers_policy_applicable(policy, &context.parent_stack)
188 && !identifier.chars().all(|c| c.is_ascii_alphanumeric())
189 {
190 return vec![LintResult::new(
191 context.segment.clone().into(),
192 Vec::new(),
193 None,
194 None,
195 )];
196 }
197
198 Vec::new()
199 }
200
201 fn crawl_behaviour(&self) -> Crawler {
202 SegmentSeekerCrawler::new(
203 const { SyntaxSet::new(&[SyntaxKind::QuotedIdentifier, SyntaxKind::NakedIdentifier]) },
204 )
205 .into()
206 }
207}
208
209impl RuleRF05 {
210 fn get_additional_allowed_characters(&self, dialect_name: DialectKind) -> AHashSet<char> {
211 let mut result = AHashSet::new();
212 result.extend(self.additional_allowed_characters.chars());
213 if dialect_name == DialectKind::Bigquery {
214 result.insert('-');
215 }
216 result
217 }
218}