sqruff_lib/rules/references/
rf05.rs

1use ahash::{AHashMap, AHashSet};
2use regex::Regex;
3use sqruff_lib_core::dialects::init::DialectKind;
4use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
5
6use crate::core::config::Value;
7use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
8use crate::core::rules::context::RuleContext;
9use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
10use crate::utils::identifers::identifiers_policy_applicable;
11
12#[derive(Clone, Default, Debug)]
13pub struct RuleRF05 {
14    quoted_identifiers_policy: String,
15    unquoted_identifiers_policy: String,
16    allow_space_in_identifier: bool,
17    additional_allowed_characters: String,
18    ignore_words: Vec<String>,
19    ignore_words_regex: Vec<Regex>,
20}
21
22impl Rule for RuleRF05 {
23    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
24        Ok(RuleRF05 {
25            unquoted_identifiers_policy: config["unquoted_identifiers_policy"]
26                .as_string()
27                .unwrap()
28                .to_owned(),
29            quoted_identifiers_policy: config["quoted_identifiers_policy"]
30                .as_string()
31                .unwrap()
32                .to_owned(),
33            ignore_words: config["ignore_words"]
34                .map(|it| {
35                    it.as_array()
36                        .unwrap()
37                        .iter()
38                        .map(|it| it.as_string().unwrap().to_lowercase())
39                        .collect()
40                })
41                .unwrap_or_default(),
42            ignore_words_regex: config["ignore_words_regex"]
43                .map(|it| {
44                    it.as_array()
45                        .unwrap()
46                        .iter()
47                        .map(|it| Regex::new(it.as_string().unwrap()).unwrap())
48                        .collect()
49                })
50                .unwrap_or_default(),
51            allow_space_in_identifier: config["allow_space_in_identifier"].as_bool().unwrap(),
52            additional_allowed_characters: config["additional_allowed_characters"]
53                .map(|it| it.as_string().unwrap().to_owned())
54                .unwrap_or_default(),
55        }
56        .erased())
57    }
58
59    fn name(&self) -> &'static str {
60        "references.special_chars"
61    }
62
63    fn description(&self) -> &'static str {
64        "Do not use special characters in identifiers."
65    }
66
67    fn long_description(&self) -> &'static str {
68        r"
69**Anti-pattern**
70
71Using special characters within identifiers when creating or aliasing objects.
72
73```sql
74CREATE TABLE DBO.ColumnNames
75(
76    [Internal Space] INT,
77    [Greater>Than] INT,
78    [Less<Than] INT,
79    Number# INT
80)
81```
82
83**Best practice**
84
85Identifiers should include only alphanumerics and underscores.
86
87```sql
88CREATE TABLE DBO.ColumnNames
89(
90    [Internal_Space] INT,
91    [GreaterThan] INT,
92    [LessThan] INT,
93    NumberVal INT
94)
95```
96"
97    }
98
99    fn groups(&self) -> &'static [RuleGroups] {
100        &[RuleGroups::All, RuleGroups::References]
101    }
102
103    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
104        if self
105            .ignore_words
106            .contains(&context.segment.raw().to_lowercase())
107            || self
108                .ignore_words_regex
109                .iter()
110                .any(|it| it.is_match(context.segment.raw().as_ref()))
111        {
112            return Vec::new();
113        }
114
115        let mut policy = self.unquoted_identifiers_policy.as_str();
116        let mut identifier = context.segment.raw().to_string();
117
118        if context.segment.is_type(SyntaxKind::QuotedIdentifier) {
119            policy = self.quoted_identifiers_policy.as_str();
120            identifier = identifier[1..identifier.len() - 1].to_string();
121
122            if self.ignore_words.contains(&identifier.to_lowercase())
123                || self
124                    .ignore_words_regex
125                    .iter()
126                    .any(|it| it.is_match(&identifier))
127            {
128                return Vec::new();
129            }
130
131            if context.dialect.name == DialectKind::Bigquery
132                && context
133                    .parent_stack
134                    .last()
135                    .is_some_and(|it| it.is_type(SyntaxKind::TableReference))
136            {
137                if identifier.ends_with('*') {
138                    identifier.pop();
139                }
140                identifier = identifier.replace(".", "");
141            }
142
143            // TODO: add databricks
144            if context.dialect.name == DialectKind::Sparksql && !context.parent_stack.is_empty() {
145                if context
146                    .parent_stack
147                    .last()
148                    .unwrap()
149                    .is_type(SyntaxKind::FileReference)
150                {
151                    return Vec::new();
152                }
153
154                if context
155                    .parent_stack
156                    .last()
157                    .unwrap()
158                    .is_type(SyntaxKind::PropertyNameIdentifier)
159                {
160                    identifier = identifier.replace(".", "");
161                }
162            }
163
164            if self.allow_space_in_identifier {
165                identifier = identifier.replace(" ", "");
166            }
167        }
168
169        identifier = identifier.replace("_", "");
170
171        if context.dialect.name == DialectKind::Redshift
172            && identifier.starts_with('#')
173            && context
174                .parent_stack
175                .last()
176                .is_some_and(|it| it.get_type() == SyntaxKind::TableReference)
177        {
178            identifier = identifier[1..].to_string();
179        }
180
181        let additional_allowed_characters =
182            self.get_additional_allowed_characters(context.dialect.name);
183        if !additional_allowed_characters.is_empty() {
184            identifier.retain(|it| !additional_allowed_characters.contains(&it));
185        }
186
187        if identifiers_policy_applicable(policy, &context.parent_stack)
188            && !identifier.chars().all(|c| c.is_ascii_alphanumeric())
189        {
190            return vec![LintResult::new(
191                context.segment.clone().into(),
192                Vec::new(),
193                None,
194                None,
195            )];
196        }
197
198        Vec::new()
199    }
200
201    fn crawl_behaviour(&self) -> Crawler {
202        SegmentSeekerCrawler::new(
203            const { SyntaxSet::new(&[SyntaxKind::QuotedIdentifier, SyntaxKind::NakedIdentifier]) },
204        )
205        .into()
206    }
207}
208
209impl RuleRF05 {
210    fn get_additional_allowed_characters(&self, dialect_name: DialectKind) -> AHashSet<char> {
211        let mut result = AHashSet::new();
212        result.extend(self.additional_allowed_characters.chars());
213        if dialect_name == DialectKind::Bigquery {
214            result.insert('-');
215        }
216        result
217    }
218}