sqruff_lib/rules/convention/
cv10.rs

1use std::iter::repeat;
2
3use ahash::AHashMap;
4use regex::Regex;
5use sqruff_lib_core::dialects::init::DialectKind;
6use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
7use sqruff_lib_core::lint_fix::LintFix;
8use sqruff_lib_core::parser::segments::base::SegmentBuilder;
9use strum_macros::{AsRefStr, EnumString};
10
11use crate::core::config::Value;
12use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
13use crate::core::rules::context::RuleContext;
14use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
15
16#[derive(Debug, Copy, Clone, AsRefStr, EnumString, PartialEq, Default)]
17#[strum(serialize_all = "snake_case")]
18enum PreferredQuotedLiteralStyle {
19    #[default]
20    Consistent,
21    SingleQuotes,
22    DoubleQuotes,
23}
24
25impl PreferredQuotedLiteralStyle {
26    fn info(&self) -> QuoteInfo {
27        match self {
28            PreferredQuotedLiteralStyle::Consistent => unimplemented!(),
29            PreferredQuotedLiteralStyle::SingleQuotes => QuoteInfo {
30                preferred_quote_char: '\'',
31                alternate_quote_char: '"',
32            },
33            PreferredQuotedLiteralStyle::DoubleQuotes => QuoteInfo {
34                preferred_quote_char: '"',
35                alternate_quote_char: '\'',
36            },
37        }
38    }
39}
40
41struct QuoteInfo {
42    preferred_quote_char: char,
43    alternate_quote_char: char,
44}
45
46#[derive(Clone, Debug, Default)]
47pub struct RuleCV10 {
48    preferred_quoted_literal_style: PreferredQuotedLiteralStyle,
49    force_enable: bool,
50}
51
52impl Rule for RuleCV10 {
53    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
54        Ok(RuleCV10 {
55            preferred_quoted_literal_style: config["preferred_quoted_literal_style"]
56                .as_string()
57                .unwrap()
58                .to_owned()
59                .parse()
60                .unwrap(),
61            force_enable: config["force_enable"].as_bool().unwrap(),
62        }
63        .erased())
64    }
65
66    fn name(&self) -> &'static str {
67        "convention.quoted_literals"
68    }
69
70    fn description(&self) -> &'static str {
71        "Consistent usage of preferred quotes for quoted literals."
72    }
73
74    fn long_description(&self) -> &'static str {
75        r#"
76**Anti-pattern**
77
78```sql
79select
80    "abc",
81    'abc',
82    "\"",
83    "abc" = 'abc'
84from foo
85```
86
87**Best practice**
88
89Ensure all quoted literals use preferred quotes, unless escaping can be reduced by using alternate quotes.
90
91```sql
92select
93    "abc",
94    "abc",
95    '"',
96    "abc" = "abc"
97from foo
98```P        
99"#
100    }
101
102    fn groups(&self) -> &'static [RuleGroups] {
103        &[RuleGroups::All, RuleGroups::Convention]
104    }
105
106    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
107        // TODO: "databricks", "hive", "mysql"
108        if !(self.force_enable
109            || matches!(
110                context.dialect.name,
111                DialectKind::Bigquery | DialectKind::Sparksql
112            ))
113        {
114            return Vec::new();
115        }
116
117        let preferred_quoted_literal_style =
118            if self.preferred_quoted_literal_style == PreferredQuotedLiteralStyle::Consistent {
119                let preferred_quoted_literal_style = context
120                    .try_get::<PreferredQuotedLiteralStyle>()
121                    .unwrap_or_else(|| {
122                        if context.segment.raw().ends_with('"') {
123                            PreferredQuotedLiteralStyle::DoubleQuotes
124                        } else {
125                            PreferredQuotedLiteralStyle::SingleQuotes
126                        }
127                    });
128
129                context.set(preferred_quoted_literal_style);
130                preferred_quoted_literal_style
131            } else {
132                self.preferred_quoted_literal_style
133            };
134
135        let info = preferred_quoted_literal_style.info();
136        let fixed_string = normalize_preferred_quoted_literal_style(
137            context.segment.raw().as_ref(),
138            info.preferred_quote_char,
139            info.alternate_quote_char,
140        );
141
142        if fixed_string != context.segment.raw().as_str() {
143            return vec![LintResult::new(
144                context.segment.clone().into(),
145                vec![LintFix::replace(
146                    context.segment.clone(),
147                    vec![
148                        SegmentBuilder::token(
149                            context.tables.next_id(),
150                            &fixed_string,
151                            SyntaxKind::QuotedLiteral,
152                        )
153                        .finish(),
154                    ],
155                    None,
156                )],
157                Some("".into()),
158                None,
159            )];
160        }
161
162        Vec::new()
163    }
164
165    fn is_fix_compatible(&self) -> bool {
166        true
167    }
168
169    fn crawl_behaviour(&self) -> Crawler {
170        SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::QuotedLiteral]) }).into()
171    }
172}
173
174// FIXME: avoid memory allocations
175fn normalize_preferred_quoted_literal_style(
176    s: &str,
177    preferred_quote_char: char,
178    alternate_quote_char: char,
179) -> String {
180    let mut s = s.to_string();
181    let trimmed = s.trim_start_matches(['r', 'b', 'R', 'B']);
182
183    let (orig_quote, new_quote) = if trimmed
184        .chars()
185        .take(3)
186        .eq(repeat(preferred_quote_char).take(3))
187    {
188        return s.to_string();
189    } else if trimmed.starts_with(preferred_quote_char) {
190        (
191            preferred_quote_char.to_string(),
192            alternate_quote_char.to_string(),
193        )
194    } else if trimmed
195        .chars()
196        .take(3)
197        .eq(repeat(alternate_quote_char).take(3))
198    {
199        (
200            repeat(alternate_quote_char).take(3).collect(),
201            repeat(preferred_quote_char).take(3).collect(),
202        )
203    } else if trimmed.starts_with(alternate_quote_char) {
204        (
205            alternate_quote_char.to_string(),
206            preferred_quote_char.to_string(),
207        )
208    } else {
209        return s.to_string();
210    };
211
212    let first_quote_pos = s.find(&orig_quote).unwrap_or_default();
213    let prefix = s[..first_quote_pos].to_string();
214    let unescaped_new_quote = Regex::new(&format!(r"(([^\\]|^)(\\\\)*){new_quote}")).unwrap();
215    let escaped_new_quote = Regex::new(&format!(r"([^\\]|^)\\((?:\\\\)*){new_quote}")).unwrap();
216    let escaped_orig_quote = Regex::new(&format!(r"([^\\]|^)\\((?:\\\\)*){orig_quote}")).unwrap();
217
218    let body_start = first_quote_pos + orig_quote.len();
219    let body_end = s.len() - orig_quote.len();
220
221    let mut body = s[body_start..body_end].to_string();
222    let mut new_body = if prefix.to_lowercase().contains("r") {
223        if unescaped_new_quote.find(&body).is_some() {
224            return s.to_string();
225        }
226        body.clone()
227    } else {
228        let mut new_body =
229            regex_sub_with_overlap(&escaped_new_quote, &format!(r"$1$2{new_quote}"), &body);
230        if new_body != body {
231            body = new_body.clone();
232            s = format!("{prefix}{orig_quote}{body}{orig_quote}");
233        }
234        new_body = regex_sub_with_overlap(
235            &escaped_orig_quote,
236            &format!(r"$1$2{orig_quote}"),
237            &new_body,
238        );
239        new_body = regex_sub_with_overlap(
240            &unescaped_new_quote,
241            &format!(r"$1\\{new_quote}"),
242            &new_body,
243        );
244
245        new_body
246    };
247
248    if new_quote.chars().eq(repeat(preferred_quote_char).take(3))
249        && new_body.ends_with(preferred_quote_char)
250    {
251        let truncated_body = &new_body[..new_body.len() - 1];
252        new_body = format!("{}\\{}", truncated_body, preferred_quote_char);
253    }
254
255    let orig_escape_count = body.matches("\\").count();
256    let new_escape_count = new_body.matches("\\").count();
257    if new_escape_count > orig_escape_count {
258        return s.to_string();
259    }
260
261    if new_escape_count == orig_escape_count && orig_quote.starts_with(preferred_quote_char) {
262        s.to_string()
263    } else {
264        format!("{prefix}{new_quote}{new_body}{new_quote}")
265    }
266}
267
268fn regex_sub_with_overlap(regex: &Regex, replacement: &str, original: &str) -> String {
269    let first_pass = regex.replace_all(original, replacement);
270    let second_pass = regex.replace_all(&first_pass, replacement);
271    second_pass.to_string()
272}