sqruff_lib/templaters/
placeholder.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4use fancy_regex::Regex;
5use sqruff_lib_core::errors::SQLFluffUserError;
6use sqruff_lib_core::templaters::base::{RawFileSlice, TemplatedFile, TemplatedFileSlice};
7
8use crate::cli::formatters::Formatter;
9use crate::core::config::FluffConfig;
10use crate::templaters::Templater;
11
12#[derive(Default)]
13pub struct PlaceholderTemplater;
14
15pub fn get_known_styles() -> HashMap<&'static str, Regex> {
16    let mut m = HashMap::new();
17
18    // e.g. WHERE bla = :name
19    m.insert(
20        "colon",
21        Regex::new(r"(?<![:\w\\]):(?P<param_name>\w+)(?!:)").unwrap(),
22    );
23
24    // e.g. WHERE bla = table:name - use with caution as more prone to false
25    // positives
26    m.insert(
27        "colon_nospaces",
28        Regex::new(r"(?<!:):(?P<param_name>\w+)").unwrap(),
29    );
30
31    // e.g. WHERE bla = :2
32    m.insert(
33        "numeric_colon",
34        Regex::new(r"(?<![:\w\\]):(?P<param_name>\d+)").unwrap(),
35    );
36
37    // e.g. WHERE bla = %(name)s
38    m.insert(
39        "pyformat",
40        Regex::new(r"(?<![:\w\\])%\((?P<param_name>[\w_]+)\)s").unwrap(),
41    );
42
43    // e.g. WHERE bla = $name or WHERE bla = ${name}
44    m.insert(
45        "dollar",
46        Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\w_]+)}?").unwrap(),
47    );
48
49    // e.g. USE ${flyway:database}.schema_name;
50    m.insert(
51        "flyway_var",
52        Regex::new(r#"\${(?P<param_name>\w+[:\w_]+)}"#).unwrap(),
53    );
54
55    // e.g. WHERE bla = ?
56    m.insert("question_mark", Regex::new(r"(?<![:\w\\])\?").unwrap());
57
58    // e.g. WHERE bla = $3 or WHERE bla = ${3}
59    m.insert(
60        "numeric_dollar",
61        Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\d]+)}?").unwrap(),
62    );
63
64    // e.g. WHERE bla = %s
65    m.insert("percent", Regex::new(r"(?<![:\w\\])%s").unwrap());
66
67    // e.g. WHERE bla = &s or WHERE bla = &{s} or USE DATABASE {ENV}_MARKETING
68    m.insert(
69        "ampersand",
70        Regex::new(r"(?<!&)&{?(?P<param_name>[\w]+)}?").unwrap(),
71    );
72
73    m
74}
75
76const NO_PARAM_OR_STYLE: &str =
77    "No param_regex nor param_style was provided to the placeholder templater.";
78
79impl PlaceholderTemplater {
80    fn derive_style(&self, config: &FluffConfig) -> Result<Regex, SQLFluffUserError> {
81        let config = config
82            .get("placeholder", "templater")
83            .as_map()
84            .ok_or(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string()))?;
85        match (config.get("param_regex"), config.get("param_style")) {
86            (Some(_), Some(_)) => Err(SQLFluffUserError::new(
87                "Both param_regex and param_style were provided to the placeholder templater."
88                    .to_string(),
89            )),
90            (None, None) => Err(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string())),
91            (Some(param_regex), None) => {
92                let param_regex = param_regex.as_string().ok_or(SQLFluffUserError::new(
93                    "Invalid param_regex for templater 'placeholder'".to_string(),
94                ))?;
95                let regex = Regex::new(param_regex).map_err(|e| {
96                    SQLFluffUserError::new(format!("Invalid regex for param_regex: {}", e))
97                })?;
98                Ok(regex)
99            }
100            (None, Some(param_style)) => {
101                let param_style = param_style.as_string().ok_or(SQLFluffUserError::new(
102                    "Invalid param_style for templater 'placeholder'".to_string(),
103                ))?;
104                let known_styles = get_known_styles();
105                let regex = known_styles.get(param_style).ok_or_else(|| {
106                    SQLFluffUserError::new(format!(
107                        "Unknown param_style '{}' for templater 'placeholder'",
108                        param_style
109                    ))
110                })?;
111                Ok(regex.clone())
112            }
113        }
114    }
115}
116
117impl Templater for PlaceholderTemplater {
118    fn name(&self) -> &'static str {
119        "placeholder"
120    }
121
122    fn description(&self) -> &'static str {
123        r#"Libraries such as SQLAlchemy or Psycopg use different parameter placeholder styles to mark where a parameter has to be inserted in the query.
124
125For example a query in SQLAlchemy can look like this:
126
127```sql
128SELECT * FROM table WHERE id = :myid
129```
130
131At runtime :myid will be replace by a value provided by the application and escaped as needed, but this is not standard SQL and cannot be parsed as is.
132
133In order to parse these queries is then necessary to replace these placeholders with sample values, and this is done with the placeholder templater.
134
135Placeholder templating can be enabled in the config using:
136
137```ini
138[sqruff]
139templater = placeholder
140```
141
142A few common styles are supported:
143
144```sql
145 -- colon
146 WHERE bla = :my_name
147
148 -- colon_nospaces
149 -- (use with caution as more prone to false positives)
150 WHERE bla = table:my_name
151
152 -- colon_optional_quotes
153 SELECT :"column" FROM :table WHERE bla = :'my_name'
154
155 -- numeric_colon
156 WHERE bla = :2
157
158 -- pyformat
159 WHERE bla = %(my_name)s
160
161 -- dollar
162 WHERE bla = $my_name or WHERE bla = ${my_name}
163
164 -- question_mark
165 WHERE bla = ?
166
167 -- numeric_dollar
168 WHERE bla = $3 or WHERE bla = ${3}
169
170 -- percent
171 WHERE bla = %s
172
173 -- ampersand
174 WHERE bla = &s or WHERE bla = &{s} or USE DATABASE MARK_{ENV}
175```
176
177The can be configured by setting `param_style` in the config file. For example:
178
179```ini
180[sqruff:templater:placeholder]
181param_style = colon
182my_name = 'john'
183```
184
185then you can set sample values for each parameter, like my_name above. Notice that the value needs to be escaped as it will be replaced as a string during parsing. When the sample values aren’t provided, the templater will use parameter names themselves by default.
186
187When parameters are positional, like question_mark, then their name is simply the order in which they appear, starting with 1.
188
189```ini
190[sqruff:templater:placeholder]
191param_style = question_mark
1921 = 'john'
193```
194
195In case you nbeed a parameter style different from the ones provided, you can set `param_regex` in the config file. For example:
196
197```ini
198[sqruff:templater:placeholder]
199param_regex = __(?P<param_name>[\w_]+)__
200my_name = 'john'
201```
202
203N.B. quotes around param_regex in the config are interpreted literally by the templater. e.g. param_regex=’__(?P<param_name>[w_]+)__’ matches ‘__some_param__’ not __some_param__
204
205the named parameter param_name will be used as the key to replace, if missing, the parameter is assumed to be positional and numbers are used instead.
206
207Also consider making a pull request to the project to have your style added, it may be useful to other people and simplify your configuration."#
208    }
209
210    fn process(
211        &self,
212        in_str: &str,
213        f_name: &str,
214        config: &FluffConfig,
215        _: &Option<Arc<dyn Formatter>>,
216    ) -> Result<TemplatedFile, SQLFluffUserError> {
217        let mut template_slices = vec![];
218        let mut raw_slices = vec![];
219        let mut last_pos_raw = 0usize;
220        let mut last_pos_templated = 0;
221        let mut out_str = "".to_string();
222
223        // when the param has no name, use a 1-based index
224        let mut param_counter = 1;
225        let regex = self.derive_style(config)?;
226
227        let template_config = config.get("placeholder", "templater").as_map();
228
229        for cap in regex.captures_iter(in_str) {
230            let cap = cap.unwrap();
231            let span = cap.get(0).unwrap().range();
232
233            let param_name = if let Some(name) = cap.name("param_name") {
234                name.as_str().to_string()
235            } else {
236                let name = param_counter.to_string();
237                param_counter += 1;
238                name
239            };
240
241            let last_literal_length = span.start - last_pos_raw;
242            let replacement = template_config
243                .and_then(|config| config.get(&param_name))
244                .map_or(Ok(param_name.clone()), |v| {
245                    match (v.as_string(), v.as_int(), v.as_bool()) {
246                        (Some(s), None, None) => Ok(s.to_string()),
247                        (None, Some(i), None) => Ok(i.to_string()),
248                        (None, None, Some(b)) => Ok(if b {
249                            "true".to_string()
250                        } else {
251                            "false".to_string()
252                        }),
253                        _ => Err(SQLFluffUserError::new(format!(
254                            "Invalid value for parameter replacement: {}",
255                            param_name
256                        ))),
257                    }
258                })?;
259
260            // Add the literal to the slices
261            template_slices.push(TemplatedFileSlice {
262                slice_type: "literal".to_string(),
263                source_slice: last_pos_raw..span.start,
264                templated_slice: last_pos_templated..last_pos_templated + last_literal_length,
265            });
266
267            raw_slices.push(RawFileSlice::new(
268                in_str[last_pos_raw..span.start].to_string(),
269                "literal".to_string(),
270                last_pos_raw,
271                None,
272                None,
273            ));
274
275            out_str.push_str(&in_str[last_pos_raw..span.start]);
276
277            // Add the current replaced element
278            let start_template_pos = last_pos_templated + last_literal_length;
279            template_slices.push(TemplatedFileSlice {
280                slice_type: "templated".to_string(),
281                source_slice: span.clone(),
282                templated_slice: start_template_pos..start_template_pos + replacement.len(),
283            });
284
285            let raw_file_slice = RawFileSlice::new(
286                in_str[span.clone()].to_string(),
287                "templated".to_string(),
288                span.start,
289                None,
290                None,
291            );
292            raw_slices.push(raw_file_slice);
293
294            out_str.push_str(&replacement);
295
296            // Update the indexes
297            last_pos_raw = span.end;
298            last_pos_templated = start_template_pos + replacement.len();
299        }
300
301        // Add the last literal, if any
302        if in_str.len() > last_pos_raw {
303            template_slices.push(TemplatedFileSlice {
304                slice_type: "literal".to_string(),
305                source_slice: last_pos_raw..in_str.len(),
306                templated_slice: last_pos_templated
307                    ..last_pos_templated + (in_str.len() - last_pos_raw),
308            });
309
310            let raw_file_slice = RawFileSlice::new(
311                in_str[last_pos_raw..].to_string(),
312                "literal".to_string(),
313                last_pos_raw,
314                None,
315                None,
316            );
317            raw_slices.push(raw_file_slice);
318
319            out_str.push_str(&in_str[last_pos_raw..]);
320        }
321
322        let templated_file = TemplatedFile::new(
323            in_str.to_string(),
324            f_name.to_string(),
325            Some(out_str),
326            Some(template_slices),
327            Some(raw_slices),
328        )
329        .unwrap();
330
331        Ok(templated_file)
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use std::mem::take;
338
339    use super::*;
340    use crate::core::linter::core::Linter;
341
342    #[test]
343    /// Test the templaters when nothing has to be replaced.
344    fn test_templater_no_replacement() {
345        let templater = PlaceholderTemplater {};
346        let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
347        let config = FluffConfig::from_source(
348            "
349[sqruff:templater:placeholder]
350param_style = colon",
351            None,
352        );
353        let out_str = templater
354            .process(in_str, "test.sql", &config, &None)
355            .unwrap();
356        let out = out_str.templated();
357        assert_eq!(in_str, out)
358    }
359
360    #[test]
361    fn test_all_the_known_styles() {
362        // in, param_style, expected_out, values
363        let cases: [(&str, &str, &str, Vec<(&str, &str)>); 16] = [
364            (
365                "SELECT * FROM f, o, o WHERE a < 10\n\n",
366                "colon",
367                "SELECT * FROM f, o, o WHERE a < 10\n\n",
368                vec![],
369            ),
370            (
371                r#"
372SELECT user_mail, city_id
373FROM users_data
374WHERE userid = :user_id AND date > :start_date
375"#,
376                "colon",
377                r#"
378SELECT user_mail, city_id
379FROM users_data
380WHERE userid = 42 AND date > '2020-01-01'
381"#,
382                vec![
383                    ("user_id", "42"),
384                    ("start_date", "'2020-01-01'"),
385                    ("city_ids", "(1, 2, 3)"),
386                ],
387            ),
388            (
389                r#"
390SELECT user_mail, city_id
391FROM users_data
392WHERE userid = :user_id AND date > :start_date"#,
393                "colon",
394                r#"
395SELECT user_mail, city_id
396FROM users_data
397WHERE userid = 42 AND date > '2020-01-01'"#,
398                vec![
399                    ("user_id", "42"),
400                    ("start_date", "'2020-01-01'"),
401                    ("city_ids", "(1, 2, 3)"),
402                ],
403            ),
404            (
405                r#"
406SELECT user_mail, city_id
407FROM users_data
408WHERE (city_id) IN :city_ids
409AND date > '2020-10-01'
410            "#,
411                "colon",
412                r#"
413SELECT user_mail, city_id
414FROM users_data
415WHERE (city_id) IN (1, 2, 3)
416AND date > '2020-10-01'
417            "#,
418                vec![
419                    ("user_id", "42"),
420                    ("start_date", "'2020-01-01'"),
421                    ("city_ids", "(1, 2, 3)"),
422                ],
423            ),
424            (
425                r#"
426SELECT user_mail, city_id
427FROM users_data:table_suffix
428"#,
429                "colon_nospaces",
430                r#"
431SELECT user_mail, city_id
432FROM users_data42
433"#,
434                vec![("table_suffix", "42")],
435            ),
436            (
437                // Postgres uses double-colons for type casts, see
438                // https://www.postgresql.org/docs/current/sql-expressions.html#SQL-SYNTAX-TYPE-CASTS
439                // This test ensures we don't confuse them with colon placeholders.
440                r#"
441SELECT user_mail, city_id, joined::date
442FROM users_date:table_suffix
443"#,
444                "colon_nospaces",
445                r#"
446SELECT user_mail, city_id, joined::date
447FROM users_date42
448"#,
449                vec![("table_suffix", "42")],
450            ),
451            (
452                r#"
453SELECT user_mail, city_id
454FROM users_data
455WHERE (city_id) IN ?
456AND date > ?
457            "#,
458                "question_mark",
459                r#"
460SELECT user_mail, city_id
461FROM users_data
462WHERE (city_id) IN (1, 2, 3, 45)
463AND date > '2020-10-01'
464            "#,
465                vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
466            ),
467            (
468                r#"
469SELECT user_mail, city_id
470FROM users_data
471WHERE (city_id) IN :1
472AND date > :45
473            "#,
474                "numeric_colon",
475                r#"
476SELECT user_mail, city_id
477FROM users_data
478WHERE (city_id) IN (1, 2, 3, 45)
479AND date > '2020-10-01'
480            "#,
481                vec![("1", "(1, 2, 3, 45)"), ("45", "'2020-10-01'")],
482            ),
483            (
484                r#"
485SELECT user_mail, city_id
486FROM users_data
487WHERE (city_id) IN %(city_id)s
488AND date > %(date)s
489AND someflag = %(someflag)s
490LIMIT %(limit)s
491            "#,
492                "pyformat",
493                r#"
494SELECT user_mail, city_id
495FROM users_data
496WHERE (city_id) IN (1, 2, 3, 45)
497AND date > '2020-10-01'
498AND someflag = false
499LIMIT 15
500            "#,
501                vec![
502                    ("city_id", "(1, 2, 3, 45)"),
503                    ("date", "'2020-10-01'"),
504                    ("limit", "15"),
505                    ("someflag", "false"),
506                ],
507            ),
508            (
509                r#"
510SELECT user_mail, city_id
511FROM users_data
512WHERE (city_id) IN $city_id
513AND date > $date
514OR date = ${date}
515            "#,
516                "dollar",
517                r#"
518SELECT user_mail, city_id
519FROM users_data
520WHERE (city_id) IN (1, 2, 3, 45)
521AND date > '2020-10-01'
522OR date = '2020-10-01'
523            "#,
524                vec![("city_id", "(1, 2, 3, 45)"), ("date", "'2020-10-01'")],
525            ),
526            (
527                r#"
528SELECT user_mail, city_id
529FROM users_data
530WHERE (city_id) IN $12
531AND date > $90
532            "#,
533                "numeric_dollar",
534                r#"
535SELECT user_mail, city_id
536FROM users_data
537WHERE (city_id) IN (1, 2, 3, 45)
538AND date > '2020-10-01'
539            "#,
540                vec![("12", "(1, 2, 3, 45)"), ("90", "'2020-10-01'")],
541            ),
542            (
543                r#"
544SELECT user_mail, city_id
545FROM users_data
546WHERE (city_id) IN %s
547AND date > %s
548            "#,
549                "percent",
550                r#"
551SELECT user_mail, city_id
552FROM users_data
553WHERE (city_id) IN (1, 2, 3, 45)
554AND date > '2020-10-01'
555            "#,
556                vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
557            ),
558            (
559                r#"
560USE DATABASE &{env}_MARKETING;
561USE SCHEMA &&EMEA;
562SELECT user_mail, city_id
563FROM users_data
564WHERE userid = &user_id AND date > &{start_date}
565            "#,
566                "ampersand",
567                r#"
568USE DATABASE PRD_MARKETING;
569USE SCHEMA &&EMEA;
570SELECT user_mail, city_id
571FROM users_data
572WHERE userid = 42 AND date > '2021-10-01'
573            "#,
574                vec![
575                    ("env", "PRD"),
576                    ("user_id", "42"),
577                    ("start_date", "'2021-10-01'"),
578                ],
579            ),
580            (
581                "USE ${flywaydatabase}.test_schema;",
582                "flyway_var",
583                "USE test_db.test_schema;",
584                vec![("flywaydatabase", "test_db")],
585            ),
586            (
587                "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
588                "flyway_var",
589                "SELECT metadata$filename, $1 FROM @stg_data_export_staging;",
590                vec![("env_name", "staging")],
591            ),
592            (
593                "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
594                "flyway_var",
595                "SELECT metadata$filename, $1 FROM @stg_data_export_env_name;",
596                vec![],
597            ),
598        ];
599
600        for (in_str, param_style, expected_out, values) in cases {
601            let config = FluffConfig::from_source(
602                format!(
603                    r#"
604[sqruff:templater:placeholder]
605param_style = {}
606{}
607"#,
608                    param_style,
609                    values
610                        .iter()
611                        .map(|(k, v)| format!("{} = {}", k, v))
612                        .collect::<Vec<String>>()
613                        .join("\n")
614                )
615                .as_str(),
616                None,
617            );
618            let templater = PlaceholderTemplater {};
619            let out_str = templater
620                .process(in_str, "test.sql", &config, &None)
621                .unwrap();
622            let out = out_str.templated();
623            assert_eq!(expected_out, out)
624        }
625    }
626
627    #[test]
628    /// Test the error raised when config is incomplete, as in no param_regex
629    /// nor param_style.
630    fn test_templater_setup_none() {
631        let config = FluffConfig::from_source("", None);
632        let templater = PlaceholderTemplater {};
633        let in_str = "SELECT 2+2";
634        let out_str = templater.process(in_str, "test.sql", &config, &None);
635
636        assert!(out_str.is_err());
637        assert_eq!(
638            out_str.err().unwrap().value,
639            "No param_regex nor param_style was provided to the placeholder templater."
640        );
641    }
642
643    #[test]
644    /// Test the error raised when both param_regex and param_style are
645    /// provided.
646    fn test_templater_setup_both_provided() {
647        let config = FluffConfig::from_source(
648            r#"
649[sqruff:templater:placeholder]
650param_regex = __(?P<param_name>[\w_]+)__
651param_style = colon
652            "#,
653            None,
654        );
655        let templater = PlaceholderTemplater {};
656        let in_str = "SELECT 2+2";
657        let out_str = templater.process(in_str, "test.sql", &config, &None);
658
659        assert!(out_str.is_err());
660        assert_eq!(
661            out_str.err().unwrap().value,
662            "Both param_regex and param_style were provided to the placeholder templater."
663        );
664    }
665
666    #[test]
667    /// Test custom regex templating.
668    fn test_templater_custom_regex() {
669        let config = FluffConfig::from_source(
670            r#"
671[sqruff:templater:placeholder]
672param_regex = __(?P<param_name>[\w_]+)__
673my_name = john
674"#,
675            None,
676        );
677        let templater = PlaceholderTemplater {};
678        let in_str = "SELECT bla FROM blob WHERE id = __my_name__";
679        let out_str = templater.process(in_str, "test", &config, &None).unwrap();
680        let out = out_str.templated();
681        assert_eq!("SELECT bla FROM blob WHERE id = john", out)
682    }
683
684    #[test]
685    /// Test the exception raised when parameter styles is unknown.
686    fn test_templater_styles_not_existing() {
687        let config = FluffConfig::from_source(
688            r#"
689[sqruff:templater:placeholder]
690param_style = unknown
691            "#,
692            None,
693        );
694        let templater = PlaceholderTemplater {};
695        let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
696        let out_str = templater.process(in_str, "test.sql", &config, &None);
697
698        assert!(out_str.is_err());
699        assert_eq!(
700            out_str.err().unwrap().value,
701            "Unknown param_style 'unknown' for templater 'placeholder'"
702        );
703    }
704
705    #[test]
706    /// Test the linter fully with this templater.
707    fn test_templater_placeholder() {
708        let config = FluffConfig::from_source(
709            r#"
710[sqruff]
711dialect = ansi
712templater = placeholder
713rules = all
714
715[sqruff:templater:placeholder]
716param_style = percent
717"#,
718            None,
719        );
720        let sql = "SELECT a,b FROM users WHERE a = %s";
721
722        let mut linter = Linter::new(config, None, None, false);
723        let mut result = linter.lint_string_wrapped(sql, None, true);
724        let result = take(&mut result.paths[0].files[0]).fix_string();
725
726        assert_eq!(result, "SELECT\n    a,\n    b\nFROM users WHERE a = %s\n");
727    }
728}