1use std::collections::HashMap;
2use std::sync::Arc;
3
4use fancy_regex::Regex;
5use sqruff_lib_core::errors::SQLFluffUserError;
6use sqruff_lib_core::templaters::base::{RawFileSlice, TemplatedFile, TemplatedFileSlice};
7
8use crate::cli::formatters::Formatter;
9use crate::core::config::FluffConfig;
10use crate::templaters::Templater;
11
12#[derive(Default)]
13pub struct PlaceholderTemplater;
14
15pub fn get_known_styles() -> HashMap<&'static str, Regex> {
16 let mut m = HashMap::new();
17
18 m.insert(
20 "colon",
21 Regex::new(r"(?<![:\w\\]):(?P<param_name>\w+)(?!:)").unwrap(),
22 );
23
24 m.insert(
27 "colon_nospaces",
28 Regex::new(r"(?<!:):(?P<param_name>\w+)").unwrap(),
29 );
30
31 m.insert(
33 "numeric_colon",
34 Regex::new(r"(?<![:\w\\]):(?P<param_name>\d+)").unwrap(),
35 );
36
37 m.insert(
39 "pyformat",
40 Regex::new(r"(?<![:\w\\])%\((?P<param_name>[\w_]+)\)s").unwrap(),
41 );
42
43 m.insert(
45 "dollar",
46 Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\w_]+)}?").unwrap(),
47 );
48
49 m.insert(
51 "flyway_var",
52 Regex::new(r#"\${(?P<param_name>\w+[:\w_]+)}"#).unwrap(),
53 );
54
55 m.insert("question_mark", Regex::new(r"(?<![:\w\\])\?").unwrap());
57
58 m.insert(
60 "numeric_dollar",
61 Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\d]+)}?").unwrap(),
62 );
63
64 m.insert("percent", Regex::new(r"(?<![:\w\\])%s").unwrap());
66
67 m.insert(
69 "ampersand",
70 Regex::new(r"(?<!&)&{?(?P<param_name>[\w]+)}?").unwrap(),
71 );
72
73 m
74}
75
76const NO_PARAM_OR_STYLE: &str =
77 "No param_regex nor param_style was provided to the placeholder templater.";
78
79impl PlaceholderTemplater {
80 fn derive_style(&self, config: &FluffConfig) -> Result<Regex, SQLFluffUserError> {
81 let config = config
82 .get("placeholder", "templater")
83 .as_map()
84 .ok_or(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string()))?;
85 match (config.get("param_regex"), config.get("param_style")) {
86 (Some(_), Some(_)) => Err(SQLFluffUserError::new(
87 "Both param_regex and param_style were provided to the placeholder templater."
88 .to_string(),
89 )),
90 (None, None) => Err(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string())),
91 (Some(param_regex), None) => {
92 let param_regex = param_regex.as_string().ok_or(SQLFluffUserError::new(
93 "Invalid param_regex for templater 'placeholder'".to_string(),
94 ))?;
95 let regex = Regex::new(param_regex).map_err(|e| {
96 SQLFluffUserError::new(format!("Invalid regex for param_regex: {}", e))
97 })?;
98 Ok(regex)
99 }
100 (None, Some(param_style)) => {
101 let param_style = param_style.as_string().ok_or(SQLFluffUserError::new(
102 "Invalid param_style for templater 'placeholder'".to_string(),
103 ))?;
104 let known_styles = get_known_styles();
105 let regex = known_styles.get(param_style).ok_or_else(|| {
106 SQLFluffUserError::new(format!(
107 "Unknown param_style '{}' for templater 'placeholder'",
108 param_style
109 ))
110 })?;
111 Ok(regex.clone())
112 }
113 }
114 }
115}
116
117impl Templater for PlaceholderTemplater {
118 fn name(&self) -> &'static str {
119 "placeholder"
120 }
121
122 fn description(&self) -> &'static str {
123 r#"Libraries such as SQLAlchemy or Psycopg use different parameter placeholder styles to mark where a parameter has to be inserted in the query.
124
125For example a query in SQLAlchemy can look like this:
126
127```sql
128SELECT * FROM table WHERE id = :myid
129```
130
131At runtime :myid will be replace by a value provided by the application and escaped as needed, but this is not standard SQL and cannot be parsed as is.
132
133In order to parse these queries is then necessary to replace these placeholders with sample values, and this is done with the placeholder templater.
134
135Placeholder templating can be enabled in the config using:
136
137```ini
138[sqruff]
139templater = placeholder
140```
141
142A few common styles are supported:
143
144```sql
145 -- colon
146 WHERE bla = :my_name
147
148 -- colon_nospaces
149 -- (use with caution as more prone to false positives)
150 WHERE bla = table:my_name
151
152 -- colon_optional_quotes
153 SELECT :"column" FROM :table WHERE bla = :'my_name'
154
155 -- numeric_colon
156 WHERE bla = :2
157
158 -- pyformat
159 WHERE bla = %(my_name)s
160
161 -- dollar
162 WHERE bla = $my_name or WHERE bla = ${my_name}
163
164 -- question_mark
165 WHERE bla = ?
166
167 -- numeric_dollar
168 WHERE bla = $3 or WHERE bla = ${3}
169
170 -- percent
171 WHERE bla = %s
172
173 -- ampersand
174 WHERE bla = &s or WHERE bla = &{s} or USE DATABASE MARK_{ENV}
175```
176
177The can be configured by setting `param_style` in the config file. For example:
178
179```ini
180[sqruff:templater:placeholder]
181param_style = colon
182my_name = 'john'
183```
184
185then you can set sample values for each parameter, like my_name above. Notice that the value needs to be escaped as it will be replaced as a string during parsing. When the sample values aren’t provided, the templater will use parameter names themselves by default.
186
187When parameters are positional, like question_mark, then their name is simply the order in which they appear, starting with 1.
188
189```ini
190[sqruff:templater:placeholder]
191param_style = question_mark
1921 = 'john'
193```
194
195In case you nbeed a parameter style different from the ones provided, you can set `param_regex` in the config file. For example:
196
197```ini
198[sqruff:templater:placeholder]
199param_regex = __(?P<param_name>[\w_]+)__
200my_name = 'john'
201```
202
203N.B. quotes around param_regex in the config are interpreted literally by the templater. e.g. param_regex=’__(?P<param_name>[w_]+)__’ matches ‘__some_param__’ not __some_param__
204
205the named parameter param_name will be used as the key to replace, if missing, the parameter is assumed to be positional and numbers are used instead.
206
207Also consider making a pull request to the project to have your style added, it may be useful to other people and simplify your configuration."#
208 }
209
210 fn process(
211 &self,
212 in_str: &str,
213 f_name: &str,
214 config: &FluffConfig,
215 _: &Option<Arc<dyn Formatter>>,
216 ) -> Result<TemplatedFile, SQLFluffUserError> {
217 let mut template_slices = vec![];
218 let mut raw_slices = vec![];
219 let mut last_pos_raw = 0usize;
220 let mut last_pos_templated = 0;
221 let mut out_str = "".to_string();
222
223 let mut param_counter = 1;
225 let regex = self.derive_style(config)?;
226
227 let template_config = config.get("placeholder", "templater").as_map();
228
229 for cap in regex.captures_iter(in_str) {
230 let cap = cap.unwrap();
231 let span = cap.get(0).unwrap().range();
232
233 let param_name = if let Some(name) = cap.name("param_name") {
234 name.as_str().to_string()
235 } else {
236 let name = param_counter.to_string();
237 param_counter += 1;
238 name
239 };
240
241 let last_literal_length = span.start - last_pos_raw;
242 let replacement = template_config
243 .and_then(|config| config.get(¶m_name))
244 .map_or(Ok(param_name.clone()), |v| {
245 match (v.as_string(), v.as_int(), v.as_bool()) {
246 (Some(s), None, None) => Ok(s.to_string()),
247 (None, Some(i), None) => Ok(i.to_string()),
248 (None, None, Some(b)) => Ok(if b {
249 "true".to_string()
250 } else {
251 "false".to_string()
252 }),
253 _ => Err(SQLFluffUserError::new(format!(
254 "Invalid value for parameter replacement: {}",
255 param_name
256 ))),
257 }
258 })?;
259
260 template_slices.push(TemplatedFileSlice {
262 slice_type: "literal".to_string(),
263 source_slice: last_pos_raw..span.start,
264 templated_slice: last_pos_templated..last_pos_templated + last_literal_length,
265 });
266
267 raw_slices.push(RawFileSlice::new(
268 in_str[last_pos_raw..span.start].to_string(),
269 "literal".to_string(),
270 last_pos_raw,
271 None,
272 None,
273 ));
274
275 out_str.push_str(&in_str[last_pos_raw..span.start]);
276
277 let start_template_pos = last_pos_templated + last_literal_length;
279 template_slices.push(TemplatedFileSlice {
280 slice_type: "templated".to_string(),
281 source_slice: span.clone(),
282 templated_slice: start_template_pos..start_template_pos + replacement.len(),
283 });
284
285 let raw_file_slice = RawFileSlice::new(
286 in_str[span.clone()].to_string(),
287 "templated".to_string(),
288 span.start,
289 None,
290 None,
291 );
292 raw_slices.push(raw_file_slice);
293
294 out_str.push_str(&replacement);
295
296 last_pos_raw = span.end;
298 last_pos_templated = start_template_pos + replacement.len();
299 }
300
301 if in_str.len() > last_pos_raw {
303 template_slices.push(TemplatedFileSlice {
304 slice_type: "literal".to_string(),
305 source_slice: last_pos_raw..in_str.len(),
306 templated_slice: last_pos_templated
307 ..last_pos_templated + (in_str.len() - last_pos_raw),
308 });
309
310 let raw_file_slice = RawFileSlice::new(
311 in_str[last_pos_raw..].to_string(),
312 "literal".to_string(),
313 last_pos_raw,
314 None,
315 None,
316 );
317 raw_slices.push(raw_file_slice);
318
319 out_str.push_str(&in_str[last_pos_raw..]);
320 }
321
322 let templated_file = TemplatedFile::new(
323 in_str.to_string(),
324 f_name.to_string(),
325 Some(out_str),
326 Some(template_slices),
327 Some(raw_slices),
328 )
329 .unwrap();
330
331 Ok(templated_file)
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use std::mem::take;
338
339 use super::*;
340 use crate::core::linter::core::Linter;
341
342 #[test]
343 fn test_templater_no_replacement() {
345 let templater = PlaceholderTemplater {};
346 let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
347 let config = FluffConfig::from_source(
348 "
349[sqruff:templater:placeholder]
350param_style = colon",
351 None,
352 );
353 let out_str = templater
354 .process(in_str, "test.sql", &config, &None)
355 .unwrap();
356 let out = out_str.templated();
357 assert_eq!(in_str, out)
358 }
359
360 #[test]
361 fn test_all_the_known_styles() {
362 let cases: [(&str, &str, &str, Vec<(&str, &str)>); 16] = [
364 (
365 "SELECT * FROM f, o, o WHERE a < 10\n\n",
366 "colon",
367 "SELECT * FROM f, o, o WHERE a < 10\n\n",
368 vec![],
369 ),
370 (
371 r#"
372SELECT user_mail, city_id
373FROM users_data
374WHERE userid = :user_id AND date > :start_date
375"#,
376 "colon",
377 r#"
378SELECT user_mail, city_id
379FROM users_data
380WHERE userid = 42 AND date > '2020-01-01'
381"#,
382 vec![
383 ("user_id", "42"),
384 ("start_date", "'2020-01-01'"),
385 ("city_ids", "(1, 2, 3)"),
386 ],
387 ),
388 (
389 r#"
390SELECT user_mail, city_id
391FROM users_data
392WHERE userid = :user_id AND date > :start_date"#,
393 "colon",
394 r#"
395SELECT user_mail, city_id
396FROM users_data
397WHERE userid = 42 AND date > '2020-01-01'"#,
398 vec![
399 ("user_id", "42"),
400 ("start_date", "'2020-01-01'"),
401 ("city_ids", "(1, 2, 3)"),
402 ],
403 ),
404 (
405 r#"
406SELECT user_mail, city_id
407FROM users_data
408WHERE (city_id) IN :city_ids
409AND date > '2020-10-01'
410 "#,
411 "colon",
412 r#"
413SELECT user_mail, city_id
414FROM users_data
415WHERE (city_id) IN (1, 2, 3)
416AND date > '2020-10-01'
417 "#,
418 vec![
419 ("user_id", "42"),
420 ("start_date", "'2020-01-01'"),
421 ("city_ids", "(1, 2, 3)"),
422 ],
423 ),
424 (
425 r#"
426SELECT user_mail, city_id
427FROM users_data:table_suffix
428"#,
429 "colon_nospaces",
430 r#"
431SELECT user_mail, city_id
432FROM users_data42
433"#,
434 vec![("table_suffix", "42")],
435 ),
436 (
437 r#"
441SELECT user_mail, city_id, joined::date
442FROM users_date:table_suffix
443"#,
444 "colon_nospaces",
445 r#"
446SELECT user_mail, city_id, joined::date
447FROM users_date42
448"#,
449 vec![("table_suffix", "42")],
450 ),
451 (
452 r#"
453SELECT user_mail, city_id
454FROM users_data
455WHERE (city_id) IN ?
456AND date > ?
457 "#,
458 "question_mark",
459 r#"
460SELECT user_mail, city_id
461FROM users_data
462WHERE (city_id) IN (1, 2, 3, 45)
463AND date > '2020-10-01'
464 "#,
465 vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
466 ),
467 (
468 r#"
469SELECT user_mail, city_id
470FROM users_data
471WHERE (city_id) IN :1
472AND date > :45
473 "#,
474 "numeric_colon",
475 r#"
476SELECT user_mail, city_id
477FROM users_data
478WHERE (city_id) IN (1, 2, 3, 45)
479AND date > '2020-10-01'
480 "#,
481 vec![("1", "(1, 2, 3, 45)"), ("45", "'2020-10-01'")],
482 ),
483 (
484 r#"
485SELECT user_mail, city_id
486FROM users_data
487WHERE (city_id) IN %(city_id)s
488AND date > %(date)s
489AND someflag = %(someflag)s
490LIMIT %(limit)s
491 "#,
492 "pyformat",
493 r#"
494SELECT user_mail, city_id
495FROM users_data
496WHERE (city_id) IN (1, 2, 3, 45)
497AND date > '2020-10-01'
498AND someflag = false
499LIMIT 15
500 "#,
501 vec![
502 ("city_id", "(1, 2, 3, 45)"),
503 ("date", "'2020-10-01'"),
504 ("limit", "15"),
505 ("someflag", "false"),
506 ],
507 ),
508 (
509 r#"
510SELECT user_mail, city_id
511FROM users_data
512WHERE (city_id) IN $city_id
513AND date > $date
514OR date = ${date}
515 "#,
516 "dollar",
517 r#"
518SELECT user_mail, city_id
519FROM users_data
520WHERE (city_id) IN (1, 2, 3, 45)
521AND date > '2020-10-01'
522OR date = '2020-10-01'
523 "#,
524 vec![("city_id", "(1, 2, 3, 45)"), ("date", "'2020-10-01'")],
525 ),
526 (
527 r#"
528SELECT user_mail, city_id
529FROM users_data
530WHERE (city_id) IN $12
531AND date > $90
532 "#,
533 "numeric_dollar",
534 r#"
535SELECT user_mail, city_id
536FROM users_data
537WHERE (city_id) IN (1, 2, 3, 45)
538AND date > '2020-10-01'
539 "#,
540 vec![("12", "(1, 2, 3, 45)"), ("90", "'2020-10-01'")],
541 ),
542 (
543 r#"
544SELECT user_mail, city_id
545FROM users_data
546WHERE (city_id) IN %s
547AND date > %s
548 "#,
549 "percent",
550 r#"
551SELECT user_mail, city_id
552FROM users_data
553WHERE (city_id) IN (1, 2, 3, 45)
554AND date > '2020-10-01'
555 "#,
556 vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
557 ),
558 (
559 r#"
560USE DATABASE &{env}_MARKETING;
561USE SCHEMA &&EMEA;
562SELECT user_mail, city_id
563FROM users_data
564WHERE userid = &user_id AND date > &{start_date}
565 "#,
566 "ampersand",
567 r#"
568USE DATABASE PRD_MARKETING;
569USE SCHEMA &&EMEA;
570SELECT user_mail, city_id
571FROM users_data
572WHERE userid = 42 AND date > '2021-10-01'
573 "#,
574 vec![
575 ("env", "PRD"),
576 ("user_id", "42"),
577 ("start_date", "'2021-10-01'"),
578 ],
579 ),
580 (
581 "USE ${flywaydatabase}.test_schema;",
582 "flyway_var",
583 "USE test_db.test_schema;",
584 vec![("flywaydatabase", "test_db")],
585 ),
586 (
587 "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
588 "flyway_var",
589 "SELECT metadata$filename, $1 FROM @stg_data_export_staging;",
590 vec![("env_name", "staging")],
591 ),
592 (
593 "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
594 "flyway_var",
595 "SELECT metadata$filename, $1 FROM @stg_data_export_env_name;",
596 vec![],
597 ),
598 ];
599
600 for (in_str, param_style, expected_out, values) in cases {
601 let config = FluffConfig::from_source(
602 format!(
603 r#"
604[sqruff:templater:placeholder]
605param_style = {}
606{}
607"#,
608 param_style,
609 values
610 .iter()
611 .map(|(k, v)| format!("{} = {}", k, v))
612 .collect::<Vec<String>>()
613 .join("\n")
614 )
615 .as_str(),
616 None,
617 );
618 let templater = PlaceholderTemplater {};
619 let out_str = templater
620 .process(in_str, "test.sql", &config, &None)
621 .unwrap();
622 let out = out_str.templated();
623 assert_eq!(expected_out, out)
624 }
625 }
626
627 #[test]
628 fn test_templater_setup_none() {
631 let config = FluffConfig::from_source("", None);
632 let templater = PlaceholderTemplater {};
633 let in_str = "SELECT 2+2";
634 let out_str = templater.process(in_str, "test.sql", &config, &None);
635
636 assert!(out_str.is_err());
637 assert_eq!(
638 out_str.err().unwrap().value,
639 "No param_regex nor param_style was provided to the placeholder templater."
640 );
641 }
642
643 #[test]
644 fn test_templater_setup_both_provided() {
647 let config = FluffConfig::from_source(
648 r#"
649[sqruff:templater:placeholder]
650param_regex = __(?P<param_name>[\w_]+)__
651param_style = colon
652 "#,
653 None,
654 );
655 let templater = PlaceholderTemplater {};
656 let in_str = "SELECT 2+2";
657 let out_str = templater.process(in_str, "test.sql", &config, &None);
658
659 assert!(out_str.is_err());
660 assert_eq!(
661 out_str.err().unwrap().value,
662 "Both param_regex and param_style were provided to the placeholder templater."
663 );
664 }
665
666 #[test]
667 fn test_templater_custom_regex() {
669 let config = FluffConfig::from_source(
670 r#"
671[sqruff:templater:placeholder]
672param_regex = __(?P<param_name>[\w_]+)__
673my_name = john
674"#,
675 None,
676 );
677 let templater = PlaceholderTemplater {};
678 let in_str = "SELECT bla FROM blob WHERE id = __my_name__";
679 let out_str = templater.process(in_str, "test", &config, &None).unwrap();
680 let out = out_str.templated();
681 assert_eq!("SELECT bla FROM blob WHERE id = john", out)
682 }
683
684 #[test]
685 fn test_templater_styles_not_existing() {
687 let config = FluffConfig::from_source(
688 r#"
689[sqruff:templater:placeholder]
690param_style = unknown
691 "#,
692 None,
693 );
694 let templater = PlaceholderTemplater {};
695 let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
696 let out_str = templater.process(in_str, "test.sql", &config, &None);
697
698 assert!(out_str.is_err());
699 assert_eq!(
700 out_str.err().unwrap().value,
701 "Unknown param_style 'unknown' for templater 'placeholder'"
702 );
703 }
704
705 #[test]
706 fn test_templater_placeholder() {
708 let config = FluffConfig::from_source(
709 r#"
710[sqruff]
711dialect = ansi
712templater = placeholder
713rules = all
714
715[sqruff:templater:placeholder]
716param_style = percent
717"#,
718 None,
719 );
720 let sql = "SELECT a,b FROM users WHERE a = %s";
721
722 let mut linter = Linter::new(config, None, None, false);
723 let mut result = linter.lint_string_wrapped(sql, None, true);
724 let result = take(&mut result.paths[0].files[0]).fix_string();
725
726 assert_eq!(result, "SELECT\n a,\n b\nFROM users WHERE a = %s\n");
727 }
728}