sqruff_lib/core/linter/linted_file.rs
1use std::ops::Range;
2
3use crate::core::rules::noqa::IgnoreMask;
4use itertools::Itertools;
5use rustc_hash::FxHashSet;
6use sqruff_lib_core::errors::{SQLBaseError, SqlError};
7use sqruff_lib_core::parser::segments::fix::FixPatch;
8use sqruff_lib_core::templaters::base::{RawFileSlice, TemplatedFile};
9
10#[derive(Debug, Default)]
11pub struct LintedFile {
12 pub path: String,
13 pub patches: Vec<FixPatch>,
14 pub templated_file: TemplatedFile,
15 pub violations: Vec<SQLBaseError>,
16 pub ignore_mask: Option<IgnoreMask>,
17}
18
19impl LintedFile {
20 pub fn get_violations(&self, is_fixable: Option<bool>) -> Vec<SQLBaseError> {
21 if let Some(is_fixable) = is_fixable {
22 self.violations
23 .iter()
24 .filter(|v| v.fixable() == is_fixable)
25 .cloned()
26 .collect_vec()
27 } else {
28 self.violations.clone().into_iter().map_into().collect_vec()
29 }
30 }
31
32 /// Use patches and raw file to fix the source file.
33 ///
34 /// This assumes that patches and slices have already
35 /// been coordinated. If they haven't then this will
36 /// fail because we rely on patches having a corresponding
37 /// slice of exactly the right file in the list of file
38 /// slices.
39 pub fn build_up_fixed_source_string(
40 source_file_slices: &[Range<usize>],
41 source_patches: &[FixPatch],
42 raw_source_string: &str,
43 ) -> String {
44 // Iterate through the patches, building up the new string.
45 let mut str_buff = String::new();
46 for source_slice in source_file_slices.iter() {
47 // Is it one in the patch buffer:
48 let mut is_patched = false;
49 for patch in source_patches.iter() {
50 if patch.source_slice == *source_slice {
51 str_buff.push_str(&patch.fixed_raw);
52 is_patched = true;
53 break;
54 }
55 }
56 if !is_patched {
57 // Use the raw string
58 str_buff.push_str(&raw_source_string[source_slice.start..source_slice.end]);
59 }
60 }
61 str_buff
62 }
63
64 pub fn fix_string(self) -> String {
65 // Generate patches from the fixed tree. In the process we sort
66 // and deduplicate them so that the resultant list is in the
67 // right order for the source file without any duplicates.
68 let filtered_source_patches =
69 Self::generate_source_patches(self.patches, &self.templated_file);
70
71 // Any Template tags in the source file are off limits, unless we're explicitly
72 // fixing the source file.
73 let source_only_slices = self.templated_file.source_only_slices();
74
75 // We now slice up the file using the patches and any source only slices.
76 // This gives us regions to apply changes to.
77 let slice_buff = Self::slice_source_file_using_patches(
78 filtered_source_patches.clone(),
79 source_only_slices,
80 &self.templated_file.source_str,
81 );
82
83 Self::build_up_fixed_source_string(
84 &slice_buff,
85 &filtered_source_patches,
86 &self.templated_file.source_str,
87 )
88 }
89
90 fn generate_source_patches(
91 patches: Vec<FixPatch>,
92 _templated_file: &TemplatedFile,
93 ) -> Vec<FixPatch> {
94 let mut filtered_source_patches = Vec::new();
95 let mut dedupe_buffer = FxHashSet::default();
96
97 for patch in patches {
98 if dedupe_buffer.insert(patch.dedupe_tuple()) {
99 filtered_source_patches.push(patch);
100 }
101 }
102
103 filtered_source_patches.sort_by_key(|x| x.source_slice.start);
104 filtered_source_patches
105 }
106
107 /// Use patches to safely slice up the file before fixing.
108 ///
109 /// This uses source only slices to avoid overwriting sections
110 /// of templated code in the source file (when we don't want to).
111 ///
112 /// We assume that the source patches have already been
113 /// sorted and deduplicated. Sorting is important. If the slices
114 /// aren't sorted then this function will miss chunks.
115 /// If there are overlaps or duplicates then this function
116 /// may produce strange results.
117 fn slice_source_file_using_patches(
118 source_patches: Vec<FixPatch>,
119 mut source_only_slices: Vec<RawFileSlice>,
120 raw_source_string: &str,
121 ) -> Vec<Range<usize>> {
122 // We now slice up the file using the patches and any source only slices.
123 // This gives us regions to apply changes to.
124 let mut slice_buff: Vec<Range<usize>> = Vec::new();
125 let mut source_idx = 0;
126
127 for patch in &source_patches {
128 // Are there templated slices at or before the start of this patch?
129 // TODO: We'll need to explicit handling for template fixes here, because
130 // they ARE source only slices. If we can get handling to work properly
131 // here then this is the last hurdle and it will flow through
132 // smoothly from here.
133 while source_only_slices
134 .first()
135 .is_some_and(|s| s.source_idx < patch.source_slice.start)
136 {
137 let next_so_slice = source_only_slices.remove(0).source_slice();
138 // Add a pre-slice before the next templated slices if needed.
139 if next_so_slice.end > source_idx {
140 slice_buff.push(source_idx..next_so_slice.start);
141 }
142 // Add the templated slice.
143 slice_buff.push(next_so_slice.clone());
144 source_idx = next_so_slice.end;
145 }
146
147 // Does this patch cover the next source-only slice directly?
148 if source_only_slices
149 .first()
150 .is_some_and(|s| patch.source_slice == s.source_slice())
151 {
152 // Log information here if needed
153 // Removing next source only slice from the stack because it
154 // covers the same area of source file as the current patch.
155 source_only_slices.remove(0);
156 }
157
158 // Is there a gap between current position and this patch?
159 if patch.source_slice.start > source_idx {
160 // Add a slice up to this patch.
161 slice_buff.push(source_idx..patch.source_slice.start);
162 }
163
164 // Is this patch covering an area we've already covered?
165 if patch.source_slice.start < source_idx {
166 // NOTE: This shouldn't happen. With more detailed templating
167 // this shouldn't happen - but in the off-chance that this does
168 // happen - then this code path remains.
169 // Log information here if needed
170 // Skipping overlapping patch at Index.
171 continue;
172 }
173
174 // Add this patch.
175 slice_buff.push(patch.source_slice.clone());
176 source_idx = patch.source_slice.end;
177 }
178 // Add a tail slice.
179 if source_idx < raw_source_string.len() {
180 slice_buff.push(source_idx..raw_source_string.len());
181 }
182
183 slice_buff
184 }
185}
186
187#[cfg(test)]
188mod test {
189 use sqruff_lib_core::templaters::base::TemplatedFileSlice;
190
191 use super::*;
192
193 /// Test _build_up_fixed_source_string. This is part of fix_string().
194 #[test]
195 fn test_linted_file_build_up_fixed_source_string() {
196 let tests = [
197 // Trivial example
198 (vec![0..1], vec![], "a", "a"),
199 // Simple replacement
200 (
201 vec![0..1, 1..2, 2..3],
202 vec![FixPatch::new(
203 1..2,
204 "d".into(),
205 1..2,
206 "b".into(),
207 "b".into(),
208 )],
209 "abc",
210 "adc",
211 ),
212 // Simple insertion
213 (
214 vec![0..1, 1..1, 1..2],
215 vec![FixPatch::new(1..1, "b".into(), 1..1, "".into(), "".into())],
216 "ac",
217 "abc",
218 ),
219 // Simple deletion
220 (
221 vec![0..1, 1..2, 2..3],
222 vec![FixPatch::new(1..2, "".into(), 1..2, "b".into(), "b".into())],
223 "abc",
224 "ac",
225 ),
226 // Illustrative templated example (although practically at this step, the routine
227 // shouldn't care if it's templated).
228 (
229 vec![0..2, 2..7, 7..9],
230 vec![FixPatch::new(
231 2..3,
232 "{{ b }}".into(),
233 2..7,
234 "b".into(),
235 "{{b}}".into(),
236 )],
237 "a {{b}} c",
238 "a {{ b }} c",
239 ),
240 ];
241
242 for (source_file_slices, source_patches, raw_source_string, expected_result) in tests {
243 let result = LintedFile::build_up_fixed_source_string(
244 &source_file_slices,
245 &source_patches,
246 raw_source_string,
247 );
248
249 assert_eq!(result, expected_result)
250 }
251 }
252
253 /// Test _slice_source_file_using_patches.
254 ///
255 /// This is part of fix_string().
256 #[test]
257 fn test_slice_source_file_using_patches() {
258 let test_cases = [
259 (
260 // Trivial example.
261 // No edits in a single character file. Slice should be one
262 // character long.
263 vec![],
264 vec![],
265 "a",
266 vec![0..1],
267 ),
268 (
269 // Simple replacement.
270 // We've yielded a patch to change a single character. This means
271 // we should get only slices for that character, and for the
272 // unchanged file around it.
273 vec![FixPatch::new(
274 1..2,
275 "d".into(),
276 1..2,
277 "b".into(),
278 "b".into(),
279 )],
280 vec![],
281 "abc",
282 vec![0..1, 1..2, 2..3],
283 ),
284 (
285 // Templated no fixes.
286 // A templated file, but with no fixes, so no subdivision of the
287 // file is required, and we should just get a single slice.
288 vec![],
289 vec![],
290 "a {{ b }} c",
291 vec![0..11],
292 ),
293 (
294 // Templated example with a source-only slice.
295 // A templated file, but with no fixes, so no subdivision of the
296 // file is required and we should just get a single slice. While
297 // there is handling for "source only" slices like template
298 // comments, in this case no additional slicing is required
299 // because no edits have been made.
300 vec![],
301 vec![RawFileSlice::new(
302 "{# b #}".into(),
303 "comment".into(),
304 2,
305 None,
306 None,
307 )],
308 "a {# b #} c",
309 vec![0..11],
310 ),
311 (
312 // Templated fix example with a source-only slice.
313 // We're making an edit adjacent to a source only slice. Edits
314 // _before_ source only slices currently don't trigger additional
315 // slicing. This is fine.
316 vec![FixPatch::new(
317 0..1,
318 "a ".into(),
319 0..1,
320 "a".into(),
321 "a".into(),
322 )],
323 vec![RawFileSlice::new(
324 "{# b #}".into(),
325 "comment".into(),
326 1,
327 None,
328 None,
329 )],
330 "a{# b #}c",
331 vec![0..1, 1..9],
332 ),
333 (
334 // Templated fix example with a source-only slice.
335 // We've made an edit directly _after_ a source only slice
336 // which should trigger the logic to ensure that the source
337 // only slice isn't included in the source mapping of the
338 // edit.
339 vec![FixPatch::new(
340 1..2,
341 " c".into(),
342 8..9,
343 "c".into(),
344 "c".into(),
345 )],
346 vec![RawFileSlice::new(
347 "{# b #}".into(),
348 "comment".into(),
349 1,
350 None,
351 None,
352 )],
353 "a{# b #}cc",
354 vec![0..1, 1..8, 8..9, 9..10],
355 ),
356 (
357 // Templated example with a source-only slice.
358 // Here we're making the fix to the templated slice. This
359 // checks that we don't duplicate or fumble the slice
360 // generation when we're explicitly trying to edit the source.
361 vec![FixPatch::new(
362 2..2,
363 "{# fixed #}".into(),
364 // "".into(),
365 2..9,
366 "".into(),
367 "".into(),
368 )],
369 vec![RawFileSlice::new(
370 "{# b #}".into(),
371 "comment".into(),
372 2,
373 None,
374 None,
375 )],
376 "a {# b #} c",
377 vec![0..2, 2..9, 9..11],
378 ),
379 (
380 // Illustrate potential templating bug (case from JJ01).
381 // In this case we have fixes for all our tempolated sections
382 // and they are all close to each other and so may be either
383 // skipped or duplicated if the logic is not precise.
384 vec![
385 FixPatch::new(
386 14..14,
387 "{%+ if true -%}".into(),
388 // "source".into(),
389 14..27,
390 "".into(),
391 "{%+if true-%}".into(),
392 ),
393 FixPatch::new(
394 14..14,
395 "{{ ref('foo') }}".into(),
396 // "source".into(),
397 28..42,
398 "".into(),
399 "{{ref('foo')}}".into(),
400 ),
401 FixPatch::new(
402 17..17,
403 "{%- endif %}".into(),
404 // "source".into(),
405 43..53,
406 "".into(),
407 "{%-endif%}".into(),
408 ),
409 ],
410 vec![
411 RawFileSlice::new("{%+if true-%}".into(), "block_start".into(), 14, None, None),
412 RawFileSlice::new("{%-endif%}".into(), "block_end".into(), 43, None, None),
413 ],
414 "SELECT 1 from {%+if true-%} {{ref('foo')}} {%-endif%}",
415 vec![0..14, 14..27, 27..28, 28..42, 42..43, 43..53],
416 ),
417 ];
418
419 for (source_patches, source_only_slices, raw_source_string, expected_result) in test_cases {
420 let result = LintedFile::slice_source_file_using_patches(
421 source_patches,
422 source_only_slices,
423 raw_source_string,
424 );
425 assert_eq!(result, expected_result);
426 }
427 }
428
429 #[allow(dead_code)]
430 fn templated_file_1() -> TemplatedFile {
431 "abc".into()
432 }
433
434 #[allow(dead_code)]
435 fn templated_file_2() -> TemplatedFile {
436 TemplatedFile::new(
437 "{# blah #}{{ foo }}bc".into(),
438 "<testing>".into(),
439 Some("abc".into()),
440 Some(vec![
441 TemplatedFileSlice::new("comment", 0..10, 0..0),
442 TemplatedFileSlice::new("templated", 10..19, 0..1),
443 TemplatedFileSlice::new("literal", 19..21, 1..3),
444 ]),
445 Some(vec![
446 RawFileSlice::new("{# blah #}".into(), "comment".into(), 0, None, None),
447 RawFileSlice::new("{{ foo }}".into(), "templated".into(), 10, None, None),
448 RawFileSlice::new("bc".into(), "literal".into(), 19, None, None),
449 ]),
450 )
451 .unwrap()
452 }
453}