sqruff_lib/rules/convention/
cv06.rs

1use ahash::{AHashMap, AHashSet};
2use itertools::Itertools;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::edit_type::EditType;
5use sqruff_lib_core::lint_fix::LintFix;
6use sqruff_lib_core::parser::segments::base::{ErasedSegment, SegmentBuilder, Tables};
7use sqruff_lib_core::utils::functional::segments::Segments;
8
9use crate::core::config::Value;
10use crate::core::rules::base::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
11use crate::core::rules::context::RuleContext;
12use crate::core::rules::crawlers::{Crawler, RootOnlyCrawler};
13
14#[derive(Default, Clone, Debug)]
15pub struct RuleCV06 {
16    multiline_newline: bool,
17    require_final_semicolon: bool,
18}
19
20impl Rule for RuleCV06 {
21    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
22        let multiline_newline = config["multiline_newline"].as_bool().unwrap();
23        let require_final_semicolon = config["require_final_semicolon"].as_bool().unwrap();
24        Ok(Self {
25            multiline_newline,
26            require_final_semicolon,
27        }
28        .erased())
29    }
30
31    fn name(&self) -> &'static str {
32        "convention.terminator"
33    }
34
35    fn description(&self) -> &'static str {
36        "Statements must end with a semi-colon."
37    }
38
39    fn long_description(&self) -> &'static str {
40        r"
41**Anti-pattern**
42
43A statement is not immediately terminated with a semi-colon. The `•` represents space.
44
45```sql
46SELECT
47    a
48FROM foo
49
50;
51
52SELECT
53    b
54FROM bar••;
55```
56
57**Best practice**
58
59Immediately terminate the statement with a semi-colon.
60
61```sql
62SELECT
63    a
64FROM foo;
65```"
66    }
67
68    fn groups(&self) -> &'static [RuleGroups] {
69        &[RuleGroups::All, RuleGroups::Convention]
70    }
71
72    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
73        debug_assert!(context.segment.is_type(SyntaxKind::File));
74
75        let mut results = vec![];
76        for (idx, segment) in context.segment.segments().iter().enumerate() {
77            let mut res = None;
78            if segment.is_type(SyntaxKind::StatementTerminator) {
79                // First we can simply handle the case of existing semi-colon alignment.
80                // If it's a terminator then we know it's raw.
81
82                res =
83                    self.handle_semicolon(context.tables, segment.clone(), context.segment.clone());
84            } else if self.require_final_semicolon && idx == context.segment.segments().len() - 1 {
85                // Otherwise, handle the end of the file separately.
86                res = self.ensure_final_semicolon(context.tables, context.segment.clone());
87            }
88            if let Some(res) = res {
89                results.push(res);
90            }
91        }
92        results
93    }
94
95    fn is_fix_compatible(&self) -> bool {
96        true
97    }
98
99    fn crawl_behaviour(&self) -> Crawler {
100        RootOnlyCrawler.into()
101    }
102}
103
104impl RuleCV06 {
105    // Adjust anchor_segment to not move trailing inline comment.
106    //
107    // We don't want to move inline comments that are on the same line
108    // as the preceding code segment as they could contain noqa instructions.
109    fn handle_trailing_inline_comments(
110        parent_segment: ErasedSegment,
111        anchor_segment: ErasedSegment,
112    ) -> ErasedSegment {
113        // See if we have a trailing inline comment on the same line as the preceding
114        // segment.
115        for comment_segment in parent_segment
116            .recursive_crawl(
117                const {
118                    &SyntaxSet::new(&[
119                        SyntaxKind::Comment,
120                        SyntaxKind::InlineComment,
121                        SyntaxKind::BlockComment,
122                    ])
123                },
124                true,
125                &SyntaxSet::EMPTY,
126                false,
127            )
128            .iter()
129        {
130            assert!(comment_segment.get_position_marker().is_some());
131            assert!(anchor_segment.get_position_marker().is_some());
132            if comment_segment
133                .get_position_marker()
134                .unwrap()
135                .working_line_no
136                == anchor_segment
137                    .get_position_marker()
138                    .unwrap()
139                    .working_line_no
140                && !comment_segment.is_type(SyntaxKind::BlockComment)
141            {
142                return comment_segment.clone();
143            }
144        }
145        anchor_segment
146    }
147
148    fn is_one_line_statement(parent_segment: ErasedSegment, segment: ErasedSegment) -> bool {
149        let statement_segment = parent_segment
150            .path_to(&segment)
151            .iter()
152            .filter(|&it| it.segment.is_type(SyntaxKind::Statement))
153            .map(|it| it.segment.clone())
154            .next();
155
156        match statement_segment {
157            None => false,
158            Some(statement_segment) => statement_segment
159                .recursive_crawl(
160                    const { &SyntaxSet::new(&[SyntaxKind::Newline]) },
161                    true,
162                    &SyntaxSet::EMPTY,
163                    true,
164                )
165                .is_empty(),
166        }
167    }
168
169    fn handle_semicolon(
170        &self,
171        tables: &Tables,
172        target_segment: ErasedSegment,
173        parent_segment: ErasedSegment,
174    ) -> Option<LintResult> {
175        let info = Self::get_segment_move_context(target_segment.clone(), parent_segment.clone());
176        let semicolon_newline = if !info.is_one_line {
177            self.multiline_newline
178        } else {
179            false
180        };
181
182        if !semicolon_newline {
183            self.handle_semicolon_same_line(tables, target_segment, parent_segment, info)
184        } else {
185            self.handle_semicolon_newline(tables, target_segment, parent_segment, info)
186        }
187    }
188
189    fn handle_semicolon_same_line(
190        &self,
191        tables: &Tables,
192        target_segment: ErasedSegment,
193        parent_segment: ErasedSegment,
194        info: SegmentMoveContext,
195    ) -> Option<LintResult> {
196        if info.before_segment.is_empty() {
197            return None;
198        }
199
200        // If preceding segments are found then delete the old
201        // semicolon and its preceding whitespace and then insert
202        // the semicolon in the correct location.
203        let fixes = self.create_semicolon_and_delete_whitespace(
204            target_segment,
205            parent_segment,
206            info.anchor_segment.clone(),
207            info.whitespace_deletions,
208            vec![
209                SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
210                    .finish(),
211            ],
212        );
213
214        Some(LintResult::new(
215            Some(info.anchor_segment),
216            fixes,
217            None,
218            None,
219        ))
220    }
221
222    /// Adjust segments to not move preceding inline comments.
223    ///
224    /// We don't want to move inline comments that are on the same line
225    /// as the preceding code segment as they could contain noqa instructions.
226    fn handle_preceding_inline_comments(
227        before_segment: Segments,
228        anchor_segment: ErasedSegment,
229    ) -> (Segments, ErasedSegment) {
230        // See if we have a preceding inline comment on the same line as the preceding
231        // segment.
232
233        let same_line_comment = before_segment.iter().find(|s| {
234            s.is_comment()
235                && !s.is_type(SyntaxKind::BlockComment)
236                && s.get_position_marker().is_some()
237                && s.get_position_marker().unwrap().working_loc().0
238                    == anchor_segment
239                        .get_raw_segments()
240                        .last()
241                        .unwrap()
242                        .get_position_marker()
243                        .unwrap()
244                        .working_loc()
245                        .0
246        });
247
248        // If so then make that our new anchor segment and adjust
249        // before_segment accordingly.
250        if let Some(same_line_comment) = same_line_comment {
251            let anchor_segment = same_line_comment.clone();
252            let before_segment = before_segment
253                .iter()
254                .take_while(|s| *s != same_line_comment)
255                .cloned()
256                .collect();
257            let before_segment = Segments::from_vec(before_segment, None);
258            (before_segment, anchor_segment)
259        } else {
260            (before_segment, anchor_segment)
261        }
262    }
263
264    fn handle_semicolon_newline(
265        &self,
266        tables: &Tables,
267        target_segment: ErasedSegment,
268        parent_segment: ErasedSegment,
269        info: SegmentMoveContext,
270    ) -> Option<LintResult> {
271        // Adjust before_segment and anchor_segment for preceding inline
272        // comments. Inline comments can contain noqa logic so we need to add the
273        // newline after the inline comment.
274        let (before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
275            info.before_segment.clone(),
276            info.anchor_segment.clone(),
277        );
278
279        if before_segment.len() == 1
280            && before_segment.all(Some(|segment: &ErasedSegment| {
281                segment.is_type(SyntaxKind::Newline)
282            }))
283        {
284            return None;
285        }
286
287        // If preceding segment is not a single newline then delete the old
288        // semicolon/preceding whitespace and then insert the
289        // semicolon in the correct location.
290        let anchor_segment =
291            Self::handle_trailing_inline_comments(parent_segment.clone(), anchor_segment.clone());
292        let fixes = if anchor_segment == target_segment {
293            vec![LintFix::replace(
294                anchor_segment.clone(),
295                vec![
296                    SegmentBuilder::whitespace(tables.next_id(), "\n"),
297                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
298                        .finish(),
299                ],
300                None,
301            )]
302        } else {
303            self.create_semicolon_and_delete_whitespace(
304                target_segment,
305                parent_segment,
306                anchor_segment.clone(),
307                info.whitespace_deletions.clone(),
308                vec![
309                    SegmentBuilder::newline(tables.next_id(), "\n"),
310                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
311                        .finish(),
312                ],
313            )
314        };
315
316        Some(LintResult::new(Some(anchor_segment), fixes, None, None))
317    }
318
319    fn create_semicolon_and_delete_whitespace(
320        &self,
321        target_segment: ErasedSegment,
322        parent_segment: ErasedSegment,
323        anchor_segment: ErasedSegment,
324        mut whitespace_deletions: Segments,
325        create_segments: Vec<ErasedSegment>,
326    ) -> Vec<LintFix> {
327        let anchor_segment = choose_anchor_segment(
328            &parent_segment,
329            EditType::CreateAfter,
330            &anchor_segment,
331            true,
332        );
333
334        let mut lintfix_fn: fn(
335            ErasedSegment,
336            Vec<ErasedSegment>,
337            Option<Vec<ErasedSegment>>,
338        ) -> LintFix = LintFix::create_after;
339        if AHashSet::from_iter(whitespace_deletions.base.clone()).contains(&anchor_segment) {
340            lintfix_fn = LintFix::replace;
341            whitespace_deletions = whitespace_deletions.select(
342                Some(|it: &ErasedSegment| it.id() != anchor_segment.id()),
343                None,
344                None,
345                None,
346            );
347        }
348
349        let mut fixes = vec![
350            lintfix_fn(anchor_segment, create_segments, None),
351            LintFix::delete(target_segment),
352        ];
353        fixes.extend(whitespace_deletions.into_iter().map(LintFix::delete));
354        fixes
355    }
356
357    fn ensure_final_semicolon(
358        &self,
359        tables: &Tables,
360        parent_segment: ErasedSegment,
361    ) -> Option<LintResult> {
362        // Iterate backwards over complete stack to find
363        // if the final semicolon is already present.
364        let mut anchor_segment = parent_segment.segments().last().cloned();
365        let trigger_segment = parent_segment.segments().last().cloned();
366        let mut semi_colon_exist_flag = false;
367        let mut is_one_line = false;
368        let mut before_segment = vec![];
369
370        let mut found_code = false;
371        for segment in parent_segment.segments().iter().rev() {
372            anchor_segment = Some(segment.clone());
373            if segment.is_type(SyntaxKind::StatementTerminator) {
374                semi_colon_exist_flag = true;
375            } else if segment.is_code() {
376                is_one_line = Self::is_one_line_statement(parent_segment.clone(), segment.clone());
377                found_code = true;
378                break;
379            } else if !segment.is_meta() {
380                before_segment.push(segment.clone());
381            }
382        }
383
384        if !found_code {
385            return None;
386        }
387
388        let semicolon_newline = if is_one_line {
389            false
390        } else {
391            self.multiline_newline
392        };
393        if !semi_colon_exist_flag {
394            // Create the final semicolon if it does not yet exist.
395
396            // Semicolon on same line.
397            return if !semicolon_newline {
398                let fixes = vec![LintFix::create_after(
399                    anchor_segment.unwrap().clone(),
400                    vec![
401                        SegmentBuilder::token(
402                            tables.next_id(),
403                            ";",
404                            SyntaxKind::StatementTerminator,
405                        )
406                        .finish(),
407                    ],
408                    None,
409                )];
410                Some(LintResult::new(
411                    Some(trigger_segment.unwrap().clone()),
412                    fixes,
413                    None,
414                    None,
415                ))
416            } else {
417                // Semi-colon on new line.
418                // Adjust before_segment and anchor_segment for inline
419                // comments.
420                let (_before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
421                    Segments::from_vec(before_segment, None),
422                    anchor_segment.unwrap().clone(),
423                );
424                let fixes = vec![LintFix::create_after(
425                    anchor_segment.clone(),
426                    vec![
427                        SegmentBuilder::newline(tables.next_id(), "\n"),
428                        SegmentBuilder::token(
429                            tables.next_id(),
430                            ";",
431                            SyntaxKind::StatementTerminator,
432                        )
433                        .finish(),
434                    ],
435                    None,
436                )];
437
438                Some(LintResult::new(
439                    Some(trigger_segment.unwrap().clone()),
440                    fixes,
441                    None,
442                    None,
443                ))
444            };
445        }
446        None
447    }
448
449    fn get_segment_move_context(
450        target_segment: ErasedSegment,
451        parent_segment: ErasedSegment,
452    ) -> SegmentMoveContext {
453        // Locate the segment to be moved (i.e. context.segment) and search back
454        // over the raw stack to find the end of the preceding statement.
455
456        let reversed_raw_stack =
457            Segments::from_vec(parent_segment.get_raw_segments(), None).reversed();
458
459        let before_code = reversed_raw_stack.select::<fn(&ErasedSegment) -> bool>(
460            None,
461            Some(|s| !s.is_code()),
462            Some(&target_segment),
463            None,
464        );
465        let before_segment = before_code.select(
466            Some(|segment: &ErasedSegment| !segment.is_meta()),
467            None,
468            None,
469            None,
470        );
471
472        // We're selecting from the raw stack, so we know that before_code is made of
473        // raw elements.
474        let anchor_segment = if !before_code.is_empty() {
475            before_code.last().unwrap().clone()
476        } else {
477            target_segment.clone()
478        };
479
480        let first_code = reversed_raw_stack
481            .select(
482                Some(|s: &ErasedSegment| s.is_code()),
483                None,
484                Some(&target_segment),
485                None,
486            )
487            .first()
488            .cloned();
489
490        let is_one_line = first_code
491            .is_some_and(|segment| Self::is_one_line_statement(parent_segment, segment.clone()));
492
493        // We can tidy up any whitespace between the segment and the preceding
494        // code/comment segment. Don't mess with the comment spacing/placement.
495        let whitespace_deletions = before_segment.select::<fn(&ErasedSegment) -> bool>(
496            None,
497            Some(|segment| segment.is_whitespace()),
498            None,
499            None,
500        );
501        SegmentMoveContext {
502            anchor_segment,
503            is_one_line,
504            before_segment,
505            whitespace_deletions,
506        }
507    }
508}
509
510struct SegmentMoveContext {
511    anchor_segment: ErasedSegment,
512    is_one_line: bool,
513    before_segment: Segments,
514    whitespace_deletions: Segments,
515}
516
517pub fn choose_anchor_segment(
518    root_segment: &ErasedSegment,
519    edit_type: EditType,
520    segment: &ErasedSegment,
521    filter_meta: bool,
522) -> ErasedSegment {
523    if !matches!(edit_type, EditType::CreateBefore | EditType::CreateAfter) {
524        return segment.clone();
525    }
526
527    let mut anchor = segment.clone();
528    let mut child = segment.clone();
529
530    let mut path = root_segment
531        .path_to(segment)
532        .into_iter()
533        .map(|it| it.segment)
534        .collect_vec();
535    path.reverse();
536
537    for seg in path {
538        if seg.can_start_end_non_code() {
539            break;
540        }
541
542        let mut children_lists = Vec::new();
543        if filter_meta {
544            children_lists.push(
545                seg.segments()
546                    .iter()
547                    .filter(|child| !child.is_meta())
548                    .cloned()
549                    .collect_vec(),
550            );
551        }
552        children_lists.push(seg.segments().to_vec());
553        for children in children_lists {
554            match edit_type {
555                EditType::CreateBefore if children[0].id() == child.id() => {
556                    unreachable!()
557                }
558                EditType::CreateAfter if children.last().unwrap().id() == child.id() => {
559                    anchor = seg.clone();
560                    child = seg;
561                    break;
562                }
563                _ => {}
564            }
565        }
566    }
567
568    anchor
569}