cairo_lang_formatter/formatter_impl.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
use std::cmp::Ordering;
use std::fmt;
use cairo_lang_filesystem::span::TextWidth;
use cairo_lang_syntax as syntax;
use cairo_lang_syntax::attribute::consts::FMT_SKIP_ATTR;
use cairo_lang_syntax::node::db::SyntaxGroup;
use cairo_lang_syntax::node::{SyntaxNode, Terminal, TypedSyntaxNode, ast};
use itertools::Itertools;
use syntax::node::helpers::QueryAttrs;
use syntax::node::kind::SyntaxKind;
use crate::FormatterConfig;
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
/// Defines the break point behaviour.
/// Defined in get_break_line_point_properties.
pub enum BreakLinePointIndentation {
/// Represents a break line point group which should be indented when broken. For example,
/// binary expr:
///
/// let x = 1
/// + 2
/// + 3
/// + 4
Indented,
/// Represents a break line point group which should be indented when broken, except for the
/// last one in the group. For example, the break points before and after a StructArgList
/// indent the list itself, but the closing braces should not be indented:
///
/// let x = Struct {
/// first_arg: first_arg, second_arg: second_arg,
/// };
IndentedWithTail,
/// Represents a break line point which should not be indented when broken. For example, the
/// break points after TerminalComma of StructArgList.
/// Notice that the break line points StructArgList wrapping it incur indentation.
///
/// let x = Struct {
/// first_arg: first_arg,
/// second_arg: second_arg,
/// third_arg: third_arg,
/// fourth_arg: fourth_arg,
/// fifth_arg: fifth_arg,
/// };
NotIndented,
}
#[derive(Clone, Debug, PartialEq, Eq)]
/// Properties defining the behaviour of a break line point.
pub struct BreakLinePointProperties {
/// Indicates that the break line point was added instead of an empty line in the code, which
/// means it must be preserved in the output. Notice that the number of consecutive empty line
/// break points is limited and not all empty lines in the code creates an empty line break
/// points.
pub is_empty_line_breakpoint: bool,
/// Breaking precedence, lower values will break first.
pub precedence: usize,
/// Dictates the breaking indentation behaviour.
pub break_indentation: BreakLinePointIndentation,
/// Indicates whether a breakpoint is optional. An optional breakpoint may be broken only if
/// the line is too long. A non-optional breakpoint is always broken.
pub is_optional: bool,
/// Indicates to put a space instead of the break line point if it were not broken.
pub space_if_not_broken: bool,
/// Indicates that in a group of such breakpoints, only one should be broken, specifically the
/// last one which fits in the line length.
pub is_single_breakpoint: bool,
}
impl Ord for BreakLinePointProperties {
fn cmp(&self, other: &Self) -> Ordering {
match (self.is_empty_line_breakpoint, other.is_empty_line_breakpoint) {
(true, true) | (false, false) => self.precedence.cmp(&other.precedence),
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
}
}
}
impl PartialOrd for BreakLinePointProperties {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl BreakLinePointProperties {
pub fn new(
precedence: usize,
break_indentation: BreakLinePointIndentation,
is_optional: bool,
space_if_not_broken: bool,
) -> Self {
Self {
precedence,
break_indentation,
is_optional,
space_if_not_broken,
is_empty_line_breakpoint: false,
is_single_breakpoint: false,
}
}
pub fn new_empty_line() -> Self {
Self {
precedence: 0,
break_indentation: BreakLinePointIndentation::NotIndented,
is_optional: false,
space_if_not_broken: false,
is_empty_line_breakpoint: true,
is_single_breakpoint: false,
}
}
pub fn set_single_breakpoint(&mut self) {
self.is_single_breakpoint = true;
}
}
/// The possible parts of line trees.
#[derive(Clone, Debug)]
enum LineComponent {
/// A simple string to be printed.
Token(String),
/// Any break line point inside a protected zone is ignored unless
/// it has no sibling break line points.
/// For example, in the expression let "x = 1 * (2 + 3);" everything inside the parentheses
/// is collected into a protected zone and is broken only if the line is too
/// long after the first break line point (the one before the '*' operator) is broken.
/// More generally, any break point inside a protected zone is ignored unless there
/// are no breakpoints which are direct children of the parent LineBuilder.
/// The precedence dictates the order of breaking the protected zones, among sibling protected
/// zones. For example, the body of a function should be broken into separate lines before
/// the function signature.
ProtectedZone { builder: LineBuilder, precedence: usize },
/// Represent a space in the code.
Space,
/// Represent a leading indent.
Indent(usize),
/// An optional break line point, that will be used if the line is too long.
BreakLinePoint(BreakLinePointProperties),
/// A component representing a comment in the code. Leading (not trailing) comments are
/// disregarded when computing line width as it belongs to another line.
Comment { content: String, is_trailing: bool },
}
impl LineComponent {
pub fn width(&self) -> usize {
match self {
Self::Token(s) => s.len(),
Self::ProtectedZone { builder, .. } => builder.width(),
Self::Space => 1,
Self::Indent(n) => *n,
Self::BreakLinePoint(properties) => usize::from(properties.space_if_not_broken),
Self::Comment { content, is_trailing } => {
if *is_trailing {
content.len()
} else {
// Comments before a line are not accounted for the line width.
0
}
}
}
}
/// Returns if the component is a trivia component, i.e. does not contain any code.
fn is_trivia(&self) -> bool {
matches!(
self,
Self::Comment { .. } | Self::Space | Self::Indent(_) | Self::BreakLinePoint(_)
)
}
}
impl fmt::Display for LineComponent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Token(s) => write!(f, "{s}"),
Self::ProtectedZone { builder, .. } => write!(f, "{builder}"),
Self::Space => write!(f, " "),
Self::Indent(n) => write!(f, "{}", " ".repeat(*n)),
Self::BreakLinePoint(properties) => {
write!(f, "{}", if properties.space_if_not_broken { " " } else { "" })
}
Self::Comment { content, .. } => write!(f, "{content}"),
}
}
}
/// Represents a line in the code, separated by optional break line points.
/// Used to break the line if too long.
#[derive(Clone, Debug)]
struct LineBuilder {
children: Vec<LineComponent>,
/// Indicates whether this builder is open, which means any new child should
/// be (recursively) appended to it. Otherwise, new children will be appended as its sibling.
is_open: bool,
/// Added break line points are temporarily collected into this vector. The vector is flushed
/// into the children vector if any other LineComponent is pushed. This prevents break line
/// points being added to the end of a line.
pending_break_line_points: Vec<LineComponent>,
}
impl fmt::Display for LineBuilder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_only_indents() {
write!(f, "")
} else {
write!(f, "{}", self.children.iter().map(|child| child.to_string()).join(""))
}
}
}
impl LineBuilder {
/// Creates a new intermediate line.
pub fn default() -> Self {
Self { children: vec![], is_open: true, pending_break_line_points: vec![] }
}
/// Creates a new line builder initialized with an indent component.
pub fn new(indent_size: usize) -> Self {
if indent_size > 0 {
Self {
children: vec![LineComponent::Indent(indent_size)],
is_open: true,
pending_break_line_points: vec![],
}
} else {
Self::default()
}
}
/// Adds a sub-builder as the next child.
/// All subsequent children will be added to this sub builder until set as closed.
fn open_sub_builder(&mut self, precedence: usize) {
let active_builder = self.get_active_builder_mut();
active_builder.flush_pending_break_line_points();
active_builder.push_child(LineComponent::ProtectedZone {
builder: LineBuilder::default(),
precedence,
});
}
/// Sets the last child, which is assumed to be a LineBuilder, as close.
/// New children will be siblings of this subtree.
fn close_sub_builder(&mut self) {
let active_builder = self.get_active_builder_mut();
active_builder.flush_pending_break_line_points();
active_builder.is_open = false;
}
/// Adds a line component as the next child. If the last child is an open LineBuilder the child
/// is recursively appended to the sub builder.
fn push_child(&mut self, component: LineComponent) {
match &component {
LineComponent::BreakLinePoint(properties) if !properties.is_empty_line_breakpoint => {
if !self.is_only_indents() {
self.get_active_builder_mut().pending_break_line_points.push(component);
}
}
_ => {
let active_builder = self.get_active_builder_mut();
active_builder.flush_pending_break_line_points();
active_builder.children.push(component);
}
}
}
/// Appends a string to the line.
pub fn push_str(&mut self, s: &str) {
self.push_child(LineComponent::Token(s.to_string()));
}
/// Appends a space to the line.
pub fn push_space(&mut self) {
self.push_child(LineComponent::Space);
}
/// Appends a user-inserted empty line to the line.
pub fn push_empty_line_break_line_point(&mut self) {
self.push_child(LineComponent::BreakLinePoint(BreakLinePointProperties::new_empty_line()));
}
/// Appends an optional break line point.
pub fn push_break_line_point(&mut self, properties: BreakLinePointProperties) {
self.push_child(LineComponent::BreakLinePoint(properties));
}
/// Appends a comment to the line.
pub fn push_comment(&mut self, content: &str, is_trailing: bool) {
// Aggregate consecutive comment lines into one.
let active_builder = self.get_active_builder_mut();
if let Some(LineComponent::Comment { content: prev_content, is_trailing }) =
active_builder.children.last_mut()
{
if !*is_trailing {
*prev_content += "\n";
*prev_content += content;
return;
}
}
self.push_child(LineComponent::Comment { content: content.to_string(), is_trailing });
self.push_break_line_point(BreakLinePointProperties::new(
// Should be greater than any other precedence.
usize::MAX,
BreakLinePointIndentation::NotIndented,
false,
false,
));
}
/// Appends all the pending break line points to the builder. Should be called whenever a
/// component of another type (i.e. not a break line point) is appended.
fn flush_pending_break_line_points(&mut self) {
self.children.append(&mut self.pending_break_line_points);
}
/// The width, in number of chars, of the whole LineTree.
fn width(&self) -> usize {
self.width_between(0, self.children.len())
}
/// The width, in number of chars, between the `start`th (inclusive)
/// and `end`th (exclusive) children.
fn width_between(&self, start: usize, end: usize) -> usize {
// TODO(Gil): Optimize to O(1).
self.children[start..end].iter().fold(0, |sum, node| sum + node.width())
}
/// Returns the next break line point properties from within all the break line points
/// which are a direct child of this tree, or None if there are no such break line points.
fn get_next_break_properties(&self) -> Option<BreakLinePointProperties> {
self.children
.iter()
.filter_map(|child| {
if let LineComponent::BreakLinePoint(properties) = child {
Some(properties.clone())
} else {
None
}
})
.min()
}
/// Returns a vector of the positions of all the break line point children which have the
/// specified precedence.
fn get_break_point_indices_by_precedence(&self, precedence: usize) -> Vec<usize> {
self.children
.iter()
.enumerate()
.filter_map(|(i, child)| match child {
LineComponent::BreakLinePoint(properties)
if properties.precedence == precedence =>
{
Some(i)
}
_ => None,
})
.collect()
}
/// Recursively calls break_line_tree until no break_line_point or protected zone exists in the
/// tree. Returns a vec of strings, each one represents a line.
fn break_line_tree(&self, max_line_width: usize, tab_size: usize) -> Vec<String> {
// TODO(gil): improve the complexity of this function. Right now the line builder is
// entirely cloned for each protected zone, which results in a worst case complexity of
// O(n*m) where n is the line length and m is the number of protected zones. The actual
// complexity is lower since the line is broken into smaller pieces and each one is handled
// separately.
let mut sub_builders = self.break_line_tree_single_level(max_line_width, tab_size);
// If the line was not broken into several lines (i.e. only one sub_builder), open the
// highest precedence protected zone and try to break again.
while sub_builders.len() == 1 {
// Break the line according to the break_line_points.
// TODO(Gil): remove the "contains_break_line_points" and return from the breaking
// function whether it broke any break points.
if sub_builders[0].contains_break_line_points() {
sub_builders =
sub_builders[0].break_line_tree_single_level(max_line_width, tab_size)
} else if sub_builders[0].contains_protected_zone() {
// No break_line_points, open the highest precedence protected zone.
sub_builders = vec![sub_builders[0].open_protected_zone()];
} else {
// All break line points were already broken or removed.
// TODO(Gil): Propagate error to user if line is still too long.
return vec![self.to_string()];
}
}
// Keep breaking recursively the new lines.
sub_builders
.iter()
.flat_map(|tree| tree.break_line_tree(max_line_width, tab_size))
.collect()
}
/// Breaks the LineTree once into a vector of LineTrees according to the highest precedence
/// (lowest precedence number) break line point found in the LineTree.
fn break_line_tree_single_level(
&self,
max_line_width: usize,
tab_size: usize,
) -> Vec<LineBuilder> {
let Some(break_line_point_properties) = self.get_next_break_properties() else {
return vec![self.clone()];
};
let mut breaking_positions =
self.get_break_point_indices_by_precedence(break_line_point_properties.precedence);
if break_line_point_properties.is_single_breakpoint {
// If the break line point is a single breakpoint, search for the last one which fits
// in the line, and remove all the others.
let mut last_break_point_index = breaking_positions.len() - 1;
let mut first_break_point_index = 0;
while first_break_point_index < last_break_point_index {
let middle_break_point_index =
(first_break_point_index + last_break_point_index + 1) / 2;
let middle_break_point = breaking_positions[middle_break_point_index];
let middle_break_point_width = self.width_between(0, middle_break_point);
if middle_break_point_width <= max_line_width {
first_break_point_index = middle_break_point_index;
} else {
last_break_point_index = middle_break_point_index - 1;
}
}
breaking_positions = vec![breaking_positions[first_break_point_index]];
}
if self.width() <= max_line_width && break_line_point_properties.is_optional {
return vec![self.remove_all_optional_break_line_points()];
}
let base_indent = self.get_leading_indent();
let mut trees: Vec<LineBuilder> = vec![];
let n_children = self.children.len();
// Dummy break line point, simplifies the loop.
breaking_positions.push(n_children);
let n_break_points = breaking_positions.len();
let mut current_line_start = 0;
// Iterate over the break line points and collect each part between them into one new
// LineBuilder.
for (i, current_line_end) in breaking_positions.iter().enumerate() {
let added_indent = match break_line_point_properties.break_indentation {
BreakLinePointIndentation::Indented if i != 0 => tab_size,
BreakLinePointIndentation::IndentedWithTail
if i != 0 && i != n_break_points - 1 =>
{
tab_size
}
_ => 0,
};
// If a comment follows the last IndentedWithTail break point, it should also be
// indented.
let mut comment_only_added_indent = if let BreakLinePointIndentation::IndentedWithTail =
break_line_point_properties.break_indentation
{
if i == n_break_points - 1 { tab_size } else { 0 }
} else {
0
};
let cur_indent = base_indent + added_indent;
trees.push(LineBuilder::new(cur_indent));
for j in current_line_start..*current_line_end {
match &self.children[j] {
LineComponent::Indent(_) => {}
LineComponent::Space => {
// Ignore spaces at the start of a line
if !trees.last().unwrap().is_only_indents() {
trees.last_mut().unwrap().push_space();
}
}
LineComponent::Comment { content, is_trailing } if !is_trailing => {
trees.last_mut().unwrap().push_str(&" ".repeat(comment_only_added_indent));
let formatted_comment = format_leading_comment(
content,
cur_indent + comment_only_added_indent,
max_line_width,
);
trees.last_mut().unwrap().push_child(LineComponent::Comment {
content: formatted_comment,
is_trailing: *is_trailing,
});
}
_ => trees.last_mut().unwrap().push_child(self.children[j].clone()),
}
// Indent the comment only if it directly follows the break point.
if !self.children[j].is_trivia() {
comment_only_added_indent = 0;
}
}
current_line_start = *current_line_end + 1;
}
trees
}
/// Returns a reference to the currently active builder.
fn get_active_builder_mut(&mut self) -> &mut LineBuilder {
// Split into two match statements since self is mutably borrowed in the second match,
// and thus a mutable ref to self can't be returned in it.
match self.children.last() {
Some(LineComponent::ProtectedZone { builder: sub_builder, .. })
if sub_builder.is_open => {}
_ => {
return self;
}
}
match self.children.last_mut() {
Some(LineComponent::ProtectedZone { builder: sub_builder, .. })
if sub_builder.is_open =>
{
sub_builder.get_active_builder_mut()
}
_ => {
unreachable!("This case is covered in the first match.")
}
}
}
/// Creates a string of the code represented in the builder. The string may represent
/// several lines (separated by '\n'), where each line length is
/// less than max_line_width (if possible).
pub fn build(&self, max_line_width: usize, tab_size: usize) -> String {
self.break_line_tree(max_line_width, tab_size).iter().join("\n") + "\n"
}
/// Returns the highest protected zone precedence (minimum number) from within all the protected
/// zones which are direct children of this builder, or None if there are no protected zones
/// direct-children.
fn get_highest_protected_zone_precedence(&self) -> Option<usize> {
self.children
.iter()
.filter_map(|child| {
if let LineComponent::ProtectedZone { precedence, .. } = child {
Some(*precedence)
} else {
None
}
})
.min()
}
/// Creates a new LineBuilder where the first subchild which is a protected zone, is now
/// unprotected.
fn open_protected_zone(&self) -> LineBuilder {
let mut unprotected_builder = LineBuilder::default();
let mut first_protected_zone_found = false;
let highest_precedence = self
.get_highest_protected_zone_precedence()
.expect("Tried to unprotect a line builder with no protected zones.");
for child in self.children.iter() {
match child {
LineComponent::ProtectedZone { builder: sub_tree, precedence }
if *precedence == highest_precedence && !first_protected_zone_found =>
{
first_protected_zone_found = true;
for sub_child in sub_tree.children.iter() {
unprotected_builder.push_child(sub_child.clone());
}
}
_ => unprotected_builder.push_child(child.clone()),
}
}
unprotected_builder
}
/// Returns whether or not the line contains a protected zone.
fn contains_protected_zone(&self) -> bool {
self.children.iter().any(|child| matches!(child, LineComponent::ProtectedZone { .. }))
}
/// Returns whether the line contains only indents.
fn is_only_indents(&self) -> bool {
!self.children.iter().any(|child| !matches!(child, LineComponent::Indent { .. }))
}
fn get_leading_indent(&self) -> usize {
let mut leading_indent = 0;
let mut children_iter = self.children.iter();
while let Some(LineComponent::Indent(indent_size)) = children_iter.next() {
leading_indent += indent_size
}
leading_indent
}
/// Returns whether the line contains a break point.
fn contains_break_line_points(&self) -> bool {
self.children.iter().any(|child| matches!(child, LineComponent::BreakLinePoint(_)))
}
// Removes all the break line points.
fn remove_all_optional_break_line_points(&self) -> LineBuilder {
LineBuilder {
children: self
.children
.iter()
.map(|child| match child {
LineComponent::BreakLinePoint(node_properties)
if node_properties.is_optional =>
{
LineComponent::Token(child.to_string())
}
_ => child.clone(),
})
.collect_vec(),
is_open: true,
pending_break_line_points: vec![],
}
}
}
/// Represents a comment line in the code.
#[derive(Clone, PartialEq, Eq)]
struct CommentLine {
/// The number of slashes in the comment prefix.
n_slashes: usize,
/// The number of exclamation marks in the comment prefix.
n_exclamations: usize,
/// The number of leading spaces in the comment prefix.
n_leading_spaces: usize,
/// The content of the comment.
content: String,
}
impl CommentLine {
/// Creates a new comment prefix.
pub fn from_string(mut comment_line: String) -> Self {
comment_line = comment_line.trim().to_string();
let n_slashes = comment_line.chars().take_while(|c| *c == '/').count();
comment_line = comment_line.chars().skip(n_slashes).collect();
let n_exclamations = comment_line.chars().take_while(|c| *c == '!').count();
comment_line = comment_line.chars().skip(n_exclamations).collect();
let n_leading_spaces = comment_line.chars().take_while(|c| *c == ' ').count();
let content = comment_line.chars().skip(n_leading_spaces).collect();
Self { n_slashes, n_exclamations, n_leading_spaces, content }
}
/// Returns true if the comment prefix is the same as the other comment prefix.
pub fn is_same_prefix(&self, other: &Self) -> bool {
self.n_slashes == other.n_slashes
&& self.n_exclamations == other.n_exclamations
&& self.n_leading_spaces == other.n_leading_spaces
}
/// Returns true if the comment ends with an alphanumeric character, or a comma, indicating that
/// the next line is probably a continuation of the comment, and thus in case of a line break it
/// should prepend the content of the next line.
pub fn is_open_line(&self) -> bool {
self.content.ends_with(|c: char| c.is_alphanumeric() || c == ',')
}
}
impl fmt::Display for CommentLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}{}{}{}",
"/".repeat(self.n_slashes),
"!".repeat(self.n_exclamations),
" ".repeat(self.n_leading_spaces),
self.content.trim()
)
}
}
/// Formats a comment to fit in the line width. There are no merges of lines, as this is not clear
/// when to merge two lines the user choose to write on separate lines, so all original line breaks
/// are preserved.
fn format_leading_comment(content: &str, cur_indent: usize, max_line_width: usize) -> String {
let mut formatted_comment = String::new();
let mut prev_comment_line = CommentLine::from_string("".to_string());
let append_line = |formatted_comment: &mut String, comment_line: &CommentLine| {
formatted_comment.push_str(&" ".repeat(cur_indent));
formatted_comment.push_str(&comment_line.to_string());
formatted_comment.push('\n');
};
let mut last_line_broken = false;
for line in content.lines() {
let orig_comment_line = CommentLine::from_string(line.to_string());
let max_comment_width = max_line_width
- cur_indent
- orig_comment_line.n_slashes
- orig_comment_line.n_exclamations
- orig_comment_line.n_leading_spaces;
// The current line is initialized with the previous line only if it was broken (to avoid
// merging user separated lines).
let mut current_line = if last_line_broken
&& prev_comment_line.is_open_line()
&& prev_comment_line.is_same_prefix(&orig_comment_line)
{
prev_comment_line.content += " ";
prev_comment_line
} else {
append_line(&mut formatted_comment, &prev_comment_line);
CommentLine { content: "".to_string(), ..orig_comment_line }
};
last_line_broken = false;
for word in orig_comment_line.content.split(' ') {
if current_line.content.is_empty()
|| current_line.content.len() + word.len() <= max_comment_width
{
current_line.content.push_str(word);
current_line.content.push(' ');
} else {
append_line(&mut formatted_comment, ¤t_line);
last_line_broken = true;
current_line = CommentLine { content: word.to_string(), ..current_line };
current_line.content.push(' ');
}
}
prev_comment_line = CommentLine {
n_slashes: orig_comment_line.n_slashes,
n_exclamations: orig_comment_line.n_exclamations,
n_leading_spaces: orig_comment_line.n_leading_spaces,
content: current_line.content.trim().to_string(),
};
}
append_line(&mut formatted_comment, &prev_comment_line);
// Remove the leading spaces of the first line, as they are added by the LineBuilder for the
// first line.
formatted_comment.trim().to_string()
}
/// A struct holding all the data of the pending line to be emitted.
struct PendingLineState {
/// Intermediate representation of the text to be emitted.
line_buffer: LineBuilder,
/// Should the next space between tokens be ignored.
prevent_next_space: bool,
}
impl PendingLineState {
pub fn new() -> Self {
Self { line_buffer: LineBuilder::default(), prevent_next_space: true }
}
}
/// Represents the break line points before and after a syntax node.
pub enum BreakLinePointsPositions {
Leading(BreakLinePointProperties),
Trailing(BreakLinePointProperties),
Both { leading: BreakLinePointProperties, trailing: BreakLinePointProperties },
List { properties: BreakLinePointProperties, breaking_frequency: usize },
None,
}
impl BreakLinePointsPositions {
pub fn new_symmetric(break_line_point_properties: BreakLinePointProperties) -> Self {
Self::Both {
leading: break_line_point_properties.clone(),
trailing: break_line_point_properties,
}
}
pub fn leading(&self) -> Option<BreakLinePointProperties> {
match self {
Self::Leading(properties) | Self::Both { leading: properties, .. } => {
Some(properties.clone())
}
_ => None,
}
}
pub fn trailing(&self) -> Option<BreakLinePointProperties> {
match self {
Self::Trailing(properties) | Self::Both { trailing: properties, .. } => {
Some(properties.clone())
}
_ => None,
}
}
}
// TODO(spapini): Introduce the correct types here, to reflect the "applicable" nodes types.
pub trait SyntaxNodeFormat {
/// Returns true if a token should never have a space before it.
fn force_no_space_before(&self, db: &dyn SyntaxGroup) -> bool;
/// Returns true if a token should never have a space after it.
fn force_no_space_after(&self, db: &dyn SyntaxGroup) -> bool;
/// Returns true if the line is allowed to break after the node.
/// Only applicable for terminal nodes.
fn allow_newline_after(&self, db: &dyn SyntaxGroup) -> bool;
/// Returns the number of allowed empty lines between two consecutive children of this node.
fn allowed_empty_between(&self, db: &dyn SyntaxGroup) -> usize;
/// Returns the break point properties before and after a specific node if a break point should
/// exist, otherwise returns None.
fn get_wrapping_break_line_point_properties(
&self,
db: &dyn SyntaxGroup,
) -> BreakLinePointsPositions;
/// Returns the breaking position between the children of a syntax node.
fn get_internal_break_line_point_properties(
&self,
db: &dyn SyntaxGroup,
) -> BreakLinePointsPositions;
/// If self is a protected zone, returns its precedence (highest precedence == lowest number).
/// Otherwise, returns None.
fn get_protected_zone_precedence(&self, db: &dyn SyntaxGroup) -> Option<usize>;
fn should_skip_terminal(&self, db: &dyn SyntaxGroup) -> bool;
/// Returns the sorting kind of the syntax node. This method will be used to sections in the
/// syntax tree.
fn as_sort_kind(&self, db: &dyn SyntaxGroup) -> SortKind;
}
pub struct FormatterImpl<'a> {
db: &'a dyn SyntaxGroup,
config: FormatterConfig,
/// A buffer for the current line.
line_state: PendingLineState,
/// The number of empty lines allowed after the current node.
empty_lines_allowance: usize,
/// Indicates whether the current line only consists of whitespace tokens (since the last
/// newline).
is_current_line_whitespaces: bool,
}
impl<'a> FormatterImpl<'a> {
pub fn new(db: &'a dyn SyntaxGroup, config: FormatterConfig) -> Self {
Self {
db,
config,
line_state: PendingLineState::new(),
empty_lines_allowance: 0,
is_current_line_whitespaces: true,
}
}
/// Gets a root of a syntax tree and returns the formatted string of the code it represents.
pub fn get_formatted_string(&mut self, syntax_node: &SyntaxNode) -> String {
self.format_node(syntax_node);
self.line_state.line_buffer.build(self.config.max_line_length, self.config.tab_size)
}
/// Appends a formatted string, representing the syntax_node, to the result.
/// Should be called with a root syntax node to format a file.
fn format_node(&mut self, syntax_node: &SyntaxNode) {
if syntax_node.text(self.db).is_some() {
panic!("Token reached before terminal.");
}
let protected_zone_precedence = syntax_node.get_protected_zone_precedence(self.db);
let node_break_points = syntax_node.get_wrapping_break_line_point_properties(self.db);
self.append_break_line_point(node_break_points.leading());
if let Some(precedence) = protected_zone_precedence {
self.line_state.line_buffer.open_sub_builder(precedence);
}
if syntax_node.force_no_space_before(self.db) {
self.line_state.prevent_next_space = true;
}
if self.should_ignore_node_format(syntax_node) {
self.line_state.line_buffer.push_str(syntax_node.get_text(self.db).trim());
} else if syntax_node.kind(self.db).is_terminal() {
self.format_terminal(syntax_node);
} else {
self.format_internal(syntax_node);
}
if syntax_node.force_no_space_after(self.db) {
self.line_state.prevent_next_space = true;
}
if protected_zone_precedence.is_some() {
self.line_state.line_buffer.close_sub_builder();
}
self.append_break_line_point(node_break_points.trailing());
}
/// Formats an internal node and appends the formatted string to the result.
fn format_internal(&mut self, syntax_node: &SyntaxNode) {
let allowed_empty_between = syntax_node.allowed_empty_between(self.db);
let internal_break_line_points_positions =
syntax_node.get_internal_break_line_point_properties(self.db);
// TODO(ilya): consider not copying here.
let mut children = self.db.get_children(syntax_node.clone()).to_vec();
let n_children = children.len();
if self.config.sort_module_level_items {
let mut start_idx = 0;
while start_idx < children.len() {
let kind = children[start_idx].as_sort_kind(self.db);
let mut end_idx = start_idx + 1;
// Find the end of the current section.
while end_idx < children.len() {
if kind != children[end_idx].as_sort_kind(self.db) {
break;
}
end_idx += 1;
}
// Sort within this section if it's `Module` or `UseItem`.
match kind {
SortKind::Module => {
children[start_idx..end_idx].sort_by_key(|node| {
ast::ItemModule::from_syntax_node(self.db, node.clone())
.name(self.db)
.text(self.db)
});
}
SortKind::UseItem => {
children[start_idx..end_idx].sort_by_key(|node| {
ast::ItemUse::from_syntax_node(self.db, node.clone())
.use_path(self.db)
.as_syntax_node()
.get_text_without_trivia(self.db)
});
}
SortKind::Immovable => {}
}
// Move past the sorted section.
start_idx = end_idx;
}
}
for (i, child) in children.iter().enumerate() {
if child.width(self.db) == TextWidth::default() {
continue;
}
self.format_node(child);
if let BreakLinePointsPositions::List { properties, breaking_frequency } =
&internal_break_line_points_positions
{
if i % breaking_frequency == breaking_frequency - 1 && i < n_children - 1 {
self.append_break_line_point(Some(properties.clone()));
}
}
self.empty_lines_allowance = allowed_empty_between;
}
}
/// Formats a terminal node and appends the formatted string to the result.
fn format_terminal(&mut self, syntax_node: &SyntaxNode) {
// TODO(spapini): Introduce a Terminal and a Token enum in ast.rs to make this cleaner.
let children = self.db.get_children(syntax_node.clone());
let mut children_iter = children.iter().cloned();
let leading_trivia = ast::Trivia::from_syntax_node(self.db, children_iter.next().unwrap());
let token = children_iter.next().unwrap();
let trailing_trivia = ast::Trivia::from_syntax_node(self.db, children_iter.next().unwrap());
// The first newlines is the leading trivia correspond exactly to empty lines.
self.format_trivia(leading_trivia, true);
if !syntax_node.should_skip_terminal(self.db) {
self.format_token(&token);
}
self.format_trivia(trailing_trivia, false);
}
/// Appends a trivia node (if needed) to the result.
fn format_trivia(&mut self, trivia: syntax::node::ast::Trivia, is_leading: bool) {
for trivium in trivia.elements(self.db) {
match trivium {
ast::Trivium::SingleLineComment(_)
| ast::Trivium::SingleLineDocComment(_)
| ast::Trivium::SingleLineInnerComment(_) => {
if !is_leading {
self.line_state.line_buffer.push_space();
}
self.line_state.line_buffer.push_comment(
&trivium.as_syntax_node().text(self.db).unwrap(),
!is_leading,
);
self.is_current_line_whitespaces = false;
self.empty_lines_allowance = 1;
}
ast::Trivium::Whitespace(_) => {}
ast::Trivium::Newline(_) => {
if self.empty_lines_allowance > 0 && self.is_current_line_whitespaces {
self.empty_lines_allowance -= 1;
self.line_state.line_buffer.push_empty_line_break_line_point();
}
self.is_current_line_whitespaces = true;
}
ast::Trivium::Skipped(_) => {
self.format_token(&trivium.as_syntax_node());
}
ast::Trivium::SkippedNode(node) => {
self.format_node(&node.as_syntax_node());
}
}
}
}
/// Formats a token node and appends it to the result.
/// Assumes the given SyntaxNode is a token.
fn format_token(&mut self, syntax_node: &SyntaxNode) {
let text = syntax_node.text(self.db).unwrap();
if !syntax_node.force_no_space_before(self.db) && !self.line_state.prevent_next_space {
self.line_state.line_buffer.push_space();
}
self.line_state.prevent_next_space = syntax_node.force_no_space_after(self.db);
if syntax_node.kind(self.db) != SyntaxKind::TokenWhitespace {
self.is_current_line_whitespaces = false;
}
let node_break_points = syntax_node.get_wrapping_break_line_point_properties(self.db);
self.append_break_line_point(node_break_points.leading());
self.line_state.line_buffer.push_str(&text);
self.append_break_line_point(node_break_points.trailing());
}
fn append_break_line_point(&mut self, properties: Option<BreakLinePointProperties>) {
if let Some(properties) = properties {
self.line_state.line_buffer.push_break_line_point(properties);
self.line_state.prevent_next_space = true;
}
}
/// Gets a syntax node and returns if the node has an cairofmt::skip attribute.
pub fn should_ignore_node_format(&self, syntax_node: &SyntaxNode) -> bool {
syntax_node.has_attr(self.db, FMT_SKIP_ATTR)
}
}
/// Represents the kind of sections in the syntax tree that can be sorted.
/// Classify consecutive nodes into sections that are eligible for sorting.
#[derive(PartialEq, Eq)]
pub enum SortKind {
/// Module items without body, e.g. `mod a;`.
Module,
/// Use items, e.g. `use a::b;` or `use c::{d, e as f};`.
UseItem,
/// Items that cannot be moved - would be skipped and not included in any sorted segment.
Immovable,
}