1use std::fmt::{self, Display, Formatter};
2
3use polars_core::datatypes::AnyValue;
4use polars_core::schema::Schema;
5use polars_io::RowIndex;
6use recursive::recursive;
7
8use self::ir::dot::ScanSourcesDisplay;
9use crate::prelude::*;
10
11pub struct IRDisplay<'a> {
12 is_streaming: bool,
13 lp: IRPlanRef<'a>,
14}
15
16#[derive(Clone, Copy)]
17pub struct ExprIRDisplay<'a> {
18 pub(crate) node: Node,
19 pub(crate) output_name: &'a OutputName,
20 pub(crate) expr_arena: &'a Arena<AExpr>,
21}
22
23pub(crate) struct ExprIRSliceDisplay<'a, T: AsExpr> {
25 pub(crate) exprs: &'a [T],
26 pub(crate) expr_arena: &'a Arena<AExpr>,
27}
28
29pub(crate) trait AsExpr {
30 fn node(&self) -> Node;
31 fn output_name(&self) -> &OutputName;
32}
33
34impl AsExpr for Node {
35 fn node(&self) -> Node {
36 *self
37 }
38 fn output_name(&self) -> &OutputName {
39 &OutputName::None
40 }
41}
42
43impl AsExpr for ExprIR {
44 fn node(&self) -> Node {
45 self.node()
46 }
47 fn output_name(&self) -> &OutputName {
48 self.output_name_inner()
49 }
50}
51
52#[allow(clippy::too_many_arguments)]
53fn write_scan(
54 f: &mut Formatter,
55 name: &str,
56 sources: &ScanSources,
57 indent: usize,
58 n_columns: i64,
59 total_columns: usize,
60 predicate: &Option<ExprIRDisplay<'_>>,
61 slice: Option<(i64, usize)>,
62 row_index: Option<&RowIndex>,
63) -> fmt::Result {
64 write!(
65 f,
66 "{:indent$}{name} SCAN {}",
67 "",
68 ScanSourcesDisplay(sources)
69 )?;
70
71 let total_columns = total_columns - usize::from(row_index.is_some());
72 if n_columns > 0 {
73 write!(
74 f,
75 "\n{:indent$}PROJECT {n_columns}/{total_columns} COLUMNS",
76 "",
77 )?;
78 } else {
79 write!(f, "\n{:indent$}PROJECT */{total_columns} COLUMNS", "")?;
80 }
81 if let Some(predicate) = predicate {
82 write!(f, "\n{:indent$}SELECTION: {predicate}", "")?;
83 }
84 if let Some(slice) = slice {
85 write!(f, "\n{:indent$}SLICE: {slice:?}", "")?;
86 }
87 if let Some(row_index) = row_index {
88 write!(f, "\n{:indent$}ROW_INDEX: {}", "", row_index.name)?;
89 if row_index.offset != 0 {
90 write!(f, " (offset: {})", row_index.offset)?;
91 }
92 }
93 Ok(())
94}
95
96impl<'a> IRDisplay<'a> {
97 pub fn new(lp: IRPlanRef<'a>) -> Self {
98 if let Some(streaming_lp) = lp.extract_streaming_plan() {
99 return Self::new_streaming(streaming_lp);
100 }
101
102 Self {
103 is_streaming: false,
104 lp,
105 }
106 }
107
108 fn new_streaming(lp: IRPlanRef<'a>) -> Self {
109 Self {
110 is_streaming: true,
111 lp,
112 }
113 }
114
115 fn root(&self) -> &IR {
116 self.lp.root()
117 }
118
119 fn with_root(&self, root: Node) -> Self {
120 Self {
121 is_streaming: false,
122 lp: self.lp.with_root(root),
123 }
124 }
125
126 fn display_expr(&self, root: &'a ExprIR) -> ExprIRDisplay<'a> {
127 ExprIRDisplay {
128 node: root.node(),
129 output_name: root.output_name_inner(),
130 expr_arena: self.lp.expr_arena,
131 }
132 }
133
134 fn display_expr_slice(&self, exprs: &'a [ExprIR]) -> ExprIRSliceDisplay<'a, ExprIR> {
135 ExprIRSliceDisplay {
136 exprs,
137 expr_arena: self.lp.expr_arena,
138 }
139 }
140
141 #[recursive]
142 fn _format(&self, f: &mut Formatter, indent: usize) -> fmt::Result {
143 let indent = if self.is_streaming {
144 writeln!(f, "{:indent$}STREAMING:", "")?;
145 indent + 2
146 } else {
147 if indent != 0 {
148 writeln!(f)?;
149 }
150
151 indent
152 };
153
154 let sub_indent = indent + 2;
155 use IR::*;
156
157 match self.root() {
158 #[cfg(feature = "python")]
159 PythonScan { options } => {
160 let total_columns = options.schema.len();
161 let n_columns = options
162 .with_columns
163 .as_ref()
164 .map(|s| s.len() as i64)
165 .unwrap_or(-1);
166
167 let predicate = match &options.predicate {
168 PythonPredicate::Polars(e) => Some(self.display_expr(e)),
169 PythonPredicate::PyArrow(_) => None,
170 PythonPredicate::None => None,
171 };
172
173 write_scan(
174 f,
175 "PYTHON",
176 &ScanSources::default(),
177 indent,
178 n_columns,
179 total_columns,
180 &predicate,
181 options.n_rows.map(|x| (0, x)),
182 None,
183 )
184 },
185 Union { inputs, options } => {
186 let name = if let Some(slice) = options.slice {
187 format!("SLICED UNION: {slice:?}")
188 } else {
189 "UNION".to_string()
190 };
191
192 let sub_sub_indent = sub_indent + 2;
197 write!(f, "{:indent$}{name}", "")?;
198 for (i, plan) in inputs.iter().enumerate() {
199 write!(f, "\n{:sub_indent$}PLAN {i}:", "")?;
200 self.with_root(*plan)._format(f, sub_sub_indent)?;
201 }
202 write!(f, "\n{:indent$}END {name}", "")
203 },
204 HConcat { inputs, .. } => {
205 let sub_sub_indent = sub_indent + 2;
206 write!(f, "{:indent$}HCONCAT", "")?;
207 for (i, plan) in inputs.iter().enumerate() {
208 write!(f, "\n{:sub_indent$}PLAN {i}:", "")?;
209 self.with_root(*plan)._format(f, sub_sub_indent)?;
210 }
211 write!(f, "\n{:indent$}END HCONCAT", "")
212 },
213 Cache {
214 input,
215 id,
216 cache_hits,
217 } => {
218 write!(
219 f,
220 "{:indent$}CACHE[id: {:x}, cache_hits: {}]",
221 "", *id, *cache_hits
222 )?;
223 self.with_root(*input)._format(f, sub_indent)
224 },
225 Scan {
226 sources,
227 file_info,
228 predicate,
229 scan_type,
230 file_options,
231 ..
232 } => {
233 let n_columns = file_options
234 .with_columns
235 .as_ref()
236 .map(|columns| columns.len() as i64)
237 .unwrap_or(-1);
238
239 let predicate = predicate.as_ref().map(|p| self.display_expr(p));
240
241 write_scan(
242 f,
243 scan_type.into(),
244 sources,
245 indent,
246 n_columns,
247 file_info.schema.len(),
248 &predicate,
249 file_options.slice,
250 file_options.row_index.as_ref(),
251 )
252 },
253 Filter { predicate, input } => {
254 let predicate = self.display_expr(predicate);
255 write!(f, "{:indent$}FILTER {predicate} FROM", "")?;
257 self.with_root(*input)._format(f, sub_indent)
258 },
259 DataFrameScan {
260 schema,
261 output_schema,
262 ..
263 } => {
264 let total_columns = schema.len();
265 let n_columns = if let Some(columns) = output_schema {
266 columns.len().to_string()
267 } else {
268 "*".to_string()
269 };
270 write!(
271 f,
272 "{:indent$}DF {:?}; PROJECT {}/{} COLUMNS",
273 "",
274 schema.iter_names().take(4).collect::<Vec<_>>(),
275 n_columns,
276 total_columns,
277 )
278 },
279 Select { expr, input, .. } => {
280 let exprs = self.display_expr_slice(expr);
282
283 write!(f, "{:indent$} SELECT {exprs} FROM", "")?;
284 self.with_root(*input)._format(f, sub_indent)
285 },
286 Sort {
287 input, by_column, ..
288 } => {
289 let by_column = self.display_expr_slice(by_column);
290 write!(f, "{:indent$}SORT BY {by_column}", "")?;
291 self.with_root(*input)._format(f, sub_indent)
292 },
293 GroupBy {
294 input,
295 keys,
296 aggs,
297 apply,
298 ..
299 } => {
300 let keys = self.display_expr_slice(keys);
301
302 write!(f, "{:indent$}AGGREGATE", "")?;
303 if apply.is_some() {
304 write!(f, "\n{:indent$}\tMAP_GROUPS BY {keys} FROM", "")?;
305 } else {
306 let aggs = self.display_expr_slice(aggs);
307 write!(f, "\n{:indent$}\t{aggs} BY {keys} FROM", "")?;
308 }
309 self.with_root(*input)._format(f, sub_indent)
310 },
311 Join {
312 input_left,
313 input_right,
314 left_on,
315 right_on,
316 options,
317 ..
318 } => {
319 let left_on = self.display_expr_slice(left_on);
320 let right_on = self.display_expr_slice(right_on);
321
322 if let Some(JoinTypeOptionsIR::Cross { predicate }) = &options.options {
324 let predicate = self.display_expr(predicate);
325 let name = "NESTED LOOP";
326 write!(f, "{:indent$}{name} JOIN ON {predicate}:", "")?;
327 write!(f, "\n{:indent$}LEFT PLAN:", "")?;
328 self.with_root(*input_left)._format(f, sub_indent)?;
329 write!(f, "\n{:indent$}RIGHT PLAN:", "")?;
330 self.with_root(*input_right)._format(f, sub_indent)?;
331 write!(f, "\n{:indent$}END {name} JOIN", "")
332 } else {
333 let how = &options.args.how;
334 write!(f, "{:indent$}{how} JOIN:", "")?;
335 write!(f, "\n{:indent$}LEFT PLAN ON: {left_on}", "")?;
336 self.with_root(*input_left)._format(f, sub_indent)?;
337 write!(f, "\n{:indent$}RIGHT PLAN ON: {right_on}", "")?;
338 self.with_root(*input_right)._format(f, sub_indent)?;
339 write!(f, "\n{:indent$}END {how} JOIN", "")
340 }
341 },
342 HStack { input, exprs, .. } => {
343 let exprs = self.display_expr_slice(exprs);
345
346 write!(f, "{:indent$} WITH_COLUMNS:", "",)?;
347 write!(f, "\n{:indent$} {exprs} ", "")?;
348 self.with_root(*input)._format(f, sub_indent)
349 },
350 Distinct { input, options } => {
351 write!(
352 f,
353 "{:indent$}UNIQUE[maintain_order: {:?}, keep_strategy: {:?}] BY {:?}",
354 "", options.maintain_order, options.keep_strategy, options.subset
355 )?;
356 self.with_root(*input)._format(f, sub_indent)
357 },
358 Slice { input, offset, len } => {
359 write!(f, "{:indent$}SLICE[offset: {offset}, len: {len}]", "")?;
360 self.with_root(*input)._format(f, sub_indent)
361 },
362 MapFunction {
363 input, function, ..
364 } => {
365 if let Some(streaming_lp) = function.to_streaming_lp() {
366 IRDisplay::new_streaming(streaming_lp)._format(f, indent)
367 } else {
368 write!(f, "{:indent$}{function}", "")?;
369 self.with_root(*input)._format(f, sub_indent)
370 }
371 },
372 ExtContext { input, .. } => {
373 write!(f, "{:indent$}EXTERNAL_CONTEXT", "")?;
374 self.with_root(*input)._format(f, sub_indent)
375 },
376 Sink { input, payload, .. } => {
377 let name = match payload {
378 SinkType::Memory => "SINK (memory)",
379 SinkType::File { .. } => "SINK (file)",
380 };
381 write!(f, "{:indent$}{name}", "")?;
382 self.with_root(*input)._format(f, sub_indent)
383 },
384 SimpleProjection { input, columns } => {
385 let num_columns = columns.as_ref().len();
386 let total_columns = self.lp.lp_arena.get(*input).schema(self.lp.lp_arena).len();
387
388 let columns = ColumnsDisplay(columns.as_ref());
389 write!(
390 f,
391 "{:indent$}simple π {num_columns}/{total_columns} [{columns}]",
392 ""
393 )?;
394
395 self.with_root(*input)._format(f, sub_indent)
396 },
397 Invalid => write!(f, "{:indent$}INVALID", ""),
398 }
399 }
400}
401
402impl<'a> ExprIRDisplay<'a> {
403 fn with_slice<T: AsExpr>(&self, exprs: &'a [T]) -> ExprIRSliceDisplay<'a, T> {
404 ExprIRSliceDisplay {
405 exprs,
406 expr_arena: self.expr_arena,
407 }
408 }
409
410 fn with_root<T: AsExpr>(&self, root: &'a T) -> Self {
411 Self {
412 node: root.node(),
413 output_name: root.output_name(),
414 expr_arena: self.expr_arena,
415 }
416 }
417}
418
419impl Display for IRDisplay<'_> {
420 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
421 self._format(f, 0)
422 }
423}
424
425impl fmt::Debug for IRDisplay<'_> {
426 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
427 Display::fmt(&self, f)
428 }
429}
430
431impl<T: AsExpr> Display for ExprIRSliceDisplay<'_, T> {
432 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
433 use std::fmt::Write;
436
437 let mut iter = self.exprs.iter();
438
439 f.write_char('[')?;
440 if let Some(fst) = iter.next() {
441 let fst = ExprIRDisplay {
442 node: fst.node(),
443 output_name: fst.output_name(),
444 expr_arena: self.expr_arena,
445 };
446 write!(f, "{fst}")?;
447 }
448
449 for expr in iter {
450 let expr = ExprIRDisplay {
451 node: expr.node(),
452 output_name: expr.output_name(),
453 expr_arena: self.expr_arena,
454 };
455 write!(f, ", {expr}")?;
456 }
457
458 f.write_char(']')?;
459
460 Ok(())
461 }
462}
463
464impl<T: AsExpr> fmt::Debug for ExprIRSliceDisplay<'_, T> {
465 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
466 Display::fmt(self, f)
467 }
468}
469
470impl Display for ExprIRDisplay<'_> {
471 #[recursive]
472 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
473 let root = self.expr_arena.get(self.node);
474
475 use AExpr::*;
476 match root {
477 Window {
478 function,
479 partition_by,
480 order_by,
481 options,
482 } => {
483 let function = self.with_root(function);
484 let partition_by = self.with_slice(partition_by);
485 match options {
486 #[cfg(feature = "dynamic_group_by")]
487 WindowType::Rolling(options) => {
488 write!(
489 f,
490 "{function}.rolling(by='{}', offset={}, period={})",
491 options.index_column, options.offset, options.period
492 )
493 },
494 _ => {
495 if let Some((order_by, _)) = order_by {
496 let order_by = self.with_root(order_by);
497 write!(f, "{function}.over(partition_by: {partition_by}, order_by: {order_by})")
498 } else {
499 write!(f, "{function}.over({partition_by})")
500 }
501 },
502 }
503 },
504 Len => write!(f, "len()"),
505 Explode(expr) => {
506 let expr = self.with_root(expr);
507 write!(f, "{expr}.explode()")
508 },
509 Alias(expr, name) => {
510 let expr = self.with_root(expr);
511 write!(f, "{expr}.alias(\"{name}\")")
512 },
513 Column(name) => write!(f, "col(\"{name}\")"),
514 Literal(v) => {
515 match v {
516 LiteralValue::String(v) => {
517 write!(f, "String({v})")
519 },
520 _ => {
521 write!(f, "{v:?}")
522 },
523 }
524 },
525 BinaryExpr { left, op, right } => {
526 let left = self.with_root(left);
527 let right = self.with_root(right);
528 write!(f, "[({left}) {op:?} ({right})]")
529 },
530 Sort { expr, options } => {
531 let expr = self.with_root(expr);
532 if options.descending {
533 write!(f, "{expr}.sort(desc)")
534 } else {
535 write!(f, "{expr}.sort(asc)")
536 }
537 },
538 SortBy {
539 expr,
540 by,
541 sort_options,
542 } => {
543 let expr = self.with_root(expr);
544 let by = self.with_slice(by);
545 write!(f, "{expr}.sort_by(by={by}, sort_option={sort_options:?})",)
546 },
547 Filter { input, by } => {
548 let input = self.with_root(input);
549 let by = self.with_root(by);
550
551 write!(f, "{input}.filter({by})")
552 },
553 Gather {
554 expr,
555 idx,
556 returns_scalar,
557 } => {
558 let expr = self.with_root(expr);
559 let idx = self.with_root(idx);
560 expr.fmt(f)?;
561
562 if *returns_scalar {
563 write!(f, ".get({idx})")
564 } else {
565 write!(f, ".gather({idx})")
566 }
567 },
568 Agg(agg) => {
569 use IRAggExpr::*;
570 match agg {
571 Min {
572 input,
573 propagate_nans,
574 } => {
575 self.with_root(input).fmt(f)?;
576 if *propagate_nans {
577 write!(f, ".nan_min()")
578 } else {
579 write!(f, ".min()")
580 }
581 },
582 Max {
583 input,
584 propagate_nans,
585 } => {
586 self.with_root(input).fmt(f)?;
587 if *propagate_nans {
588 write!(f, ".nan_max()")
589 } else {
590 write!(f, ".max()")
591 }
592 },
593 Median(expr) => write!(f, "{}.median()", self.with_root(expr)),
594 Mean(expr) => write!(f, "{}.mean()", self.with_root(expr)),
595 First(expr) => write!(f, "{}.first()", self.with_root(expr)),
596 Last(expr) => write!(f, "{}.last()", self.with_root(expr)),
597 Implode(expr) => write!(f, "{}.list()", self.with_root(expr)),
598 NUnique(expr) => write!(f, "{}.n_unique()", self.with_root(expr)),
599 Sum(expr) => write!(f, "{}.sum()", self.with_root(expr)),
600 AggGroups(expr) => write!(f, "{}.groups()", self.with_root(expr)),
601 Count(expr, _) => write!(f, "{}.count()", self.with_root(expr)),
602 Var(expr, _) => write!(f, "{}.var()", self.with_root(expr)),
603 Std(expr, _) => write!(f, "{}.std()", self.with_root(expr)),
604 Quantile { expr, .. } => write!(f, "{}.quantile()", self.with_root(expr)),
605 }
606 },
607 Cast {
608 expr,
609 dtype,
610 options,
611 } => {
612 self.with_root(expr).fmt(f)?;
613 if options.strict() {
614 write!(f, ".strict_cast({dtype:?})")
615 } else {
616 write!(f, ".cast({dtype:?})")
617 }
618 },
619 Ternary {
620 predicate,
621 truthy,
622 falsy,
623 } => {
624 let predicate = self.with_root(predicate);
625 let truthy = self.with_root(truthy);
626 let falsy = self.with_root(falsy);
627 write!(f, "when({predicate}).then({truthy}).otherwise({falsy})",)
628 },
629 Function {
630 input, function, ..
631 } => {
632 let fst = self.with_root(&input[0]);
633 fst.fmt(f)?;
634 if input.len() >= 2 {
635 write!(f, ".{function}({})", self.with_slice(&input[1..]))
636 } else {
637 write!(f, ".{function}()")
638 }
639 },
640 AnonymousFunction { input, options, .. } => {
641 let fst = self.with_root(&input[0]);
642 fst.fmt(f)?;
643 if input.len() >= 2 {
644 write!(f, ".{}({})", options.fmt_str, self.with_slice(&input[1..]))
645 } else {
646 write!(f, ".{}()", options.fmt_str)
647 }
648 },
649 Slice {
650 input,
651 offset,
652 length,
653 } => {
654 let input = self.with_root(input);
655 let offset = self.with_root(offset);
656 let length = self.with_root(length);
657
658 write!(f, "{input}.slice(offset={offset}, length={length})")
659 },
660 }?;
661
662 match self.output_name {
663 OutputName::None => {},
664 OutputName::LiteralLhs(_) => {},
665 OutputName::ColumnLhs(_) => {},
666 #[cfg(feature = "dtype-struct")]
667 OutputName::Field(_) => {},
668 OutputName::Alias(name) => write!(f, r#".alias("{name}")"#)?,
669 }
670
671 Ok(())
672 }
673}
674
675impl fmt::Debug for ExprIRDisplay<'_> {
676 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
677 Display::fmt(self, f)
678 }
679}
680
681pub(crate) struct ColumnsDisplay<'a>(pub(crate) &'a Schema);
682
683impl fmt::Display for ColumnsDisplay<'_> {
684 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
685 let len = self.0.len();
686 let mut iter_names = self.0.iter_names().enumerate();
687
688 const MAX_LEN: usize = 32;
689 const ADD_PER_ITEM: usize = 4;
690
691 let mut current_len = 0;
692
693 if let Some((_, fst)) = iter_names.next() {
694 write!(f, "\"{fst}\"")?;
695
696 current_len += fst.len() + ADD_PER_ITEM;
697 }
698
699 for (i, col) in iter_names {
700 current_len += col.len() + ADD_PER_ITEM;
701
702 if current_len > MAX_LEN {
703 write!(f, ", ... {} other ", len - i)?;
704 if len - i == 1 {
705 f.write_str("column")?;
706 } else {
707 f.write_str("columns")?;
708 }
709
710 break;
711 }
712
713 write!(f, ", \"{col}\"")?;
714 }
715
716 Ok(())
717 }
718}
719
720impl fmt::Debug for Operator {
721 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
722 Display::fmt(self, f)
723 }
724}
725
726impl fmt::Debug for LiteralValue {
727 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
728 use LiteralValue::*;
729
730 match self {
731 Binary(_) => write!(f, "[binary value]"),
732 Range { low, high, .. } => write!(f, "range({low}, {high})"),
733 Series(s) => {
734 let name = s.name();
735 if name.is_empty() {
736 write!(f, "Series")
737 } else {
738 write!(f, "Series[{name}]")
739 }
740 },
741 Float(v) => {
742 let av = AnyValue::Float64(*v);
743 write!(f, "dyn float: {}", av)
744 },
745 Int(v) => write!(f, "dyn int: {}", v),
746 _ => {
747 let av = self.to_any_value().unwrap();
748 write!(f, "{av}")
749 },
750 }
751 }
752}