polars_plan/plans/ir/
format.rs

1use std::fmt::{self, Display, Formatter};
2
3use polars_core::datatypes::AnyValue;
4use polars_core::schema::Schema;
5use polars_io::RowIndex;
6use recursive::recursive;
7
8use self::ir::dot::ScanSourcesDisplay;
9use crate::prelude::*;
10
11pub struct IRDisplay<'a> {
12    is_streaming: bool,
13    lp: IRPlanRef<'a>,
14}
15
16#[derive(Clone, Copy)]
17pub struct ExprIRDisplay<'a> {
18    pub(crate) node: Node,
19    pub(crate) output_name: &'a OutputName,
20    pub(crate) expr_arena: &'a Arena<AExpr>,
21}
22
23/// Utility structure to display several [`ExprIR`]'s in a nice way
24pub(crate) struct ExprIRSliceDisplay<'a, T: AsExpr> {
25    pub(crate) exprs: &'a [T],
26    pub(crate) expr_arena: &'a Arena<AExpr>,
27}
28
29pub(crate) trait AsExpr {
30    fn node(&self) -> Node;
31    fn output_name(&self) -> &OutputName;
32}
33
34impl AsExpr for Node {
35    fn node(&self) -> Node {
36        *self
37    }
38    fn output_name(&self) -> &OutputName {
39        &OutputName::None
40    }
41}
42
43impl AsExpr for ExprIR {
44    fn node(&self) -> Node {
45        self.node()
46    }
47    fn output_name(&self) -> &OutputName {
48        self.output_name_inner()
49    }
50}
51
52#[allow(clippy::too_many_arguments)]
53fn write_scan(
54    f: &mut Formatter,
55    name: &str,
56    sources: &ScanSources,
57    indent: usize,
58    n_columns: i64,
59    total_columns: usize,
60    predicate: &Option<ExprIRDisplay<'_>>,
61    slice: Option<(i64, usize)>,
62    row_index: Option<&RowIndex>,
63) -> fmt::Result {
64    write!(
65        f,
66        "{:indent$}{name} SCAN {}",
67        "",
68        ScanSourcesDisplay(sources)
69    )?;
70
71    let total_columns = total_columns - usize::from(row_index.is_some());
72    if n_columns > 0 {
73        write!(
74            f,
75            "\n{:indent$}PROJECT {n_columns}/{total_columns} COLUMNS",
76            "",
77        )?;
78    } else {
79        write!(f, "\n{:indent$}PROJECT */{total_columns} COLUMNS", "")?;
80    }
81    if let Some(predicate) = predicate {
82        write!(f, "\n{:indent$}SELECTION: {predicate}", "")?;
83    }
84    if let Some(slice) = slice {
85        write!(f, "\n{:indent$}SLICE: {slice:?}", "")?;
86    }
87    if let Some(row_index) = row_index {
88        write!(f, "\n{:indent$}ROW_INDEX: {}", "", row_index.name)?;
89        if row_index.offset != 0 {
90            write!(f, " (offset: {})", row_index.offset)?;
91        }
92    }
93    Ok(())
94}
95
96impl<'a> IRDisplay<'a> {
97    pub fn new(lp: IRPlanRef<'a>) -> Self {
98        if let Some(streaming_lp) = lp.extract_streaming_plan() {
99            return Self::new_streaming(streaming_lp);
100        }
101
102        Self {
103            is_streaming: false,
104            lp,
105        }
106    }
107
108    fn new_streaming(lp: IRPlanRef<'a>) -> Self {
109        Self {
110            is_streaming: true,
111            lp,
112        }
113    }
114
115    fn root(&self) -> &IR {
116        self.lp.root()
117    }
118
119    fn with_root(&self, root: Node) -> Self {
120        Self {
121            is_streaming: false,
122            lp: self.lp.with_root(root),
123        }
124    }
125
126    fn display_expr(&self, root: &'a ExprIR) -> ExprIRDisplay<'a> {
127        ExprIRDisplay {
128            node: root.node(),
129            output_name: root.output_name_inner(),
130            expr_arena: self.lp.expr_arena,
131        }
132    }
133
134    fn display_expr_slice(&self, exprs: &'a [ExprIR]) -> ExprIRSliceDisplay<'a, ExprIR> {
135        ExprIRSliceDisplay {
136            exprs,
137            expr_arena: self.lp.expr_arena,
138        }
139    }
140
141    #[recursive]
142    fn _format(&self, f: &mut Formatter, indent: usize) -> fmt::Result {
143        let indent = if self.is_streaming {
144            writeln!(f, "{:indent$}STREAMING:", "")?;
145            indent + 2
146        } else {
147            if indent != 0 {
148                writeln!(f)?;
149            }
150
151            indent
152        };
153
154        let sub_indent = indent + 2;
155        use IR::*;
156
157        match self.root() {
158            #[cfg(feature = "python")]
159            PythonScan { options } => {
160                let total_columns = options.schema.len();
161                let n_columns = options
162                    .with_columns
163                    .as_ref()
164                    .map(|s| s.len() as i64)
165                    .unwrap_or(-1);
166
167                let predicate = match &options.predicate {
168                    PythonPredicate::Polars(e) => Some(self.display_expr(e)),
169                    PythonPredicate::PyArrow(_) => None,
170                    PythonPredicate::None => None,
171                };
172
173                write_scan(
174                    f,
175                    "PYTHON",
176                    &ScanSources::default(),
177                    indent,
178                    n_columns,
179                    total_columns,
180                    &predicate,
181                    options.n_rows.map(|x| (0, x)),
182                    None,
183                )
184            },
185            Union { inputs, options } => {
186                let name = if let Some(slice) = options.slice {
187                    format!("SLICED UNION: {slice:?}")
188                } else {
189                    "UNION".to_string()
190                };
191
192                // 3 levels of indentation
193                // - 0 => UNION ... END UNION
194                // - 1 => PLAN 0, PLAN 1, ... PLAN N
195                // - 2 => actual formatting of plans
196                let sub_sub_indent = sub_indent + 2;
197                write!(f, "{:indent$}{name}", "")?;
198                for (i, plan) in inputs.iter().enumerate() {
199                    write!(f, "\n{:sub_indent$}PLAN {i}:", "")?;
200                    self.with_root(*plan)._format(f, sub_sub_indent)?;
201                }
202                write!(f, "\n{:indent$}END {name}", "")
203            },
204            HConcat { inputs, .. } => {
205                let sub_sub_indent = sub_indent + 2;
206                write!(f, "{:indent$}HCONCAT", "")?;
207                for (i, plan) in inputs.iter().enumerate() {
208                    write!(f, "\n{:sub_indent$}PLAN {i}:", "")?;
209                    self.with_root(*plan)._format(f, sub_sub_indent)?;
210                }
211                write!(f, "\n{:indent$}END HCONCAT", "")
212            },
213            Cache {
214                input,
215                id,
216                cache_hits,
217            } => {
218                write!(
219                    f,
220                    "{:indent$}CACHE[id: {:x}, cache_hits: {}]",
221                    "", *id, *cache_hits
222                )?;
223                self.with_root(*input)._format(f, sub_indent)
224            },
225            Scan {
226                sources,
227                file_info,
228                predicate,
229                scan_type,
230                file_options,
231                ..
232            } => {
233                let n_columns = file_options
234                    .with_columns
235                    .as_ref()
236                    .map(|columns| columns.len() as i64)
237                    .unwrap_or(-1);
238
239                let predicate = predicate.as_ref().map(|p| self.display_expr(p));
240
241                write_scan(
242                    f,
243                    scan_type.into(),
244                    sources,
245                    indent,
246                    n_columns,
247                    file_info.schema.len(),
248                    &predicate,
249                    file_options.slice,
250                    file_options.row_index.as_ref(),
251                )
252            },
253            Filter { predicate, input } => {
254                let predicate = self.display_expr(predicate);
255                // this one is writeln because we don't increase indent (which inserts a line)
256                write!(f, "{:indent$}FILTER {predicate} FROM", "")?;
257                self.with_root(*input)._format(f, sub_indent)
258            },
259            DataFrameScan {
260                schema,
261                output_schema,
262                ..
263            } => {
264                let total_columns = schema.len();
265                let n_columns = if let Some(columns) = output_schema {
266                    columns.len().to_string()
267                } else {
268                    "*".to_string()
269                };
270                write!(
271                    f,
272                    "{:indent$}DF {:?}; PROJECT {}/{} COLUMNS",
273                    "",
274                    schema.iter_names().take(4).collect::<Vec<_>>(),
275                    n_columns,
276                    total_columns,
277                )
278            },
279            Select { expr, input, .. } => {
280                // @NOTE: Maybe there should be a clear delimiter here?
281                let exprs = self.display_expr_slice(expr);
282
283                write!(f, "{:indent$} SELECT {exprs} FROM", "")?;
284                self.with_root(*input)._format(f, sub_indent)
285            },
286            Sort {
287                input, by_column, ..
288            } => {
289                let by_column = self.display_expr_slice(by_column);
290                write!(f, "{:indent$}SORT BY {by_column}", "")?;
291                self.with_root(*input)._format(f, sub_indent)
292            },
293            GroupBy {
294                input,
295                keys,
296                aggs,
297                apply,
298                ..
299            } => {
300                let keys = self.display_expr_slice(keys);
301
302                write!(f, "{:indent$}AGGREGATE", "")?;
303                if apply.is_some() {
304                    write!(f, "\n{:indent$}\tMAP_GROUPS BY {keys} FROM", "")?;
305                } else {
306                    let aggs = self.display_expr_slice(aggs);
307                    write!(f, "\n{:indent$}\t{aggs} BY {keys} FROM", "")?;
308                }
309                self.with_root(*input)._format(f, sub_indent)
310            },
311            Join {
312                input_left,
313                input_right,
314                left_on,
315                right_on,
316                options,
317                ..
318            } => {
319                let left_on = self.display_expr_slice(left_on);
320                let right_on = self.display_expr_slice(right_on);
321
322                // Fused cross + filter (show as nested loop join)
323                if let Some(JoinTypeOptionsIR::Cross { predicate }) = &options.options {
324                    let predicate = self.display_expr(predicate);
325                    let name = "NESTED LOOP";
326                    write!(f, "{:indent$}{name} JOIN ON {predicate}:", "")?;
327                    write!(f, "\n{:indent$}LEFT PLAN:", "")?;
328                    self.with_root(*input_left)._format(f, sub_indent)?;
329                    write!(f, "\n{:indent$}RIGHT PLAN:", "")?;
330                    self.with_root(*input_right)._format(f, sub_indent)?;
331                    write!(f, "\n{:indent$}END {name} JOIN", "")
332                } else {
333                    let how = &options.args.how;
334                    write!(f, "{:indent$}{how} JOIN:", "")?;
335                    write!(f, "\n{:indent$}LEFT PLAN ON: {left_on}", "")?;
336                    self.with_root(*input_left)._format(f, sub_indent)?;
337                    write!(f, "\n{:indent$}RIGHT PLAN ON: {right_on}", "")?;
338                    self.with_root(*input_right)._format(f, sub_indent)?;
339                    write!(f, "\n{:indent$}END {how} JOIN", "")
340                }
341            },
342            HStack { input, exprs, .. } => {
343                // @NOTE: Maybe there should be a clear delimiter here?
344                let exprs = self.display_expr_slice(exprs);
345
346                write!(f, "{:indent$} WITH_COLUMNS:", "",)?;
347                write!(f, "\n{:indent$} {exprs} ", "")?;
348                self.with_root(*input)._format(f, sub_indent)
349            },
350            Distinct { input, options } => {
351                write!(
352                    f,
353                    "{:indent$}UNIQUE[maintain_order: {:?}, keep_strategy: {:?}] BY {:?}",
354                    "", options.maintain_order, options.keep_strategy, options.subset
355                )?;
356                self.with_root(*input)._format(f, sub_indent)
357            },
358            Slice { input, offset, len } => {
359                write!(f, "{:indent$}SLICE[offset: {offset}, len: {len}]", "")?;
360                self.with_root(*input)._format(f, sub_indent)
361            },
362            MapFunction {
363                input, function, ..
364            } => {
365                if let Some(streaming_lp) = function.to_streaming_lp() {
366                    IRDisplay::new_streaming(streaming_lp)._format(f, indent)
367                } else {
368                    write!(f, "{:indent$}{function}", "")?;
369                    self.with_root(*input)._format(f, sub_indent)
370                }
371            },
372            ExtContext { input, .. } => {
373                write!(f, "{:indent$}EXTERNAL_CONTEXT", "")?;
374                self.with_root(*input)._format(f, sub_indent)
375            },
376            Sink { input, payload, .. } => {
377                let name = match payload {
378                    SinkType::Memory => "SINK (memory)",
379                    SinkType::File { .. } => "SINK (file)",
380                };
381                write!(f, "{:indent$}{name}", "")?;
382                self.with_root(*input)._format(f, sub_indent)
383            },
384            SimpleProjection { input, columns } => {
385                let num_columns = columns.as_ref().len();
386                let total_columns = self.lp.lp_arena.get(*input).schema(self.lp.lp_arena).len();
387
388                let columns = ColumnsDisplay(columns.as_ref());
389                write!(
390                    f,
391                    "{:indent$}simple π {num_columns}/{total_columns} [{columns}]",
392                    ""
393                )?;
394
395                self.with_root(*input)._format(f, sub_indent)
396            },
397            Invalid => write!(f, "{:indent$}INVALID", ""),
398        }
399    }
400}
401
402impl<'a> ExprIRDisplay<'a> {
403    fn with_slice<T: AsExpr>(&self, exprs: &'a [T]) -> ExprIRSliceDisplay<'a, T> {
404        ExprIRSliceDisplay {
405            exprs,
406            expr_arena: self.expr_arena,
407        }
408    }
409
410    fn with_root<T: AsExpr>(&self, root: &'a T) -> Self {
411        Self {
412            node: root.node(),
413            output_name: root.output_name(),
414            expr_arena: self.expr_arena,
415        }
416    }
417}
418
419impl Display for IRDisplay<'_> {
420    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
421        self._format(f, 0)
422    }
423}
424
425impl fmt::Debug for IRDisplay<'_> {
426    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
427        Display::fmt(&self, f)
428    }
429}
430
431impl<T: AsExpr> Display for ExprIRSliceDisplay<'_, T> {
432    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
433        // Display items in slice delimited by a comma
434
435        use std::fmt::Write;
436
437        let mut iter = self.exprs.iter();
438
439        f.write_char('[')?;
440        if let Some(fst) = iter.next() {
441            let fst = ExprIRDisplay {
442                node: fst.node(),
443                output_name: fst.output_name(),
444                expr_arena: self.expr_arena,
445            };
446            write!(f, "{fst}")?;
447        }
448
449        for expr in iter {
450            let expr = ExprIRDisplay {
451                node: expr.node(),
452                output_name: expr.output_name(),
453                expr_arena: self.expr_arena,
454            };
455            write!(f, ", {expr}")?;
456        }
457
458        f.write_char(']')?;
459
460        Ok(())
461    }
462}
463
464impl<T: AsExpr> fmt::Debug for ExprIRSliceDisplay<'_, T> {
465    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
466        Display::fmt(self, f)
467    }
468}
469
470impl Display for ExprIRDisplay<'_> {
471    #[recursive]
472    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
473        let root = self.expr_arena.get(self.node);
474
475        use AExpr::*;
476        match root {
477            Window {
478                function,
479                partition_by,
480                order_by,
481                options,
482            } => {
483                let function = self.with_root(function);
484                let partition_by = self.with_slice(partition_by);
485                match options {
486                    #[cfg(feature = "dynamic_group_by")]
487                    WindowType::Rolling(options) => {
488                        write!(
489                            f,
490                            "{function}.rolling(by='{}', offset={}, period={})",
491                            options.index_column, options.offset, options.period
492                        )
493                    },
494                    _ => {
495                        if let Some((order_by, _)) = order_by {
496                            let order_by = self.with_root(order_by);
497                            write!(f, "{function}.over(partition_by: {partition_by}, order_by: {order_by})")
498                        } else {
499                            write!(f, "{function}.over({partition_by})")
500                        }
501                    },
502                }
503            },
504            Len => write!(f, "len()"),
505            Explode(expr) => {
506                let expr = self.with_root(expr);
507                write!(f, "{expr}.explode()")
508            },
509            Alias(expr, name) => {
510                let expr = self.with_root(expr);
511                write!(f, "{expr}.alias(\"{name}\")")
512            },
513            Column(name) => write!(f, "col(\"{name}\")"),
514            Literal(v) => {
515                match v {
516                    LiteralValue::String(v) => {
517                        // dot breaks with debug fmt due to \"
518                        write!(f, "String({v})")
519                    },
520                    _ => {
521                        write!(f, "{v:?}")
522                    },
523                }
524            },
525            BinaryExpr { left, op, right } => {
526                let left = self.with_root(left);
527                let right = self.with_root(right);
528                write!(f, "[({left}) {op:?} ({right})]")
529            },
530            Sort { expr, options } => {
531                let expr = self.with_root(expr);
532                if options.descending {
533                    write!(f, "{expr}.sort(desc)")
534                } else {
535                    write!(f, "{expr}.sort(asc)")
536                }
537            },
538            SortBy {
539                expr,
540                by,
541                sort_options,
542            } => {
543                let expr = self.with_root(expr);
544                let by = self.with_slice(by);
545                write!(f, "{expr}.sort_by(by={by}, sort_option={sort_options:?})",)
546            },
547            Filter { input, by } => {
548                let input = self.with_root(input);
549                let by = self.with_root(by);
550
551                write!(f, "{input}.filter({by})")
552            },
553            Gather {
554                expr,
555                idx,
556                returns_scalar,
557            } => {
558                let expr = self.with_root(expr);
559                let idx = self.with_root(idx);
560                expr.fmt(f)?;
561
562                if *returns_scalar {
563                    write!(f, ".get({idx})")
564                } else {
565                    write!(f, ".gather({idx})")
566                }
567            },
568            Agg(agg) => {
569                use IRAggExpr::*;
570                match agg {
571                    Min {
572                        input,
573                        propagate_nans,
574                    } => {
575                        self.with_root(input).fmt(f)?;
576                        if *propagate_nans {
577                            write!(f, ".nan_min()")
578                        } else {
579                            write!(f, ".min()")
580                        }
581                    },
582                    Max {
583                        input,
584                        propagate_nans,
585                    } => {
586                        self.with_root(input).fmt(f)?;
587                        if *propagate_nans {
588                            write!(f, ".nan_max()")
589                        } else {
590                            write!(f, ".max()")
591                        }
592                    },
593                    Median(expr) => write!(f, "{}.median()", self.with_root(expr)),
594                    Mean(expr) => write!(f, "{}.mean()", self.with_root(expr)),
595                    First(expr) => write!(f, "{}.first()", self.with_root(expr)),
596                    Last(expr) => write!(f, "{}.last()", self.with_root(expr)),
597                    Implode(expr) => write!(f, "{}.list()", self.with_root(expr)),
598                    NUnique(expr) => write!(f, "{}.n_unique()", self.with_root(expr)),
599                    Sum(expr) => write!(f, "{}.sum()", self.with_root(expr)),
600                    AggGroups(expr) => write!(f, "{}.groups()", self.with_root(expr)),
601                    Count(expr, _) => write!(f, "{}.count()", self.with_root(expr)),
602                    Var(expr, _) => write!(f, "{}.var()", self.with_root(expr)),
603                    Std(expr, _) => write!(f, "{}.std()", self.with_root(expr)),
604                    Quantile { expr, .. } => write!(f, "{}.quantile()", self.with_root(expr)),
605                }
606            },
607            Cast {
608                expr,
609                dtype,
610                options,
611            } => {
612                self.with_root(expr).fmt(f)?;
613                if options.strict() {
614                    write!(f, ".strict_cast({dtype:?})")
615                } else {
616                    write!(f, ".cast({dtype:?})")
617                }
618            },
619            Ternary {
620                predicate,
621                truthy,
622                falsy,
623            } => {
624                let predicate = self.with_root(predicate);
625                let truthy = self.with_root(truthy);
626                let falsy = self.with_root(falsy);
627                write!(f, "when({predicate}).then({truthy}).otherwise({falsy})",)
628            },
629            Function {
630                input, function, ..
631            } => {
632                let fst = self.with_root(&input[0]);
633                fst.fmt(f)?;
634                if input.len() >= 2 {
635                    write!(f, ".{function}({})", self.with_slice(&input[1..]))
636                } else {
637                    write!(f, ".{function}()")
638                }
639            },
640            AnonymousFunction { input, options, .. } => {
641                let fst = self.with_root(&input[0]);
642                fst.fmt(f)?;
643                if input.len() >= 2 {
644                    write!(f, ".{}({})", options.fmt_str, self.with_slice(&input[1..]))
645                } else {
646                    write!(f, ".{}()", options.fmt_str)
647                }
648            },
649            Slice {
650                input,
651                offset,
652                length,
653            } => {
654                let input = self.with_root(input);
655                let offset = self.with_root(offset);
656                let length = self.with_root(length);
657
658                write!(f, "{input}.slice(offset={offset}, length={length})")
659            },
660        }?;
661
662        match self.output_name {
663            OutputName::None => {},
664            OutputName::LiteralLhs(_) => {},
665            OutputName::ColumnLhs(_) => {},
666            #[cfg(feature = "dtype-struct")]
667            OutputName::Field(_) => {},
668            OutputName::Alias(name) => write!(f, r#".alias("{name}")"#)?,
669        }
670
671        Ok(())
672    }
673}
674
675impl fmt::Debug for ExprIRDisplay<'_> {
676    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
677        Display::fmt(self, f)
678    }
679}
680
681pub(crate) struct ColumnsDisplay<'a>(pub(crate) &'a Schema);
682
683impl fmt::Display for ColumnsDisplay<'_> {
684    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
685        let len = self.0.len();
686        let mut iter_names = self.0.iter_names().enumerate();
687
688        const MAX_LEN: usize = 32;
689        const ADD_PER_ITEM: usize = 4;
690
691        let mut current_len = 0;
692
693        if let Some((_, fst)) = iter_names.next() {
694            write!(f, "\"{fst}\"")?;
695
696            current_len += fst.len() + ADD_PER_ITEM;
697        }
698
699        for (i, col) in iter_names {
700            current_len += col.len() + ADD_PER_ITEM;
701
702            if current_len > MAX_LEN {
703                write!(f, ", ... {} other ", len - i)?;
704                if len - i == 1 {
705                    f.write_str("column")?;
706                } else {
707                    f.write_str("columns")?;
708                }
709
710                break;
711            }
712
713            write!(f, ", \"{col}\"")?;
714        }
715
716        Ok(())
717    }
718}
719
720impl fmt::Debug for Operator {
721    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
722        Display::fmt(self, f)
723    }
724}
725
726impl fmt::Debug for LiteralValue {
727    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
728        use LiteralValue::*;
729
730        match self {
731            Binary(_) => write!(f, "[binary value]"),
732            Range { low, high, .. } => write!(f, "range({low}, {high})"),
733            Series(s) => {
734                let name = s.name();
735                if name.is_empty() {
736                    write!(f, "Series")
737                } else {
738                    write!(f, "Series[{name}]")
739                }
740            },
741            Float(v) => {
742                let av = AnyValue::Float64(*v);
743                write!(f, "dyn float: {}", av)
744            },
745            Int(v) => write!(f, "dyn int: {}", v),
746            _ => {
747                let av = self.to_any_value().unwrap();
748                write!(f, "{av}")
749            },
750        }
751    }
752}