cranelift_isle/
serialize.rs

//! Put "sea of nodes" representation of a `RuleSet` into a sequential order.
//!
//! We're trying to satisfy two key constraints on generated code:
//!
//! First, we must produce the same result as if we tested the left-hand side
//! of every rule in descending priority order and picked the first match.
//! But that would mean a lot of duplicated work since many rules have similar
//! patterns. We want to evaluate in an order that gets the same answer but
//! does as little work as possible.
//!
//! Second, some ISLE patterns can only be implemented in Rust using a `match`
//! expression (or various choices of syntactic sugar). Others can only
//! be implemented as expressions, which can't be evaluated while matching
//! patterns in Rust. So we need to alternate between pattern matching and
//! expression evaluation.
//!
//! To meet both requirements, we repeatedly partition the set of rules for a
//! term and build a tree of Rust control-flow constructs corresponding to each
//! partition. The root of such a tree is a [Block], and [serialize] constructs
//! it.
use std::cmp::Reverse;

use crate::disjointsets::DisjointSets;
use crate::lexer::Pos;
use crate::trie_again::{Binding, BindingId, Constraint, Rule, RuleSet};

/// Decomposes the rule-set into a tree of [Block]s.
pub fn serialize(rules: &RuleSet) -> Block {
    // While building the tree, we need temporary storage to keep track of
    // different subsets of the rules as we partition them into ever smaller
    // sets. As long as we're allowed to re-order the rules, we can ensure
    // that every partition is contiguous; but since we plan to re-order them,
    // we actually just store indexes into the `RuleSet` to minimize data
    // movement. The algorithm in this module never duplicates or discards
    // rules, so the total size of all partitions is exactly the number of
    // rules. For all the above reasons, we can pre-allocate all the space
    // we'll need to hold those partitions up front and share it throughout the
    // tree.
    //
    // As an interesting side effect, when the algorithm finishes, this vector
    // records the order in which rule bodies will be emitted in the generated
    // Rust. We don't care because we could get the same information from the
    // built tree, but it may be helpful to think about the intermediate steps
    // as recursively sorting the rules. It may not be possible to produce the
    // same order using a comparison sort, and the asymptotic complexity is
    // probably worse than the O(n log n) of a comparison sort, but it's still
    // doing sorting of some kind.
    let mut order = Vec::from_iter(0..rules.rules.len());
    Decomposition::new(rules).sort(&mut order)
}

/// A sequence of steps to evaluate in order. Any step may return early, so
/// steps ordered later can assume the negation of the conditions evaluated in
/// earlier steps.
#[derive(Default)]
pub struct Block {
    /// Steps to evaluate.
    pub steps: Vec<EvalStep>,
}

/// A step to evaluate involves possibly let-binding some expressions, then
/// executing some control flow construct.
pub struct EvalStep {
    /// Before evaluating this case, emit let-bindings in this order.
    pub bind_order: Vec<BindingId>,
    /// The control-flow construct to execute at this point.
    pub check: ControlFlow,
}

/// What kind of control-flow structure do we need to emit here?
pub enum ControlFlow {
    /// Test a binding site against one or more mutually-exclusive patterns and
    /// branch to the appropriate block if a pattern matches.
    Match {
        /// Which binding site are we examining at this point?
        source: BindingId,
        /// What patterns do we care about?
        arms: Vec<MatchArm>,
    },
    /// Test whether two binding sites have values which are equal when
    /// evaluated on the current input.
    Equal {
        /// One binding site.
        a: BindingId,
        /// The other binding site. To ensure we always generate the same code
        /// given the same set of ISLE rules, `b` should be strictly greater
        /// than `a`.
        b: BindingId,
        /// If the test succeeds, evaluate this block.
        body: Block,
    },
    /// Evaluate a block once with each value of the given binding site.
    Loop {
        /// A binding site of type [Binding::Iterator]. Its source binding site
        /// must be a multi-extractor or multi-constructor call.
        result: BindingId,
        /// What to evaluate with each binding.
        body: Block,
    },
    /// Return a result from the right-hand side of a rule. If we're building a
    /// multi-constructor then this doesn't actually return, but adds to a list
    /// of results instead. Otherwise this return stops evaluation before any
    /// later steps.
    Return {
        /// Where was the rule defined that had this right-hand side?
        pos: Pos,
        /// What is the result expression which should be returned if this
        /// rule matched?
        result: BindingId,
    },
}

/// One concrete pattern and the block to evaluate if the pattern matches.
pub struct MatchArm {
    /// The pattern to match.
    pub constraint: Constraint,
    /// If this pattern matches, it brings these bindings into scope. If a
    /// binding is unused in this block, then the corresponding position in the
    /// pattern's bindings may be `None`.
    pub bindings: Vec<Option<BindingId>>,
    /// Steps to evaluate if the pattern matched.
    pub body: Block,
}

/// Given a set of rules that's been partitioned into two groups, move rules
/// from the first partition to the second if there are higher-priority rules
/// in the second group. In the final generated code, we'll check the rules
/// in the first ("selected") group before any in the second ("deferred")
/// group. But we need the result to be _as if_ we checked the rules in strict
/// descending priority order.
///
/// When evaluating the relationship between one rule in the selected set and
/// one rule in the deferred set, there are two cases where we can keep a rule
/// in the selected set:
/// 1. The deferred rule is lower priority than the selected rule; or
/// 2. The two rules don't overlap, so they can't match on the same inputs.
///
/// In either case, if the selected rule matches then we know the deferred rule
/// would not have been the one we wanted anyway; and if it doesn't match then
/// the fall-through semantics of the code we generate will let us go on to
/// check the deferred rule.
///
/// So a rule can stay in the selected set as long as it's in one of the above
/// relationships with every rule in the deferred set.
///
/// Due to the overlap checking pass which occurs before codegen, we know that
/// if two rules have the same priority, they do not overlap. So case 1 above
/// can be expanded to when the deferred rule is lower _or equal_ priority
/// to the selected rule. This much overlap checking is absolutely necessary:
/// There are terms where codegen is impossible if we use only the unmodified
/// case 1 and don't also check case 2.
///
/// Aside from the equal-priority case, though, case 2 does not seem to matter
/// in practice. On the current backends, doing a full overlap check here does
/// not change the generated code at all. So we don't bother.
///
/// Since this function never moves rules from the deferred set to the selected
/// set, the returned partition-point is always less than or equal to the
/// initial partition-point.
fn respect_priority(rules: &RuleSet, order: &mut [usize], partition_point: usize) -> usize {
    let (selected, deferred) = order.split_at_mut(partition_point);

    if let Some(max_deferred_prio) = deferred.iter().map(|&idx| rules.rules[idx].prio).max() {
        partition_in_place(selected, |&idx| rules.rules[idx].prio >= max_deferred_prio)
    } else {
        // If the deferred set is empty, all selected rules are fine where
        // they are.
        partition_point
    }
}

/// A query which can be tested against a [Rule] to see if that rule requires
/// the given kind of control flow around the given binding sites. These
/// choices correspond to the identically-named variants of [ControlFlow].
///
/// The order of these variants is significant, because it's used as a tie-
/// breaker in the heuristic that picks which control flow to generate next.
///
/// - Loops should always be chosen last. If a rule needs to run once for each
///   value from an iterator, but only if some other condition is true, we
///   should check the other condition first.
///
/// - Sorting concrete [HasControlFlow::Match] constraints first has the effect
///   of clustering such constraints together, which is not important but means
///   codegen could theoretically merge the cluster of matches into a single
///   Rust `match` statement.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
enum HasControlFlow {
    /// Find rules which have a concrete pattern constraint on the given
    /// binding site.
    Match(BindingId),

    /// Find rules which require both given binding sites to be in the same
    /// equivalence class.
    Equal(BindingId, BindingId),

    /// Find rules which must loop over the multiple values of the given
    /// binding site.
    Loop(BindingId),
}

struct PartitionResults {
    any_matched: bool,
    valid: usize,
}

impl HasControlFlow {
    /// Identify which rules both satisfy this query, and are safe to evaluate
    /// before all rules that don't satisfy the query, considering rules'
    /// relative priorities like [respect_priority]. Partition matching rules
    /// first in `order`. Return the number of rules which are valid with
    /// respect to priority, as well as whether any rules matched the query at
    /// all. No ordering is guaranteed within either partition, which allows
    /// this function to run in linear time. That's fine because later we'll
    /// recursively sort both partitions.
    fn partition(self, rules: &RuleSet, order: &mut [usize]) -> PartitionResults {
        let matching = partition_in_place(order, |&idx| {
            let rule = &rules.rules[idx];
            match self {
                HasControlFlow::Match(binding_id) => rule.get_constraint(binding_id).is_some(),
                HasControlFlow::Equal(x, y) => rule.equals.in_same_set(x, y),
                HasControlFlow::Loop(binding_id) => rule.iterators.contains(&binding_id),
            }
        });
        PartitionResults {
            any_matched: matching > 0,
            valid: respect_priority(rules, order, matching),
        }
    }
}

/// As we proceed through sorting a term's rules, the term's binding sites move
/// through this sequence of states. This state machine helps us avoid doing
/// the same thing with a binding site more than once in any subtree.
#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
enum BindingState {
    /// Initially, all binding sites are unavailable for evaluation except for
    /// top-level arguments, constants, and similar.
    #[default]
    Unavailable,
    /// As more binding sites become available, it becomes possible to evaluate
    /// bindings which depend on those sites.
    Available,
    /// Once we've decided a binding is needed in order to make progress in
    /// matching, we emit a let-binding for it. We shouldn't evaluate it a
    /// second time, if possible.
    Emitted,
    /// We can only match a constraint against a binding site if we can emit it
    /// first. Afterward, we should not try to match a constraint against that
    /// site again in the same subtree.
    Matched,
}

/// A sort key used to order control-flow candidates in `best_control_flow`.
#[derive(Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
struct Score {
    // We prefer to match as many rules at once as possible.
    count: usize,
    // Break ties by preferring bindings we've already emitted.
    state: BindingState,
}

impl Score {
    /// Recompute this score. Returns whether this is a valid candidate; if
    /// not, the score may not have been updated and the candidate should
    /// be removed from further consideration. The `partition` callback is
    /// evaluated lazily.
    fn update(
        &mut self,
        state: BindingState,
        partition: impl FnOnce() -> PartitionResults,
    ) -> bool {
        // Candidates which have already been matched in this partition must
        // not be matched again. There's never anything to be gained from
        // matching a binding site when you're in an evaluation path where you
        // already know exactly what pattern that binding site matches. And
        // without this check, we could go into an infinite loop: all rules in
        // the current partition match the same pattern for this binding site,
        // so matching on it doesn't reduce the number of rules to check and it
        // doesn't make more binding sites available.
        //
        // Note that equality constraints never make a binding site `Matched`
        // and are de-duplicated using more complicated equivalence-class
        // checks instead.
        if state == BindingState::Matched {
            return false;
        }
        self.state = state;

        // The score is not based solely on how many rules have this
        // constraint, but on how many such rules can go into the same block
        // without violating rule priority. This number can grow as higher-
        // priority rules are removed from the partition, so we can't drop
        // candidates just because this is zero. If some rule has this
        // constraint, it will become viable in some later partition.
        let partition = partition();
        self.count = partition.valid;

        // Only consider constraints that are present in some rule in the
        // current partition. Note that as we partition the rule set into
        // smaller groups, the number of rules which have a particular kind of
        // constraint can never grow, so a candidate removed here doesn't need
        // to be examined again in this partition.
        partition.any_matched
    }
}

/// A rule filter ([HasControlFlow]), plus temporary storage for the sort
/// key used in `best_control_flow` to order these candidates. Keeping the
/// temporary storage here lets us avoid repeated heap allocations.
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
struct Candidate {
    score: Score,
    // Last resort tie-breaker: defer to HasControlFlow order, but prefer
    // control-flow that sorts earlier.
    kind: Reverse<HasControlFlow>,
}

impl Candidate {
    /// Construct a candidate where the score is not set. The score will need
    /// to be reset by [Score::update] before use.
    fn new(kind: HasControlFlow) -> Self {
        Candidate {
            score: Score::default(),
            kind: Reverse(kind),
        }
    }
}

/// A single binding site to check for participation in equality constraints,
/// plus temporary storage for the score used in `best_control_flow` to order
/// these candidates. Keeping the temporary storage here lets us avoid repeated
/// heap allocations.
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
struct EqualCandidate {
    score: Score,
    // Last resort tie-breaker: prefer earlier binding sites.
    source: Reverse<BindingId>,
}

impl EqualCandidate {
    /// Construct a candidate where the score is not set. The score will need
    /// to be reset by [Score::update] before use.
    fn new(source: BindingId) -> Self {
        EqualCandidate {
            score: Score::default(),
            source: Reverse(source),
        }
    }
}

/// State for a [Decomposition] that needs to be cloned when entering a nested
/// scope, so that changes in that scope don't affect this one.
#[derive(Clone, Default)]
struct ScopedState {
    /// The state of all binding sites at this point in the tree, indexed by
    /// [BindingId]. Bindings which become available in nested scopes don't
    /// magically become available in outer scopes too.
    ready: Vec<BindingState>,
    /// The current set of candidates for control flow to add at this point in
    /// the tree. We can't rely on any match results that might be computed in
    /// a nested scope, so if we still care about a candidate in the fallback
    /// case then we need to emit the correct control flow for it again.
    candidates: Vec<Candidate>,
    /// The current set of binding sites which participate in equality
    /// constraints at this point in the tree. We can't rely on any match
    /// results that might be computed in a nested scope, so if we still care
    /// about a candidate in the fallback case then we need to emit the correct
    /// control flow for it again.
    equal_candidates: Vec<EqualCandidate>,
    /// Equivalence classes that we've established on the current path from
    /// the root.
    equal: DisjointSets<BindingId>,
}

/// Builder for one [Block] in the tree.
struct Decomposition<'a> {
    /// The complete RuleSet, shared across the whole tree.
    rules: &'a RuleSet,
    /// Decomposition state that is scoped to the current subtree.
    scope: ScopedState,
    /// Accumulator for bindings that should be emitted before the next
    /// control-flow construct.
    bind_order: Vec<BindingId>,
    /// Accumulator for the final Block that we'll return as this subtree.
    block: Block,
}

impl<'a> Decomposition<'a> {
    /// Create a builder for the root [Block].
    fn new(rules: &'a RuleSet) -> Decomposition<'a> {
        let mut scope = ScopedState::default();
        scope.ready.resize(rules.bindings.len(), Default::default());
        let mut result = Decomposition {
            rules,
            scope,
            bind_order: Default::default(),
            block: Default::default(),
        };
        result.add_bindings();
        result
    }

    /// Create a builder for a nested [Block].
    fn new_block(&mut self) -> Decomposition {
        Decomposition {
            rules: self.rules,
            scope: self.scope.clone(),
            bind_order: Default::default(),
            block: Default::default(),
        }
    }

    /// Ensure that every binding site's state reflects its dependencies'
    /// states. This takes time linear in the number of bindings. Because
    /// `trie_again` only hash-conses a binding after all its dependencies have
    /// already been hash-consed, a single in-order pass visits a binding's
    /// dependencies before visiting the binding itself.
    fn add_bindings(&mut self) {
        for (idx, binding) in self.rules.bindings.iter().enumerate() {
            // We only add these bindings when matching a corresponding
            // type of control flow, in `make_control_flow`.
            if matches!(
                binding,
                Binding::Iterator { .. } | Binding::MatchVariant { .. } | Binding::MatchSome { .. }
            ) {
                continue;
            }

            // TODO: proactively put some bindings in `Emitted` state
            // That makes them visible to the best-binding heuristic, which
            // prefers to match on already-emitted bindings first. This helps
            // to sort cheap computations before expensive ones.

            let idx: BindingId = idx.try_into().unwrap();
            if self.scope.ready[idx.index()] < BindingState::Available {
                if binding
                    .sources()
                    .iter()
                    .all(|&source| self.scope.ready[source.index()] >= BindingState::Available)
                {
                    self.set_ready(idx, BindingState::Available);
                }
            }
        }
    }

    /// Determines the final evaluation order for the given subset of rules, and
    /// builds a [Block] representing that order.
    fn sort(mut self, mut order: &mut [usize]) -> Block {
        while let Some(best) = self.best_control_flow(order) {
            // Peel off all rules that have this particular control flow, and
            // save the rest for the next iteration of the loop.
            let partition_point = best.partition(self.rules, order).valid;
            debug_assert!(partition_point > 0);
            let (this, rest) = order.split_at_mut(partition_point);
            order = rest;

            // Recursively build the control-flow tree for these rules.
            let check = self.make_control_flow(best, this);
            // Note that `make_control_flow` may have added more let-bindings.
            let bind_order = std::mem::take(&mut self.bind_order);
            self.block.steps.push(EvalStep { bind_order, check });
        }

        // At this point, `best_control_flow` says the remaining rules don't
        // have any control flow left to emit. That could be because there are
        // no unhandled rules left, or because every candidate for control flow
        // for the remaining rules has already been matched by some ancestor in
        // the tree.
        debug_assert_eq!(self.scope.candidates.len(), 0);
        // TODO: assert something about self.equal_candidates?

        // If we're building a multi-constructor, then there could be multiple
        // rules with the same left-hand side. We'll evaluate them all, but
        // to keep the output consistent, first sort by descending priority
        // and break ties with the order the rules were declared. In non-multi
        // constructors, there should be at most one rule remaining here.
        order.sort_unstable_by_key(|&idx| (Reverse(self.rules.rules[idx].prio), idx));
        for &idx in order.iter() {
            let &Rule {
                pos,
                result,
                ref impure,
                ..
            } = &self.rules.rules[idx];

            // Ensure that any impure constructors are called, even if their
            // results aren't used.
            for &impure in impure.iter() {
                self.use_expr(impure);
            }
            self.use_expr(result);

            let check = ControlFlow::Return { pos, result };
            let bind_order = std::mem::take(&mut self.bind_order);
            self.block.steps.push(EvalStep { bind_order, check });
        }

        self.block
    }

    /// Let-bind this binding site and all its dependencies, skipping any
    /// which are already let-bound. Also skip let-bindings for certain trivial
    /// expressions which are safe and cheap to evaluate multiple times,
    /// because that reduces clutter in the generated code.
    fn use_expr(&mut self, name: BindingId) {
        if self.scope.ready[name.index()] < BindingState::Emitted {
            self.set_ready(name, BindingState::Emitted);
            let binding = &self.rules.bindings[name.index()];
            for &source in binding.sources() {
                self.use_expr(source);
            }

            let should_let_bind = match binding {
                Binding::ConstInt { .. } => false,
                Binding::ConstPrim { .. } => false,
                Binding::Argument { .. } => false,
                Binding::MatchTuple { .. } => false,

                // Only let-bind variant constructors if they have some fields.
                // Building a variant with no fields is cheap, but don't
                // duplicate more complex expressions.
                Binding::MakeVariant { fields, .. } => !fields.is_empty(),

                // By default, do let-bind: that's always safe.
                _ => true,
            };
            if should_let_bind {
                self.bind_order.push(name);
            }
        }
    }

    /// Build one control-flow construct and its subtree for the specified rules.
    /// The rules in `order` must all have the kind of control-flow named in `best`.
    fn make_control_flow(&mut self, best: HasControlFlow, order: &mut [usize]) -> ControlFlow {
        match best {
            HasControlFlow::Match(source) => {
                self.use_expr(source);
                self.add_bindings();
                let mut arms = Vec::new();

                let get_constraint =
                    |idx: usize| self.rules.rules[idx].get_constraint(source).unwrap();

                // Ensure that identical constraints are grouped together, then
                // loop over each group.
                order.sort_unstable_by_key(|&idx| get_constraint(idx));
                for g in group_by_mut(order, |&a, &b| get_constraint(a) == get_constraint(b)) {
                    // Applying a constraint moves the discriminant from
                    // Emitted to Matched, but only within the constraint's
                    // match arm; later fallthrough cases may need to match
                    // this discriminant again. Since `source` is in the
                    // `Emitted` state in the parent due to the above call
                    // to `use_expr`, calling `add_bindings` again after this
                    // wouldn't change anything.
                    let mut child = self.new_block();
                    child.set_ready(source, BindingState::Matched);

                    // Get the constraint for this group, and all of the
                    // binding sites that it introduces.
                    let constraint = get_constraint(g[0]);
                    let bindings = Vec::from_iter(
                        constraint
                            .bindings_for(source)
                            .into_iter()
                            .map(|b| child.rules.find_binding(&b)),
                    );

                    let mut changed = false;
                    for &binding in bindings.iter() {
                        if let Some(binding) = binding {
                            // Matching a pattern makes its bindings
                            // available, and also emits code to bind
                            // them.
                            child.set_ready(binding, BindingState::Emitted);
                            changed = true;
                        }
                    }

                    // As an optimization, only propagate availability
                    // if we changed any binding's readiness.
                    if changed {
                        child.add_bindings();
                    }

                    // Recursively construct a Block for this group of rules.
                    let body = child.sort(g);
                    arms.push(MatchArm {
                        constraint,
                        bindings,
                        body,
                    });
                }

                ControlFlow::Match { source, arms }
            }

            HasControlFlow::Equal(a, b) => {
                // Both sides of the equality test must be evaluated before
                // the condition can be tested. Go ahead and let-bind them
                // so they're available without re-evaluation in fall-through
                // cases.
                self.use_expr(a);
                self.use_expr(b);
                self.add_bindings();

                let mut child = self.new_block();
                // Never mark binding sites used in equality constraints as
                // "matched", because either might need to be used again in
                // a later equality check. Instead record that they're in the
                // same equivalence class on this path.
                child.scope.equal.merge(a, b);
                let body = child.sort(order);
                ControlFlow::Equal { a, b, body }
            }

            HasControlFlow::Loop(source) => {
                // Consuming a multi-term involves two binding sites:
                // calling the multi-term to get an iterator (the `source`),
                // and looping over the iterator to get a binding for each
                // `result`.
                let result = self
                    .rules
                    .find_binding(&Binding::Iterator { source })
                    .unwrap();

                // We must not let-bind the iterator until we're ready to
                // consume it, because it can only be consumed once. This also
                // means that the let-binding for `source` is not actually
                // reusable after this point, so even though we need to emit
                // its let-binding here, we pretend we haven't.
                let base_state = self.scope.ready[source.index()];
                debug_assert_eq!(base_state, BindingState::Available);
                self.use_expr(source);
                self.scope.ready[source.index()] = base_state;
                self.add_bindings();

                let mut child = self.new_block();
                child.set_ready(source, BindingState::Matched);
                child.set_ready(result, BindingState::Emitted);
                child.add_bindings();
                let body = child.sort(order);
                ControlFlow::Loop { result, body }
            }
        }
    }

    /// Advance the given binding to a new state. The new state usually should
    /// be greater than the existing state; but at the least it must never
    /// go backward.
    fn set_ready(&mut self, source: BindingId, state: BindingState) {
        let old = &mut self.scope.ready[source.index()];
        debug_assert!(*old <= state);

        // Add candidates for this binding, but only when it first becomes
        // available.
        if let BindingState::Unavailable = old {
            // A binding site can't have all of these kinds of constraint,
            // and many have none. But `best_control_flow` has to check all
            // candidates anyway, so let it figure out which (if any) of these
            // are applicable. It will only check false candidates once on any
            // partition, removing them from this list immediately.
            self.scope.candidates.extend([
                Candidate::new(HasControlFlow::Match(source)),
                Candidate::new(HasControlFlow::Loop(source)),
            ]);
            self.scope
                .equal_candidates
                .push(EqualCandidate::new(source));
        }

        *old = state;
    }

    /// For the specified set of rules, heuristically choose which control-flow
    /// will minimize redundant work when the generated code is running.
    fn best_control_flow(&mut self, order: &mut [usize]) -> Option<HasControlFlow> {
        // If there are no rules left, none of the candidates will match
        // anything in the `retain_mut` call below, so short-circuit it.
        if order.is_empty() {
            // This is only read in a debug-assert but it's fast so just do it
            self.scope.candidates.clear();
            return None;
        }

        // Remove false candidates, and recompute the candidate score for the
        // current set of rules in `order`.
        self.scope.candidates.retain_mut(|candidate| {
            let kind = candidate.kind.0;
            let source = match kind {
                HasControlFlow::Match(source) => source,
                HasControlFlow::Loop(source) => source,
                HasControlFlow::Equal(..) => unreachable!(),
            };
            let state = self.scope.ready[source.index()];
            candidate
                .score
                .update(state, || kind.partition(self.rules, order))
        });

        // Find the best normal candidate.
        let mut best = self.scope.candidates.iter().max().cloned();

        // Equality constraints are more complicated. We need to identify
        // some pair of binding sites which are constrained to be equal in at
        // least one rule in the current partition. We do this in two steps.
        // First, find each single binding site which participates in any
        // equality constraint in some rule. We compute the best-case `Score`
        // we could get, if there were another binding site where all the rules
        // constraining this binding site require it to be equal to that one.
        self.scope.equal_candidates.retain_mut(|candidate| {
            let source = candidate.source.0;
            let state = self.scope.ready[source.index()];
            candidate.score.update(state, || {
                let matching = partition_in_place(order, |&idx| {
                    self.rules.rules[idx].equals.find(source).is_some()
                });
                PartitionResults {
                    any_matched: matching > 0,
                    valid: respect_priority(self.rules, order, matching),
                }
            })
        });

        // Now that we know which single binding sites participate in any
        // equality constraints, we need to find the best pair of binding
        // sites. Rules that require binding sites `x` and `y` to be equal are
        // a subset of the intersection of rules constraining `x` and those
        // constraining `y`. So the upper bound on the number of matching rules
        // is whichever candidate is smaller.
        //
        // Do an O(n log n) sort to put the best single binding sites first.
        // Then the O(n^2) all-pairs loop can do branch-and-bound style
        // pruning, breaking out of a loop as soon as the remaining candidates
        // must all produce worse results than our current best candidate.
        //
        // Note that `x` and `y` are reversed, to sort in descending order.
        self.scope
            .equal_candidates
            .sort_unstable_by(|x, y| y.cmp(x));

        let mut equals = self.scope.equal_candidates.iter();
        while let Some(x) = equals.next() {
            if Some(&x.score) < best.as_ref().map(|best| &best.score) {
                break;
            }
            let x_id = x.source.0;
            for y in equals.as_slice().iter() {
                if Some(&y.score) < best.as_ref().map(|best| &best.score) {
                    break;
                }
                let y_id = y.source.0;
                // If x and y are already in the same path-scoped equivalence
                // class, then skip this pair because we already emitted this
                // check or a combination of equivalent checks on this path.
                if !self.scope.equal.in_same_set(x_id, y_id) {
                    // Sort arguments for consistency.
                    let kind = if x_id < y_id {
                        HasControlFlow::Equal(x_id, y_id)
                    } else {
                        HasControlFlow::Equal(y_id, x_id)
                    };
                    let pair = Candidate {
                        kind: Reverse(kind),
                        score: Score {
                            count: kind.partition(self.rules, order).valid,
                            // Only treat this as already-emitted if
                            // both bindings are.
                            state: x.score.state.min(y.score.state),
                        },
                    };
                    if best.as_ref() < Some(&pair) {
                        best = Some(pair);
                    }
                }
            }
        }

        best.filter(|candidate| candidate.score.count > 0)
            .map(|candidate| candidate.kind.0)
    }
}

/// Places all elements which satisfy the predicate at the beginning of the
/// slice, and all elements which don't at the end. Returns the number of
/// elements in the first partition.
///
/// This function runs in time linear in the number of elements, and calls
/// the predicate exactly once per element. If either partition is empty, no
/// writes will occur in the slice, so it's okay to call this frequently with
/// predicates that we expect won't match anything.
fn partition_in_place<T>(xs: &mut [T], mut pred: impl FnMut(&T) -> bool) -> usize {
    let mut iter = xs.iter_mut();
    let mut partition_point = 0;
    while let Some(a) = iter.next() {
        if pred(a) {
            partition_point += 1;
        } else {
            // `a` belongs in the partition at the end. If there's some later
            // element `b` that belongs in the partition at the beginning,
            // swap them. Working backwards from the end establishes the loop
            // invariant that both ends of the array are partitioned correctly,
            // and only the middle needs to be checked.
            while let Some(b) = iter.next_back() {
                if pred(b) {
                    std::mem::swap(a, b);
                    partition_point += 1;
                    break;
                }
            }
        }
    }
    partition_point
}

fn group_by_mut<T: Eq>(
    mut xs: &mut [T],
    mut pred: impl FnMut(&T, &T) -> bool,
) -> impl Iterator<Item = &mut [T]> {
    std::iter::from_fn(move || {
        if xs.is_empty() {
            None
        } else {
            let mid = xs
                .windows(2)
                .position(|w| !pred(&w[0], &w[1]))
                .map_or(xs.len(), |x| x + 1);
            let slice = std::mem::take(&mut xs);
            let (group, rest) = slice.split_at_mut(mid);
            xs = rest;
            Some(group)
        }
    })
}

#[test]
fn test_group_mut() {
    let slice = &mut [1, 1, 1, 3, 3, 2, 2, 2];
    let mut iter = group_by_mut(slice, |a, b| a == b);
    assert_eq!(iter.next(), Some(&mut [1, 1, 1][..]));
    assert_eq!(iter.next(), Some(&mut [3, 3][..]));
    assert_eq!(iter.next(), Some(&mut [2, 2, 2][..]));
    assert_eq!(iter.next(), None);
}
cranelift_isle/serialize.rs

cranelift_isle/
serialize.rs