1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
//! Overlap detection for rules in ISLE.

use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};

use crate::error::{Error, Result, Source, Span};
use crate::lexer::Pos;
use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId};

/// Check for overlap.
pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
    let mut errors = check_overlaps(termenv).report(tyenv, termenv);
    errors.sort_by_key(|err| match err {
        Error::OverlapError { rules, .. } => rules.first().unwrap().1.from,
        _ => Pos::default(),
    });
    match errors.len() {
        0 => Ok(()),
        1 => Err(errors.pop().unwrap()),
        _ => Err(Error::Errors(errors)),
    }
}

/// A graph of rules that overlap in the ISLE source. The edges are undirected.
#[derive(Default)]
struct Errors {
    /// Edges between rules indicating overlap.
    nodes: HashMap<RuleId, HashSet<RuleId>>,
}

impl Errors {
    /// Condense the overlap information down into individual errors. We iteratively remove the
    /// nodes from the graph with the highest degree, reporting errors for them and their direct
    /// connections. The goal with reporting errors this way is to prefer reporting rules that
    /// overlap with many others first, and then report other more targeted overlaps later.
    fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec<Error> {
        let mut errors = Vec::new();

        let get_info = |id: RuleId| {
            let rule = &termenv.rules[id.0];
            let file = rule.pos.file;
            let src = Source::new(
                tyenv.filenames[file].clone(),
                tyenv.file_texts[file].clone(),
            );
            let span = Span::new_single(rule.pos);
            (src, span)
        };

        while let Some((&id, _)) = self
            .nodes
            .iter()
            .max_by_key(|(id, edges)| (edges.len(), *id))
        {
            let node = self.nodes.remove(&id).unwrap();
            for other in node.iter() {
                if let Entry::Occupied(mut entry) = self.nodes.entry(*other) {
                    let back_edges = entry.get_mut();
                    back_edges.remove(&id);
                    if back_edges.is_empty() {
                        entry.remove();
                    }
                }
            }

            // build the real error
            let mut rules = vec![get_info(id)];

            rules.extend(node.into_iter().map(get_info));

            errors.push(Error::OverlapError {
                msg: String::from("rules are overlapping"),
                rules,
            });
        }

        errors
    }

    /// Add a bidirectional edge between two rules in the graph.
    fn add_edge(&mut self, a: RuleId, b: RuleId) {
        // edges are undirected
        self.nodes.entry(a).or_default().insert(b);
        self.nodes.entry(b).or_default().insert(a);
    }
}

/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of
/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the
/// presence of wildcard patterns.
fn check_overlaps(env: &TermEnv) -> Errors {
    struct RulePatterns<'a> {
        rule: &'a Rule,
        pats: Box<[Pattern]>,
    }
    let mut by_term = HashMap::new();
    for rule in env.rules.iter() {
        if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
            let is_multi_ctor = match &env.terms[tid.index()].kind {
                &TermKind::Decl { multi, .. } => multi,
                _ => false,
            };
            if is_multi_ctor {
                // Rules for multi-constructors are not checked for
                // overlap: the ctor returns *every* match, not just
                // the first or highest-priority one, so overlap does
                // not actually affect the results.
                continue;
            }

            // Group rules by term and priority. Only rules within the same group are checked to
            // see if they overlap each other. If you want to change the scope of overlap checking,
            // change this key.
            let key = (tid, rule.prio);

            let mut binds = Vec::new();
            let rule = RulePatterns {
                rule,
                pats: vars
                    .iter()
                    .map(|pat| Pattern::from_sema(env, &mut binds, pat))
                    .collect(),
            };
            by_term.entry(key).or_insert_with(Vec::new).push(rule);
        }
    }

    let mut errs = Errors::default();
    for (_, rows) in by_term {
        let mut cursor = rows.into_iter();
        while let Some(left) = cursor.next() {
            for right in cursor.as_slice() {
                if check_overlap_pair(&left.pats, &right.pats) {
                    errs.add_edge(left.rule.id, right.rule.id);
                }
            }
        }
    }
    errs
}

/// Check if two rules overlap in the inputs they accept.
fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool {
    debug_assert_eq!(a.len(), b.len());
    let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect();

    while let Some((a, b)) = worklist.pop() {
        // Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or
        // hash-maps might be faster in practice, but:
        // - The alternatives are not asymptotically faster, because in theory all the subpatterns
        //   might have the same extractor or enum variant, and in that case any approach has to
        //   check all of the cross-product combinations anyway.
        // - It's easier to reason about this doubly-nested loop than about merging sorted lists or
        //   picking the right hash keys.
        // - These lists are always so small that performance doesn't matter.
        for a in a.as_and_subpatterns() {
            for b in b.as_and_subpatterns() {
                let overlap = match (a, b) {
                    (Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b,
                    (Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b,

                    // if it's the same variant or same extractor, check all pairs of subterms
                    (
                        Pattern::Variant {
                            id: a,
                            pats: a_pats,
                        },
                        Pattern::Variant {
                            id: b,
                            pats: b_pats,
                        },
                    )
                    | (
                        Pattern::Extractor {
                            id: a,
                            pats: a_pats,
                        },
                        Pattern::Extractor {
                            id: b,
                            pats: b_pats,
                        },
                    ) if a == b => {
                        debug_assert_eq!(a_pats.len(), b_pats.len());
                        worklist.extend(a_pats.iter().zip(b_pats.iter()));
                        true
                    }

                    // different variants of the same enum definitely do not overlap
                    (Pattern::Variant { .. }, Pattern::Variant { .. }) => false,

                    // an extractor which does not exactly match the other pattern might overlap
                    (Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true,

                    // a wildcard definitely overlaps
                    (Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true,

                    // these patterns can only be paired with patterns of the same type, or
                    // wildcards or extractors, and all those cases are covered above
                    (Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => {
                        unreachable!()
                    }

                    // and-patterns don't reach here due to as_and_subpatterns
                    (Pattern::And { .. }, _) => unreachable!(),
                };

                if !overlap {
                    return false;
                }
            }
        }
    }
    true
}

/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier.
#[derive(Debug, Clone)]
enum Pattern {
    /// Integer literal patterns.
    Int {
        value: i128,
    },

    /// Constant literal patterns, such as `$F32`.
    Const {
        name: Sym,
    },

    /// Enum variant constructors.
    Variant {
        id: TermId,
        pats: Box<[Pattern]>,
    },

    /// Conjunctions of patterns.
    And {
        pats: Box<[Pattern]>,
    },

    /// Extractor uses (both fallible and infallible).
    Extractor {
        id: TermId,
        pats: Box<[Pattern]>,
    },

    Wildcard,
}

impl Pattern {
    /// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two
    /// representations are as follows:
    /// 1. Variable bindings are removed and turned into wildcards
    /// 2. Equality constraints are removed and turned into inlined versions of the patterns they
    ///    would have introduced equalities with
    /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or
    ///    [`Pattern::Extractor`] cases depending on their term kind.
    fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self {
        match pat {
            sema::Pattern::BindPattern(_, id, pat) => {
                let pat = Self::from_sema(env, binds, pat);
                binds.push((*id, pat.clone()));
                pat
            }

            sema::Pattern::Var(_, id) => {
                for (vid, pat) in binds.iter().rev() {
                    if vid == id {
                        // We inline equality constraints for two reasons: we specialize on the
                        // spine of related patterns only, so more specific information about
                        // individual values isn't necessarily helpful; we consider overlap
                        // checking to be an over-approximation of overlapping rules, so handling
                        // equalities ends up being best-effort. As an approximation, we use
                        // whatever pattern happened to be at the binding of the variable for all
                        // of the cases where it's used for equality. For example, in the following
                        // rule:
                        //
                        // > (rule (example x @ (Enum.Variant y) x) ...)
                        //
                        // we will only specialize up to `(Enum.Variant _)`, so any more specific
                        // runtime values of `y` won't end up helping to identify overlap. As a
                        // result, we rewrite the patterns in the rule to look more like the
                        // following, as it greatly simplifies overlap checking.
                        //
                        // > (rule (example (Enum.Variant _) (Enum.Variant _)) ...)
                        //
                        // Cases that this scheme won't handle look like the following:
                        //
                        // > (rule (example2 2 3) ...)
                        // > (rule (example2 x x) ...)
                        //
                        // As in this case we'll not make use of the information that `2` and `3`
                        // aren't equal to know that the rules don't overlap. One approach that we
                        // could take here is delaying substitution to the point where a variable
                        // binding has been specialized, turning the rules into the following once
                        // specialization had occurred for `2`:
                        //
                        // > (rule (example2 2 3) ...)
                        // > (rule (example2 2 2) ...)
                        return pat.clone();
                    }
                }

                binds.push((*id, Pattern::Wildcard));
                Pattern::Wildcard
            }

            sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value },
            sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name },

            &sema::Pattern::Term(_, id, ref pats) => {
                let pats = pats
                    .iter()
                    .map(|pat| Pattern::from_sema(env, binds, pat))
                    .collect();

                match &env.terms[id.0].kind {
                    TermKind::EnumVariant { .. } => Pattern::Variant { id, pats },
                    TermKind::Decl { .. } => Pattern::Extractor { id, pats },
                }
            }

            sema::Pattern::Wildcard(_) => Pattern::Wildcard,

            sema::Pattern::And(_, pats) => {
                let pats = pats
                    .iter()
                    .map(|pat| Pattern::from_sema(env, binds, pat))
                    .collect();
                Pattern::And { pats }
            }
        }
    }

    /// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an
    /// and-pattern which has this as its only subpattern, and return self as a single-element
    /// slice.
    fn as_and_subpatterns(&self) -> &[Pattern] {
        if let Pattern::And { pats } = self {
            pats
        } else {
            std::slice::from_ref(self)
        }
    }
}