sway_ir/optimize/
mem2reg.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
use indexmap::IndexMap;
/// Promote local memory to SSA registers.
/// This pass is essentially SSA construction. A good readable reference is:
/// https://www.cs.princeton.edu/~appel/modern/c/
/// We use block arguments instead of explicit PHI nodes. Conceptually,
/// they are both the same.
use rustc_hash::FxHashMap;
use std::collections::HashSet;
use sway_utils::mapped_stack::MappedStack;

use crate::{
    AnalysisResults, Block, BranchToWithArgs, Context, DomFronts, DomTree, Function, InstOp,
    Instruction, IrError, LocalVar, Pass, PassMutability, PostOrder, ScopedPass, Type, Value,
    ValueDatum, DOMINATORS_NAME, DOM_FRONTS_NAME, POSTORDER_NAME,
};

pub const MEM2REG_NAME: &str = "mem2reg";

pub fn create_mem2reg_pass() -> Pass {
    Pass {
        name: MEM2REG_NAME,
        descr: "Promotion of local memory to SSA registers",
        deps: vec![POSTORDER_NAME, DOMINATORS_NAME, DOM_FRONTS_NAME],
        runner: ScopedPass::FunctionPass(PassMutability::Transform(promote_to_registers)),
    }
}

// Check if a value is a valid (for our optimization) local pointer
fn get_validate_local_var(
    context: &Context,
    function: &Function,
    val: &Value,
) -> Option<(String, LocalVar)> {
    match context.values[val.0].value {
        ValueDatum::Instruction(Instruction {
            op: InstOp::GetLocal(local_var),
            ..
        }) => {
            let name = function.lookup_local_name(context, &local_var);
            name.map(|name| (name.clone(), local_var))
        }
        _ => None,
    }
}

// Returns those locals that can be promoted to SSA registers.
fn filter_usable_locals(context: &mut Context, function: &Function) -> HashSet<String> {
    // The size of an SSA register is target specific.  Here we're going to just stick with atomic
    // types which can fit in 64-bits.
    let mut locals: HashSet<String> = function
        .locals_iter(context)
        .filter_map(|(name, var)| {
            let ty = var.get_inner_type(context);
            (ty.is_unit(context)
                || ty.is_bool(context)
                || (ty.is_uint(context) && ty.get_uint_width(context).unwrap() <= 64))
                .then_some(name.clone())
        })
        .collect();

    for (_, inst) in function.instruction_iter(context) {
        match context.values[inst.0].value {
            ValueDatum::Instruction(Instruction {
                op: InstOp::Load(_),
                ..
            })
            | ValueDatum::Instruction(Instruction {
                op: InstOp::Store { .. },
                ..
            }) => {
                // We understand load and store, so no problem.
            }
            _ => {
                // Make sure that no local escapes into instructions we don't understand.
                let operands = inst.get_instruction(context).unwrap().op.get_operands();
                for opd in operands {
                    if let Some((local, ..)) = get_validate_local_var(context, function, &opd) {
                        locals.remove(&local);
                    }
                }
            }
        }
    }
    locals
}

// For each block, compute the set of locals that are live-in.
// TODO: Use rustc_index::bit_set::ChunkedBitSet by mapping local names to indices.
//       This will allow more efficient set operations.
pub fn compute_livein(
    context: &mut Context,
    function: &Function,
    po: &PostOrder,
    locals: &HashSet<String>,
) -> FxHashMap<Block, HashSet<String>> {
    let mut result = FxHashMap::<Block, HashSet<String>>::default();
    for block in &po.po_to_block {
        result.insert(*block, HashSet::<String>::default());
    }

    let mut changed = true;
    while changed {
        changed = false;
        for block in &po.po_to_block {
            // we begin by unioning the liveins at successor blocks.
            let mut cur_live = HashSet::<String>::default();
            for BranchToWithArgs { block: succ, .. } in block.successors(context) {
                let succ_livein = &result[&succ];
                cur_live.extend(succ_livein.iter().cloned());
            }
            // Scan the instructions, in reverse.
            for inst in block.instruction_iter(context).rev() {
                match context.values[inst.0].value {
                    ValueDatum::Instruction(Instruction {
                        op: InstOp::Load(ptr),
                        ..
                    }) => {
                        let local_var = get_validate_local_var(context, function, &ptr);
                        match local_var {
                            Some((local, ..)) if locals.contains(&local) => {
                                cur_live.insert(local);
                            }
                            _ => {}
                        }
                    }
                    ValueDatum::Instruction(Instruction {
                        op: InstOp::Store { dst_val_ptr, .. },
                        ..
                    }) => {
                        let local_var = get_validate_local_var(context, function, &dst_val_ptr);
                        match local_var {
                            Some((local, _)) if locals.contains(&local) => {
                                cur_live.remove(&local);
                            }
                            _ => (),
                        }
                    }
                    _ => (),
                }
            }
            if result[block] != cur_live {
                // Whatever's live now, is the live-in for the block.
                result.get_mut(block).unwrap().extend(cur_live);
                changed = true;
            }
        }
    }
    result
}

/// Promote local values that are accessed via load/store to SSA registers.
/// We promote only locals of non-copy type, whose every use is in a `get_local`
/// without offsets, and the result of such a `get_local` is used only in a load
/// or a store.
pub fn promote_to_registers(
    context: &mut Context,
    analyses: &AnalysisResults,
    function: Function,
) -> Result<bool, IrError> {
    let safe_locals = filter_usable_locals(context, &function);
    if safe_locals.is_empty() {
        return Ok(false);
    }
    let po: &PostOrder = analyses.get_analysis_result(function);
    let dom_tree: &DomTree = analyses.get_analysis_result(function);
    let dom_fronts: &DomFronts = analyses.get_analysis_result(function);
    let liveins = compute_livein(context, &function, po, &safe_locals);

    // A list of the PHIs we insert in this transform.
    let mut new_phi_tracker = HashSet::<(String, Block)>::new();
    // A map from newly inserted block args to the Local that it's a PHI for.
    let mut worklist = Vec::<(String, Type, Block)>::new();
    let mut phi_to_local = FxHashMap::<Value, String>::default();
    // Insert PHIs for each definition (store) at its dominance frontiers.
    // Start by adding the existing definitions (stores) to a worklist,
    // in program order (reverse post order). This is for faster convergence (or maybe not).
    for (block, inst) in po
        .po_to_block
        .iter()
        .rev()
        .flat_map(|b| b.instruction_iter(context).map(|i| (*b, i)))
    {
        if let ValueDatum::Instruction(Instruction {
            op: InstOp::Store { dst_val_ptr, .. },
            ..
        }) = context.values[inst.0].value
        {
            match get_validate_local_var(context, &function, &dst_val_ptr) {
                Some((local, var)) if safe_locals.contains(&local) => {
                    worklist.push((local, var.get_inner_type(context), block));
                }
                _ => (),
            }
        }
    }
    // Transitively add PHIs, till nothing more to do.
    while let Some((local, ty, known_def)) = worklist.pop() {
        for df in dom_fronts[&known_def].iter() {
            if !new_phi_tracker.contains(&(local.clone(), *df)) && liveins[df].contains(&local) {
                // Insert PHI for this local at block df.
                let index = df.new_arg(context, ty);
                phi_to_local.insert(df.get_arg(context, index).unwrap(), local.clone());
                new_phi_tracker.insert((local.clone(), *df));
                // Add df to the worklist.
                worklist.push((local.clone(), ty, *df));
            }
        }
    }

    // We're just left with rewriting the loads and stores into SSA.
    // For efficiency, we first collect the rewrites
    // and then apply them all together in the next step.
    #[allow(clippy::too_many_arguments)]
    fn record_rewrites(
        context: &mut Context,
        function: &Function,
        dom_tree: &DomTree,
        node: Block,
        safe_locals: &HashSet<String>,
        phi_to_local: &FxHashMap<Value, String>,
        name_stack: &mut MappedStack<String, Value>,
        rewrites: &mut FxHashMap<Value, Value>,
        deletes: &mut Vec<(Block, Value)>,
    ) {
        // Whatever new definitions we find in this block, they must be popped
        // when we're done. So let's keep track of that locally as a count.
        let mut num_local_pushes = IndexMap::<String, u32>::new();

        // Start with relevant block args, they are new definitions.
        for arg in node.arg_iter(context) {
            if let Some(local) = phi_to_local.get(arg) {
                name_stack.push(local.clone(), *arg);
                num_local_pushes
                    .entry(local.clone())
                    .and_modify(|count| *count += 1)
                    .or_insert(1);
            }
        }

        for inst in node.instruction_iter(context) {
            match context.values[inst.0].value {
                ValueDatum::Instruction(Instruction {
                    op: InstOp::Load(ptr),
                    ..
                }) => {
                    let local_var = get_validate_local_var(context, function, &ptr);
                    match local_var {
                        Some((local, var)) if safe_locals.contains(&local) => {
                            // We should replace all uses of inst with new_stack[local].
                            let new_val = match name_stack.get(&local) {
                                Some(val) => *val,
                                None => {
                                    // Nothing on the stack, let's attempt to get the initializer
                                    Value::new_constant(
                                        context,
                                        var.get_initializer(context)
                                            .expect("We're dealing with an uninitialized value")
                                            .clone(),
                                    )
                                }
                            };
                            rewrites.insert(inst, new_val);
                            deletes.push((node, inst));
                        }
                        _ => (),
                    }
                }
                ValueDatum::Instruction(Instruction {
                    op:
                        InstOp::Store {
                            dst_val_ptr,
                            stored_val,
                        },
                    ..
                }) => {
                    let local_var = get_validate_local_var(context, function, &dst_val_ptr);
                    match local_var {
                        Some((local, _)) if safe_locals.contains(&local) => {
                            // Henceforth, everything that's dominated by this inst must use stored_val
                            // instead of loading from dst_val.
                            name_stack.push(local.clone(), stored_val);
                            num_local_pushes
                                .entry(local)
                                .and_modify(|count| *count += 1)
                                .or_insert(1);
                            deletes.push((node, inst));
                        }
                        _ => (),
                    }
                }
                _ => (),
            }
        }

        // Update arguments to successor blocks (i.e., PHI args).
        for BranchToWithArgs { block: succ, .. } in node.successors(context) {
            let args: Vec<_> = succ.arg_iter(context).copied().collect();
            // For every arg of succ, if it's in phi_to_local,
            // we pass, as arg, the top value of local
            for arg in args {
                if let Some(local) = phi_to_local.get(&arg) {
                    let ptr = function.get_local_var(context, local).unwrap();
                    let new_val = match name_stack.get(local) {
                        Some(val) => *val,
                        None => {
                            // Nothing on the stack, let's attempt to get the initializer
                            Value::new_constant(
                                context,
                                ptr.get_initializer(context)
                                    .expect("We're dealing with an uninitialized value")
                                    .clone(),
                            )
                        }
                    };
                    let params = node.get_succ_params_mut(context, &succ).unwrap();
                    params.push(new_val);
                }
            }
        }

        // Process dominator children.
        for child in dom_tree.children(node) {
            record_rewrites(
                context,
                function,
                dom_tree,
                child,
                safe_locals,
                phi_to_local,
                name_stack,
                rewrites,
                deletes,
            );
        }

        // Pop from the names stack.
        for (local, pushes) in num_local_pushes.iter() {
            for _ in 0..*pushes {
                name_stack.pop(local);
            }
        }
    }

    let mut name_stack = MappedStack::<String, Value>::default();
    let mut value_replacement = FxHashMap::<Value, Value>::default();
    let mut delete_insts = Vec::<(Block, Value)>::new();
    record_rewrites(
        context,
        &function,
        dom_tree,
        function.get_entry_block(context),
        &safe_locals,
        &phi_to_local,
        &mut name_stack,
        &mut value_replacement,
        &mut delete_insts,
    );

    // Apply the rewrites.
    function.replace_values(context, &value_replacement, None);
    // Delete the loads and stores.
    for (block, inst) in delete_insts {
        block.remove_instruction(context, inst);
    }

    Ok(true)
}