cranelift_codegen/machinst/
vcode.rs

1//! This implements the VCode container: a CFG of Insts that have been lowered.
2//!
3//! VCode is virtual-register code. An instruction in VCode is almost a machine
4//! instruction; however, its register slots can refer to virtual registers in
5//! addition to real machine registers.
6//!
7//! VCode is structured with traditional basic blocks, and
8//! each block must be terminated by an unconditional branch (one target), a
9//! conditional branch (two targets), or a return (no targets). Note that this
10//! slightly differs from the machine code of most ISAs: in most ISAs, a
11//! conditional branch has one target (and the not-taken case falls through).
12//! However, we expect that machine backends will elide branches to the following
13//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15//! play with layout prior to final binary emission, as well, if we want.
16//!
17//! See the main module comment in `mod.rs` for more details on the VCode-based
18//! backend pipeline.
19
20use crate::ir::pcc::*;
21use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22use crate::machinst::*;
23use crate::ranges::Ranges;
24use crate::timing;
25use crate::trace;
26use crate::CodegenError;
27use crate::{LabelValueLoc, ValueLocRange};
28use regalloc2::{
29    Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30    OperandConstraint, OperandKind, PRegSet, RegClass,
31};
32use rustc_hash::FxHashMap;
33
34use core::mem::take;
35use cranelift_entity::{entity_impl, Keys};
36use std::collections::hash_map::Entry;
37use std::collections::HashMap;
38use std::fmt;
39
40/// Index referring to an instruction in VCode.
41pub type InsnIndex = regalloc2::Inst;
42
43/// Extension trait for `InsnIndex` to allow conversion to a
44/// `BackwardsInsnIndex`.
45trait ToBackwardsInsnIndex {
46    fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47}
48
49impl ToBackwardsInsnIndex for InsnIndex {
50    fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51        BackwardsInsnIndex::new(num_insts - self.index() - 1)
52    }
53}
54
55/// An index referring to an instruction in the VCode when it is backwards,
56/// during VCode construction.
57#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58#[cfg_attr(
59    feature = "enable-serde",
60    derive(::serde::Serialize, ::serde::Deserialize)
61)]
62pub struct BackwardsInsnIndex(InsnIndex);
63
64impl BackwardsInsnIndex {
65    pub fn new(i: usize) -> Self {
66        BackwardsInsnIndex(InsnIndex::new(i))
67    }
68}
69
70/// Index referring to a basic block in VCode.
71pub type BlockIndex = regalloc2::Block;
72
73/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75pub trait VCodeInst: MachInst + MachInstEmit {}
76impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77
78/// A function in "VCode" (virtualized-register code) form, after
79/// lowering.  This is essentially a standard CFG of basic blocks,
80/// where each basic block consists of lowered instructions produced
81/// by the machine-specific backend.
82///
83/// Note that the VCode is immutable once produced, and is not
84/// modified by register allocation in particular. Rather, register
85/// allocation on the `VCode` produces a separate `regalloc2::Output`
86/// struct, and this can be passed to `emit`. `emit` in turn does not
87/// modify the vcode, but produces an `EmitResult`, which contains the
88/// machine code itself, and the associated disassembly and/or
89/// metadata as requested.
90pub struct VCode<I: VCodeInst> {
91    /// VReg IR-level types.
92    vreg_types: Vec<Type>,
93
94    /// Lowered machine instructions in order corresponding to the original IR.
95    insts: Vec<I>,
96
97    /// A map from backwards instruction index to the user stack map for that
98    /// instruction.
99    ///
100    /// This is a sparse side table that only has entries for instructions that
101    /// are safepoints, and only for a subset of those that have an associated
102    /// user stack map.
103    user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104
105    /// Operands: pre-regalloc references to virtual registers with
106    /// constraints, in one flattened array. This allows the regalloc
107    /// to efficiently access all operands without requiring expensive
108    /// matches or method invocations on insts.
109    operands: Vec<Operand>,
110
111    /// Operand index ranges: for each instruction in `insts`, there
112    /// is a tuple here providing the range in `operands` for that
113    /// instruction's operands.
114    operand_ranges: Ranges,
115
116    /// Clobbers: a sparse map from instruction indices to clobber masks.
117    clobbers: FxHashMap<InsnIndex, PRegSet>,
118
119    /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120    /// reasonable to keep one of these per instruction.)
121    srclocs: Vec<RelSourceLoc>,
122
123    /// Entry block.
124    entry: BlockIndex,
125
126    /// Block instruction indices.
127    block_ranges: Ranges,
128
129    /// Block successors: index range in the `block_succs` list.
130    block_succ_range: Ranges,
131
132    /// Block successor lists, concatenated into one vec. The
133    /// `block_succ_range` list of tuples above gives (start, end)
134    /// ranges within this list that correspond to each basic block's
135    /// successors.
136    block_succs: Vec<regalloc2::Block>,
137
138    /// Block predecessors: index range in the `block_preds` list.
139    block_pred_range: Ranges,
140
141    /// Block predecessor lists, concatenated into one vec. The
142    /// `block_pred_range` list of tuples above gives (start, end)
143    /// ranges within this list that correspond to each basic block's
144    /// predecessors.
145    block_preds: Vec<regalloc2::Block>,
146
147    /// Block parameters: index range in `block_params` below.
148    block_params_range: Ranges,
149
150    /// Block parameter lists, concatenated into one vec. The
151    /// `block_params_range` list of tuples above gives (start, end)
152    /// ranges within this list that correspond to each basic block's
153    /// blockparam vregs.
154    block_params: Vec<regalloc2::VReg>,
155
156    /// Outgoing block arguments on branch instructions, concatenated
157    /// into one list.
158    ///
159    /// Note that this is conceptually a 3D array: we have a VReg list
160    /// per block, per successor. We flatten those three dimensions
161    /// into this 1D vec, then store index ranges in two levels of
162    /// indirection.
163    ///
164    /// Indexed by the indices in `branch_block_arg_succ_range`.
165    branch_block_args: Vec<regalloc2::VReg>,
166
167    /// Array of sequences of (start, end) tuples in
168    /// `branch_block_args`, one for each successor; these sequences
169    /// for each block are concatenated.
170    ///
171    /// Indexed by the indices in `branch_block_arg_succ_range`.
172    branch_block_arg_range: Ranges,
173
174    /// For a given block, indices in `branch_block_arg_range`
175    /// corresponding to all of its successors.
176    branch_block_arg_succ_range: Ranges,
177
178    /// Block-order information.
179    block_order: BlockLoweringOrder,
180
181    /// ABI object.
182    pub(crate) abi: Callee<I::ABIMachineSpec>,
183
184    /// Constant information used during code emission. This should be
185    /// immutable across function compilations within the same module.
186    emit_info: I::Info,
187
188    /// Constants.
189    pub(crate) constants: VCodeConstants,
190
191    /// Value labels for debuginfo attached to vregs.
192    debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
193
194    pub(crate) sigs: SigSet,
195
196    /// Facts on VRegs, for proof-carrying code verification.
197    facts: Vec<Option<Fact>>,
198}
199
200/// The result of `VCode::emit`. Contains all information computed
201/// during emission: actual machine code, optionally a disassembly,
202/// and optionally metadata about the code layout.
203pub struct EmitResult {
204    /// The MachBuffer containing the machine code.
205    pub buffer: MachBufferFinalized<Stencil>,
206
207    /// Offset of each basic block, recorded during emission. Computed
208    /// only if `debug_value_labels` is non-empty.
209    pub bb_offsets: Vec<CodeOffset>,
210
211    /// Final basic-block edges, in terms of code offsets of
212    /// bb-starts. Computed only if `debug_value_labels` is non-empty.
213    pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
214
215    /// Final length of function body.
216    pub func_body_len: CodeOffset,
217
218    /// The pretty-printed disassembly, if any. This uses the same
219    /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
220    /// implementation, but additionally includes the prologue and
221    /// epilogue(s), and makes use of the regalloc results.
222    pub disasm: Option<String>,
223
224    /// Offsets of sized stackslots.
225    pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
226
227    /// Offsets of dynamic stackslots.
228    pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
229
230    /// Value-labels information (debug metadata).
231    pub value_labels_ranges: ValueLabelsRanges,
232
233    /// Stack frame size.
234    pub frame_size: u32,
235}
236
237/// A builder for a VCode function body.
238///
239/// This builder has the ability to accept instructions in either
240/// forward or reverse order, depending on the pass direction that
241/// produces the VCode. The lowering from CLIF to VCode<MachInst>
242/// ordinarily occurs in reverse order (in order to allow instructions
243/// to be lowered only if used, and not merged) so a reversal will
244/// occur at the end of lowering to ensure the VCode is in machine
245/// order.
246///
247/// If built in reverse, block and instruction indices used once the
248/// VCode is built are relative to the final (reversed) order, not the
249/// order of construction. Note that this means we do not know the
250/// final block or instruction indices when building, so we do not
251/// hand them out. (The user is assumed to know them when appending
252/// terminator instructions with successor blocks.)
253pub struct VCodeBuilder<I: VCodeInst> {
254    /// In-progress VCode.
255    pub(crate) vcode: VCode<I>,
256
257    /// In what direction is the build occurring?
258    direction: VCodeBuildDirection,
259
260    /// Debug-value label in-progress map, keyed by label. For each
261    /// label, we keep disjoint ranges mapping to vregs. We'll flatten
262    /// this into (vreg, range, label) tuples when done.
263    debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
264}
265
266/// Direction in which a VCodeBuilder builds VCode.
267#[derive(Clone, Copy, Debug, PartialEq, Eq)]
268pub enum VCodeBuildDirection {
269    // TODO: add `Forward` once we need it and can test it adequately.
270    /// Backward-build pass: we expect the producer to call `emit()`
271    /// with instructions in reverse program order within each block.
272    Backward,
273}
274
275impl<I: VCodeInst> VCodeBuilder<I> {
276    /// Create a new VCodeBuilder.
277    pub fn new(
278        sigs: SigSet,
279        abi: Callee<I::ABIMachineSpec>,
280        emit_info: I::Info,
281        block_order: BlockLoweringOrder,
282        constants: VCodeConstants,
283        direction: VCodeBuildDirection,
284    ) -> Self {
285        let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
286
287        VCodeBuilder {
288            vcode,
289            direction,
290            debug_info: FxHashMap::default(),
291        }
292    }
293
294    pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
295        self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
296    }
297
298    /// Access the ABI object.
299    pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
300        &self.vcode.abi
301    }
302
303    /// Access the ABI object.
304    pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
305        &mut self.vcode.abi
306    }
307
308    pub fn sigs(&self) -> &SigSet {
309        &self.vcode.sigs
310    }
311
312    pub fn sigs_mut(&mut self) -> &mut SigSet {
313        &mut self.vcode.sigs
314    }
315
316    /// Access to the BlockLoweringOrder object.
317    pub fn block_order(&self) -> &BlockLoweringOrder {
318        &self.vcode.block_order
319    }
320
321    /// Set the current block as the entry block.
322    pub fn set_entry(&mut self, block: BlockIndex) {
323        self.vcode.entry = block;
324    }
325
326    /// End the current basic block. Must be called after emitting vcode insts
327    /// for IR insts and prior to ending the function (building the VCode).
328    pub fn end_bb(&mut self) {
329        let end_idx = self.vcode.insts.len();
330        // Add the instruction index range to the list of blocks.
331        self.vcode.block_ranges.push_end(end_idx);
332        // End the successors list.
333        let succ_end = self.vcode.block_succs.len();
334        self.vcode.block_succ_range.push_end(succ_end);
335        // End the blockparams list.
336        let block_params_end = self.vcode.block_params.len();
337        self.vcode.block_params_range.push_end(block_params_end);
338        // End the branch blockparam args list.
339        let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
340        self.vcode
341            .branch_block_arg_succ_range
342            .push_end(branch_block_arg_succ_end);
343    }
344
345    pub fn add_block_param(&mut self, param: VirtualReg) {
346        self.vcode.block_params.push(param.into());
347    }
348
349    fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
350        self.vcode
351            .branch_block_args
352            .extend(args.iter().map(|&arg| VReg::from(arg)));
353        let end = self.vcode.branch_block_args.len();
354        self.vcode.branch_block_arg_range.push_end(end);
355    }
356
357    /// Push an instruction for the current BB and current IR inst
358    /// within the BB.
359    pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
360        assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
361        self.vcode.insts.push(insn);
362        self.vcode.srclocs.push(loc);
363    }
364
365    /// Add a successor block with branch args.
366    pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
367        self.vcode.block_succs.push(block);
368        self.add_branch_args_for_succ(args);
369    }
370
371    /// Add a debug value label to a register.
372    pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
373        // We'll fix up labels in reverse(). Because we're generating
374        // code bottom-to-top, the liverange of the label goes *from*
375        // the last index at which was defined (or 0, which is the end
376        // of the eventual function) *to* just this instruction, and
377        // no further.
378        let inst = InsnIndex::new(self.vcode.insts.len());
379        let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
380        let last = labels
381            .last()
382            .map(|(_start, end, _vreg)| *end)
383            .unwrap_or(InsnIndex::new(0));
384        labels.push((last, inst, reg.into()));
385    }
386
387    /// Access the constants.
388    pub fn constants(&mut self) -> &mut VCodeConstants {
389        &mut self.vcode.constants
390    }
391
392    fn compute_preds_from_succs(&mut self) {
393        // Do a linear-time counting sort: first determine how many
394        // times each block appears as a successor.
395        let mut starts = vec![0u32; self.vcode.num_blocks()];
396        for succ in &self.vcode.block_succs {
397            starts[succ.index()] += 1;
398        }
399
400        // Determine for each block the starting index where that
401        // block's predecessors should go. This is equivalent to the
402        // ranges we need to store in block_pred_range.
403        self.vcode.block_pred_range.reserve(starts.len());
404        let mut end = 0;
405        for count in starts.iter_mut() {
406            let start = end;
407            end += *count;
408            *count = start;
409            self.vcode.block_pred_range.push_end(end as usize);
410        }
411        let end = end as usize;
412        debug_assert_eq!(end, self.vcode.block_succs.len());
413
414        // Walk over the successors again, this time grouped by
415        // predecessor, and push the predecessor at the current
416        // starting position of each of its successors. We build
417        // each group of predecessors in whatever order Ranges::iter
418        // returns them; regalloc2 doesn't care.
419        self.vcode.block_preds.resize(end, BlockIndex::invalid());
420        for (pred, range) in self.vcode.block_succ_range.iter() {
421            let pred = BlockIndex::new(pred);
422            for succ in &self.vcode.block_succs[range] {
423                let pos = &mut starts[succ.index()];
424                self.vcode.block_preds[*pos as usize] = pred;
425                *pos += 1;
426            }
427        }
428        debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
429    }
430
431    /// Called once, when a build in Backward order is complete, to
432    /// perform the overall reversal (into final forward order) and
433    /// finalize metadata accordingly.
434    fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
435        let n_insts = self.vcode.insts.len();
436        if n_insts == 0 {
437            return;
438        }
439
440        // Reverse the per-block and per-inst sequences.
441        self.vcode.block_ranges.reverse_index();
442        self.vcode.block_ranges.reverse_target(n_insts);
443        // block_params_range is indexed by block (and blocks were
444        // traversed in reverse) so we reverse it; but block-param
445        // sequences in the concatenated vec can remain in reverse
446        // order (it is effectively an arena of arbitrarily-placed
447        // referenced sequences).
448        self.vcode.block_params_range.reverse_index();
449        // Likewise, we reverse block_succ_range, but the block_succ
450        // concatenated array can remain as-is.
451        self.vcode.block_succ_range.reverse_index();
452        self.vcode.insts.reverse();
453        self.vcode.srclocs.reverse();
454        // Likewise, branch_block_arg_succ_range is indexed by block
455        // so must be reversed.
456        self.vcode.branch_block_arg_succ_range.reverse_index();
457
458        // To translate an instruction index *endpoint* in reversed
459        // order to forward order, compute `n_insts - i`.
460        //
461        // Why not `n_insts - 1 - i`? That would be correct to
462        // translate an individual instruction index (for ten insts 0
463        // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
464        // 0). But for the usual inclusive-start, exclusive-end range
465        // idiom, inclusive starts become exclusive ends and
466        // vice-versa, so e.g. an (inclusive) start of 0 becomes an
467        // (exclusive) end of 10.
468        let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
469
470        // Generate debug-value labels based on per-label maps.
471        for (label, tuples) in &self.debug_info {
472            for &(start, end, vreg) in tuples {
473                let vreg = vregs.resolve_vreg_alias(vreg);
474                let fwd_start = translate(end);
475                let fwd_end = translate(start);
476                self.vcode
477                    .debug_value_labels
478                    .push((vreg, fwd_start, fwd_end, label.as_u32()));
479            }
480        }
481
482        // Now sort debug value labels by VReg, as required
483        // by regalloc2.
484        self.vcode
485            .debug_value_labels
486            .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
487    }
488
489    fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
490        let allocatable = PRegSet::from(self.vcode.machine_env());
491        for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
492            // Push operands from the instruction onto the operand list.
493            //
494            // We rename through the vreg alias table as we collect
495            // the operands. This is better than a separate post-pass
496            // over operands, because it has more cache locality:
497            // operands only need to pass through L1 once. This is
498            // also better than renaming instructions'
499            // operands/registers while lowering, because here we only
500            // need to do the `match` over the instruction to visit
501            // its register fields (which is slow, branchy code) once.
502
503            let mut op_collector =
504                OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
505                    vregs.resolve_vreg_alias(vreg)
506                });
507            insn.get_operands(&mut op_collector);
508            let (ops, clobbers) = op_collector.finish();
509            self.vcode.operand_ranges.push_end(ops);
510
511            if clobbers != PRegSet::default() {
512                self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
513            }
514
515            if let Some((dst, src)) = insn.is_move() {
516                // We should never see non-virtual registers present in move
517                // instructions.
518                assert!(
519                    src.is_virtual(),
520                    "the real register {src:?} was used as the source of a move instruction"
521                );
522                assert!(
523                    dst.to_reg().is_virtual(),
524                    "the real register {:?} was used as the destination of a move instruction",
525                    dst.to_reg()
526                );
527            }
528        }
529
530        // Translate blockparam args via the vreg aliases table as well.
531        for arg in &mut self.vcode.branch_block_args {
532            let new_arg = vregs.resolve_vreg_alias(*arg);
533            trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
534            *arg = new_arg;
535        }
536    }
537
538    /// Build the final VCode.
539    pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
540        self.vcode.vreg_types = take(&mut vregs.vreg_types);
541        self.vcode.facts = take(&mut vregs.facts);
542
543        if self.direction == VCodeBuildDirection::Backward {
544            self.reverse_and_finalize(&vregs);
545        }
546        self.collect_operands(&vregs);
547
548        self.compute_preds_from_succs();
549        self.vcode.debug_value_labels.sort_unstable();
550
551        // At this point, nothing in the vcode should mention any
552        // VReg which has been aliased. All the appropriate rewriting
553        // should have happened above. Just to be sure, let's
554        // double-check each field which has vregs.
555        // Note: can't easily check vcode.insts, resolved in collect_operands.
556        // Operands are resolved in collect_operands.
557        vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
558        // Currently block params are never aliased to another vreg.
559        vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
560        // Branch block args are resolved in collect_operands.
561        vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
562        // Debug value labels are resolved in reverse_and_finalize.
563        vregs.debug_assert_no_vreg_aliases(
564            self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
565        );
566        // Facts are resolved eagerly during set_vreg_alias.
567        vregs.debug_assert_no_vreg_aliases(
568            self.vcode
569                .facts
570                .iter()
571                .zip(&vregs.vreg_types)
572                .enumerate()
573                .filter(|(_, (fact, _))| fact.is_some())
574                .map(|(vreg, (_, &ty))| {
575                    let (regclasses, _) = I::rc_for_type(ty).unwrap();
576                    VReg::new(vreg, regclasses[0])
577                }),
578        );
579
580        self.vcode
581    }
582
583    /// Add a user stack map for the associated instruction.
584    pub fn add_user_stack_map(
585        &mut self,
586        inst: BackwardsInsnIndex,
587        entries: &[ir::UserStackMapEntry],
588    ) {
589        let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
590        let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
591        debug_assert!(old_entry.is_none());
592    }
593}
594
595const NO_INST_OFFSET: CodeOffset = u32::MAX;
596
597impl<I: VCodeInst> VCode<I> {
598    /// New empty VCode.
599    fn new(
600        sigs: SigSet,
601        abi: Callee<I::ABIMachineSpec>,
602        emit_info: I::Info,
603        block_order: BlockLoweringOrder,
604        constants: VCodeConstants,
605    ) -> Self {
606        let n_blocks = block_order.lowered_order().len();
607        VCode {
608            sigs,
609            vreg_types: vec![],
610            insts: Vec::with_capacity(10 * n_blocks),
611            user_stack_maps: FxHashMap::default(),
612            operands: Vec::with_capacity(30 * n_blocks),
613            operand_ranges: Ranges::with_capacity(10 * n_blocks),
614            clobbers: FxHashMap::default(),
615            srclocs: Vec::with_capacity(10 * n_blocks),
616            entry: BlockIndex::new(0),
617            block_ranges: Ranges::with_capacity(n_blocks),
618            block_succ_range: Ranges::with_capacity(n_blocks),
619            block_succs: Vec::with_capacity(n_blocks),
620            block_pred_range: Ranges::default(),
621            block_preds: Vec::new(),
622            block_params_range: Ranges::with_capacity(n_blocks),
623            block_params: Vec::with_capacity(5 * n_blocks),
624            branch_block_args: Vec::with_capacity(10 * n_blocks),
625            branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
626            branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
627            block_order,
628            abi,
629            emit_info,
630            constants,
631            debug_value_labels: vec![],
632            facts: vec![],
633        }
634    }
635
636    /// Get the ABI-dependent MachineEnv for managing register allocation.
637    pub fn machine_env(&self) -> &MachineEnv {
638        self.abi.machine_env(&self.sigs)
639    }
640
641    /// Get the number of blocks. Block indices will be in the range `0 ..
642    /// (self.num_blocks() - 1)`.
643    pub fn num_blocks(&self) -> usize {
644        self.block_ranges.len()
645    }
646
647    /// The number of lowered instructions.
648    pub fn num_insts(&self) -> usize {
649        self.insts.len()
650    }
651
652    fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
653        let mut clobbered = PRegSet::default();
654
655        // All moves are included in clobbers.
656        for (_, Edit::Move { to, .. }) in &regalloc.edits {
657            if let Some(preg) = to.as_reg() {
658                clobbered.add(preg);
659            }
660        }
661
662        for (i, range) in self.operand_ranges.iter() {
663            // Skip this instruction if not "included in clobbers" as
664            // per the MachInst. (Some backends use this to implement
665            // ABI specifics; e.g., excluding calls of the same ABI as
666            // the current function from clobbers, because by
667            // definition everything clobbered by the call can be
668            // clobbered by this function without saving as well.)
669            if !self.insts[i].is_included_in_clobbers() {
670                continue;
671            }
672
673            let operands = &self.operands[range.clone()];
674            let allocs = &regalloc.allocs[range];
675            for (operand, alloc) in operands.iter().zip(allocs.iter()) {
676                if operand.kind() == OperandKind::Def {
677                    if let Some(preg) = alloc.as_reg() {
678                        clobbered.add(preg);
679                    }
680                }
681            }
682
683            // Also add explicitly-clobbered registers.
684            if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
685                clobbered.union_from(inst_clobbered);
686            }
687        }
688
689        clobbered
690            .into_iter()
691            .map(|preg| Writable::from_reg(RealReg::from(preg)))
692            .collect()
693    }
694
695    /// Emit the instructions to a `MachBuffer`, containing fixed-up
696    /// code and external reloc/trap/etc. records ready for use. Takes
697    /// the regalloc results as well.
698    ///
699    /// Returns the machine code itself, and optionally metadata
700    /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
701    /// is consumed by the emission process.
702    pub fn emit(
703        mut self,
704        regalloc: &regalloc2::Output,
705        want_disasm: bool,
706        flags: &settings::Flags,
707        ctrl_plane: &mut ControlPlane,
708    ) -> EmitResult
709    where
710        I: VCodeInst,
711    {
712        // To write into disasm string.
713        use core::fmt::Write;
714
715        let _tt = timing::vcode_emit();
716        let mut buffer = MachBuffer::new();
717        let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
718
719        // The first M MachLabels are reserved for block indices.
720        buffer.reserve_labels_for_blocks(self.num_blocks());
721
722        // Register all allocated constants with the `MachBuffer` to ensure that
723        // any references to the constants during instructions can be handled
724        // correctly.
725        buffer.register_constants(&self.constants);
726
727        // Construct the final order we emit code in: cold blocks at the end.
728        let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
729        let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
730        for block in 0..self.num_blocks() {
731            let block = BlockIndex::new(block);
732            if self.block_order.is_cold(block) {
733                cold_blocks.push(block);
734            } else {
735                final_order.push(block);
736            }
737        }
738        final_order.extend(cold_blocks.clone());
739
740        // Compute/save info we need for the prologue: clobbers and
741        // number of spillslots.
742        //
743        // We clone `abi` here because we will mutate it as we
744        // generate the prologue and set other info, but we can't
745        // mutate `VCode`. The info it usually carries prior to
746        // setting clobbers is fairly minimal so this should be
747        // relatively cheap.
748        let clobbers = self.compute_clobbers(regalloc);
749        self.abi
750            .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
751
752        // Emit blocks.
753        let mut cur_srcloc = None;
754        let mut last_offset = None;
755        let mut inst_offsets = vec![];
756        let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
757
758        let mut disasm = String::new();
759
760        if !self.debug_value_labels.is_empty() {
761            inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
762        }
763
764        // Count edits per block ahead of time; this is needed for
765        // lookahead island emission. (We could derive it per-block
766        // with binary search in the edit list, but it's more
767        // efficient to do it in one pass here.)
768        let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
769        let mut edit_idx = 0;
770        for block in 0..self.num_blocks() {
771            let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
772            let start_edit_idx = edit_idx;
773            while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
774                edit_idx += 1;
775            }
776            let end_edit_idx = edit_idx;
777            ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
778        }
779
780        let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
781        let mut bb_padding = match flags.bb_padding_log2_minus_one() {
782            0 => Vec::new(),
783            n => vec![0; 1 << (n - 1)],
784        };
785        let mut total_bb_padding = 0;
786
787        for (block_order_idx, &block) in final_order.iter().enumerate() {
788            trace!("emitting block {:?}", block);
789
790            // Call the new block hook for state
791            state.on_new_block();
792
793            // Emit NOPs to align the block.
794            let new_offset = I::align_basic_block(buffer.cur_offset());
795            while new_offset > buffer.cur_offset() {
796                // Pad with NOPs up to the aligned block offset.
797                let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
798                nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
799            }
800            assert_eq!(buffer.cur_offset(), new_offset);
801
802            let do_emit = |inst: &I,
803                           disasm: &mut String,
804                           buffer: &mut MachBuffer<I>,
805                           state: &mut I::State| {
806                if want_disasm && !inst.is_args() {
807                    let mut s = state.clone();
808                    writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
809                }
810                inst.emit(buffer, &self.emit_info, state);
811            };
812
813            // Is this the first block? Emit the prologue directly if so.
814            if block == self.entry {
815                trace!(" -> entry block");
816                buffer.start_srcloc(Default::default());
817                for inst in &self.abi.gen_prologue() {
818                    do_emit(&inst, &mut disasm, &mut buffer, &mut state);
819                }
820                buffer.end_srcloc();
821            }
822
823            // Now emit the regular block body.
824
825            buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
826
827            if want_disasm {
828                writeln!(&mut disasm, "block{}:", block.index()).unwrap();
829            }
830
831            if flags.machine_code_cfg_info() {
832                // Track BB starts. If we have backed up due to MachBuffer
833                // branch opts, note that the removed blocks were removed.
834                let cur_offset = buffer.cur_offset();
835                if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
836                    for i in (0..bb_starts.len()).rev() {
837                        if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
838                            break;
839                        }
840                        bb_starts[i] = None;
841                    }
842                }
843                bb_starts.push(Some(cur_offset));
844                last_offset = Some(cur_offset);
845            }
846
847            if let Some(block_start) = I::gen_block_start(
848                self.block_order.is_indirect_branch_target(block),
849                is_forward_edge_cfi_enabled,
850            ) {
851                do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
852            }
853
854            for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
855                match inst_or_edit {
856                    InstOrEdit::Inst(iix) => {
857                        if !self.debug_value_labels.is_empty() {
858                            // If we need to produce debug info,
859                            // record the offset of each instruction
860                            // so that we can translate value-label
861                            // ranges to machine-code offsets.
862
863                            // Cold blocks violate monotonicity
864                            // assumptions elsewhere (that
865                            // instructions in inst-index order are in
866                            // order in machine code), so we omit
867                            // their offsets here. Value-label range
868                            // generation below will skip empty ranges
869                            // and ranges with to-offsets of zero.
870                            if !self.block_order.is_cold(block) {
871                                inst_offsets[iix.index()] = buffer.cur_offset();
872                            }
873                        }
874
875                        // Update the srcloc at this point in the buffer.
876                        let srcloc = self.srclocs[iix.index()];
877                        if cur_srcloc != Some(srcloc) {
878                            if cur_srcloc.is_some() {
879                                buffer.end_srcloc();
880                            }
881                            buffer.start_srcloc(srcloc);
882                            cur_srcloc = Some(srcloc);
883                        }
884
885                        // If this is a safepoint, compute a stack map
886                        // and pass it to the emit state.
887                        let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
888                            let (user_stack_map, user_stack_map_disasm) = {
889                                // The `user_stack_maps` is keyed by reverse
890                                // instruction index, so we must flip the
891                                // index. We can't put this into a helper method
892                                // due to borrowck issues because parts of
893                                // `self` are borrowed mutably elsewhere in this
894                                // function.
895                                let index = iix.to_backwards_insn_index(self.num_insts());
896                                let user_stack_map = self.user_stack_maps.remove(&index);
897                                let user_stack_map_disasm =
898                                    user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
899                                (user_stack_map, user_stack_map_disasm)
900                            };
901
902                            state.pre_safepoint(user_stack_map);
903
904                            user_stack_map_disasm
905                        } else {
906                            None
907                        };
908
909                        // If the instruction we are about to emit is
910                        // a return, place an epilogue at this point
911                        // (and don't emit the return; the actual
912                        // epilogue will contain it).
913                        if self.insts[iix.index()].is_term() == MachTerminator::Ret {
914                            for inst in self.abi.gen_epilogue() {
915                                do_emit(&inst, &mut disasm, &mut buffer, &mut state);
916                            }
917                        } else {
918                            // Update the operands for this inst using the
919                            // allocations from the regalloc result.
920                            let mut allocs = regalloc.inst_allocs(iix).iter();
921                            self.insts[iix.index()].get_operands(
922                                &mut |reg: &mut Reg, constraint, _kind, _pos| {
923                                    let alloc = allocs
924                                        .next()
925                                        .expect("enough allocations for all operands")
926                                        .as_reg()
927                                        .expect("only register allocations, not stack allocations")
928                                        .into();
929
930                                    if let OperandConstraint::FixedReg(rreg) = constraint {
931                                        debug_assert_eq!(Reg::from(rreg), alloc);
932                                    }
933                                    *reg = alloc;
934                                },
935                            );
936                            debug_assert!(allocs.next().is_none());
937
938                            // Emit the instruction!
939                            do_emit(
940                                &self.insts[iix.index()],
941                                &mut disasm,
942                                &mut buffer,
943                                &mut state,
944                            );
945                            if let Some(stack_map_disasm) = stack_map_disasm {
946                                disasm.push_str(&stack_map_disasm);
947                                disasm.push('\n');
948                            }
949                        }
950                    }
951
952                    InstOrEdit::Edit(Edit::Move { from, to }) => {
953                        // Create a move/spill/reload instruction and
954                        // immediately emit it.
955                        match (from.as_reg(), to.as_reg()) {
956                            (Some(from), Some(to)) => {
957                                // Reg-to-reg move.
958                                let from_rreg = Reg::from(from);
959                                let to_rreg = Writable::from_reg(Reg::from(to));
960                                debug_assert_eq!(from.class(), to.class());
961                                let ty = I::canonical_type_for_rc(from.class());
962                                let mv = I::gen_move(to_rreg, from_rreg, ty);
963                                do_emit(&mv, &mut disasm, &mut buffer, &mut state);
964                            }
965                            (Some(from), None) => {
966                                // Spill from register to spillslot.
967                                let to = to.as_stack().unwrap();
968                                let from_rreg = RealReg::from(from);
969                                let spill = self.abi.gen_spill(to, from_rreg);
970                                do_emit(&spill, &mut disasm, &mut buffer, &mut state);
971                            }
972                            (None, Some(to)) => {
973                                // Load from spillslot to register.
974                                let from = from.as_stack().unwrap();
975                                let to_rreg = Writable::from_reg(RealReg::from(to));
976                                let reload = self.abi.gen_reload(to_rreg, from);
977                                do_emit(&reload, &mut disasm, &mut buffer, &mut state);
978                            }
979                            (None, None) => {
980                                panic!("regalloc2 should have eliminated stack-to-stack moves!");
981                            }
982                        }
983                    }
984                }
985            }
986
987            if cur_srcloc.is_some() {
988                buffer.end_srcloc();
989                cur_srcloc = None;
990            }
991
992            // Do we need an island? Get the worst-case size of the next BB, add
993            // it to the optional padding behind the block, and pass this to the
994            // `MachBuffer` to determine if an island is necessary.
995            let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
996                let next_block = final_order[block_order_idx + 1];
997                let next_block_range = self.block_ranges.get(next_block.index());
998                let next_block_size = next_block_range.len() as u32;
999                let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1000                I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1001            } else {
1002                0
1003            };
1004            let padding = if bb_padding.is_empty() {
1005                0
1006            } else {
1007                bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1008            };
1009            if buffer.island_needed(padding + worst_case_next_bb) {
1010                buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1011            }
1012
1013            // Insert padding, if configured, to stress the `MachBuffer`'s
1014            // relocation and island calculations.
1015            //
1016            // Padding can get quite large during fuzzing though so place a
1017            // total cap on it where when a per-function threshold is exceeded
1018            // the padding is turned back down to zero. This avoids a small-ish
1019            // test case generating a GB+ memory footprint in Cranelift for
1020            // example.
1021            if !bb_padding.is_empty() {
1022                buffer.put_data(&bb_padding);
1023                buffer.align_to(I::LabelUse::ALIGN);
1024                total_bb_padding += bb_padding.len();
1025                if total_bb_padding > (150 << 20) {
1026                    bb_padding = Vec::new();
1027                }
1028            }
1029        }
1030
1031        debug_assert!(
1032            self.user_stack_maps.is_empty(),
1033            "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1034            self.user_stack_maps,
1035        );
1036
1037        // Do any optimizations on branches at tail of buffer, as if we had
1038        // bound one last label.
1039        buffer.optimize_branches(ctrl_plane);
1040
1041        // emission state is not needed anymore, move control plane back out
1042        *ctrl_plane = state.take_ctrl_plane();
1043
1044        let func_body_len = buffer.cur_offset();
1045
1046        // Create `bb_edges` and final (filtered) `bb_starts`.
1047        let mut bb_edges = vec![];
1048        let mut bb_offsets = vec![];
1049        if flags.machine_code_cfg_info() {
1050            for block in 0..self.num_blocks() {
1051                if bb_starts[block].is_none() {
1052                    // Block was deleted by MachBuffer; skip.
1053                    continue;
1054                }
1055                let from = bb_starts[block].unwrap();
1056
1057                bb_offsets.push(from);
1058                // Resolve each `succ` label and add edges.
1059                let succs = self.block_succs(BlockIndex::new(block));
1060                for &succ in succs.iter() {
1061                    let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1062                    bb_edges.push((from, to));
1063                }
1064            }
1065        }
1066
1067        self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1068        let value_labels_ranges =
1069            self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1070        let frame_size = self.abi.frame_size();
1071
1072        EmitResult {
1073            buffer: buffer.finish(&self.constants, ctrl_plane),
1074            bb_offsets,
1075            bb_edges,
1076            func_body_len,
1077            disasm: if want_disasm { Some(disasm) } else { None },
1078            sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1079            dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1080            value_labels_ranges,
1081            frame_size,
1082        }
1083    }
1084
1085    fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1086        if self.debug_value_labels.is_empty() {
1087            return;
1088        }
1089
1090        // During emission, branch removal can make offsets of instructions incorrect.
1091        // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1092        // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1093        // When the jumps get removed we are left with (in "inst_offsets"):
1094        // [insi][jmp0][jmp1][jmp2][insj][...]
1095        // [30]  [34]  [38]  [42]  [34]
1096        // Which violates the monotonicity invariant. This method sets offsets of these
1097        // removed instructions such as to make them appear zero-sized:
1098        // [insi][jmp0][jmp1][jmp2][insj][...]
1099        // [30]  [34]  [34]  [34]  [34]
1100        //
1101        let mut next_offset = func_body_len;
1102        for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1103            let inst_offset = inst_offsets[inst_index];
1104
1105            // Not all instructions get their offsets recorded.
1106            if inst_offset == NO_INST_OFFSET {
1107                continue;
1108            }
1109
1110            if inst_offset > next_offset {
1111                trace!(
1112                    "Fixing code offset of the removed Inst {}: {} -> {}",
1113                    inst_index,
1114                    inst_offset,
1115                    next_offset
1116                );
1117                inst_offsets[inst_index] = next_offset;
1118                continue;
1119            }
1120
1121            next_offset = inst_offset;
1122        }
1123    }
1124
1125    fn compute_value_labels_ranges(
1126        &self,
1127        regalloc: &regalloc2::Output,
1128        inst_offsets: &[CodeOffset],
1129        func_body_len: u32,
1130    ) -> ValueLabelsRanges {
1131        if self.debug_value_labels.is_empty() {
1132            return ValueLabelsRanges::default();
1133        }
1134
1135        let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1136        for &(label, from, to, alloc) in &regalloc.debug_locations {
1137            let ranges = value_labels_ranges
1138                .entry(ValueLabel::from_u32(label))
1139                .or_insert_with(|| vec![]);
1140            let from_offset = inst_offsets[from.inst().index()];
1141            let to_offset = if to.inst().index() == inst_offsets.len() {
1142                func_body_len
1143            } else {
1144                inst_offsets[to.inst().index()]
1145            };
1146
1147            // Empty ranges or unavailable offsets can happen
1148            // due to cold blocks and branch removal (see above).
1149            if from_offset == NO_INST_OFFSET
1150                || to_offset == NO_INST_OFFSET
1151                || from_offset == to_offset
1152            {
1153                continue;
1154            }
1155
1156            let loc = if let Some(preg) = alloc.as_reg() {
1157                LabelValueLoc::Reg(Reg::from(preg))
1158            } else {
1159                let slot = alloc.as_stack().unwrap();
1160                let slot_offset = self.abi.get_spillslot_offset(slot);
1161                let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1162                let caller_sp_to_cfa_offset =
1163                    crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1164                // NOTE: this is a negative offset because it's relative to the caller's SP
1165                let cfa_to_sp_offset =
1166                    -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1167                LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1168            };
1169
1170            // ValueLocRanges are recorded by *instruction-end
1171            // offset*. `from_offset` is the *start* of the
1172            // instruction; that is the same as the end of another
1173            // instruction, so we only want to begin coverage once
1174            // we are past the previous instruction's end.
1175            let start = from_offset + 1;
1176
1177            // Likewise, `end` is exclusive, but we want to
1178            // *include* the end of the last
1179            // instruction. `to_offset` is the start of the
1180            // `to`-instruction, which is the exclusive end, i.e.,
1181            // the first instruction not covered. That
1182            // instruction's start is the same as the end of the
1183            // last instruction that is included, so we go one
1184            // byte further to be sure to include it.
1185            let end = to_offset + 1;
1186
1187            // Coalesce adjacent ranges that for the same location
1188            // to minimize output size here and for the consumers.
1189            if let Some(last_loc_range) = ranges.last_mut() {
1190                if last_loc_range.loc == loc && last_loc_range.end == start {
1191                    trace!(
1192                        "Extending debug range for VL{} in {:?} to {}",
1193                        label,
1194                        loc,
1195                        end
1196                    );
1197                    last_loc_range.end = end;
1198                    continue;
1199                }
1200            }
1201
1202            trace!(
1203                "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1204                label,
1205                loc,
1206                from.inst().index(),
1207                to.inst().index(),
1208                start,
1209                end
1210            );
1211
1212            ranges.push(ValueLocRange { loc, start, end });
1213        }
1214
1215        value_labels_ranges
1216    }
1217
1218    /// Get the IR block for a BlockIndex, if one exists.
1219    pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1220        self.block_order.lowered_order()[block.index()].orig_block()
1221    }
1222
1223    /// Get the type of a VReg.
1224    pub fn vreg_type(&self, vreg: VReg) -> Type {
1225        self.vreg_types[vreg.vreg()]
1226    }
1227
1228    /// Get the fact, if any, for a given VReg.
1229    pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1230        self.facts[vreg.vreg()].as_ref()
1231    }
1232
1233    /// Set the fact for a given VReg.
1234    pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1235        trace!("set fact on {}: {:?}", vreg, fact);
1236        self.facts[vreg.vreg()] = Some(fact);
1237    }
1238
1239    /// Does a given instruction define any facts?
1240    pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1241        self.inst_operands(inst)
1242            .iter()
1243            .filter(|o| o.kind() == OperandKind::Def)
1244            .map(|o| o.vreg())
1245            .any(|vreg| self.facts[vreg.vreg()].is_some())
1246    }
1247
1248    /// Get the user stack map associated with the given forward instruction index.
1249    pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1250        let index = inst.to_backwards_insn_index(self.num_insts());
1251        self.user_stack_maps.get(&index)
1252    }
1253}
1254
1255impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1256    type Output = I;
1257    fn index(&self, idx: InsnIndex) -> &Self::Output {
1258        &self.insts[idx.index()]
1259    }
1260}
1261
1262impl<I: VCodeInst> RegallocFunction for VCode<I> {
1263    fn num_insts(&self) -> usize {
1264        self.insts.len()
1265    }
1266
1267    fn num_blocks(&self) -> usize {
1268        self.block_ranges.len()
1269    }
1270
1271    fn entry_block(&self) -> BlockIndex {
1272        self.entry
1273    }
1274
1275    fn block_insns(&self, block: BlockIndex) -> InstRange {
1276        let range = self.block_ranges.get(block.index());
1277        InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1278    }
1279
1280    fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1281        let range = self.block_succ_range.get(block.index());
1282        &self.block_succs[range]
1283    }
1284
1285    fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1286        let range = self.block_pred_range.get(block.index());
1287        &self.block_preds[range]
1288    }
1289
1290    fn block_params(&self, block: BlockIndex) -> &[VReg] {
1291        // As a special case we don't return block params for the entry block, as all the arguments
1292        // will be defined by the `Inst::Args` instruction.
1293        if block == self.entry {
1294            return &[];
1295        }
1296
1297        let range = self.block_params_range.get(block.index());
1298        &self.block_params[range]
1299    }
1300
1301    fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1302        let succ_range = self.branch_block_arg_succ_range.get(block.index());
1303        debug_assert!(succ_idx < succ_range.len());
1304        let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1305        &self.branch_block_args[branch_block_args]
1306    }
1307
1308    fn is_ret(&self, insn: InsnIndex) -> bool {
1309        match self.insts[insn.index()].is_term() {
1310            // We treat blocks terminated by an unconditional trap like a return for regalloc.
1311            MachTerminator::None => self.insts[insn.index()].is_trap(),
1312            MachTerminator::Ret | MachTerminator::RetCall => true,
1313            MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1314        }
1315    }
1316
1317    fn is_branch(&self, insn: InsnIndex) -> bool {
1318        match self.insts[insn.index()].is_term() {
1319            MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1320            _ => false,
1321        }
1322    }
1323
1324    fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1325        let range = self.operand_ranges.get(insn.index());
1326        &self.operands[range]
1327    }
1328
1329    fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1330        self.clobbers.get(&insn).cloned().unwrap_or_default()
1331    }
1332
1333    fn num_vregs(&self) -> usize {
1334        self.vreg_types.len()
1335    }
1336
1337    fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1338        &self.debug_value_labels
1339    }
1340
1341    fn spillslot_size(&self, regclass: RegClass) -> usize {
1342        self.abi.get_spillslot_size(regclass) as usize
1343    }
1344
1345    fn allow_multiple_vreg_defs(&self) -> bool {
1346        // At least the s390x backend requires this, because the
1347        // `Loop` pseudo-instruction aggregates all Operands so pinned
1348        // vregs (RealRegs) may occur more than once.
1349        true
1350    }
1351}
1352
1353impl<I: VCodeInst> Debug for VRegAllocator<I> {
1354    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1355        writeln!(f, "VRegAllocator {{")?;
1356
1357        let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1358        alias_keys.sort_unstable();
1359        for key in alias_keys {
1360            let dest = self.vreg_aliases.get(&key).unwrap();
1361            writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1362        }
1363
1364        for (vreg, fact) in self.facts.iter().enumerate() {
1365            if let Some(fact) = fact {
1366                writeln!(f, "  v{vreg} ! {fact}")?;
1367            }
1368        }
1369
1370        writeln!(f, "}}")
1371    }
1372}
1373
1374impl<I: VCodeInst> fmt::Debug for VCode<I> {
1375    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1376        writeln!(f, "VCode {{")?;
1377        writeln!(f, "  Entry block: {}", self.entry.index())?;
1378
1379        let mut state = Default::default();
1380
1381        for block in 0..self.num_blocks() {
1382            let block = BlockIndex::new(block);
1383            writeln!(
1384                f,
1385                "Block {}({:?}):",
1386                block.index(),
1387                self.block_params(block)
1388            )?;
1389            if let Some(bb) = self.bindex_to_bb(block) {
1390                writeln!(f, "    (original IR block: {bb})")?;
1391            }
1392            for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1393                writeln!(
1394                    f,
1395                    "    (successor: Block {}({:?}))",
1396                    succ.index(),
1397                    self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1398                )?;
1399            }
1400            for inst in self.block_ranges.get(block.index()) {
1401                writeln!(
1402                    f,
1403                    "  Inst {}: {}",
1404                    inst,
1405                    self.insts[inst].pretty_print_inst(&mut state)
1406                )?;
1407                if !self.operands.is_empty() {
1408                    for operand in self.inst_operands(InsnIndex::new(inst)) {
1409                        if operand.kind() == OperandKind::Def {
1410                            if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1411                                writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1412                            }
1413                        }
1414                    }
1415                }
1416                if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1417                    writeln!(f, "    {user_stack_map:?}")?;
1418                }
1419            }
1420        }
1421
1422        writeln!(f, "}}")?;
1423        Ok(())
1424    }
1425}
1426
1427/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1428pub struct VRegAllocator<I> {
1429    /// VReg IR-level types.
1430    vreg_types: Vec<Type>,
1431
1432    /// VReg aliases. When the final VCode is built we rewrite all
1433    /// uses of the keys in this table to their replacement values.
1434    ///
1435    /// We use these aliases to rename an instruction's expected
1436    /// result vregs to the returned vregs from lowering, which are
1437    /// usually freshly-allocated temps.
1438    vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1439
1440    /// A deferred error, to be bubbled up to the top level of the
1441    /// lowering algorithm. We take this approach because we cannot
1442    /// currently propagate a `Result` upward through ISLE code (the
1443    /// lowering rules) or some ABI code.
1444    deferred_error: Option<CodegenError>,
1445
1446    /// Facts on VRegs, for proof-carrying code.
1447    facts: Vec<Option<Fact>>,
1448
1449    /// The type of instruction that this allocator makes registers for.
1450    _inst: core::marker::PhantomData<I>,
1451}
1452
1453impl<I: VCodeInst> VRegAllocator<I> {
1454    /// Make a new VRegAllocator.
1455    pub fn with_capacity(capacity: usize) -> Self {
1456        let capacity = first_user_vreg_index() + capacity;
1457        let mut vreg_types = Vec::with_capacity(capacity);
1458        vreg_types.resize(first_user_vreg_index(), types::INVALID);
1459        Self {
1460            vreg_types,
1461            vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1462            deferred_error: None,
1463            facts: Vec::with_capacity(capacity),
1464            _inst: core::marker::PhantomData::default(),
1465        }
1466    }
1467
1468    /// Allocate a fresh ValueRegs.
1469    pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1470        if self.deferred_error.is_some() {
1471            return Err(CodegenError::CodeTooLarge);
1472        }
1473        let v = self.vreg_types.len();
1474        let (regclasses, tys) = I::rc_for_type(ty)?;
1475        if v + regclasses.len() >= VReg::MAX {
1476            return Err(CodegenError::CodeTooLarge);
1477        }
1478
1479        let regs: ValueRegs<Reg> = match regclasses {
1480            &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1481            &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1482            // We can extend this if/when we support 32-bit targets; e.g.,
1483            // an i128 on a 32-bit machine will need up to four machine regs
1484            // for a `Value`.
1485            _ => panic!("Value must reside in 1 or 2 registers"),
1486        };
1487        for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1488            let vreg = reg.to_virtual_reg().unwrap();
1489            debug_assert_eq!(self.vreg_types.len(), vreg.index());
1490            self.vreg_types.push(reg_ty);
1491        }
1492
1493        // Create empty facts for each allocated vreg.
1494        self.facts.resize(self.vreg_types.len(), None);
1495
1496        Ok(regs)
1497    }
1498
1499    /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1500    /// errors. This is useful in places where we cannot bubble a
1501    /// `CodegenResult` upward easily, and which are known to be
1502    /// invoked from within the lowering loop that checks the deferred
1503    /// error status below.
1504    pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1505        match self.alloc(ty) {
1506            Ok(x) => x,
1507            Err(e) => {
1508                self.deferred_error = Some(e);
1509                self.bogus_for_deferred_error(ty)
1510            }
1511        }
1512    }
1513
1514    /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1515    pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1516        self.deferred_error.take()
1517    }
1518
1519    /// Produce an bogus VReg placeholder with the proper number of
1520    /// registers for the given type. This is meant to be used with
1521    /// deferred allocation errors (see `Lower::alloc_tmp()`).
1522    fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1523        let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1524        match regclasses {
1525            &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1526            &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1527            _ => panic!("Value must reside in 1 or 2 registers"),
1528        }
1529    }
1530
1531    /// Rewrite any mention of `from` into `to`.
1532    pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1533        let from = from.into();
1534        let resolved_to = self.resolve_vreg_alias(to.into());
1535        // Disallow cycles (see below).
1536        assert_ne!(resolved_to, from);
1537
1538        // Maintain the invariant that PCC facts only exist on vregs
1539        // which aren't aliases. We want to preserve whatever was
1540        // stated about the vreg before its producer was lowered.
1541        if let Some(fact) = self.facts[from.vreg()].take() {
1542            self.set_fact(resolved_to, fact);
1543        }
1544
1545        let old_alias = self.vreg_aliases.insert(from, resolved_to);
1546        debug_assert_eq!(old_alias, None);
1547    }
1548
1549    fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1550        // We prevent cycles from existing by resolving targets of
1551        // aliases eagerly before setting them. If the target resolves
1552        // to the origin of the alias, then a cycle would be created
1553        // and the alias is disallowed. Because of the structure of
1554        // SSA code (one instruction can refer to another's defs but
1555        // not vice-versa, except indirectly through
1556        // phis/blockparams), cycles should not occur as we use
1557        // aliases to redirect vregs to the temps that actually define
1558        // them.
1559        while let Some(to) = self.vreg_aliases.get(&vreg) {
1560            vreg = *to;
1561        }
1562        vreg
1563    }
1564
1565    #[inline]
1566    fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1567        debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1568    }
1569
1570    /// Set the proof-carrying code fact on a given virtual register.
1571    ///
1572    /// Returns the old fact, if any (only one fact can be stored).
1573    fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1574        trace!("vreg {:?} has fact: {:?}", vreg, fact);
1575        debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1576        self.facts[vreg.vreg()].replace(fact)
1577    }
1578
1579    /// Set a fact only if one doesn't already exist.
1580    pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1581        let vreg = self.resolve_vreg_alias(vreg.into());
1582        if self.facts[vreg.vreg()].is_none() {
1583            self.set_fact(vreg, fact);
1584        }
1585    }
1586
1587    /// Allocate a fresh ValueRegs, with a given fact to apply if
1588    /// the value fits in one VReg.
1589    pub fn alloc_with_maybe_fact(
1590        &mut self,
1591        ty: Type,
1592        fact: Option<Fact>,
1593    ) -> CodegenResult<ValueRegs<Reg>> {
1594        let result = self.alloc(ty)?;
1595
1596        // Ensure that we don't lose a fact on a value that splits
1597        // into multiple VRegs.
1598        assert!(result.len() == 1 || fact.is_none());
1599        if let Some(fact) = fact {
1600            self.set_fact(result.regs()[0].into(), fact);
1601        }
1602
1603        Ok(result)
1604    }
1605}
1606
1607/// This structure tracks the large constants used in VCode that will be emitted separately by the
1608/// [MachBuffer].
1609///
1610/// First, during the lowering phase, constants are inserted using
1611/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1612/// used in this phase. Some deduplication is performed, when possible, as constant
1613/// values are inserted.
1614///
1615/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1616/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1617/// then writes the constant values to the buffer.
1618#[derive(Default)]
1619pub struct VCodeConstants {
1620    constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1621    pool_uses: HashMap<Constant, VCodeConstant>,
1622    well_known_uses: HashMap<*const [u8], VCodeConstant>,
1623    u64s: HashMap<[u8; 8], VCodeConstant>,
1624}
1625impl VCodeConstants {
1626    /// Initialize the structure with the expected number of constants.
1627    pub fn with_capacity(expected_num_constants: usize) -> Self {
1628        Self {
1629            constants: PrimaryMap::with_capacity(expected_num_constants),
1630            pool_uses: HashMap::with_capacity(expected_num_constants),
1631            well_known_uses: HashMap::new(),
1632            u64s: HashMap::new(),
1633        }
1634    }
1635
1636    /// Insert a constant; using this method indicates that a constant value will be used and thus
1637    /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1638    /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1639    /// [VCodeConstantData::Generated].
1640    pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1641        match data {
1642            VCodeConstantData::Generated(_) => self.constants.push(data),
1643            VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1644                None => {
1645                    let vcode_constant = self.constants.push(data);
1646                    self.pool_uses.insert(constant, vcode_constant);
1647                    vcode_constant
1648                }
1649                Some(&vcode_constant) => vcode_constant,
1650            },
1651            VCodeConstantData::WellKnown(data_ref) => {
1652                match self.well_known_uses.entry(data_ref as *const [u8]) {
1653                    Entry::Vacant(v) => {
1654                        let vcode_constant = self.constants.push(data);
1655                        v.insert(vcode_constant);
1656                        vcode_constant
1657                    }
1658                    Entry::Occupied(o) => *o.get(),
1659                }
1660            }
1661            VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1662                Entry::Vacant(v) => {
1663                    let vcode_constant = self.constants.push(data);
1664                    v.insert(vcode_constant);
1665                    vcode_constant
1666                }
1667                Entry::Occupied(o) => *o.get(),
1668            },
1669        }
1670    }
1671
1672    /// Return the number of constants inserted.
1673    pub fn len(&self) -> usize {
1674        self.constants.len()
1675    }
1676
1677    /// Iterate over the `VCodeConstant` keys inserted in this structure.
1678    pub fn keys(&self) -> Keys<VCodeConstant> {
1679        self.constants.keys()
1680    }
1681
1682    /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1683    /// structure.
1684    pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1685        self.constants.iter()
1686    }
1687
1688    /// Returns the data associated with the specified constant.
1689    pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1690        &self.constants[c]
1691    }
1692
1693    /// Checks if the given [VCodeConstantData] is registered as
1694    /// used by the pool.
1695    pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1696        match constant {
1697            VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1698            _ => false,
1699        }
1700    }
1701}
1702
1703/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1704#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1705pub struct VCodeConstant(u32);
1706entity_impl!(VCodeConstant);
1707
1708/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1709/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1710pub enum VCodeConstantData {
1711    /// A constant already present in the Cranelift IR
1712    /// [ConstantPool](crate::ir::constant::ConstantPool).
1713    Pool(Constant, ConstantData),
1714    /// A reference to a well-known constant value that is statically encoded within the compiler.
1715    WellKnown(&'static [u8]),
1716    /// A constant value generated during lowering; the value may depend on the instruction context
1717    /// which makes it difficult to de-duplicate--if possible, use other variants.
1718    Generated(ConstantData),
1719    /// A constant of at most 64 bits. These are deduplicated as
1720    /// well. Stored as a fixed-size array of `u8` so that we do not
1721    /// encounter endianness problems when cross-compiling.
1722    U64([u8; 8]),
1723}
1724impl VCodeConstantData {
1725    /// Retrieve the constant data as a byte slice.
1726    pub fn as_slice(&self) -> &[u8] {
1727        match self {
1728            VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1729            VCodeConstantData::WellKnown(d) => d,
1730            VCodeConstantData::U64(value) => &value[..],
1731        }
1732    }
1733
1734    /// Calculate the alignment of the constant data.
1735    pub fn alignment(&self) -> u32 {
1736        if self.as_slice().len() <= 8 {
1737            8
1738        } else {
1739            16
1740        }
1741    }
1742}
1743
1744#[cfg(test)]
1745mod test {
1746    use super::*;
1747    use std::mem::size_of;
1748
1749    #[test]
1750    fn size_of_constant_structs() {
1751        assert_eq!(size_of::<Constant>(), 4);
1752        assert_eq!(size_of::<VCodeConstant>(), 4);
1753        assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1754        assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1755        assert_eq!(
1756            size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1757            3 * size_of::<usize>()
1758        );
1759        // TODO The VCodeConstants structure's memory size could be further optimized.
1760        // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1761        // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1762    }
1763}
cranelift_codegen/machinst/vcode.rs

cranelift_codegen/machinst/
vcode.rs