cranelift_codegen/isa/aarch64/inst/
emit.rs

1//! AArch64 ISA: binary code emission.
2
3use cranelift_control::ControlPlane;
4
5use crate::ir::{self, types::*};
6use crate::isa::aarch64::inst::*;
7use crate::trace;
8
9/// Memory addressing mode finalization: convert "special" modes (e.g.,
10/// generic arbitrary stack offset) into real addressing modes, possibly by
11/// emitting some helper instructions that come immediately before the use
12/// of this amode.
13pub fn mem_finalize(
14    sink: Option<&mut MachBuffer<Inst>>,
15    mem: &AMode,
16    access_ty: Type,
17    state: &EmitState,
18) -> (SmallVec<[Inst; 4]>, AMode) {
19    match mem {
20        &AMode::RegOffset { off, .. }
21        | &AMode::SPOffset { off }
22        | &AMode::FPOffset { off }
23        | &AMode::IncomingArg { off }
24        | &AMode::SlotOffset { off } => {
25            let basereg = match mem {
26                &AMode::RegOffset { rn, .. } => rn,
27                &AMode::SPOffset { .. }
28                | &AMode::SlotOffset { .. }
29                | &AMode::IncomingArg { .. } => stack_reg(),
30                &AMode::FPOffset { .. } => fp_reg(),
31                _ => unreachable!(),
32            };
33            let off = match mem {
34                &AMode::IncomingArg { .. } => {
35                    let frame_layout = state.frame_layout();
36                    i64::from(
37                        frame_layout.setup_area_size
38                            + frame_layout.tail_args_size
39                            + frame_layout.clobber_size
40                            + frame_layout.fixed_frame_storage_size
41                            + frame_layout.outgoing_args_size,
42                    ) - off
43                }
44                &AMode::SlotOffset { .. } => {
45                    let adj = i64::from(state.frame_layout().outgoing_args_size);
46                    trace!(
47                        "mem_finalize: slot offset {} + adj {} -> {}",
48                        off,
49                        adj,
50                        off + adj
51                    );
52                    off + adj
53                }
54                _ => off,
55            };
56
57            if let Some(simm9) = SImm9::maybe_from_i64(off) {
58                let mem = AMode::Unscaled { rn: basereg, simm9 };
59                (smallvec![], mem)
60            } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
61                let mem = AMode::UnsignedOffset {
62                    rn: basereg,
63                    uimm12,
64                };
65                (smallvec![], mem)
66            } else {
67                let tmp = writable_spilltmp_reg();
68                (
69                    Inst::load_constant(tmp, off as u64, &mut |_| tmp),
70                    AMode::RegExtended {
71                        rn: basereg,
72                        rm: tmp.to_reg(),
73                        extendop: ExtendOp::SXTX,
74                    },
75                )
76            }
77        }
78
79        AMode::Const { addr } => {
80            let sink = match sink {
81                Some(sink) => sink,
82                None => return (smallvec![], mem.clone()),
83            };
84            let label = sink.get_label_for_constant(*addr);
85            let label = MemLabel::Mach(label);
86            (smallvec![], AMode::Label { label })
87        }
88
89        _ => (smallvec![], mem.clone()),
90    }
91}
92
93//=============================================================================
94// Instructions and subcomponents: emission
95
96pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
97    assert_eq!(m.class(), RegClass::Int);
98    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
99}
100
101pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
102    assert_eq!(m.class(), RegClass::Float);
103    u32::from(m.to_real_reg().unwrap().hw_enc())
104}
105
106fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
107    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
108}
109
110pub(crate) fn enc_arith_rrr(
111    bits_31_21: u32,
112    bits_15_10: u32,
113    rd: Writable<Reg>,
114    rn: Reg,
115    rm: Reg,
116) -> u32 {
117    (bits_31_21 << 21)
118        | (bits_15_10 << 10)
119        | machreg_to_gpr(rd.to_reg())
120        | (machreg_to_gpr(rn) << 5)
121        | (machreg_to_gpr(rm) << 16)
122}
123
124fn enc_arith_rr_imm12(
125    bits_31_24: u32,
126    immshift: u32,
127    imm12: u32,
128    rn: Reg,
129    rd: Writable<Reg>,
130) -> u32 {
131    (bits_31_24 << 24)
132        | (immshift << 22)
133        | (imm12 << 10)
134        | (machreg_to_gpr(rn) << 5)
135        | machreg_to_gpr(rd.to_reg())
136}
137
138fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
139    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
140}
141
142fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
143    (top11 << 21)
144        | (machreg_to_gpr(rm) << 16)
145        | (bit15 << 15)
146        | (machreg_to_gpr(ra) << 10)
147        | (machreg_to_gpr(rn) << 5)
148        | machreg_to_gpr(rd.to_reg())
149}
150
151fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
152    assert!(off_26_0 < (1 << 26));
153    (op_31_26 << 26) | off_26_0
154}
155
156fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
157    assert!(off_18_0 < (1 << 19));
158    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
159}
160
161fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
162    assert!(off_18_0 < (1 << 19));
163    assert!(cond < (1 << 4));
164    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
165}
166
167/// Set the size bit of an instruction.
168fn enc_op_size(op: u32, size: OperandSize) -> u32 {
169    (op & !(1 << 31)) | (size.sf_bit() << 31)
170}
171
172fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
173    match kind {
174        CondBrKind::Zero(reg, size) => enc_op_size(
175            enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
176            size,
177        ),
178        CondBrKind::NotZero(reg, size) => enc_op_size(
179            enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
180            size,
181        ),
182        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
183    }
184}
185
186fn enc_test_bit_and_branch(
187    kind: TestBitAndBranchKind,
188    taken: BranchTarget,
189    reg: Reg,
190    bit: u8,
191) -> u32 {
192    assert!(bit < 64);
193    let op_31 = u32::from(bit >> 5);
194    let op_23_19 = u32::from(bit & 0b11111);
195    let op_30_24 = 0b0110110
196        | match kind {
197            TestBitAndBranchKind::Z => 0,
198            TestBitAndBranchKind::NZ => 1,
199        };
200    (op_31 << 31)
201        | (op_30_24 << 24)
202        | (op_23_19 << 19)
203        | (taken.as_offset14_or_zero() << 5)
204        | machreg_to_gpr(reg)
205}
206
207fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
208    assert!(imm.shift <= 0b11);
209    let op = match op {
210        MoveWideOp::MovN => 0b00,
211        MoveWideOp::MovZ => 0b10,
212    };
213    0x12800000
214        | size.sf_bit() << 31
215        | op << 29
216        | u32::from(imm.shift) << 21
217        | u32::from(imm.bits) << 5
218        | machreg_to_gpr(rd.to_reg())
219}
220
221fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
222    assert!(imm.shift <= 0b11);
223    0x72800000
224        | size.sf_bit() << 31
225        | u32::from(imm.shift) << 21
226        | u32::from(imm.bits) << 5
227        | machreg_to_gpr(rd.to_reg())
228}
229
230fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
231    (op_31_22 << 22)
232        | (simm7.bits() << 15)
233        | (machreg_to_gpr(rt2) << 10)
234        | (machreg_to_gpr(rn) << 5)
235        | machreg_to_gpr(rt)
236}
237
238fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
239    (op_31_22 << 22)
240        | (simm9.bits() << 12)
241        | (op_11_10 << 10)
242        | (machreg_to_gpr(rn) << 5)
243        | machreg_to_gpr_or_vec(rd)
244}
245
246fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
247    (op_31_22 << 22)
248        | (0b1 << 24)
249        | (uimm12.bits() << 10)
250        | (machreg_to_gpr(rn) << 5)
251        | machreg_to_gpr_or_vec(rd)
252}
253
254fn enc_ldst_reg(
255    op_31_22: u32,
256    rn: Reg,
257    rm: Reg,
258    s_bit: bool,
259    extendop: Option<ExtendOp>,
260    rd: Reg,
261) -> u32 {
262    let s_bit = if s_bit { 1 } else { 0 };
263    let extend_bits = match extendop {
264        Some(ExtendOp::UXTW) => 0b010,
265        Some(ExtendOp::SXTW) => 0b110,
266        Some(ExtendOp::SXTX) => 0b111,
267        None => 0b011, // LSL
268        _ => panic!("bad extend mode for ld/st AMode"),
269    };
270    (op_31_22 << 22)
271        | (1 << 21)
272        | (machreg_to_gpr(rm) << 16)
273        | (extend_bits << 13)
274        | (s_bit << 12)
275        | (0b10 << 10)
276        | (machreg_to_gpr(rn) << 5)
277        | machreg_to_gpr_or_vec(rd)
278}
279
280pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
281    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
282}
283
284fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
285    debug_assert_eq!(q & 0b1, q);
286    debug_assert_eq!(size & 0b11, size);
287    0b0_0_0011010_10_00000_110_0_00_00000_00000
288        | q << 30
289        | size << 10
290        | machreg_to_gpr(rn) << 5
291        | machreg_to_vec(rt.to_reg())
292}
293
294fn enc_ldst_vec_pair(
295    opc: u32,
296    amode: u32,
297    is_load: bool,
298    simm7: SImm7Scaled,
299    rn: Reg,
300    rt: Reg,
301    rt2: Reg,
302) -> u32 {
303    debug_assert_eq!(opc & 0b11, opc);
304    debug_assert_eq!(amode & 0b11, amode);
305
306    0b00_10110_00_0_0000000_00000_00000_00000
307        | opc << 30
308        | amode << 23
309        | (is_load as u32) << 22
310        | simm7.bits() << 15
311        | machreg_to_vec(rt2) << 10
312        | machreg_to_gpr(rn) << 5
313        | machreg_to_vec(rt)
314}
315
316fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
317    (top11 << 21)
318        | (machreg_to_vec(rm) << 16)
319        | (bit15_10 << 10)
320        | (machreg_to_vec(rn) << 5)
321        | machreg_to_vec(rd.to_reg())
322}
323
324fn enc_vec_rrr_long(
325    q: u32,
326    u: u32,
327    size: u32,
328    bit14: u32,
329    rm: Reg,
330    rn: Reg,
331    rd: Writable<Reg>,
332) -> u32 {
333    debug_assert_eq!(q & 0b1, q);
334    debug_assert_eq!(u & 0b1, u);
335    debug_assert_eq!(size & 0b11, size);
336    debug_assert_eq!(bit14 & 0b1, bit14);
337
338    0b0_0_0_01110_00_1_00000_100000_00000_00000
339        | q << 30
340        | u << 29
341        | size << 22
342        | bit14 << 14
343        | (machreg_to_vec(rm) << 16)
344        | (machreg_to_vec(rn) << 5)
345        | machreg_to_vec(rd.to_reg())
346}
347
348fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
349    (0b01011010110 << 21)
350        | size << 31
351        | opcode2 << 16
352        | opcode1 << 10
353        | machreg_to_gpr(rn) << 5
354        | machreg_to_gpr(rd.to_reg())
355}
356
357pub(crate) fn enc_br(rn: Reg) -> u32 {
358    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
359}
360
361pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
362    let off = u32::try_from(off).unwrap();
363    let immlo = off & 3;
364    let immhi = (off >> 2) & ((1 << 19) - 1);
365    opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
366}
367
368pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
369    let opcode = 0b00010000 << 24;
370    enc_adr_inst(opcode, off, rd)
371}
372
373pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
374    let opcode = 0b10010000 << 24;
375    enc_adr_inst(opcode, off, rd)
376}
377
378fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
379    debug_assert_eq!(op & 0b1, op);
380    debug_assert_eq!(o2 & 0b1, o2);
381    0b100_11010100_00000_0000_00_00000_00000
382        | (op << 30)
383        | (machreg_to_gpr(rm) << 16)
384        | (cond.bits() << 12)
385        | (o2 << 10)
386        | (machreg_to_gpr(rn) << 5)
387        | machreg_to_gpr(rd.to_reg())
388}
389
390fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
391    0b000_11110_00_1_00000_0000_11_00000_00000
392        | (size.ftype() << 22)
393        | (machreg_to_vec(rm) << 16)
394        | (machreg_to_vec(rn) << 5)
395        | machreg_to_vec(rd.to_reg())
396        | (cond.bits() << 12)
397}
398
399fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
400    0b0_1_1_11010010_00000_0000_00_00000_0_0000
401        | size.sf_bit() << 31
402        | machreg_to_gpr(rm) << 16
403        | cond.bits() << 12
404        | machreg_to_gpr(rn) << 5
405        | nzcv.bits()
406}
407
408fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
409    0b0_1_1_11010010_00000_0000_10_00000_0_0000
410        | size.sf_bit() << 31
411        | imm.bits() << 16
412        | cond.bits() << 12
413        | machreg_to_gpr(rn) << 5
414        | nzcv.bits()
415}
416
417fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
418    match size {
419        OperandSize::Size64 => {
420            debug_assert!(immr <= 63);
421            debug_assert!(imms <= 63);
422        }
423        OperandSize::Size32 => {
424            debug_assert!(immr <= 31);
425            debug_assert!(imms <= 31);
426        }
427    }
428    debug_assert_eq!(opc & 0b11, opc);
429    let n_bit = size.sf_bit();
430    0b0_00_100110_0_000000_000000_00000_00000
431        | size.sf_bit() << 31
432        | u32::from(opc) << 29
433        | n_bit << 22
434        | u32::from(immr) << 16
435        | u32::from(imms) << 10
436        | machreg_to_gpr(rn) << 5
437        | machreg_to_gpr(rd.to_reg())
438}
439
440fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
441    0b00001110_101_00000_00011_1_00000_00000
442        | ((is_16b as u32) << 30)
443        | machreg_to_vec(rd.to_reg())
444        | (machreg_to_vec(rn) << 16)
445        | (machreg_to_vec(rn) << 5)
446}
447
448fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
449    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
450}
451
452fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
453    (top22 << 10)
454        | (machreg_to_vec(rm) << 16)
455        | (machreg_to_vec(rn) << 5)
456        | machreg_to_vec(rd.to_reg())
457}
458
459fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
460    (top17 << 15)
461        | (machreg_to_vec(rm) << 16)
462        | (machreg_to_vec(ra) << 10)
463        | (machreg_to_vec(rn) << 5)
464        | machreg_to_vec(rd.to_reg())
465}
466
467fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
468    0b000_11110_00_1_00000_00_1000_00000_00000
469        | (size.ftype() << 22)
470        | (machreg_to_vec(rm) << 16)
471        | (machreg_to_vec(rn) << 5)
472}
473
474fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
475    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
476}
477
478fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
479    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
480}
481
482fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
483    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
484}
485
486fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
487    debug_assert_eq!(qu & 0b11, qu);
488    debug_assert_eq!(size & 0b11, size);
489    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
490    let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
491    bits | qu << 29
492        | size << 22
493        | bits_12_16 << 12
494        | machreg_to_vec(rn) << 5
495        | machreg_to_vec(rd.to_reg())
496}
497
498fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
499    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
500
501    0b010_11110_11_11000_11011_10_00000_00000
502        | bits_12_16 << 12
503        | machreg_to_vec(rn) << 5
504        | machreg_to_vec(rd.to_reg())
505}
506
507fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
508    debug_assert_eq!(u & 0b1, u);
509    debug_assert_eq!(enc_size & 0b1, enc_size);
510
511    0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
512        | u << 29
513        | enc_size << 22
514        | machreg_to_vec(rn) << 5
515        | machreg_to_vec(rd.to_reg())
516}
517
518fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
519    debug_assert_eq!(q & 0b1, q);
520    debug_assert_eq!(u & 0b1, u);
521    debug_assert_eq!(size & 0b11, size);
522    debug_assert_eq!(opcode & 0b11111, opcode);
523    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
524        | q << 30
525        | u << 29
526        | size << 22
527        | opcode << 12
528        | machreg_to_vec(rn) << 5
529        | machreg_to_vec(rd.to_reg())
530}
531
532fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
533    debug_assert_eq!(len & 0b11, len);
534    0b0_1_001110_000_00000_0_00_0_00_00000_00000
535        | (machreg_to_vec(rm) << 16)
536        | len << 13
537        | (is_extension as u32) << 12
538        | (machreg_to_vec(rn) << 5)
539        | machreg_to_vec(rd.to_reg())
540}
541
542fn enc_dmb_ish() -> u32 {
543    0xD5033BBF
544}
545
546fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
547    assert!(machreg_to_gpr(rt.to_reg()) != 31);
548    let sz = match ty {
549        I64 => 0b11,
550        I32 => 0b10,
551        I16 => 0b01,
552        I8 => 0b00,
553        _ => unreachable!(),
554    };
555    let bit15 = match op {
556        AtomicRMWOp::Swp => 0b1,
557        _ => 0b0,
558    };
559    let op = match op {
560        AtomicRMWOp::Add => 0b000,
561        AtomicRMWOp::Clr => 0b001,
562        AtomicRMWOp::Eor => 0b010,
563        AtomicRMWOp::Set => 0b011,
564        AtomicRMWOp::Smax => 0b100,
565        AtomicRMWOp::Smin => 0b101,
566        AtomicRMWOp::Umax => 0b110,
567        AtomicRMWOp::Umin => 0b111,
568        AtomicRMWOp::Swp => 0b000,
569    };
570    0b00_111_000_111_00000_0_000_00_00000_00000
571        | (sz << 30)
572        | (machreg_to_gpr(rs) << 16)
573        | bit15 << 15
574        | (op << 12)
575        | (machreg_to_gpr(rn) << 5)
576        | machreg_to_gpr(rt.to_reg())
577}
578
579fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
580    let sz = match ty {
581        I64 => 0b11,
582        I32 => 0b10,
583        I16 => 0b01,
584        I8 => 0b00,
585        _ => unreachable!(),
586    };
587    0b00_001000_1_1_0_11111_1_11111_00000_00000
588        | (sz << 30)
589        | (machreg_to_gpr(rn) << 5)
590        | machreg_to_gpr(rt.to_reg())
591}
592
593fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
594    let sz = match ty {
595        I64 => 0b11,
596        I32 => 0b10,
597        I16 => 0b01,
598        I8 => 0b00,
599        _ => unreachable!(),
600    };
601    0b00_001000_100_11111_1_11111_00000_00000
602        | (sz << 30)
603        | (machreg_to_gpr(rn) << 5)
604        | machreg_to_gpr(rt)
605}
606
607fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
608    let sz = match ty {
609        I64 => 0b11,
610        I32 => 0b10,
611        I16 => 0b01,
612        I8 => 0b00,
613        _ => unreachable!(),
614    };
615    0b00_001000_0_1_0_11111_1_11111_00000_00000
616        | (sz << 30)
617        | (machreg_to_gpr(rn) << 5)
618        | machreg_to_gpr(rt.to_reg())
619}
620
621fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
622    let sz = match ty {
623        I64 => 0b11,
624        I32 => 0b10,
625        I16 => 0b01,
626        I8 => 0b00,
627        _ => unreachable!(),
628    };
629    0b00_001000_000_00000_1_11111_00000_00000
630        | (sz << 30)
631        | (machreg_to_gpr(rs.to_reg()) << 16)
632        | (machreg_to_gpr(rn) << 5)
633        | machreg_to_gpr(rt)
634}
635
636fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
637    debug_assert_eq!(size & 0b11, size);
638
639    0b00_0010001_1_1_00000_1_11111_00000_00000
640        | size << 30
641        | machreg_to_gpr(rs.to_reg()) << 16
642        | machreg_to_gpr(rn) << 5
643        | machreg_to_gpr(rt)
644}
645
646fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
647    let abc = (imm >> 5) as u32;
648    let defgh = (imm & 0b11111) as u32;
649
650    debug_assert_eq!(cmode & 0b1111, cmode);
651    debug_assert_eq!(q_op & 0b11, q_op);
652
653    0b0_0_0_0111100000_000_0000_01_00000_00000
654        | (q_op << 29)
655        | (abc << 16)
656        | (cmode << 12)
657        | (defgh << 5)
658        | machreg_to_vec(rd.to_reg())
659}
660
661/// State carried between emissions of a sequence of instructions.
662#[derive(Default, Clone, Debug)]
663pub struct EmitState {
664    /// The user stack map for the upcoming instruction, as provided to
665    /// `pre_safepoint()`.
666    user_stack_map: Option<ir::UserStackMap>,
667
668    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
669    /// optimized away at compiletime. See [cranelift_control].
670    ctrl_plane: ControlPlane,
671
672    frame_layout: FrameLayout,
673}
674
675impl MachInstEmitState<Inst> for EmitState {
676    fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
677        EmitState {
678            user_stack_map: None,
679            ctrl_plane,
680            frame_layout: abi.frame_layout().clone(),
681        }
682    }
683
684    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
685        self.user_stack_map = user_stack_map;
686    }
687
688    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
689        &mut self.ctrl_plane
690    }
691
692    fn take_ctrl_plane(self) -> ControlPlane {
693        self.ctrl_plane
694    }
695
696    fn frame_layout(&self) -> &FrameLayout {
697        &self.frame_layout
698    }
699}
700
701impl EmitState {
702    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
703        self.user_stack_map.take()
704    }
705
706    fn clear_post_insn(&mut self) {
707        self.user_stack_map = None;
708    }
709}
710
711/// Constant state used during function compilation.
712pub struct EmitInfo(settings::Flags);
713
714impl EmitInfo {
715    /// Create a constant state for emission of instructions.
716    pub fn new(flags: settings::Flags) -> Self {
717        Self(flags)
718    }
719}
720
721impl MachInstEmit for Inst {
722    type State = EmitState;
723    type Info = EmitInfo;
724
725    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
726        // N.B.: we *must* not exceed the "worst-case size" used to compute
727        // where to insert islands, except when islands are explicitly triggered
728        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
729        // to allow disabling the check for `JTSequence`, which is always
730        // emitted following an `EmitIsland`.
731        let mut start_off = sink.cur_offset();
732
733        match self {
734            &Inst::AluRRR {
735                alu_op,
736                size,
737                rd,
738                rn,
739                rm,
740            } => {
741                debug_assert!(match alu_op {
742                    ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
743                    _ => true,
744                });
745                let top11 = match alu_op {
746                    ALUOp::Add => 0b00001011_000,
747                    ALUOp::Adc => 0b00011010_000,
748                    ALUOp::AdcS => 0b00111010_000,
749                    ALUOp::Sub => 0b01001011_000,
750                    ALUOp::Sbc => 0b01011010_000,
751                    ALUOp::SbcS => 0b01111010_000,
752                    ALUOp::Orr => 0b00101010_000,
753                    ALUOp::And => 0b00001010_000,
754                    ALUOp::AndS => 0b01101010_000,
755                    ALUOp::Eor => 0b01001010_000,
756                    ALUOp::OrrNot => 0b00101010_001,
757                    ALUOp::AndNot => 0b00001010_001,
758                    ALUOp::EorNot => 0b01001010_001,
759                    ALUOp::AddS => 0b00101011_000,
760                    ALUOp::SubS => 0b01101011_000,
761                    ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
762                    ALUOp::RotR | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
763                    ALUOp::SMulH => 0b10011011_010,
764                    ALUOp::UMulH => 0b10011011_110,
765                };
766
767                let top11 = top11 | size.sf_bit() << 10;
768                let bit15_10 = match alu_op {
769                    ALUOp::SDiv => 0b000011,
770                    ALUOp::UDiv => 0b000010,
771                    ALUOp::RotR => 0b001011,
772                    ALUOp::Lsr => 0b001001,
773                    ALUOp::Asr => 0b001010,
774                    ALUOp::Lsl => 0b001000,
775                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
776                    _ => 0b000000,
777                };
778                debug_assert_ne!(writable_stack_reg(), rd);
779                // The stack pointer is the zero register in this context, so this might be an
780                // indication that something is wrong.
781                debug_assert_ne!(stack_reg(), rn);
782                debug_assert_ne!(stack_reg(), rm);
783                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
784            }
785            &Inst::AluRRRR {
786                alu_op,
787                size,
788                rd,
789                rm,
790                rn,
791                ra,
792            } => {
793                let (top11, bit15) = match alu_op {
794                    ALUOp3::MAdd => (0b0_00_11011_000, 0),
795                    ALUOp3::MSub => (0b0_00_11011_000, 1),
796                    ALUOp3::UMAddL => {
797                        debug_assert!(size == OperandSize::Size32);
798                        (0b1_00_11011_1_01, 0)
799                    }
800                    ALUOp3::SMAddL => {
801                        debug_assert!(size == OperandSize::Size32);
802                        (0b1_00_11011_0_01, 0)
803                    }
804                };
805                let top11 = top11 | size.sf_bit() << 10;
806                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
807            }
808            &Inst::AluRRImm12 {
809                alu_op,
810                size,
811                rd,
812                rn,
813                ref imm12,
814            } => {
815                let top8 = match alu_op {
816                    ALUOp::Add => 0b000_10001,
817                    ALUOp::Sub => 0b010_10001,
818                    ALUOp::AddS => 0b001_10001,
819                    ALUOp::SubS => 0b011_10001,
820                    _ => unimplemented!("{:?}", alu_op),
821                };
822                let top8 = top8 | size.sf_bit() << 7;
823                sink.put4(enc_arith_rr_imm12(
824                    top8,
825                    imm12.shift_bits(),
826                    imm12.imm_bits(),
827                    rn,
828                    rd,
829                ));
830            }
831            &Inst::AluRRImmLogic {
832                alu_op,
833                size,
834                rd,
835                rn,
836                ref imml,
837            } => {
838                let (top9, inv) = match alu_op {
839                    ALUOp::Orr => (0b001_100100, false),
840                    ALUOp::And => (0b000_100100, false),
841                    ALUOp::AndS => (0b011_100100, false),
842                    ALUOp::Eor => (0b010_100100, false),
843                    ALUOp::OrrNot => (0b001_100100, true),
844                    ALUOp::AndNot => (0b000_100100, true),
845                    ALUOp::EorNot => (0b010_100100, true),
846                    _ => unimplemented!("{:?}", alu_op),
847                };
848                let top9 = top9 | size.sf_bit() << 8;
849                let imml = if inv { imml.invert() } else { *imml };
850                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
851            }
852
853            &Inst::AluRRImmShift {
854                alu_op,
855                size,
856                rd,
857                rn,
858                ref immshift,
859            } => {
860                let amt = immshift.value();
861                let (top10, immr, imms) = match alu_op {
862                    ALUOp::RotR => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
863                    ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
864                    ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
865                    ALUOp::Lsl => {
866                        let bits = if size.is64() { 64 } else { 32 };
867                        (
868                            0b0101001100,
869                            u32::from((bits - amt) % bits),
870                            u32::from(bits - 1 - amt),
871                        )
872                    }
873                    _ => unimplemented!("{:?}", alu_op),
874                };
875                let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
876                let imms = match alu_op {
877                    ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
878                    _ => imms,
879                };
880                sink.put4(
881                    (top10 << 22)
882                        | (immr << 16)
883                        | (imms << 10)
884                        | (machreg_to_gpr(rn) << 5)
885                        | machreg_to_gpr(rd.to_reg()),
886                );
887            }
888
889            &Inst::AluRRRShift {
890                alu_op,
891                size,
892                rd,
893                rn,
894                rm,
895                ref shiftop,
896            } => {
897                let top11: u32 = match alu_op {
898                    ALUOp::Add => 0b000_01011000,
899                    ALUOp::AddS => 0b001_01011000,
900                    ALUOp::Sub => 0b010_01011000,
901                    ALUOp::SubS => 0b011_01011000,
902                    ALUOp::Orr => 0b001_01010000,
903                    ALUOp::And => 0b000_01010000,
904                    ALUOp::AndS => 0b011_01010000,
905                    ALUOp::Eor => 0b010_01010000,
906                    ALUOp::OrrNot => 0b001_01010001,
907                    ALUOp::EorNot => 0b010_01010001,
908                    ALUOp::AndNot => 0b000_01010001,
909                    _ => unimplemented!("{:?}", alu_op),
910                };
911                let top11 = top11 | size.sf_bit() << 10;
912                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
913                let bits_15_10 = u32::from(shiftop.amt().value());
914                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
915            }
916
917            &Inst::AluRRRExtend {
918                alu_op,
919                size,
920                rd,
921                rn,
922                rm,
923                extendop,
924            } => {
925                let top11: u32 = match alu_op {
926                    ALUOp::Add => 0b00001011001,
927                    ALUOp::Sub => 0b01001011001,
928                    ALUOp::AddS => 0b00101011001,
929                    ALUOp::SubS => 0b01101011001,
930                    _ => unimplemented!("{:?}", alu_op),
931                };
932                let top11 = top11 | size.sf_bit() << 10;
933                let bits_15_10 = u32::from(extendop.bits()) << 3;
934                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
935            }
936
937            &Inst::BitRR {
938                op, size, rd, rn, ..
939            } => {
940                let (op1, op2) = match op {
941                    BitOp::RBit => (0b00000, 0b000000),
942                    BitOp::Clz => (0b00000, 0b000100),
943                    BitOp::Cls => (0b00000, 0b000101),
944                    BitOp::Rev16 => (0b00000, 0b000001),
945                    BitOp::Rev32 => (0b00000, 0b000010),
946                    BitOp::Rev64 => (0b00000, 0b000011),
947                };
948                sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
949            }
950
951            &Inst::ULoad8 { rd, ref mem, flags }
952            | &Inst::SLoad8 { rd, ref mem, flags }
953            | &Inst::ULoad16 { rd, ref mem, flags }
954            | &Inst::SLoad16 { rd, ref mem, flags }
955            | &Inst::ULoad32 { rd, ref mem, flags }
956            | &Inst::SLoad32 { rd, ref mem, flags }
957            | &Inst::ULoad64 {
958                rd, ref mem, flags, ..
959            }
960            | &Inst::FpuLoad16 { rd, ref mem, flags }
961            | &Inst::FpuLoad32 { rd, ref mem, flags }
962            | &Inst::FpuLoad64 { rd, ref mem, flags }
963            | &Inst::FpuLoad128 { rd, ref mem, flags } => {
964                let mem = mem.clone();
965                let access_ty = self.mem_type().unwrap();
966                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
967
968                for inst in mem_insts.into_iter() {
969                    inst.emit(sink, emit_info, state);
970                }
971
972                // ldst encoding helpers take Reg, not Writable<Reg>.
973                let rd = rd.to_reg();
974
975                // This is the base opcode (top 10 bits) for the "unscaled
976                // immediate" form (Unscaled). Other addressing modes will OR in
977                // other values for bits 24/25 (bits 1/2 of this constant).
978                let op = match self {
979                    Inst::ULoad8 { .. } => 0b0011100001,
980                    Inst::SLoad8 { .. } => 0b0011100010,
981                    Inst::ULoad16 { .. } => 0b0111100001,
982                    Inst::SLoad16 { .. } => 0b0111100010,
983                    Inst::ULoad32 { .. } => 0b1011100001,
984                    Inst::SLoad32 { .. } => 0b1011100010,
985                    Inst::ULoad64 { .. } => 0b1111100001,
986                    Inst::FpuLoad16 { .. } => 0b0111110001,
987                    Inst::FpuLoad32 { .. } => 0b1011110001,
988                    Inst::FpuLoad64 { .. } => 0b1111110001,
989                    Inst::FpuLoad128 { .. } => 0b0011110011,
990                    _ => unreachable!(),
991                };
992
993                if let Some(trap_code) = flags.trap_code() {
994                    // Register the offset at which the actual load instruction starts.
995                    sink.add_trap(trap_code);
996                }
997
998                match &mem {
999                    &AMode::Unscaled { rn, simm9 } => {
1000                        let reg = rn;
1001                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1002                    }
1003                    &AMode::UnsignedOffset { rn, uimm12 } => {
1004                        let reg = rn;
1005                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1006                    }
1007                    &AMode::RegReg { rn, rm } => {
1008                        let r1 = rn;
1009                        let r2 = rm;
1010                        sink.put4(enc_ldst_reg(
1011                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1012                        ));
1013                    }
1014                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1015                        let r1 = rn;
1016                        let r2 = rm;
1017                        let extendop = match &mem {
1018                            &AMode::RegScaled { .. } => None,
1019                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1020                            _ => unreachable!(),
1021                        };
1022                        sink.put4(enc_ldst_reg(
1023                            op, r1, r2, /* scaled = */ true, extendop, rd,
1024                        ));
1025                    }
1026                    &AMode::RegExtended { rn, rm, extendop } => {
1027                        let r1 = rn;
1028                        let r2 = rm;
1029                        sink.put4(enc_ldst_reg(
1030                            op,
1031                            r1,
1032                            r2,
1033                            /* scaled = */ false,
1034                            Some(extendop),
1035                            rd,
1036                        ));
1037                    }
1038                    &AMode::Label { ref label } => {
1039                        let offset = match label {
1040                            // cast i32 to u32 (two's-complement)
1041                            MemLabel::PCRel(off) => *off as u32,
1042                            // Emit a relocation into the `MachBuffer`
1043                            // for the label that's being loaded from and
1044                            // encode an address of 0 in its place which will
1045                            // get filled in by relocation resolution later on.
1046                            MemLabel::Mach(label) => {
1047                                sink.use_label_at_offset(
1048                                    sink.cur_offset(),
1049                                    *label,
1050                                    LabelUse::Ldr19,
1051                                );
1052                                0
1053                            }
1054                        } / 4;
1055                        assert!(offset < (1 << 19));
1056                        match self {
1057                            &Inst::ULoad32 { .. } => {
1058                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1059                            }
1060                            &Inst::SLoad32 { .. } => {
1061                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1062                            }
1063                            &Inst::FpuLoad32 { .. } => {
1064                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1065                            }
1066                            &Inst::ULoad64 { .. } => {
1067                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1068                            }
1069                            &Inst::FpuLoad64 { .. } => {
1070                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1071                            }
1072                            &Inst::FpuLoad128 { .. } => {
1073                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1074                            }
1075                            _ => panic!("Unsupported size for LDR from constant pool!"),
1076                        }
1077                    }
1078                    &AMode::SPPreIndexed { simm9 } => {
1079                        let reg = stack_reg();
1080                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1081                    }
1082                    &AMode::SPPostIndexed { simm9 } => {
1083                        let reg = stack_reg();
1084                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1085                    }
1086                    // Eliminated by `mem_finalize()` above.
1087                    &AMode::SPOffset { .. }
1088                    | &AMode::FPOffset { .. }
1089                    | &AMode::IncomingArg { .. }
1090                    | &AMode::SlotOffset { .. }
1091                    | &AMode::Const { .. }
1092                    | &AMode::RegOffset { .. } => {
1093                        panic!("Should not see {mem:?} here!")
1094                    }
1095                }
1096            }
1097
1098            &Inst::Store8 { rd, ref mem, flags }
1099            | &Inst::Store16 { rd, ref mem, flags }
1100            | &Inst::Store32 { rd, ref mem, flags }
1101            | &Inst::Store64 { rd, ref mem, flags }
1102            | &Inst::FpuStore16 { rd, ref mem, flags }
1103            | &Inst::FpuStore32 { rd, ref mem, flags }
1104            | &Inst::FpuStore64 { rd, ref mem, flags }
1105            | &Inst::FpuStore128 { rd, ref mem, flags } => {
1106                let mem = mem.clone();
1107                let access_ty = self.mem_type().unwrap();
1108                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1109
1110                for inst in mem_insts.into_iter() {
1111                    inst.emit(sink, emit_info, state);
1112                }
1113
1114                let op = match self {
1115                    Inst::Store8 { .. } => 0b0011100000,
1116                    Inst::Store16 { .. } => 0b0111100000,
1117                    Inst::Store32 { .. } => 0b1011100000,
1118                    Inst::Store64 { .. } => 0b1111100000,
1119                    Inst::FpuStore16 { .. } => 0b0111110000,
1120                    Inst::FpuStore32 { .. } => 0b1011110000,
1121                    Inst::FpuStore64 { .. } => 0b1111110000,
1122                    Inst::FpuStore128 { .. } => 0b0011110010,
1123                    _ => unreachable!(),
1124                };
1125
1126                if let Some(trap_code) = flags.trap_code() {
1127                    // Register the offset at which the actual store instruction starts.
1128                    sink.add_trap(trap_code);
1129                }
1130
1131                match &mem {
1132                    &AMode::Unscaled { rn, simm9 } => {
1133                        let reg = rn;
1134                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1135                    }
1136                    &AMode::UnsignedOffset { rn, uimm12 } => {
1137                        let reg = rn;
1138                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1139                    }
1140                    &AMode::RegReg { rn, rm } => {
1141                        let r1 = rn;
1142                        let r2 = rm;
1143                        sink.put4(enc_ldst_reg(
1144                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1145                        ));
1146                    }
1147                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1148                        let r1 = rn;
1149                        let r2 = rm;
1150                        let extendop = match &mem {
1151                            &AMode::RegScaled { .. } => None,
1152                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1153                            _ => unreachable!(),
1154                        };
1155                        sink.put4(enc_ldst_reg(
1156                            op, r1, r2, /* scaled = */ true, extendop, rd,
1157                        ));
1158                    }
1159                    &AMode::RegExtended { rn, rm, extendop } => {
1160                        let r1 = rn;
1161                        let r2 = rm;
1162                        sink.put4(enc_ldst_reg(
1163                            op,
1164                            r1,
1165                            r2,
1166                            /* scaled = */ false,
1167                            Some(extendop),
1168                            rd,
1169                        ));
1170                    }
1171                    &AMode::Label { .. } => {
1172                        panic!("Store to a MemLabel not implemented!");
1173                    }
1174                    &AMode::SPPreIndexed { simm9 } => {
1175                        let reg = stack_reg();
1176                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1177                    }
1178                    &AMode::SPPostIndexed { simm9 } => {
1179                        let reg = stack_reg();
1180                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1181                    }
1182                    // Eliminated by `mem_finalize()` above.
1183                    &AMode::SPOffset { .. }
1184                    | &AMode::FPOffset { .. }
1185                    | &AMode::IncomingArg { .. }
1186                    | &AMode::SlotOffset { .. }
1187                    | &AMode::Const { .. }
1188                    | &AMode::RegOffset { .. } => {
1189                        panic!("Should not see {mem:?} here!")
1190                    }
1191                }
1192            }
1193
1194            &Inst::StoreP64 {
1195                rt,
1196                rt2,
1197                ref mem,
1198                flags,
1199            } => {
1200                let mem = mem.clone();
1201                if let Some(trap_code) = flags.trap_code() {
1202                    // Register the offset at which the actual store instruction starts.
1203                    sink.add_trap(trap_code);
1204                }
1205                match &mem {
1206                    &PairAMode::SignedOffset { reg, simm7 } => {
1207                        assert_eq!(simm7.scale_ty, I64);
1208                        sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1209                    }
1210                    &PairAMode::SPPreIndexed { simm7 } => {
1211                        assert_eq!(simm7.scale_ty, I64);
1212                        let reg = stack_reg();
1213                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1214                    }
1215                    &PairAMode::SPPostIndexed { simm7 } => {
1216                        assert_eq!(simm7.scale_ty, I64);
1217                        let reg = stack_reg();
1218                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1219                    }
1220                }
1221            }
1222            &Inst::LoadP64 {
1223                rt,
1224                rt2,
1225                ref mem,
1226                flags,
1227            } => {
1228                let rt = rt.to_reg();
1229                let rt2 = rt2.to_reg();
1230                let mem = mem.clone();
1231                if let Some(trap_code) = flags.trap_code() {
1232                    // Register the offset at which the actual load instruction starts.
1233                    sink.add_trap(trap_code);
1234                }
1235
1236                match &mem {
1237                    &PairAMode::SignedOffset { reg, simm7 } => {
1238                        assert_eq!(simm7.scale_ty, I64);
1239                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1240                    }
1241                    &PairAMode::SPPreIndexed { simm7 } => {
1242                        assert_eq!(simm7.scale_ty, I64);
1243                        let reg = stack_reg();
1244                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1245                    }
1246                    &PairAMode::SPPostIndexed { simm7 } => {
1247                        assert_eq!(simm7.scale_ty, I64);
1248                        let reg = stack_reg();
1249                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1250                    }
1251                }
1252            }
1253            &Inst::FpuLoadP64 {
1254                rt,
1255                rt2,
1256                ref mem,
1257                flags,
1258            }
1259            | &Inst::FpuLoadP128 {
1260                rt,
1261                rt2,
1262                ref mem,
1263                flags,
1264            } => {
1265                let rt = rt.to_reg();
1266                let rt2 = rt2.to_reg();
1267                let mem = mem.clone();
1268
1269                if let Some(trap_code) = flags.trap_code() {
1270                    // Register the offset at which the actual load instruction starts.
1271                    sink.add_trap(trap_code);
1272                }
1273
1274                let opc = match self {
1275                    &Inst::FpuLoadP64 { .. } => 0b01,
1276                    &Inst::FpuLoadP128 { .. } => 0b10,
1277                    _ => unreachable!(),
1278                };
1279
1280                match &mem {
1281                    &PairAMode::SignedOffset { reg, simm7 } => {
1282                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1283                        sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1284                    }
1285                    &PairAMode::SPPreIndexed { simm7 } => {
1286                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1287                        let reg = stack_reg();
1288                        sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1289                    }
1290                    &PairAMode::SPPostIndexed { simm7 } => {
1291                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1292                        let reg = stack_reg();
1293                        sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1294                    }
1295                }
1296            }
1297            &Inst::FpuStoreP64 {
1298                rt,
1299                rt2,
1300                ref mem,
1301                flags,
1302            }
1303            | &Inst::FpuStoreP128 {
1304                rt,
1305                rt2,
1306                ref mem,
1307                flags,
1308            } => {
1309                let mem = mem.clone();
1310
1311                if let Some(trap_code) = flags.trap_code() {
1312                    // Register the offset at which the actual store instruction starts.
1313                    sink.add_trap(trap_code);
1314                }
1315
1316                let opc = match self {
1317                    &Inst::FpuStoreP64 { .. } => 0b01,
1318                    &Inst::FpuStoreP128 { .. } => 0b10,
1319                    _ => unreachable!(),
1320                };
1321
1322                match &mem {
1323                    &PairAMode::SignedOffset { reg, simm7 } => {
1324                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1325                        sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1326                    }
1327                    &PairAMode::SPPreIndexed { simm7 } => {
1328                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1329                        let reg = stack_reg();
1330                        sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1331                    }
1332                    &PairAMode::SPPostIndexed { simm7 } => {
1333                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1334                        let reg = stack_reg();
1335                        sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1336                    }
1337                }
1338            }
1339            &Inst::Mov { size, rd, rm } => {
1340                assert!(rd.to_reg().class() == rm.class());
1341                assert!(rm.class() == RegClass::Int);
1342
1343                match size {
1344                    OperandSize::Size64 => {
1345                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1346                        // should never MOV to XZR.
1347                        assert!(rd.to_reg() != stack_reg());
1348
1349                        if rm == stack_reg() {
1350                            // We can't use ORR here, so use an `add rd, sp, #0` instead.
1351                            let imm12 = Imm12::maybe_from_u64(0).unwrap();
1352                            sink.put4(enc_arith_rr_imm12(
1353                                0b100_10001,
1354                                imm12.shift_bits(),
1355                                imm12.imm_bits(),
1356                                rm,
1357                                rd,
1358                            ));
1359                        } else {
1360                            // Encoded as ORR rd, rm, zero.
1361                            sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1362                        }
1363                    }
1364                    OperandSize::Size32 => {
1365                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1366                        // should never MOV to XZR.
1367                        assert!(machreg_to_gpr(rd.to_reg()) != 31);
1368                        // Encoded as ORR rd, rm, zero.
1369                        sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1370                    }
1371                }
1372            }
1373            &Inst::MovFromPReg { rd, rm } => {
1374                let rm: Reg = rm.into();
1375                debug_assert!([
1376                    regs::fp_reg(),
1377                    regs::stack_reg(),
1378                    regs::link_reg(),
1379                    regs::pinned_reg()
1380                ]
1381                .contains(&rm));
1382                assert!(rm.class() == RegClass::Int);
1383                assert!(rd.to_reg().class() == rm.class());
1384                let size = OperandSize::Size64;
1385                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1386            }
1387            &Inst::MovToPReg { rd, rm } => {
1388                let rd: Writable<Reg> = Writable::from_reg(rd.into());
1389                debug_assert!([
1390                    regs::fp_reg(),
1391                    regs::stack_reg(),
1392                    regs::link_reg(),
1393                    regs::pinned_reg()
1394                ]
1395                .contains(&rd.to_reg()));
1396                assert!(rd.to_reg().class() == RegClass::Int);
1397                assert!(rm.class() == rd.to_reg().class());
1398                let size = OperandSize::Size64;
1399                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1400            }
1401            &Inst::MovWide { op, rd, imm, size } => {
1402                sink.put4(enc_move_wide(op, rd, imm, size));
1403            }
1404            &Inst::MovK { rd, rn, imm, size } => {
1405                debug_assert_eq!(rn, rd.to_reg());
1406                sink.put4(enc_movk(rd, imm, size));
1407            }
1408            &Inst::CSel { rd, rn, rm, cond } => {
1409                sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1410            }
1411            &Inst::CSNeg { rd, rn, rm, cond } => {
1412                sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1413            }
1414            &Inst::CSet { rd, cond } => {
1415                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1416            }
1417            &Inst::CSetm { rd, cond } => {
1418                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1419            }
1420            &Inst::CCmp {
1421                size,
1422                rn,
1423                rm,
1424                nzcv,
1425                cond,
1426            } => {
1427                sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1428            }
1429            &Inst::CCmpImm {
1430                size,
1431                rn,
1432                imm,
1433                nzcv,
1434                cond,
1435            } => {
1436                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1437            }
1438            &Inst::AtomicRMW {
1439                ty,
1440                op,
1441                rs,
1442                rt,
1443                rn,
1444                flags,
1445            } => {
1446                if let Some(trap_code) = flags.trap_code() {
1447                    sink.add_trap(trap_code);
1448                }
1449
1450                sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1451            }
1452            &Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1453                /* Emit this:
1454                     again:
1455                      ldaxr{,b,h}  x/w27, [x25]
1456                      // maybe sign extend
1457                      op          x28, x27, x26 // op is add,sub,and,orr,eor
1458                      stlxr{,b,h}  w24, x/w28, [x25]
1459                      cbnz        x24, again
1460
1461                   Operand conventions:
1462                      IN:  x25 (addr), x26 (2nd arg for op)
1463                      OUT: x27 (old value), x24 (trashed), x28 (trashed)
1464
1465                   It is unfortunate that, per the ARM documentation, x28 cannot be used for
1466                   both the store-data and success-flag operands of stlxr.  This causes the
1467                   instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1468                   instead for the success-flag.
1469                */
1470                // TODO: We should not hardcode registers here, a better idea would be to
1471                // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1472                let xzr = zero_reg();
1473                let x24 = xreg(24);
1474                let x25 = xreg(25);
1475                let x26 = xreg(26);
1476                let x27 = xreg(27);
1477                let x28 = xreg(28);
1478                let x24wr = writable_xreg(24);
1479                let x27wr = writable_xreg(27);
1480                let x28wr = writable_xreg(28);
1481                let again_label = sink.get_label();
1482
1483                // again:
1484                sink.bind_label(again_label, &mut state.ctrl_plane);
1485
1486                if let Some(trap_code) = flags.trap_code() {
1487                    sink.add_trap(trap_code);
1488                }
1489
1490                sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1491                let size = OperandSize::from_ty(ty);
1492                let sign_ext = match op {
1493                    AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1494                        I16 => Some((ExtendOp::SXTH, 16)),
1495                        I8 => Some((ExtendOp::SXTB, 8)),
1496                        _ => None,
1497                    },
1498                    _ => None,
1499                };
1500
1501                // sxt{b|h} the loaded result if necessary.
1502                if sign_ext.is_some() {
1503                    let (_, from_bits) = sign_ext.unwrap();
1504                    Inst::Extend {
1505                        rd: x27wr,
1506                        rn: x27,
1507                        signed: true,
1508                        from_bits,
1509                        to_bits: size.bits(),
1510                    }
1511                    .emit(sink, emit_info, state);
1512                }
1513
1514                match op {
1515                    AtomicRMWLoopOp::Xchg => {} // do nothing
1516                    AtomicRMWLoopOp::Nand => {
1517                        // and x28, x27, x26
1518                        // mvn x28, x28
1519
1520                        Inst::AluRRR {
1521                            alu_op: ALUOp::And,
1522                            size,
1523                            rd: x28wr,
1524                            rn: x27,
1525                            rm: x26,
1526                        }
1527                        .emit(sink, emit_info, state);
1528
1529                        Inst::AluRRR {
1530                            alu_op: ALUOp::OrrNot,
1531                            size,
1532                            rd: x28wr,
1533                            rn: xzr,
1534                            rm: x28,
1535                        }
1536                        .emit(sink, emit_info, state);
1537                    }
1538                    AtomicRMWLoopOp::Umin
1539                    | AtomicRMWLoopOp::Umax
1540                    | AtomicRMWLoopOp::Smin
1541                    | AtomicRMWLoopOp::Smax => {
1542                        // cmp x27, x26 {?sxt}
1543                        // csel.op x28, x27, x26
1544
1545                        let cond = match op {
1546                            AtomicRMWLoopOp::Umin => Cond::Lo,
1547                            AtomicRMWLoopOp::Umax => Cond::Hi,
1548                            AtomicRMWLoopOp::Smin => Cond::Lt,
1549                            AtomicRMWLoopOp::Smax => Cond::Gt,
1550                            _ => unreachable!(),
1551                        };
1552
1553                        if sign_ext.is_some() {
1554                            let (extendop, _) = sign_ext.unwrap();
1555                            Inst::AluRRRExtend {
1556                                alu_op: ALUOp::SubS,
1557                                size,
1558                                rd: writable_zero_reg(),
1559                                rn: x27,
1560                                rm: x26,
1561                                extendop,
1562                            }
1563                            .emit(sink, emit_info, state);
1564                        } else {
1565                            Inst::AluRRR {
1566                                alu_op: ALUOp::SubS,
1567                                size,
1568                                rd: writable_zero_reg(),
1569                                rn: x27,
1570                                rm: x26,
1571                            }
1572                            .emit(sink, emit_info, state);
1573                        }
1574
1575                        Inst::CSel {
1576                            cond,
1577                            rd: x28wr,
1578                            rn: x27,
1579                            rm: x26,
1580                        }
1581                        .emit(sink, emit_info, state);
1582                    }
1583                    _ => {
1584                        // add/sub/and/orr/eor x28, x27, x26
1585                        let alu_op = match op {
1586                            AtomicRMWLoopOp::Add => ALUOp::Add,
1587                            AtomicRMWLoopOp::Sub => ALUOp::Sub,
1588                            AtomicRMWLoopOp::And => ALUOp::And,
1589                            AtomicRMWLoopOp::Orr => ALUOp::Orr,
1590                            AtomicRMWLoopOp::Eor => ALUOp::Eor,
1591                            AtomicRMWLoopOp::Nand
1592                            | AtomicRMWLoopOp::Umin
1593                            | AtomicRMWLoopOp::Umax
1594                            | AtomicRMWLoopOp::Smin
1595                            | AtomicRMWLoopOp::Smax
1596                            | AtomicRMWLoopOp::Xchg => unreachable!(),
1597                        };
1598
1599                        Inst::AluRRR {
1600                            alu_op,
1601                            size,
1602                            rd: x28wr,
1603                            rn: x27,
1604                            rm: x26,
1605                        }
1606                        .emit(sink, emit_info, state);
1607                    }
1608                }
1609
1610                if let Some(trap_code) = flags.trap_code() {
1611                    sink.add_trap(trap_code);
1612                }
1613                if op == AtomicRMWLoopOp::Xchg {
1614                    sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1615                } else {
1616                    sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1617                }
1618
1619                // cbnz w24, again
1620                // Note, we're actually testing x24, and relying on the default zero-high-half
1621                // rule in the assignment that `stlxr` does.
1622                let br_offset = sink.cur_offset();
1623                sink.put4(enc_conditional_br(
1624                    BranchTarget::Label(again_label),
1625                    CondBrKind::NotZero(x24, OperandSize::Size64),
1626                ));
1627                sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1628            }
1629            &Inst::AtomicCAS {
1630                rd,
1631                rs,
1632                rt,
1633                rn,
1634                ty,
1635                flags,
1636            } => {
1637                debug_assert_eq!(rd.to_reg(), rs);
1638                let size = match ty {
1639                    I8 => 0b00,
1640                    I16 => 0b01,
1641                    I32 => 0b10,
1642                    I64 => 0b11,
1643                    _ => panic!("Unsupported type: {ty}"),
1644                };
1645
1646                if let Some(trap_code) = flags.trap_code() {
1647                    sink.add_trap(trap_code);
1648                }
1649
1650                sink.put4(enc_cas(size, rd, rt, rn));
1651            }
1652            &Inst::AtomicCASLoop { ty, flags, .. } => {
1653                /* Emit this:
1654                    again:
1655                     ldaxr{,b,h} x/w27, [x25]
1656                     cmp         x27, x/w26 uxt{b,h}
1657                     b.ne        out
1658                     stlxr{,b,h} w24, x/w28, [x25]
1659                     cbnz        x24, again
1660                    out:
1661
1662                  Operand conventions:
1663                     IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1664                     OUT: x27 (old value), x24 (trashed)
1665                */
1666                let x24 = xreg(24);
1667                let x25 = xreg(25);
1668                let x26 = xreg(26);
1669                let x27 = xreg(27);
1670                let x28 = xreg(28);
1671                let xzrwr = writable_zero_reg();
1672                let x24wr = writable_xreg(24);
1673                let x27wr = writable_xreg(27);
1674                let again_label = sink.get_label();
1675                let out_label = sink.get_label();
1676
1677                // again:
1678                sink.bind_label(again_label, &mut state.ctrl_plane);
1679
1680                if let Some(trap_code) = flags.trap_code() {
1681                    sink.add_trap(trap_code);
1682                }
1683
1684                // ldaxr x27, [x25]
1685                sink.put4(enc_ldaxr(ty, x27wr, x25));
1686
1687                // The top 32-bits are zero-extended by the ldaxr so we don't
1688                // have to use UXTW, just the x-form of the register.
1689                let (bit21, extend_op) = match ty {
1690                    I8 => (0b1, 0b000000),
1691                    I16 => (0b1, 0b001000),
1692                    _ => (0b0, 0b000000),
1693                };
1694                let bits_31_21 = 0b111_01011_000 | bit21;
1695                // cmp x27, x26 (== subs xzr, x27, x26)
1696                sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1697
1698                // b.ne out
1699                let br_out_offset = sink.cur_offset();
1700                sink.put4(enc_conditional_br(
1701                    BranchTarget::Label(out_label),
1702                    CondBrKind::Cond(Cond::Ne),
1703                ));
1704                sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1705
1706                if let Some(trap_code) = flags.trap_code() {
1707                    sink.add_trap(trap_code);
1708                }
1709
1710                sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1711
1712                // cbnz w24, again.
1713                // Note, we're actually testing x24, and relying on the default zero-high-half
1714                // rule in the assignment that `stlxr` does.
1715                let br_again_offset = sink.cur_offset();
1716                sink.put4(enc_conditional_br(
1717                    BranchTarget::Label(again_label),
1718                    CondBrKind::NotZero(x24, OperandSize::Size64),
1719                ));
1720                sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1721
1722                // out:
1723                sink.bind_label(out_label, &mut state.ctrl_plane);
1724            }
1725            &Inst::LoadAcquire {
1726                access_ty,
1727                rt,
1728                rn,
1729                flags,
1730            } => {
1731                if let Some(trap_code) = flags.trap_code() {
1732                    sink.add_trap(trap_code);
1733                }
1734
1735                sink.put4(enc_ldar(access_ty, rt, rn));
1736            }
1737            &Inst::StoreRelease {
1738                access_ty,
1739                rt,
1740                rn,
1741                flags,
1742            } => {
1743                if let Some(trap_code) = flags.trap_code() {
1744                    sink.add_trap(trap_code);
1745                }
1746
1747                sink.put4(enc_stlr(access_ty, rt, rn));
1748            }
1749            &Inst::Fence {} => {
1750                sink.put4(enc_dmb_ish()); // dmb ish
1751            }
1752            &Inst::Csdb {} => {
1753                sink.put4(0xd503229f);
1754            }
1755            &Inst::FpuMove32 { rd, rn } => {
1756                sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1757            }
1758            &Inst::FpuMove64 { rd, rn } => {
1759                sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1760            }
1761            &Inst::FpuMove128 { rd, rn } => {
1762                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1763            }
1764            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1765                let (imm5, shift, mask) = match size.lane_size() {
1766                    ScalarSize::Size32 => (0b00100, 3, 0b011),
1767                    ScalarSize::Size64 => (0b01000, 4, 0b001),
1768                    _ => unimplemented!(),
1769                };
1770                debug_assert_eq!(idx & mask, idx);
1771                let imm5 = imm5 | ((idx as u32) << shift);
1772                sink.put4(
1773                    0b010_11110000_00000_000001_00000_00000
1774                        | (imm5 << 16)
1775                        | (machreg_to_vec(rn) << 5)
1776                        | machreg_to_vec(rd.to_reg()),
1777                );
1778            }
1779            &Inst::FpuExtend { rd, rn, size } => {
1780                sink.put4(enc_fpurr(
1781                    0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1782                    rd,
1783                    rn,
1784                ));
1785            }
1786            &Inst::FpuRR {
1787                fpu_op,
1788                size,
1789                rd,
1790                rn,
1791            } => {
1792                let top22 = match fpu_op {
1793                    FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1794                    FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1795                    FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1796                    FPUOp1::Cvt32To64 => {
1797                        debug_assert_eq!(size, ScalarSize::Size32);
1798                        0b000_11110_00_1_000101_10000
1799                    }
1800                    FPUOp1::Cvt64To32 => {
1801                        debug_assert_eq!(size, ScalarSize::Size64);
1802                        0b000_11110_01_1_000100_10000
1803                    }
1804                };
1805                let top22 = top22 | size.ftype() << 12;
1806                sink.put4(enc_fpurr(top22, rd, rn));
1807            }
1808            &Inst::FpuRRR {
1809                fpu_op,
1810                size,
1811                rd,
1812                rn,
1813                rm,
1814            } => {
1815                let top22 = match fpu_op {
1816                    FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1817                    FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1818                    FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1819                    FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1820                    FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1821                    FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1822                };
1823                let top22 = top22 | size.ftype() << 12;
1824                sink.put4(enc_fpurrr(top22, rd, rn, rm));
1825            }
1826            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1827                FPUOpRI::UShr32(imm) => {
1828                    debug_assert_eq!(32, imm.lane_size_in_bits);
1829                    sink.put4(
1830                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1831                            | imm.enc() << 16
1832                            | machreg_to_vec(rn) << 5
1833                            | machreg_to_vec(rd.to_reg()),
1834                    )
1835                }
1836                FPUOpRI::UShr64(imm) => {
1837                    debug_assert_eq!(64, imm.lane_size_in_bits);
1838                    sink.put4(
1839                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1840                            | imm.enc() << 16
1841                            | machreg_to_vec(rn) << 5
1842                            | machreg_to_vec(rd.to_reg()),
1843                    )
1844                }
1845            },
1846            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1847                debug_assert_eq!(rd.to_reg(), ri);
1848                match fpu_op {
1849                    FPUOpRIMod::Sli64(imm) => {
1850                        debug_assert_eq!(64, imm.lane_size_in_bits);
1851                        sink.put4(
1852                            0b01_1_111110_0000000_010101_00000_00000
1853                                | imm.enc() << 16
1854                                | machreg_to_vec(rn) << 5
1855                                | machreg_to_vec(rd.to_reg()),
1856                        )
1857                    }
1858                    FPUOpRIMod::Sli32(imm) => {
1859                        debug_assert_eq!(32, imm.lane_size_in_bits);
1860                        sink.put4(
1861                            0b0_0_1_011110_0000000_010101_00000_00000
1862                                | imm.enc() << 16
1863                                | machreg_to_vec(rn) << 5
1864                                | machreg_to_vec(rd.to_reg()),
1865                        )
1866                    }
1867                }
1868            }
1869            &Inst::FpuRRRR {
1870                fpu_op,
1871                size,
1872                rd,
1873                rn,
1874                rm,
1875                ra,
1876            } => {
1877                let top17 = match fpu_op {
1878                    FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1879                    FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1880                    FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1881                    FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1882                };
1883                let top17 = top17 | size.ftype() << 7;
1884                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1885            }
1886            &Inst::VecMisc { op, rd, rn, size } => {
1887                let (q, enc_size) = size.enc_size();
1888                let (u, bits_12_16, size) = match op {
1889                    VecMisc2::Not => (0b1, 0b00101, 0b00),
1890                    VecMisc2::Neg => (0b1, 0b01011, enc_size),
1891                    VecMisc2::Abs => (0b0, 0b01011, enc_size),
1892                    VecMisc2::Fabs => {
1893                        debug_assert!(
1894                            size == VectorSize::Size32x2
1895                                || size == VectorSize::Size32x4
1896                                || size == VectorSize::Size64x2
1897                        );
1898                        (0b0, 0b01111, enc_size)
1899                    }
1900                    VecMisc2::Fneg => {
1901                        debug_assert!(
1902                            size == VectorSize::Size32x2
1903                                || size == VectorSize::Size32x4
1904                                || size == VectorSize::Size64x2
1905                        );
1906                        (0b1, 0b01111, enc_size)
1907                    }
1908                    VecMisc2::Fsqrt => {
1909                        debug_assert!(
1910                            size == VectorSize::Size32x2
1911                                || size == VectorSize::Size32x4
1912                                || size == VectorSize::Size64x2
1913                        );
1914                        (0b1, 0b11111, enc_size)
1915                    }
1916                    VecMisc2::Rev16 => {
1917                        debug_assert_eq!(size, VectorSize::Size8x16);
1918                        (0b0, 0b00001, enc_size)
1919                    }
1920                    VecMisc2::Rev32 => {
1921                        debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1922                        (0b1, 0b00000, enc_size)
1923                    }
1924                    VecMisc2::Rev64 => {
1925                        debug_assert!(
1926                            size == VectorSize::Size8x16
1927                                || size == VectorSize::Size16x8
1928                                || size == VectorSize::Size32x4
1929                        );
1930                        (0b0, 0b00000, enc_size)
1931                    }
1932                    VecMisc2::Fcvtzs => {
1933                        debug_assert!(
1934                            size == VectorSize::Size32x2
1935                                || size == VectorSize::Size32x4
1936                                || size == VectorSize::Size64x2
1937                        );
1938                        (0b0, 0b11011, enc_size)
1939                    }
1940                    VecMisc2::Fcvtzu => {
1941                        debug_assert!(
1942                            size == VectorSize::Size32x2
1943                                || size == VectorSize::Size32x4
1944                                || size == VectorSize::Size64x2
1945                        );
1946                        (0b1, 0b11011, enc_size)
1947                    }
1948                    VecMisc2::Scvtf => {
1949                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1950                        (0b0, 0b11101, enc_size & 0b1)
1951                    }
1952                    VecMisc2::Ucvtf => {
1953                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1954                        (0b1, 0b11101, enc_size & 0b1)
1955                    }
1956                    VecMisc2::Frintn => {
1957                        debug_assert!(
1958                            size == VectorSize::Size32x2
1959                                || size == VectorSize::Size32x4
1960                                || size == VectorSize::Size64x2
1961                        );
1962                        (0b0, 0b11000, enc_size & 0b01)
1963                    }
1964                    VecMisc2::Frintz => {
1965                        debug_assert!(
1966                            size == VectorSize::Size32x2
1967                                || size == VectorSize::Size32x4
1968                                || size == VectorSize::Size64x2
1969                        );
1970                        (0b0, 0b11001, enc_size)
1971                    }
1972                    VecMisc2::Frintm => {
1973                        debug_assert!(
1974                            size == VectorSize::Size32x2
1975                                || size == VectorSize::Size32x4
1976                                || size == VectorSize::Size64x2
1977                        );
1978                        (0b0, 0b11001, enc_size & 0b01)
1979                    }
1980                    VecMisc2::Frintp => {
1981                        debug_assert!(
1982                            size == VectorSize::Size32x2
1983                                || size == VectorSize::Size32x4
1984                                || size == VectorSize::Size64x2
1985                        );
1986                        (0b0, 0b11000, enc_size)
1987                    }
1988                    VecMisc2::Cnt => {
1989                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1990                        (0b0, 0b00101, enc_size)
1991                    }
1992                    VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1993                    VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
1994                    VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
1995                    VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
1996                    VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
1997                    VecMisc2::Fcmeq0 => {
1998                        debug_assert!(
1999                            size == VectorSize::Size32x2
2000                                || size == VectorSize::Size32x4
2001                                || size == VectorSize::Size64x2
2002                        );
2003                        (0b0, 0b01101, enc_size)
2004                    }
2005                    VecMisc2::Fcmge0 => {
2006                        debug_assert!(
2007                            size == VectorSize::Size32x2
2008                                || size == VectorSize::Size32x4
2009                                || size == VectorSize::Size64x2
2010                        );
2011                        (0b1, 0b01100, enc_size)
2012                    }
2013                    VecMisc2::Fcmgt0 => {
2014                        debug_assert!(
2015                            size == VectorSize::Size32x2
2016                                || size == VectorSize::Size32x4
2017                                || size == VectorSize::Size64x2
2018                        );
2019                        (0b0, 0b01100, enc_size)
2020                    }
2021                    VecMisc2::Fcmle0 => {
2022                        debug_assert!(
2023                            size == VectorSize::Size32x2
2024                                || size == VectorSize::Size32x4
2025                                || size == VectorSize::Size64x2
2026                        );
2027                        (0b1, 0b01101, enc_size)
2028                    }
2029                    VecMisc2::Fcmlt0 => {
2030                        debug_assert!(
2031                            size == VectorSize::Size32x2
2032                                || size == VectorSize::Size32x4
2033                                || size == VectorSize::Size64x2
2034                        );
2035                        (0b0, 0b01110, enc_size)
2036                    }
2037                };
2038                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2039            }
2040            &Inst::VecLanes { op, rd, rn, size } => {
2041                let (q, size) = match size {
2042                    VectorSize::Size8x8 => (0b0, 0b00),
2043                    VectorSize::Size8x16 => (0b1, 0b00),
2044                    VectorSize::Size16x4 => (0b0, 0b01),
2045                    VectorSize::Size16x8 => (0b1, 0b01),
2046                    VectorSize::Size32x4 => (0b1, 0b10),
2047                    _ => unreachable!(),
2048                };
2049                let (u, opcode) = match op {
2050                    VecLanesOp::Uminv => (0b1, 0b11010),
2051                    VecLanesOp::Addv => (0b0, 0b11011),
2052                };
2053                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2054            }
2055            &Inst::VecShiftImm {
2056                op,
2057                rd,
2058                rn,
2059                size,
2060                imm,
2061            } => {
2062                let (is_shr, mut template) = match op {
2063                    VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2064                    VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2065                    VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2066                };
2067                if size.is_128bits() {
2068                    template |= 0b1 << 30;
2069                }
2070                let imm = imm as u32;
2071                // Deal with the somewhat strange encoding scheme for, and limits on,
2072                // the shift amount.
2073                let immh_immb = match (size.lane_size(), is_shr) {
2074                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2075                        0b_1000_000_u32 | (64 - imm)
2076                    }
2077                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2078                        0b_0100_000_u32 | (32 - imm)
2079                    }
2080                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2081                        0b_0010_000_u32 | (16 - imm)
2082                    }
2083                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2084                        0b_0001_000_u32 | (8 - imm)
2085                    }
2086                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2087                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2088                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2089                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2090                    _ => panic!(
2091                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2092                    ),
2093                };
2094                let rn_enc = machreg_to_vec(rn);
2095                let rd_enc = machreg_to_vec(rd.to_reg());
2096                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2097            }
2098            &Inst::VecShiftImmMod {
2099                op,
2100                rd,
2101                ri,
2102                rn,
2103                size,
2104                imm,
2105            } => {
2106                debug_assert_eq!(rd.to_reg(), ri);
2107                let (is_shr, mut template) = match op {
2108                    VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2109                };
2110                if size.is_128bits() {
2111                    template |= 0b1 << 30;
2112                }
2113                let imm = imm as u32;
2114                // Deal with the somewhat strange encoding scheme for, and limits on,
2115                // the shift amount.
2116                let immh_immb = match (size.lane_size(), is_shr) {
2117                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2118                        0b_1000_000_u32 | (64 - imm)
2119                    }
2120                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2121                        0b_0100_000_u32 | (32 - imm)
2122                    }
2123                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2124                        0b_0010_000_u32 | (16 - imm)
2125                    }
2126                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2127                        0b_0001_000_u32 | (8 - imm)
2128                    }
2129                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2130                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2131                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2132                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2133                    _ => panic!(
2134                        "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2135                    ),
2136                };
2137                let rn_enc = machreg_to_vec(rn);
2138                let rd_enc = machreg_to_vec(rd.to_reg());
2139                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2140            }
2141            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2142                if imm4 < 16 {
2143                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2144                    let rm_enc = machreg_to_vec(rm);
2145                    let rn_enc = machreg_to_vec(rn);
2146                    let rd_enc = machreg_to_vec(rd.to_reg());
2147                    sink.put4(
2148                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2149                    );
2150                } else {
2151                    panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2152                }
2153            }
2154            &Inst::VecTbl { rd, rn, rm } => {
2155                sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2156            }
2157            &Inst::VecTblExt { rd, ri, rn, rm } => {
2158                debug_assert_eq!(rd.to_reg(), ri);
2159                sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2160            }
2161            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2162                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2163                sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2164            }
2165            &Inst::VecTbl2Ext {
2166                rd,
2167                ri,
2168                rn,
2169                rn2,
2170                rm,
2171            } => {
2172                debug_assert_eq!(rd.to_reg(), ri);
2173                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2174                sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2175            }
2176            &Inst::FpuCmp { size, rn, rm } => {
2177                sink.put4(enc_fcmp(size, rn, rm));
2178            }
2179            &Inst::FpuToInt { op, rd, rn } => {
2180                let top16 = match op {
2181                    // FCVTZS (32/32-bit)
2182                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2183                    // FCVTZU (32/32-bit)
2184                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2185                    // FCVTZS (32/64-bit)
2186                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2187                    // FCVTZU (32/64-bit)
2188                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2189                    // FCVTZS (64/32-bit)
2190                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2191                    // FCVTZU (64/32-bit)
2192                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2193                    // FCVTZS (64/64-bit)
2194                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2195                    // FCVTZU (64/64-bit)
2196                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2197                };
2198                sink.put4(enc_fputoint(top16, rd, rn));
2199            }
2200            &Inst::IntToFpu { op, rd, rn } => {
2201                let top16 = match op {
2202                    // SCVTF (32/32-bit)
2203                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2204                    // UCVTF (32/32-bit)
2205                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2206                    // SCVTF (64/32-bit)
2207                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2208                    // UCVTF (64/32-bit)
2209                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2210                    // SCVTF (32/64-bit)
2211                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2212                    // UCVTF (32/64-bit)
2213                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2214                    // SCVTF (64/64-bit)
2215                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2216                    // UCVTF (64/64-bit)
2217                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2218                };
2219                sink.put4(enc_inttofpu(top16, rd, rn));
2220            }
2221            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
2222                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2223            }
2224            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
2225                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2226            }
2227            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
2228                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2229            }
2230            &Inst::FpuRound { op, rd, rn } => {
2231                let top22 = match op {
2232                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2233                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2234                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2235                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2236                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2237                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2238                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2239                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2240                };
2241                sink.put4(enc_fround(top22, rd, rn));
2242            }
2243            &Inst::MovToFpu { rd, rn, size } => {
2244                let template = match size {
2245                    ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2246                    ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2247                    ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2248                    _ => unreachable!(),
2249                };
2250                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2251            }
2252            &Inst::FpuMoveFPImm { rd, imm, size } => {
2253                sink.put4(
2254                    0b000_11110_00_1_00_000_000100_00000_00000
2255                        | size.ftype() << 22
2256                        | ((imm.enc_bits() as u32) << 13)
2257                        | machreg_to_vec(rd.to_reg()),
2258                );
2259            }
2260            &Inst::MovToVec {
2261                rd,
2262                ri,
2263                rn,
2264                idx,
2265                size,
2266            } => {
2267                debug_assert_eq!(rd.to_reg(), ri);
2268                let (imm5, shift) = match size.lane_size() {
2269                    ScalarSize::Size8 => (0b00001, 1),
2270                    ScalarSize::Size16 => (0b00010, 2),
2271                    ScalarSize::Size32 => (0b00100, 3),
2272                    ScalarSize::Size64 => (0b01000, 4),
2273                    _ => unreachable!(),
2274                };
2275                debug_assert_eq!(idx & (0b11111 >> shift), idx);
2276                let imm5 = imm5 | ((idx as u32) << shift);
2277                sink.put4(
2278                    0b010_01110000_00000_0_0011_1_00000_00000
2279                        | (imm5 << 16)
2280                        | (machreg_to_gpr(rn) << 5)
2281                        | machreg_to_vec(rd.to_reg()),
2282                );
2283            }
2284            &Inst::MovFromVec { rd, rn, idx, size } => {
2285                let (q, imm5, shift, mask) = match size {
2286                    ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2287                    ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2288                    ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2289                    ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2290                    _ => panic!("Unexpected scalar FP operand size: {size:?}"),
2291                };
2292                debug_assert_eq!(idx & mask, idx);
2293                let imm5 = imm5 | ((idx as u32) << shift);
2294                sink.put4(
2295                    0b000_01110000_00000_0_0111_1_00000_00000
2296                        | (q << 30)
2297                        | (imm5 << 16)
2298                        | (machreg_to_vec(rn) << 5)
2299                        | machreg_to_gpr(rd.to_reg()),
2300                );
2301            }
2302            &Inst::MovFromVecSigned {
2303                rd,
2304                rn,
2305                idx,
2306                size,
2307                scalar_size,
2308            } => {
2309                let (imm5, shift, half) = match size {
2310                    VectorSize::Size8x8 => (0b00001, 1, true),
2311                    VectorSize::Size8x16 => (0b00001, 1, false),
2312                    VectorSize::Size16x4 => (0b00010, 2, true),
2313                    VectorSize::Size16x8 => (0b00010, 2, false),
2314                    VectorSize::Size32x2 => {
2315                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2316                        (0b00100, 3, true)
2317                    }
2318                    VectorSize::Size32x4 => {
2319                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2320                        (0b00100, 3, false)
2321                    }
2322                    _ => panic!("Unexpected vector operand size"),
2323                };
2324                debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2325                let imm5 = imm5 | ((idx as u32) << shift);
2326                sink.put4(
2327                    0b000_01110000_00000_0_0101_1_00000_00000
2328                        | (scalar_size.is64() as u32) << 30
2329                        | (imm5 << 16)
2330                        | (machreg_to_vec(rn) << 5)
2331                        | machreg_to_gpr(rd.to_reg()),
2332                );
2333            }
2334            &Inst::VecDup { rd, rn, size } => {
2335                let q = size.is_128bits() as u32;
2336                let imm5 = match size.lane_size() {
2337                    ScalarSize::Size8 => 0b00001,
2338                    ScalarSize::Size16 => 0b00010,
2339                    ScalarSize::Size32 => 0b00100,
2340                    ScalarSize::Size64 => 0b01000,
2341                    _ => unreachable!(),
2342                };
2343                sink.put4(
2344                    0b0_0_0_01110000_00000_000011_00000_00000
2345                        | (q << 30)
2346                        | (imm5 << 16)
2347                        | (machreg_to_gpr(rn) << 5)
2348                        | machreg_to_vec(rd.to_reg()),
2349                );
2350            }
2351            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2352                let q = size.is_128bits() as u32;
2353                let imm5 = match size.lane_size() {
2354                    ScalarSize::Size8 => {
2355                        assert!(lane < 16);
2356                        0b00001 | (u32::from(lane) << 1)
2357                    }
2358                    ScalarSize::Size16 => {
2359                        assert!(lane < 8);
2360                        0b00010 | (u32::from(lane) << 2)
2361                    }
2362                    ScalarSize::Size32 => {
2363                        assert!(lane < 4);
2364                        0b00100 | (u32::from(lane) << 3)
2365                    }
2366                    ScalarSize::Size64 => {
2367                        assert!(lane < 2);
2368                        0b01000 | (u32::from(lane) << 4)
2369                    }
2370                    _ => unimplemented!(),
2371                };
2372                sink.put4(
2373                    0b000_01110000_00000_000001_00000_00000
2374                        | (q << 30)
2375                        | (imm5 << 16)
2376                        | (machreg_to_vec(rn) << 5)
2377                        | machreg_to_vec(rd.to_reg()),
2378                );
2379            }
2380            &Inst::VecDupFPImm { rd, imm, size } => {
2381                let imm = imm.enc_bits();
2382                let op = match size.lane_size() {
2383                    ScalarSize::Size32 => 0,
2384                    ScalarSize::Size64 => 1,
2385                    _ => unimplemented!(),
2386                };
2387                let q_op = op | ((size.is_128bits() as u32) << 1);
2388
2389                sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2390            }
2391            &Inst::VecDupImm {
2392                rd,
2393                imm,
2394                invert,
2395                size,
2396            } => {
2397                let (imm, shift, shift_ones) = imm.value();
2398                let (op, cmode) = match size.lane_size() {
2399                    ScalarSize::Size8 => {
2400                        assert!(!invert);
2401                        assert_eq!(shift, 0);
2402
2403                        (0, 0b1110)
2404                    }
2405                    ScalarSize::Size16 => {
2406                        let s = shift & 8;
2407
2408                        assert!(!shift_ones);
2409                        assert_eq!(s, shift);
2410
2411                        (invert as u32, 0b1000 | (s >> 2))
2412                    }
2413                    ScalarSize::Size32 => {
2414                        if shift_ones {
2415                            assert!(shift == 8 || shift == 16);
2416
2417                            (invert as u32, 0b1100 | (shift >> 4))
2418                        } else {
2419                            let s = shift & 24;
2420
2421                            assert_eq!(s, shift);
2422
2423                            (invert as u32, 0b0000 | (s >> 2))
2424                        }
2425                    }
2426                    ScalarSize::Size64 => {
2427                        assert!(!invert);
2428                        assert_eq!(shift, 0);
2429
2430                        (1, 0b1110)
2431                    }
2432                    _ => unreachable!(),
2433                };
2434                let q_op = op | ((size.is_128bits() as u32) << 1);
2435
2436                sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2437            }
2438            &Inst::VecExtend {
2439                t,
2440                rd,
2441                rn,
2442                high_half,
2443                lane_size,
2444            } => {
2445                let immh = match lane_size {
2446                    ScalarSize::Size16 => 0b001,
2447                    ScalarSize::Size32 => 0b010,
2448                    ScalarSize::Size64 => 0b100,
2449                    _ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2450                };
2451                let u = match t {
2452                    VecExtendOp::Sxtl => 0b0,
2453                    VecExtendOp::Uxtl => 0b1,
2454                };
2455                sink.put4(
2456                    0b000_011110_0000_000_101001_00000_00000
2457                        | ((high_half as u32) << 30)
2458                        | (u << 29)
2459                        | (immh << 19)
2460                        | (machreg_to_vec(rn) << 5)
2461                        | machreg_to_vec(rd.to_reg()),
2462                );
2463            }
2464            &Inst::VecRRLong {
2465                op,
2466                rd,
2467                rn,
2468                high_half,
2469            } => {
2470                let (u, size, bits_12_16) = match op {
2471                    VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2472                    VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2473                    VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2474                    VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2475                    VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2476                };
2477
2478                sink.put4(enc_vec_rr_misc(
2479                    ((high_half as u32) << 1) | u,
2480                    size,
2481                    bits_12_16,
2482                    rd,
2483                    rn,
2484                ));
2485            }
2486            &Inst::VecRRNarrowLow {
2487                op,
2488                rd,
2489                rn,
2490                lane_size,
2491            }
2492            | &Inst::VecRRNarrowHigh {
2493                op,
2494                rd,
2495                rn,
2496                lane_size,
2497                ..
2498            } => {
2499                let high_half = match self {
2500                    &Inst::VecRRNarrowLow { .. } => false,
2501                    &Inst::VecRRNarrowHigh { .. } => true,
2502                    _ => unreachable!(),
2503                };
2504
2505                let size = match lane_size {
2506                    ScalarSize::Size8 => 0b00,
2507                    ScalarSize::Size16 => 0b01,
2508                    ScalarSize::Size32 => 0b10,
2509                    _ => panic!("unsupported size: {lane_size:?}"),
2510                };
2511
2512                // Floats use a single bit, to encode either half or single.
2513                let size = match op {
2514                    VecRRNarrowOp::Fcvtn => size >> 1,
2515                    _ => size,
2516                };
2517
2518                let (u, bits_12_16) = match op {
2519                    VecRRNarrowOp::Xtn => (0b0, 0b10010),
2520                    VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2521                    VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2522                    VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2523                    VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2524                };
2525
2526                sink.put4(enc_vec_rr_misc(
2527                    ((high_half as u32) << 1) | u,
2528                    size,
2529                    bits_12_16,
2530                    rd,
2531                    rn,
2532                ));
2533            }
2534            &Inst::VecMovElement {
2535                rd,
2536                ri,
2537                rn,
2538                dest_idx,
2539                src_idx,
2540                size,
2541            } => {
2542                debug_assert_eq!(rd.to_reg(), ri);
2543                let (imm5, shift) = match size.lane_size() {
2544                    ScalarSize::Size8 => (0b00001, 1),
2545                    ScalarSize::Size16 => (0b00010, 2),
2546                    ScalarSize::Size32 => (0b00100, 3),
2547                    ScalarSize::Size64 => (0b01000, 4),
2548                    _ => unreachable!(),
2549                };
2550                let mask = 0b11111 >> shift;
2551                debug_assert_eq!(dest_idx & mask, dest_idx);
2552                debug_assert_eq!(src_idx & mask, src_idx);
2553                let imm4 = (src_idx as u32) << (shift - 1);
2554                let imm5 = imm5 | ((dest_idx as u32) << shift);
2555                sink.put4(
2556                    0b011_01110000_00000_0_0000_1_00000_00000
2557                        | (imm5 << 16)
2558                        | (imm4 << 11)
2559                        | (machreg_to_vec(rn) << 5)
2560                        | machreg_to_vec(rd.to_reg()),
2561                );
2562            }
2563            &Inst::VecRRPair { op, rd, rn } => {
2564                let bits_12_16 = match op {
2565                    VecPairOp::Addp => 0b11011,
2566                };
2567
2568                sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2569            }
2570            &Inst::VecRRRLong {
2571                rd,
2572                rn,
2573                rm,
2574                alu_op,
2575                high_half,
2576            } => {
2577                let (u, size, bit14) = match alu_op {
2578                    VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2579                    VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2580                    VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2581                    VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2582                    VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2583                    VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2584                };
2585                sink.put4(enc_vec_rrr_long(
2586                    high_half as u32,
2587                    u,
2588                    size,
2589                    bit14,
2590                    rm,
2591                    rn,
2592                    rd,
2593                ));
2594            }
2595            &Inst::VecRRRLongMod {
2596                rd,
2597                ri,
2598                rn,
2599                rm,
2600                alu_op,
2601                high_half,
2602            } => {
2603                debug_assert_eq!(rd.to_reg(), ri);
2604                let (u, size, bit14) = match alu_op {
2605                    VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2606                    VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2607                    VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2608                };
2609                sink.put4(enc_vec_rrr_long(
2610                    high_half as u32,
2611                    u,
2612                    size,
2613                    bit14,
2614                    rm,
2615                    rn,
2616                    rd,
2617                ));
2618            }
2619            &Inst::VecRRPairLong { op, rd, rn } => {
2620                let (u, size) = match op {
2621                    VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2622                    VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2623                    VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2624                    VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2625                };
2626
2627                sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2628            }
2629            &Inst::VecRRR {
2630                rd,
2631                rn,
2632                rm,
2633                alu_op,
2634                size,
2635            } => {
2636                let (q, enc_size) = size.enc_size();
2637                let is_float = match alu_op {
2638                    VecALUOp::Fcmeq
2639                    | VecALUOp::Fcmgt
2640                    | VecALUOp::Fcmge
2641                    | VecALUOp::Fadd
2642                    | VecALUOp::Fsub
2643                    | VecALUOp::Fdiv
2644                    | VecALUOp::Fmax
2645                    | VecALUOp::Fmin
2646                    | VecALUOp::Fmul => true,
2647                    _ => false,
2648                };
2649
2650                let (top11, bit15_10) = match alu_op {
2651                    VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2652                    VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2653                    VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2654                    VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2655                    VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2656                    VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2657                    VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2658                    VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2659                    VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2660                    VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2661                    VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2662                    VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2663                    // The following logical instructions operate on bytes, so are not encoded differently
2664                    // for the different vector types.
2665                    VecALUOp::And => (0b000_01110_00_1, 0b000111),
2666                    VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2667                    VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2668                    VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2669                    VecALUOp::Umaxp => {
2670                        debug_assert_ne!(size, VectorSize::Size64x2);
2671
2672                        (0b001_01110_00_1 | enc_size << 1, 0b101001)
2673                    }
2674                    VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2675                    VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2676                    VecALUOp::Mul => {
2677                        debug_assert_ne!(size, VectorSize::Size64x2);
2678                        (0b000_01110_00_1 | enc_size << 1, 0b100111)
2679                    }
2680                    VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2681                    VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2682                    VecALUOp::Umin => {
2683                        debug_assert_ne!(size, VectorSize::Size64x2);
2684
2685                        (0b001_01110_00_1 | enc_size << 1, 0b011011)
2686                    }
2687                    VecALUOp::Smin => {
2688                        debug_assert_ne!(size, VectorSize::Size64x2);
2689
2690                        (0b000_01110_00_1 | enc_size << 1, 0b011011)
2691                    }
2692                    VecALUOp::Umax => {
2693                        debug_assert_ne!(size, VectorSize::Size64x2);
2694
2695                        (0b001_01110_00_1 | enc_size << 1, 0b011001)
2696                    }
2697                    VecALUOp::Smax => {
2698                        debug_assert_ne!(size, VectorSize::Size64x2);
2699
2700                        (0b000_01110_00_1 | enc_size << 1, 0b011001)
2701                    }
2702                    VecALUOp::Urhadd => {
2703                        debug_assert_ne!(size, VectorSize::Size64x2);
2704
2705                        (0b001_01110_00_1 | enc_size << 1, 0b000101)
2706                    }
2707                    VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2708                    VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2709                    VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2710                    VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2711                    VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2712                    VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2713                    VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2714                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2715                    VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2716                    VecALUOp::Sqrdmulh => {
2717                        debug_assert!(
2718                            size.lane_size() == ScalarSize::Size16
2719                                || size.lane_size() == ScalarSize::Size32
2720                        );
2721
2722                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
2723                    }
2724                    VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2725                    VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2726                    VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2727                    VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2728                };
2729                let top11 = if is_float {
2730                    top11 | size.enc_float_size() << 1
2731                } else {
2732                    top11
2733                };
2734                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2735            }
2736            &Inst::VecRRRMod {
2737                rd,
2738                ri,
2739                rn,
2740                rm,
2741                alu_op,
2742                size,
2743            } => {
2744                debug_assert_eq!(rd.to_reg(), ri);
2745                let (q, _enc_size) = size.enc_size();
2746
2747                let (top11, bit15_10) = match alu_op {
2748                    VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2749                    VecALUModOp::Fmla => {
2750                        (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2751                    }
2752                    VecALUModOp::Fmls => {
2753                        (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2754                    }
2755                };
2756                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2757            }
2758            &Inst::VecFmlaElem {
2759                rd,
2760                ri,
2761                rn,
2762                rm,
2763                alu_op,
2764                size,
2765                idx,
2766            } => {
2767                debug_assert_eq!(rd.to_reg(), ri);
2768                let idx = u32::from(idx);
2769
2770                let (q, _size) = size.enc_size();
2771                let o2 = match alu_op {
2772                    VecALUModOp::Fmla => 0b0,
2773                    VecALUModOp::Fmls => 0b1,
2774                    _ => unreachable!(),
2775                };
2776
2777                let (h, l) = match size {
2778                    VectorSize::Size32x4 => {
2779                        assert!(idx < 4);
2780                        (idx >> 1, idx & 1)
2781                    }
2782                    VectorSize::Size64x2 => {
2783                        assert!(idx < 2);
2784                        (idx, 0)
2785                    }
2786                    _ => unreachable!(),
2787                };
2788
2789                let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2790                let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2791                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2792            }
2793            &Inst::VecLoadReplicate {
2794                rd,
2795                rn,
2796                size,
2797                flags,
2798            } => {
2799                let (q, size) = size.enc_size();
2800
2801                if let Some(trap_code) = flags.trap_code() {
2802                    // Register the offset at which the actual load instruction starts.
2803                    sink.add_trap(trap_code);
2804                }
2805
2806                sink.put4(enc_ldst_vec(q, size, rn, rd));
2807            }
2808            &Inst::VecCSel { rd, rn, rm, cond } => {
2809                /* Emit this:
2810                      b.cond  else
2811                      mov     rd, rm
2812                      b       out
2813                     else:
2814                      mov     rd, rn
2815                     out:
2816
2817                   Note, we could do better in the cases where rd == rn or rd == rm.
2818                */
2819                let else_label = sink.get_label();
2820                let out_label = sink.get_label();
2821
2822                // b.cond else
2823                let br_else_offset = sink.cur_offset();
2824                sink.put4(enc_conditional_br(
2825                    BranchTarget::Label(else_label),
2826                    CondBrKind::Cond(cond),
2827                ));
2828                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2829
2830                // mov rd, rm
2831                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2832
2833                // b out
2834                let b_out_offset = sink.cur_offset();
2835                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2836                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2837                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2838
2839                // else:
2840                sink.bind_label(else_label, &mut state.ctrl_plane);
2841
2842                // mov rd, rn
2843                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2844
2845                // out:
2846                sink.bind_label(out_label, &mut state.ctrl_plane);
2847            }
2848            &Inst::MovToNZCV { rn } => {
2849                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2850            }
2851            &Inst::MovFromNZCV { rd } => {
2852                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2853            }
2854            &Inst::Extend {
2855                rd,
2856                rn,
2857                signed: false,
2858                from_bits: 1,
2859                to_bits,
2860            } => {
2861                assert!(to_bits <= 64);
2862                // Reduce zero-extend-from-1-bit to:
2863                // - and rd, rn, #1
2864                // Note: This is special cased as UBFX may take more cycles
2865                // than AND on smaller cores.
2866                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2867                Inst::AluRRImmLogic {
2868                    alu_op: ALUOp::And,
2869                    size: OperandSize::Size32,
2870                    rd,
2871                    rn,
2872                    imml,
2873                }
2874                .emit(sink, emit_info, state);
2875            }
2876            &Inst::Extend {
2877                rd,
2878                rn,
2879                signed: false,
2880                from_bits: 32,
2881                to_bits: 64,
2882            } => {
2883                let mov = Inst::Mov {
2884                    size: OperandSize::Size32,
2885                    rd,
2886                    rm: rn,
2887                };
2888                mov.emit(sink, emit_info, state);
2889            }
2890            &Inst::Extend {
2891                rd,
2892                rn,
2893                signed,
2894                from_bits,
2895                to_bits,
2896            } => {
2897                let (opc, size) = if signed {
2898                    (0b00, OperandSize::from_bits(to_bits))
2899                } else {
2900                    (0b10, OperandSize::Size32)
2901                };
2902                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2903            }
2904            &Inst::Jump { ref dest } => {
2905                let off = sink.cur_offset();
2906                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2907                if let Some(l) = dest.as_label() {
2908                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
2909                    sink.add_uncond_branch(off, off + 4, l);
2910                }
2911                // Emit the jump itself.
2912                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2913            }
2914            &Inst::Args { .. } | &Inst::Rets { .. } => {
2915                // Nothing: this is a pseudoinstruction that serves
2916                // only to constrain registers at a certain point.
2917            }
2918            &Inst::Ret {} => {
2919                sink.put4(0xd65f03c0);
2920            }
2921            &Inst::AuthenticatedRet { key, is_hint } => {
2922                let (op2, is_hint) = match key {
2923                    APIKey::AZ => (0b100, true),
2924                    APIKey::ASP => (0b101, is_hint),
2925                    APIKey::BZ => (0b110, true),
2926                    APIKey::BSP => (0b111, is_hint),
2927                };
2928
2929                if is_hint {
2930                    sink.put4(key.enc_auti_hint());
2931                    Inst::Ret {}.emit(sink, emit_info, state);
2932                } else {
2933                    sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2934                }
2935            }
2936            &Inst::Call { ref info } => {
2937                let user_stack_map = state.take_stack_map();
2938                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2939                sink.put4(enc_jump26(0b100101, 0));
2940                if let Some(s) = user_stack_map {
2941                    let offset = sink.cur_offset();
2942                    sink.push_user_stack_map(state, offset, s);
2943                }
2944                sink.add_call_site();
2945
2946                if info.callee_pop_size > 0 {
2947                    let callee_pop_size =
2948                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2949                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2950                        inst.emit(sink, emit_info, state);
2951                    }
2952                }
2953            }
2954            &Inst::CallInd { ref info } => {
2955                let user_stack_map = state.take_stack_map();
2956                sink.put4(
2957                    0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
2958                );
2959                if let Some(s) = user_stack_map {
2960                    let offset = sink.cur_offset();
2961                    sink.push_user_stack_map(state, offset, s);
2962                }
2963                sink.add_call_site();
2964
2965                if info.callee_pop_size > 0 {
2966                    let callee_pop_size =
2967                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2968                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2969                        inst.emit(sink, emit_info, state);
2970                    }
2971                }
2972            }
2973            &Inst::ReturnCall { ref info } => {
2974                emit_return_call_common_sequence(sink, emit_info, state, info);
2975
2976                // Note: this is not `Inst::Jump { .. }.emit(..)` because we
2977                // have different metadata in this case: we don't have a label
2978                // for the target, but rather a function relocation.
2979                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2980                sink.put4(enc_jump26(0b000101, 0));
2981                sink.add_call_site();
2982
2983                // `emit_return_call_common_sequence` emits an island if
2984                // necessary, so we can safely disable the worst-case-size check
2985                // in this case.
2986                start_off = sink.cur_offset();
2987            }
2988            &Inst::ReturnCallInd { ref info } => {
2989                emit_return_call_common_sequence(sink, emit_info, state, info);
2990
2991                Inst::IndirectBr {
2992                    rn: info.dest,
2993                    targets: vec![],
2994                }
2995                .emit(sink, emit_info, state);
2996                sink.add_call_site();
2997
2998                // `emit_return_call_common_sequence` emits an island if
2999                // necessary, so we can safely disable the worst-case-size check
3000                // in this case.
3001                start_off = sink.cur_offset();
3002            }
3003            &Inst::CondBr {
3004                taken,
3005                not_taken,
3006                kind,
3007            } => {
3008                // Conditional part first.
3009                let cond_off = sink.cur_offset();
3010                if let Some(l) = taken.as_label() {
3011                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3012                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3013                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3014                }
3015                sink.put4(enc_conditional_br(taken, kind));
3016
3017                // Unconditional part next.
3018                let uncond_off = sink.cur_offset();
3019                if let Some(l) = not_taken.as_label() {
3020                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3021                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3022                }
3023                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3024            }
3025            &Inst::TestBitAndBranch {
3026                taken,
3027                not_taken,
3028                kind,
3029                rn,
3030                bit,
3031            } => {
3032                // Emit the conditional branch first
3033                let cond_off = sink.cur_offset();
3034                if let Some(l) = taken.as_label() {
3035                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3036                    let inverted =
3037                        enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3038                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3039                }
3040                sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3041
3042                // Unconditional part next.
3043                let uncond_off = sink.cur_offset();
3044                if let Some(l) = not_taken.as_label() {
3045                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3046                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3047                }
3048                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3049            }
3050            &Inst::TrapIf { kind, trap_code } => {
3051                let label = sink.defer_trap(trap_code);
3052                // condbr KIND, LABEL
3053                let off = sink.cur_offset();
3054                sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3055                sink.use_label_at_offset(off, label, LabelUse::Branch19);
3056            }
3057            &Inst::IndirectBr { rn, .. } => {
3058                sink.put4(enc_br(rn));
3059            }
3060            &Inst::Nop0 => {}
3061            &Inst::Nop4 => {
3062                sink.put4(0xd503201f);
3063            }
3064            &Inst::Brk => {
3065                sink.put4(0xd4200000);
3066            }
3067            &Inst::Udf { trap_code } => {
3068                sink.add_trap(trap_code);
3069                sink.put_data(Inst::TRAP_OPCODE);
3070            }
3071            &Inst::Adr { rd, off } => {
3072                assert!(off > -(1 << 20));
3073                assert!(off < (1 << 20));
3074                sink.put4(enc_adr(off, rd));
3075            }
3076            &Inst::Adrp { rd, off } => {
3077                assert!(off > -(1 << 20));
3078                assert!(off < (1 << 20));
3079                sink.put4(enc_adrp(off, rd));
3080            }
3081            &Inst::Word4 { data } => {
3082                sink.put4(data);
3083            }
3084            &Inst::Word8 { data } => {
3085                sink.put8(data);
3086            }
3087            &Inst::JTSequence {
3088                ridx,
3089                rtmp1,
3090                rtmp2,
3091                default,
3092                ref targets,
3093                ..
3094            } => {
3095                // This sequence is *one* instruction in the vcode, and is expanded only here at
3096                // emission time, because we cannot allow the regalloc to insert spills/reloads in
3097                // the middle; we depend on hardcoded PC-rel addressing below.
3098
3099                // Branch to default when condition code from prior comparison indicates.
3100                let br =
3101                    enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3102
3103                // No need to inform the sink's branch folding logic about this branch, because it
3104                // will not be merged with any other branch, flipped, or elided (it is not preceded
3105                // or succeeded by any other branch). Just emit it with the label use.
3106                let default_br_offset = sink.cur_offset();
3107                sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3108                sink.put4(br);
3109
3110                // Overwrite the index with a zero when the above
3111                // branch misspeculates (Spectre mitigation). Save the
3112                // resulting index in rtmp2.
3113                let inst = Inst::CSel {
3114                    rd: rtmp2,
3115                    cond: Cond::Hs,
3116                    rn: zero_reg(),
3117                    rm: ridx,
3118                };
3119                inst.emit(sink, emit_info, state);
3120                // Prevent any data value speculation.
3121                Inst::Csdb.emit(sink, emit_info, state);
3122
3123                // Load address of jump table
3124                let inst = Inst::Adr { rd: rtmp1, off: 16 };
3125                inst.emit(sink, emit_info, state);
3126                // Load value out of jump table
3127                let inst = Inst::SLoad32 {
3128                    rd: rtmp2,
3129                    mem: AMode::reg_plus_reg_scaled_extended(
3130                        rtmp1.to_reg(),
3131                        rtmp2.to_reg(),
3132                        ExtendOp::UXTW,
3133                    ),
3134                    flags: MemFlags::trusted(),
3135                };
3136                inst.emit(sink, emit_info, state);
3137                // Add base of jump table to jump-table-sourced block offset
3138                let inst = Inst::AluRRR {
3139                    alu_op: ALUOp::Add,
3140                    size: OperandSize::Size64,
3141                    rd: rtmp1,
3142                    rn: rtmp1.to_reg(),
3143                    rm: rtmp2.to_reg(),
3144                };
3145                inst.emit(sink, emit_info, state);
3146                // Branch to computed address. (`targets` here is only used for successor queries
3147                // and is not needed for emission.)
3148                let inst = Inst::IndirectBr {
3149                    rn: rtmp1.to_reg(),
3150                    targets: vec![],
3151                };
3152                inst.emit(sink, emit_info, state);
3153                // Emit jump table (table of 32-bit offsets).
3154                let jt_off = sink.cur_offset();
3155                for &target in targets.iter() {
3156                    let word_off = sink.cur_offset();
3157                    // off_into_table is an addend here embedded in the label to be later patched
3158                    // at the end of codegen. The offset is initially relative to this jump table
3159                    // entry; with the extra addend, it'll be relative to the jump table's start,
3160                    // after patching.
3161                    let off_into_table = word_off - jt_off;
3162                    sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3163                    sink.put4(off_into_table);
3164                }
3165
3166                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
3167                // disable the worst-case-size check in this case.
3168                start_off = sink.cur_offset();
3169            }
3170            &Inst::LoadExtName {
3171                rd,
3172                ref name,
3173                offset,
3174            } => {
3175                if emit_info.0.is_pic() {
3176                    // See this CE Example for the variations of this with and without BTI & PAUTH
3177                    // https://godbolt.org/z/ncqjbbvvn
3178                    //
3179                    // Emit the following code:
3180                    //   adrp    rd, :got:X
3181                    //   ldr     rd, [rd, :got_lo12:X]
3182
3183                    // adrp rd, symbol
3184                    sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3185                    let inst = Inst::Adrp { rd, off: 0 };
3186                    inst.emit(sink, emit_info, state);
3187
3188                    // ldr rd, [rd, :got_lo12:X]
3189                    sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3190                    let inst = Inst::ULoad64 {
3191                        rd,
3192                        mem: AMode::reg(rd.to_reg()),
3193                        flags: MemFlags::trusted(),
3194                    };
3195                    inst.emit(sink, emit_info, state);
3196                } else {
3197                    // With absolute offsets we set up a load from a preallocated space, and then jump
3198                    // over it.
3199                    //
3200                    // Emit the following code:
3201                    //   ldr     rd, #8
3202                    //   b       #0x10
3203                    //   <8 byte space>
3204
3205                    let inst = Inst::ULoad64 {
3206                        rd,
3207                        mem: AMode::Label {
3208                            label: MemLabel::PCRel(8),
3209                        },
3210                        flags: MemFlags::trusted(),
3211                    };
3212                    inst.emit(sink, emit_info, state);
3213                    let inst = Inst::Jump {
3214                        dest: BranchTarget::ResolvedOffset(12),
3215                    };
3216                    inst.emit(sink, emit_info, state);
3217                    sink.add_reloc(Reloc::Abs8, &**name, offset);
3218                    sink.put8(0);
3219                }
3220            }
3221            &Inst::LoadAddr { rd, ref mem } => {
3222                let mem = mem.clone();
3223                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3224                for inst in mem_insts.into_iter() {
3225                    inst.emit(sink, emit_info, state);
3226                }
3227
3228                let (reg, index_reg, offset) = match mem {
3229                    AMode::RegExtended { rn, rm, extendop } => {
3230                        let r = rn;
3231                        (r, Some((rm, extendop)), 0)
3232                    }
3233                    AMode::Unscaled { rn, simm9 } => {
3234                        let r = rn;
3235                        (r, None, simm9.value())
3236                    }
3237                    AMode::UnsignedOffset { rn, uimm12 } => {
3238                        let r = rn;
3239                        (r, None, uimm12.value() as i32)
3240                    }
3241                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3242                };
3243                let abs_offset = if offset < 0 {
3244                    -offset as u64
3245                } else {
3246                    offset as u64
3247                };
3248                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3249
3250                if let Some((idx, extendop)) = index_reg {
3251                    let add = Inst::AluRRRExtend {
3252                        alu_op: ALUOp::Add,
3253                        size: OperandSize::Size64,
3254                        rd,
3255                        rn: reg,
3256                        rm: idx,
3257                        extendop,
3258                    };
3259
3260                    add.emit(sink, emit_info, state);
3261                } else if offset == 0 {
3262                    if reg != rd.to_reg() {
3263                        let mov = Inst::Mov {
3264                            size: OperandSize::Size64,
3265                            rd,
3266                            rm: reg,
3267                        };
3268
3269                        mov.emit(sink, emit_info, state);
3270                    }
3271                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3272                    let add = Inst::AluRRImm12 {
3273                        alu_op,
3274                        size: OperandSize::Size64,
3275                        rd,
3276                        rn: reg,
3277                        imm12,
3278                    };
3279                    add.emit(sink, emit_info, state);
3280                } else {
3281                    // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3282                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3283                    // that no other instructions will be inserted here (we're emitting directly),
3284                    // and a live range of `tmp2` should not span this instruction, so this use
3285                    // should otherwise be correct.
3286                    debug_assert!(rd.to_reg() != tmp2_reg());
3287                    debug_assert!(reg != tmp2_reg());
3288                    let tmp = writable_tmp2_reg();
3289                    for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
3290                        insn.emit(sink, emit_info, state);
3291                    }
3292                    let add = Inst::AluRRR {
3293                        alu_op,
3294                        size: OperandSize::Size64,
3295                        rd,
3296                        rn: reg,
3297                        rm: tmp.to_reg(),
3298                    };
3299                    add.emit(sink, emit_info, state);
3300                }
3301            }
3302            &Inst::Paci { key } => {
3303                let (crm, op2) = match key {
3304                    APIKey::AZ => (0b0011, 0b000),
3305                    APIKey::ASP => (0b0011, 0b001),
3306                    APIKey::BZ => (0b0011, 0b010),
3307                    APIKey::BSP => (0b0011, 0b011),
3308                };
3309
3310                sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3311            }
3312            &Inst::Xpaclri => sink.put4(0xd50320ff),
3313            &Inst::Bti { targets } => {
3314                let targets = match targets {
3315                    BranchTargetType::None => 0b00,
3316                    BranchTargetType::C => 0b01,
3317                    BranchTargetType::J => 0b10,
3318                    BranchTargetType::JC => 0b11,
3319                };
3320
3321                sink.put4(0xd503241f | targets << 6);
3322            }
3323            &Inst::EmitIsland { needed_space } => {
3324                if sink.island_needed(needed_space + 4) {
3325                    let jump_around_label = sink.get_label();
3326                    let jmp = Inst::Jump {
3327                        dest: BranchTarget::Label(jump_around_label),
3328                    };
3329                    jmp.emit(sink, emit_info, state);
3330                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3331                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3332                }
3333            }
3334
3335            &Inst::ElfTlsGetAddr {
3336                ref symbol,
3337                rd,
3338                tmp,
3339            } => {
3340                assert_eq!(xreg(0), rd.to_reg());
3341
3342                // See the original proposal for TLSDESC.
3343                // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3344                //
3345                // Implement the TLSDESC instruction sequence:
3346                //   adrp x0, :tlsdesc:tlsvar
3347                //   ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3348                //   add  x0, x0, :tlsdesc_lo12:tlsvar
3349                //   blr  tmp
3350                //   mrs  tmp, tpidr_el0
3351                //   add  x0, x0, tmp
3352                //
3353                // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3354                // See: https://gcc.godbolt.org/z/e4j7MdErh
3355
3356                // adrp x0, :tlsdesc:tlsvar
3357                sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3358                Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3359
3360                // ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3361                sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3362                Inst::ULoad64 {
3363                    rd: tmp,
3364                    mem: AMode::reg(rd.to_reg()),
3365                    flags: MemFlags::trusted(),
3366                }
3367                .emit(sink, emit_info, state);
3368
3369                // add x0, x0, :tlsdesc_lo12:tlsvar
3370                sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3371                Inst::AluRRImm12 {
3372                    alu_op: ALUOp::Add,
3373                    size: OperandSize::Size64,
3374                    rd,
3375                    rn: rd.to_reg(),
3376                    imm12: Imm12::maybe_from_u64(0).unwrap(),
3377                }
3378                .emit(sink, emit_info, state);
3379
3380                // blr tmp
3381                sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3382                Inst::CallInd {
3383                    info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3384                }
3385                .emit(sink, emit_info, state);
3386
3387                // mrs tmp, tpidr_el0
3388                sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3389
3390                // add x0, x0, tmp
3391                Inst::AluRRR {
3392                    alu_op: ALUOp::Add,
3393                    size: OperandSize::Size64,
3394                    rd,
3395                    rn: rd.to_reg(),
3396                    rm: tmp.to_reg(),
3397                }
3398                .emit(sink, emit_info, state);
3399            }
3400
3401            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
3402                // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3403                // to a function that takes the descriptor address in x0, and after the function returns x0
3404                // contains the address for the thread local variable
3405                //
3406                // what we want to emit is basically:
3407                //
3408                // adrp x0, <label>@TLVPPAGE  ; Load the address of the page of the thread local variable pointer (TLVP)
3409                // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3410                // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3411                // blr x1 ; Call the function pointer with the descriptor address in x0
3412                // ; x0 now contains the TLV address
3413
3414                assert_eq!(xreg(0), rd.to_reg());
3415                let rtmp = writable_xreg(1);
3416
3417                // adrp x0, <label>@TLVPPAGE
3418                sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3419                sink.put4(0x90000000);
3420
3421                // ldr x0, [x0, <label>@TLVPPAGEOFF]
3422                sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3423                sink.put4(0xf9400000);
3424
3425                // load [x0] into temp register
3426                Inst::ULoad64 {
3427                    rd: rtmp,
3428                    mem: AMode::reg(rd.to_reg()),
3429                    flags: MemFlags::trusted(),
3430                }
3431                .emit(sink, emit_info, state);
3432
3433                // call function pointer in temp register
3434                Inst::CallInd {
3435                    info: crate::isa::Box::new(CallInfo::empty(
3436                        rtmp.to_reg(),
3437                        CallConv::AppleAarch64,
3438                    )),
3439                }
3440                .emit(sink, emit_info, state);
3441            }
3442
3443            &Inst::Unwind { ref inst } => {
3444                sink.add_unwind(inst.clone());
3445            }
3446
3447            &Inst::DummyUse { .. } => {}
3448
3449            &Inst::StackProbeLoop { start, end, step } => {
3450                assert!(emit_info.0.enable_probestack());
3451
3452                // The loop generated here uses `start` as a counter register to
3453                // count backwards until negating it exceeds `end`. In other
3454                // words `start` is an offset from `sp` we're testing where
3455                // `end` is the max size we need to test. The loop looks like:
3456                //
3457                //      loop_start:
3458                //          sub start, start, #step
3459                //          stur xzr, [sp, start]
3460                //          cmn start, end
3461                //          br.gt loop_start
3462                //      loop_end:
3463                //
3464                // Note that this loop cannot use the spilltmp and tmp2
3465                // registers as those are currently used as the input to this
3466                // loop when generating the instruction. This means that some
3467                // more flavorful address modes and lowerings need to be
3468                // avoided.
3469                //
3470                // Perhaps someone more clever than I can figure out how to use
3471                // `subs` or the like and skip the `cmn`, but I can't figure it
3472                // out at this time.
3473
3474                let loop_start = sink.get_label();
3475                sink.bind_label(loop_start, &mut state.ctrl_plane);
3476
3477                Inst::AluRRImm12 {
3478                    alu_op: ALUOp::Sub,
3479                    size: OperandSize::Size64,
3480                    rd: start,
3481                    rn: start.to_reg(),
3482                    imm12: step,
3483                }
3484                .emit(sink, emit_info, state);
3485                Inst::Store32 {
3486                    rd: regs::zero_reg(),
3487                    mem: AMode::RegReg {
3488                        rn: regs::stack_reg(),
3489                        rm: start.to_reg(),
3490                    },
3491                    flags: MemFlags::trusted(),
3492                }
3493                .emit(sink, emit_info, state);
3494                Inst::AluRRR {
3495                    alu_op: ALUOp::AddS,
3496                    size: OperandSize::Size64,
3497                    rd: regs::writable_zero_reg(),
3498                    rn: start.to_reg(),
3499                    rm: end,
3500                }
3501                .emit(sink, emit_info, state);
3502
3503                let loop_end = sink.get_label();
3504                Inst::CondBr {
3505                    taken: BranchTarget::Label(loop_start),
3506                    not_taken: BranchTarget::Label(loop_end),
3507                    kind: CondBrKind::Cond(Cond::Gt),
3508                }
3509                .emit(sink, emit_info, state);
3510                sink.bind_label(loop_end, &mut state.ctrl_plane);
3511            }
3512        }
3513
3514        let end_off = sink.cur_offset();
3515        debug_assert!(
3516            (end_off - start_off) <= Inst::worst_case_size()
3517                || matches!(self, Inst::EmitIsland { .. }),
3518            "Worst case size exceed for {:?}: {}",
3519            self,
3520            end_off - start_off
3521        );
3522
3523        state.clear_post_insn();
3524    }
3525
3526    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3527        self.print_with_state(state)
3528    }
3529}
3530
3531fn emit_return_call_common_sequence<T>(
3532    sink: &mut MachBuffer<Inst>,
3533    emit_info: &EmitInfo,
3534    state: &mut EmitState,
3535    info: &ReturnCallInfo<T>,
3536) {
3537    for inst in
3538        AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())
3539    {
3540        inst.emit(sink, emit_info, state);
3541    }
3542
3543    let setup_area_size = state.frame_layout().setup_area_size;
3544    if setup_area_size > 0 {
3545        // N.B.: sp is already adjusted to the appropriate place by the
3546        // clobber-restore code (which also frees the fixed frame). Hence, there
3547        // is no need for the usual `mov sp, fp` here.
3548
3549        // `ldp fp, lr, [sp], #16`
3550        Inst::LoadP64 {
3551            rt: writable_fp_reg(),
3552            rt2: writable_link_reg(),
3553            mem: PairAMode::SPPostIndexed {
3554                // TODO: we could fold the increment for incoming_args_diff here, as long as that
3555                // value is less than 502*8, by adding it to `setup_area_size`.
3556                // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3557                simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3558            },
3559            flags: MemFlags::trusted(),
3560        }
3561        .emit(sink, emit_info, state);
3562    }
3563
3564    // Adjust SP to account for the possible over-allocation in the prologue.
3565    let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3566    if incoming_args_diff > 0 {
3567        for inst in
3568            AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3569        {
3570            inst.emit(sink, emit_info, state);
3571        }
3572    }
3573
3574    if let Some(key) = info.key {
3575        sink.put4(key.enc_auti_hint());
3576    }
3577}