cranelift_codegen/isa/x64/encoding/
rex.rs

1//! Encodes instructions in the standard x86 encoding mode. This is called
2//! IA-32E mode in the Intel manuals but corresponds to the addition of the
3//! REX-prefix format (hence the name of this module) that allowed encoding
4//! instructions in both compatibility mode (32-bit instructions running on a
5//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
6//!
7//! For all of the routines that take both a memory-or-reg operand (sometimes
8//! called "E" in the Intel documentation, see the Intel Developer's manual,
9//! vol. 2, section A.2) and a reg-only operand ("G" in Intel-ese), the order is
10//! always G first, then E. The term "enc" in the following means "hardware
11//! register encoding number".
12
13use super::ByteSink;
14use crate::isa::x64::inst::args::{Amode, OperandSize};
15use crate::isa::x64::inst::{regs, Inst, LabelUse};
16use crate::machinst::{MachBuffer, Reg, RegClass};
17
18pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
19    let xs = (x as i32) as i64;
20    xs == ((xs << 56) >> 56)
21}
22
23pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
24    let xs = x as i32;
25    xs == ((xs << 24) >> 24)
26}
27
28/// Encode the ModR/M byte.
29#[inline(always)]
30pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
31    debug_assert!(m0d < 4);
32    debug_assert!(enc_reg_g < 8);
33    debug_assert!(rm_e < 8);
34    ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
35}
36
37#[inline(always)]
38pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
39    debug_assert!(shift < 4);
40    debug_assert!(enc_index < 8);
41    debug_assert!(enc_base < 8);
42    ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
43}
44
45/// Get the encoding number of a GPR.
46#[inline(always)]
47pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
48    let reg = reg.into();
49    debug_assert!(reg.is_real(), "reg = {reg:?}");
50    debug_assert_eq!(reg.class(), RegClass::Int);
51    reg.to_real_reg().unwrap().hw_enc()
52}
53
54/// Get the encoding number of any register.
55#[inline(always)]
56pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
57    let reg = reg.into();
58    debug_assert!(reg.is_real());
59    reg.to_real_reg().unwrap().hw_enc()
60}
61
62/// A small bit field to record a REX prefix specification:
63/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
64/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
65#[repr(transparent)]
66#[derive(Clone, Copy)]
67pub struct RexFlags(u8);
68
69impl RexFlags {
70    /// By default, set the W field, and don't always emit.
71    #[inline(always)]
72    pub fn set_w() -> Self {
73        Self(0)
74    }
75
76    /// Creates a new RexPrefix for which the REX.W bit will be cleared.
77    #[inline(always)]
78    pub fn clear_w() -> Self {
79        Self(1)
80    }
81
82    /// True if 64-bit operands are used.
83    #[inline(always)]
84    pub fn must_clear_w(&self) -> bool {
85        (self.0 & 1) != 0
86    }
87
88    /// Require that the REX prefix is emitted.
89    #[inline(always)]
90    pub fn always_emit(&mut self) -> &mut Self {
91        self.0 = self.0 | 2;
92        self
93    }
94
95    /// True if the REX prefix must always be emitted.
96    #[inline(always)]
97    pub fn must_always_emit(&self) -> bool {
98        (self.0 & 2) != 0
99    }
100
101    /// Emit the rex prefix if the referenced register would require it for 8-bit operations.
102    #[inline(always)]
103    pub fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
104        let enc_reg = int_reg_enc(reg);
105        if enc_reg >= 4 && enc_reg <= 7 {
106            self.always_emit();
107        }
108        self
109    }
110
111    /// Emit a unary instruction.
112    #[inline(always)]
113    pub fn emit_one_op<BS: ByteSink + ?Sized>(&self, sink: &mut BS, enc_e: u8) {
114        // Register Operand coded in Opcode Byte
115        // REX.R and REX.X unused
116        // REX.B == 1 accesses r8-r15
117        let w = if self.must_clear_w() { 0 } else { 1 };
118        let r = 0;
119        let x = 0;
120        let b = (enc_e >> 3) & 1;
121        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
122        if rex != 0x40 || self.must_always_emit() {
123            sink.put1(rex);
124        }
125    }
126
127    /// Emit a binary instruction.
128    #[inline(always)]
129    pub fn emit_two_op<BS: ByteSink + ?Sized>(&self, sink: &mut BS, enc_g: u8, enc_e: u8) {
130        let w = if self.must_clear_w() { 0 } else { 1 };
131        let r = (enc_g >> 3) & 1;
132        let x = 0;
133        let b = (enc_e >> 3) & 1;
134        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
135        if rex != 0x40 || self.must_always_emit() {
136            sink.put1(rex);
137        }
138    }
139
140    /// Emit a ternary instruction.
141    #[inline(always)]
142    pub fn emit_three_op<BS: ByteSink + ?Sized>(
143        &self,
144        sink: &mut BS,
145        enc_g: u8,
146        enc_index: u8,
147        enc_base: u8,
148    ) {
149        let w = if self.must_clear_w() { 0 } else { 1 };
150        let r = (enc_g >> 3) & 1;
151        let x = (enc_index >> 3) & 1;
152        let b = (enc_base >> 3) & 1;
153        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
154        if rex != 0x40 || self.must_always_emit() {
155            sink.put1(rex);
156        }
157    }
158}
159
160/// Generate the proper Rex flags for the given operand size.
161impl From<OperandSize> for RexFlags {
162    fn from(size: OperandSize) -> Self {
163        match size {
164            OperandSize::Size64 => RexFlags::set_w(),
165            _ => RexFlags::clear_w(),
166        }
167    }
168}
169/// Generate Rex flags for an OperandSize/register tuple.
170impl From<(OperandSize, Reg)> for RexFlags {
171    fn from((size, reg): (OperandSize, Reg)) -> Self {
172        let mut rex = RexFlags::from(size);
173        if size == OperandSize::Size8 {
174            rex.always_emit_if_8bit_needed(reg);
175        }
176        rex
177    }
178}
179
180/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
181/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
182#[allow(missing_docs)]
183#[derive(PartialEq)]
184pub enum OpcodeMap {
185    None,
186    _0F,
187    _0F38,
188    _0F3A,
189}
190
191impl OpcodeMap {
192    /// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
193    /// formats pack this information as bits in a prefix (e.g. VEX / EVEX).
194    pub(crate) fn bits(&self) -> u8 {
195        match self {
196            OpcodeMap::None => 0b00,
197            OpcodeMap::_0F => 0b01,
198            OpcodeMap::_0F38 => 0b10,
199            OpcodeMap::_0F3A => 0b11,
200        }
201    }
202}
203
204impl Default for OpcodeMap {
205    fn default() -> Self {
206        Self::None
207    }
208}
209
210/// We may need to include one or more legacy prefix bytes before the REX prefix.  This enum
211/// covers only the small set of possibilities that we actually need.
212#[derive(PartialEq)]
213pub enum LegacyPrefixes {
214    /// No prefix bytes.
215    None,
216    /// Operand Size Override -- here, denoting "16-bit operation".
217    _66,
218    /// The Lock prefix.
219    _F0,
220    /// Operand size override and Lock.
221    _66F0,
222    /// REPNE, but no specific meaning here -- is just an opcode extension.
223    _F2,
224    /// REP/REPE, but no specific meaning here -- is just an opcode extension.
225    _F3,
226    /// Operand size override and same effect as F3.
227    _66F3,
228}
229
230impl LegacyPrefixes {
231    /// Emit the legacy prefix as bytes (e.g. in REX instructions).
232    #[inline(always)]
233    pub(crate) fn emit<BS: ByteSink + ?Sized>(&self, sink: &mut BS) {
234        match self {
235            Self::_66 => sink.put1(0x66),
236            Self::_F0 => sink.put1(0xF0),
237            Self::_66F0 => {
238                // I don't think the order matters, but in any case, this is the same order that
239                // the GNU assembler uses.
240                sink.put1(0x66);
241                sink.put1(0xF0);
242            }
243            Self::_F2 => sink.put1(0xF2),
244            Self::_F3 => sink.put1(0xF3),
245            Self::_66F3 => {
246                sink.put1(0x66);
247                sink.put1(0xF3);
248            }
249            Self::None => (),
250        }
251    }
252
253    /// Emit the legacy prefix as bits (e.g. for EVEX instructions).
254    #[inline(always)]
255    pub(crate) fn bits(&self) -> u8 {
256        match self {
257            Self::None => 0b00,
258            Self::_66 => 0b01,
259            Self::_F3 => 0b10,
260            Self::_F2 => 0b11,
261            _ => panic!(
262                "VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
263            ),
264        }
265    }
266}
267
268impl Default for LegacyPrefixes {
269    fn default() -> Self {
270        Self::None
271    }
272}
273
274/// This is the core 'emit' function for instructions that reference memory.
275///
276/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
277/// create and emit:
278/// - first the legacy prefixes, if any
279/// - then the REX prefix, if needed
280/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
281/// - then the MOD/RM byte,
282/// - then optionally, a SIB byte,
283/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
284///
285/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
286/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
287/// instructions will require their own emitter functions.
288///
289/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
290///
291/// The opcodes are written bigendianly for the convenience of callers.  For example, if the opcode
292/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
293/// 0xF3_0F_27 and `num_opcodes` == 3.
294///
295/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
296/// `rex` can specify special handling for the REX prefix.  By default, the REX prefix will
297/// indicate a 64-bit operation and will be deleted if it is redundant (0x40).  Note that for a
298/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
299/// indicate a 64-bit operation.
300pub(crate) fn emit_std_enc_mem(
301    sink: &mut MachBuffer<Inst>,
302    prefixes: LegacyPrefixes,
303    opcodes: u32,
304    mut num_opcodes: usize,
305    enc_g: u8,
306    mem_e: &Amode,
307    rex: RexFlags,
308    bytes_at_end: u8,
309) {
310    // General comment for this function: the registers in `mem_e` must be
311    // 64-bit integer registers, because they are part of an address
312    // expression.  But `enc_g` can be derived from a register of any class.
313
314    if let Some(trap_code) = mem_e.get_flags().trap_code() {
315        sink.add_trap(trap_code);
316    }
317
318    prefixes.emit(sink);
319
320    // After prefixes, first emit the REX byte depending on the kind of
321    // addressing mode that's being used.
322    match *mem_e {
323        Amode::ImmReg { base, .. } => {
324            let enc_e = int_reg_enc(base);
325            rex.emit_two_op(sink, enc_g, enc_e);
326        }
327
328        Amode::ImmRegRegShift {
329            base: reg_base,
330            index: reg_index,
331            ..
332        } => {
333            let enc_base = int_reg_enc(*reg_base);
334            let enc_index = int_reg_enc(*reg_index);
335            rex.emit_three_op(sink, enc_g, enc_index, enc_base);
336        }
337
338        Amode::RipRelative { .. } => {
339            // note REX.B = 0.
340            rex.emit_two_op(sink, enc_g, 0);
341        }
342    }
343
344    // Now the opcode(s).  These include any other prefixes the caller
345    // hands to us.
346    while num_opcodes > 0 {
347        num_opcodes -= 1;
348        sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
349    }
350
351    // And finally encode the mod/rm bytes and all further information.
352    emit_modrm_sib_disp(sink, enc_g, mem_e, bytes_at_end, None)
353}
354
355pub(crate) fn emit_modrm_sib_disp(
356    sink: &mut MachBuffer<Inst>,
357    enc_g: u8,
358    mem_e: &Amode,
359    bytes_at_end: u8,
360    evex_scaling: Option<i8>,
361) {
362    match *mem_e {
363        Amode::ImmReg { simm32, base, .. } => {
364            let enc_e = int_reg_enc(base);
365            let mut imm = Imm::new(simm32, evex_scaling);
366
367            // Most base registers allow for a single ModRM byte plus an
368            // optional immediate. If rsp is the base register, however, then a
369            // SIB byte must be used.
370            let enc_e_low3 = enc_e & 7;
371            if enc_e_low3 != regs::ENC_RSP {
372                // If the base register is rbp and there's no offset then force
373                // a 1-byte zero offset since otherwise the encoding would be
374                // invalid.
375                if enc_e_low3 == regs::ENC_RBP {
376                    imm.force_immediate();
377                }
378                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
379                imm.emit(sink);
380            } else {
381                // Displacement from RSP is encoded with a SIB byte where
382                // the index and base are both encoded as RSP's encoding of
383                // 0b100. This special encoding means that the index register
384                // isn't used and the base is 0b100 with or without a
385                // REX-encoded 4th bit (e.g. rsp or r12)
386                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
387                sink.put1(0b00_100_100);
388                imm.emit(sink);
389            }
390        }
391
392        Amode::ImmRegRegShift {
393            simm32,
394            base: reg_base,
395            index: reg_index,
396            shift,
397            ..
398        } => {
399            let enc_base = int_reg_enc(*reg_base);
400            let enc_index = int_reg_enc(*reg_index);
401
402            // Encoding of ModRM/SIB bytes don't allow the index register to
403            // ever be rsp. Note, though, that the encoding of r12, whose three
404            // lower bits match the encoding of rsp, is explicitly allowed with
405            // REX bytes so only rsp is disallowed.
406            assert!(enc_index != regs::ENC_RSP);
407
408            // If the offset is zero then there is no immediate. Note, though,
409            // that if the base register's lower three bits are `101` then an
410            // offset must be present. This is a special case in the encoding of
411            // the SIB byte and requires an explicit displacement with rbp/r13.
412            let mut imm = Imm::new(simm32, evex_scaling);
413            if enc_base & 7 == regs::ENC_RBP {
414                imm.force_immediate();
415            }
416
417            // With the above determined encode the ModRM byte, then the SIB
418            // byte, then any immediate as necessary.
419            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
420            sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
421            imm.emit(sink);
422        }
423
424        Amode::RipRelative { ref target } => {
425            // RIP-relative is mod=00, rm=101.
426            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
427
428            let offset = sink.cur_offset();
429            sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
430            // N.B.: some instructions (XmmRmRImm format for example)
431            // have bytes *after* the RIP-relative offset. The
432            // addressed location is relative to the end of the
433            // instruction, but the relocation is nominally relative
434            // to the end of the u32 field. So, to compensate for
435            // this, we emit a negative extra offset in the u32 field
436            // initially, and the relocation will add to it.
437            sink.put4(-(i32::from(bytes_at_end)) as u32);
438        }
439    }
440}
441
442#[derive(Copy, Clone)]
443enum Imm {
444    None,
445    Imm8(i8),
446    Imm32(i32),
447}
448
449impl Imm {
450    /// Classifies the 32-bit immediate `val` as how this can be encoded
451    /// with ModRM/SIB bytes.
452    ///
453    /// For `evex_scaling` according to Section 2.7.5 of Intel's manual:
454    ///
455    /// > EVEX-encoded instructions always use a compressed displacement scheme
456    /// > by multiplying disp8 in conjunction with a scaling factor N that is
457    /// > determined based on the vector length, the value of EVEX.b bit
458    /// > (embedded broadcast) and the input element size of the instruction
459    ///
460    /// The `evex_scaling` factor provided here is `Some(N)` for EVEX
461    /// instructions.  This is taken into account where the `Imm` value
462    /// contained is the raw byte offset.
463    fn new(val: i32, evex_scaling: Option<i8>) -> Imm {
464        if val == 0 {
465            return Imm::None;
466        }
467        match evex_scaling {
468            Some(scaling) => {
469                if val % i32::from(scaling) == 0 {
470                    let scaled = val / i32::from(scaling);
471                    if low8_will_sign_extend_to_32(scaled as u32) {
472                        return Imm::Imm8(scaled as i8);
473                    }
474                }
475                Imm::Imm32(val)
476            }
477            None => match i8::try_from(val) {
478                Ok(val) => Imm::Imm8(val),
479                Err(_) => Imm::Imm32(val),
480            },
481        }
482    }
483
484    /// Forces `Imm::None` to become `Imm::Imm8(0)`, used for special cases
485    /// where some base registers require an immediate.
486    fn force_immediate(&mut self) {
487        if let Imm::None = self {
488            *self = Imm::Imm8(0);
489        }
490    }
491
492    /// Returns the two "mod" bits present at the upper bits of the mod/rm
493    /// byte.
494    fn m0d(&self) -> u8 {
495        match self {
496            Imm::None => 0b00,
497            Imm::Imm8(_) => 0b01,
498            Imm::Imm32(_) => 0b10,
499        }
500    }
501
502    fn emit<BS: ByteSink + ?Sized>(&self, sink: &mut BS) {
503        match self {
504            Imm::None => {}
505            Imm::Imm8(n) => sink.put1(*n as u8),
506            Imm::Imm32(n) => sink.put4(*n as u32),
507        }
508    }
509}
510
511/// This is the core 'emit' function for instructions that do not reference memory.
512///
513/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
514/// operand is a register rather than memory.  Hence it is much simpler.
515pub(crate) fn emit_std_enc_enc<BS: ByteSink + ?Sized>(
516    sink: &mut BS,
517    prefixes: LegacyPrefixes,
518    opcodes: u32,
519    mut num_opcodes: usize,
520    enc_g: u8,
521    enc_e: u8,
522    rex: RexFlags,
523) {
524    // EncG and EncE can be derived from registers of any class, and they
525    // don't even have to be from the same class.  For example, for an
526    // integer-to-FP conversion insn, one might be RegClass::I64 and the other
527    // RegClass::V128.
528
529    // The legacy prefixes.
530    prefixes.emit(sink);
531
532    // The rex byte.
533    rex.emit_two_op(sink, enc_g, enc_e);
534
535    // All other prefixes and opcodes.
536    while num_opcodes > 0 {
537        num_opcodes -= 1;
538        sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
539    }
540
541    // Now the mod/rm byte.  The instruction we're generating doesn't access
542    // memory, so there is no SIB byte or immediate -- we're done.
543    sink.put1(encode_modrm(0b11, enc_g & 7, enc_e & 7));
544}
545
546// These are merely wrappers for the above two functions that facilitate passing
547// actual `Reg`s rather than their encodings.
548
549pub(crate) fn emit_std_reg_mem(
550    sink: &mut MachBuffer<Inst>,
551    prefixes: LegacyPrefixes,
552    opcodes: u32,
553    num_opcodes: usize,
554    reg_g: Reg,
555    mem_e: &Amode,
556    rex: RexFlags,
557    bytes_at_end: u8,
558) {
559    let enc_g = reg_enc(reg_g);
560    emit_std_enc_mem(
561        sink,
562        prefixes,
563        opcodes,
564        num_opcodes,
565        enc_g,
566        mem_e,
567        rex,
568        bytes_at_end,
569    );
570}
571
572pub(crate) fn emit_std_reg_reg<BS: ByteSink + ?Sized>(
573    sink: &mut BS,
574    prefixes: LegacyPrefixes,
575    opcodes: u32,
576    num_opcodes: usize,
577    reg_g: Reg,
578    reg_e: Reg,
579    rex: RexFlags,
580) {
581    let enc_g = reg_enc(reg_g);
582    let enc_e = reg_enc(reg_e);
583    emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
584}
585
586/// Write a suitable number of bits from an imm64 to the sink.
587pub(crate) fn emit_simm<BS: ByteSink + ?Sized>(sink: &mut BS, size: u8, simm32: u32) {
588    match size {
589        8 | 4 => sink.put4(simm32),
590        2 => sink.put2(simm32 as u16),
591        1 => sink.put1(simm32 as u8),
592        _ => unreachable!(),
593    }
594}