pulley_interpreter/lib.rs
1//! The pulley bytecode for fast interpreters.
2
3#![cfg_attr(docsrs, feature(doc_auto_cfg))]
4#![cfg_attr(pulley_tail_calls, feature(explicit_tail_calls))]
5#![cfg_attr(pulley_tail_calls, allow(incomplete_features, unstable_features))]
6#![deny(missing_docs)]
7#![no_std]
8
9#[cfg(feature = "std")]
10#[macro_use]
11extern crate std;
12
13#[cfg(feature = "decode")]
14extern crate alloc;
15
16/// Calls the given macro with each opcode.
17///
18/// # Instruction Guidelines
19///
20/// We're inventing an instruction set here which naturally brings a whole set
21/// of design questions. Note that this is explicitly intended to be only ever
22/// used for Pulley where there are a different set of design constraints than
23/// other instruction sets (e.g. general-purpose CPU ISAs). Some examples of
24/// constraints for Pulley are:
25///
26/// * Instructions must be portable to many architectures.
27/// * The Pulley ISA is mostly target-independent as the compilation target is
28/// currently only parameterized on pointer width and endianness.
29/// * Pulley instructions should be balance of time-to-decode and code size. For
30/// example super fancy bit-packing tricks might be tough to decode in
31/// software but might be worthwhile if it's quite common and greatly reduces
32/// the size of bytecode. There's not a hard-and-fast answer here, but a
33/// balance to be made.
34/// * Many "macro ops" are present to reduce the size of compiled bytecode so
35/// there is a wide set of duplicate functionality between opcodes (and this
36/// is expected).
37///
38/// Given all this it's also useful to have a set of guidelines used to name and
39/// develop Pulley instructions. As of the time of this writing it's still
40/// pretty early days for Pulley so some of these guidelines may change over
41/// time. Additionally instructions don't necessarily all follow these
42/// conventions and that may also change over time. With that in mind, here's a
43/// rough set of guidelines:
44///
45/// * Most instructions are prefixed with `x`, `f`, or `v`, indicating which
46/// type of register they're operating on. (e.g. `xadd32` operates on the `x`
47/// integer registers and `fadd32` operates on the `f` float registers).
48///
49/// * Most instructions are suffixed or otherwise contain the bit width they're
50/// operating on. For example `xadd32` is a 32-bit addition.
51///
52/// * If an instruction operates on signed or unsigned data (such as division
53/// and remainder), then the instruction is suffixed with `_s` or `_u`.
54///
55/// * Instructions operate on either 32 or 64-bit parts of a register.
56/// Instructions modifying only 32-bits of a register always modify the "low"
57/// part of a register and leave the upper part unmodified. This is intended
58/// to help 32-bit platforms where if most operations are 32-bit there's no
59/// need for extra instructions to sign or zero extend and modify the upper
60/// half of the register.
61///
62/// * Binops use `BinaryOperands<T>` for the destination and argument registers.
63///
64/// * Instructions operating on memory contain a few pieces of information:
65///
66/// ```text
67/// xload16le_u32_offset32
68/// │└─┬┘└┤└┤ └┬┘ └──┬───┘
69/// │ │ │ │ │ ▼
70/// │ │ │ │ │ addressing mode
71/// │ │ │ │ ▼
72/// │ │ │ │ width of register modified + sign-extension (optional)
73/// │ │ │ ▼
74/// │ │ │ endianness of the operation (le/be)
75/// │ │ ▼
76/// │ │ bit-width of the operation
77/// │ ▼
78/// │ what's happening (load/store)
79/// ▼
80/// register being operated on (x/f/z)
81/// ```
82///
83/// More guidelines might get added here over time, and if you have any
84/// questions feel free to raise them and we can try to add them here as well!
85#[macro_export]
86macro_rules! for_each_op {
87 ( $macro:ident ) => {
88 $macro! {
89 /// Transfer control the address in the `lr` register.
90 ret = Ret;
91
92 /// Transfer control to the PC at the given offset and set the `lr`
93 /// register to the PC just after this instruction.
94 ///
95 /// This instruction generally assumes that the Pulley ABI is being
96 /// respected where arguments are in argument registers (starting at
97 /// x0 for integer arguments) and results are in result registers.
98 /// This instruction itself assume that all arguments are already in
99 /// their registers. Subsequent instructions below enable moving
100 /// arguments into the correct registers as part of the same call
101 /// instruction.
102 call = Call { offset: PcRelOffset };
103 /// Like `call`, but also `x0 = arg1`
104 call1 = Call1 { arg1: XReg, offset: PcRelOffset };
105 /// Like `call`, but also `x0, x1 = arg1, arg2`
106 call2 = Call2 { arg1: XReg, arg2: XReg, offset: PcRelOffset };
107 /// Like `call`, but also `x0, x1, x2 = arg1, arg2, arg3`
108 call3 = Call3 { arg1: XReg, arg2: XReg, arg3: XReg, offset: PcRelOffset };
109 /// Like `call`, but also `x0, x1, x2, x3 = arg1, arg2, arg3, arg4`
110 call4 = Call4 { arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg, offset: PcRelOffset };
111
112 /// Transfer control to the PC in `reg` and set `lr` to the PC just
113 /// after this instruction.
114 call_indirect = CallIndirect { reg: XReg };
115
116 /// Unconditionally transfer control to the PC at the given offset.
117 jump = Jump { offset: PcRelOffset };
118
119 /// Unconditionally transfer control to the PC at specified
120 /// register.
121 xjump = XJump { reg: XReg };
122
123 /// Conditionally transfer control to the given PC offset if
124 /// `low32(cond)` contains a non-zero value.
125 br_if32 = BrIf { cond: XReg, offset: PcRelOffset };
126
127 /// Conditionally transfer control to the given PC offset if
128 /// `low32(cond)` contains a zero value.
129 br_if_not32 = BrIfNot { cond: XReg, offset: PcRelOffset };
130
131 /// Branch if `a == b`.
132 br_if_xeq32 = BrIfXeq32 { a: XReg, b: XReg, offset: PcRelOffset };
133 /// Branch if `a != `b.
134 br_if_xneq32 = BrIfXneq32 { a: XReg, b: XReg, offset: PcRelOffset };
135 /// Branch if signed `a < b`.
136 br_if_xslt32 = BrIfXslt32 { a: XReg, b: XReg, offset: PcRelOffset };
137 /// Branch if signed `a <= b`.
138 br_if_xslteq32 = BrIfXslteq32 { a: XReg, b: XReg, offset: PcRelOffset };
139 /// Branch if unsigned `a < b`.
140 br_if_xult32 = BrIfXult32 { a: XReg, b: XReg, offset: PcRelOffset };
141 /// Branch if unsigned `a <= b`.
142 br_if_xulteq32 = BrIfXulteq32 { a: XReg, b: XReg, offset: PcRelOffset };
143 /// Branch if `a == b`.
144 br_if_xeq64 = BrIfXeq64 { a: XReg, b: XReg, offset: PcRelOffset };
145 /// Branch if `a != `b.
146 br_if_xneq64 = BrIfXneq64 { a: XReg, b: XReg, offset: PcRelOffset };
147 /// Branch if signed `a < b`.
148 br_if_xslt64 = BrIfXslt64 { a: XReg, b: XReg, offset: PcRelOffset };
149 /// Branch if signed `a <= b`.
150 br_if_xslteq64 = BrIfXslteq64 { a: XReg, b: XReg, offset: PcRelOffset };
151 /// Branch if unsigned `a < b`.
152 br_if_xult64 = BrIfXult64 { a: XReg, b: XReg, offset: PcRelOffset };
153 /// Branch if unsigned `a <= b`.
154 br_if_xulteq64 = BrIfXulteq64 { a: XReg, b: XReg, offset: PcRelOffset };
155
156 /// Branch if `a == b`.
157 br_if_xeq32_i8 = BrIfXeq32I8 { a: XReg, b: i8, offset: PcRelOffset };
158 /// Branch if `a == b`.
159 br_if_xeq32_i32 = BrIfXeq32I32 { a: XReg, b: i32, offset: PcRelOffset };
160 /// Branch if `a != `b.
161 br_if_xneq32_i8 = BrIfXneq32I8 { a: XReg, b: i8, offset: PcRelOffset };
162 /// Branch if `a != `b.
163 br_if_xneq32_i32 = BrIfXneq32I32 { a: XReg, b: i32, offset: PcRelOffset };
164 /// Branch if signed `a < b`.
165 br_if_xslt32_i8 = BrIfXslt32I8 { a: XReg, b: i8, offset: PcRelOffset };
166 /// Branch if signed `a < b`.
167 br_if_xslt32_i32 = BrIfXslt32I32 { a: XReg, b: i32, offset: PcRelOffset };
168 /// Branch if signed `a > b`.
169 br_if_xsgt32_i8 = BrIfXsgt32I8 { a: XReg, b: i8, offset: PcRelOffset };
170 /// Branch if signed `a > b`.
171 br_if_xsgt32_i32 = BrIfXsgt32I32 { a: XReg, b: i32, offset: PcRelOffset };
172 /// Branch if signed `a <= b`.
173 br_if_xslteq32_i8 = BrIfXslteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
174 /// Branch if signed `a <= b`.
175 br_if_xslteq32_i32 = BrIfXslteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
176 /// Branch if signed `a >= b`.
177 br_if_xsgteq32_i8 = BrIfXsgteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
178 /// Branch if signed `a >= b`.
179 br_if_xsgteq32_i32 = BrIfXsgteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
180 /// Branch if unsigned `a < b`.
181 br_if_xult32_u8 = BrIfXult32U8 { a: XReg, b: u8, offset: PcRelOffset };
182 /// Branch if unsigned `a < b`.
183 br_if_xult32_u32 = BrIfXult32U32 { a: XReg, b: u32, offset: PcRelOffset };
184 /// Branch if unsigned `a <= b`.
185 br_if_xulteq32_u8 = BrIfXulteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
186 /// Branch if unsigned `a <= b`.
187 br_if_xulteq32_u32 = BrIfXulteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
188 /// Branch if unsigned `a > b`.
189 br_if_xugt32_u8 = BrIfXugt32U8 { a: XReg, b: u8, offset: PcRelOffset };
190 /// Branch if unsigned `a > b`.
191 br_if_xugt32_u32 = BrIfXugt32U32 { a: XReg, b: u32, offset: PcRelOffset };
192 /// Branch if unsigned `a >= b`.
193 br_if_xugteq32_u8 = BrIfXugteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
194 /// Branch if unsigned `a >= b`.
195 br_if_xugteq32_u32 = BrIfXugteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
196
197 /// Branch if `a == b`.
198 br_if_xeq64_i8 = BrIfXeq64I8 { a: XReg, b: i8, offset: PcRelOffset };
199 /// Branch if `a == b`.
200 br_if_xeq64_i32 = BrIfXeq64I32 { a: XReg, b: i32, offset: PcRelOffset };
201 /// Branch if `a != `b.
202 br_if_xneq64_i8 = BrIfXneq64I8 { a: XReg, b: i8, offset: PcRelOffset };
203 /// Branch if `a != `b.
204 br_if_xneq64_i32 = BrIfXneq64I32 { a: XReg, b: i32, offset: PcRelOffset };
205 /// Branch if signed `a < b`.
206 br_if_xslt64_i8 = BrIfXslt64I8 { a: XReg, b: i8, offset: PcRelOffset };
207 /// Branch if signed `a < b`.
208 br_if_xslt64_i32 = BrIfXslt64I32 { a: XReg, b: i32, offset: PcRelOffset };
209 /// Branch if signed `a > b`.
210 br_if_xsgt64_i8 = BrIfXsgt64I8 { a: XReg, b: i8, offset: PcRelOffset };
211 /// Branch if signed `a > b`.
212 br_if_xsgt64_i32 = BrIfXsgt64I32 { a: XReg, b: i32, offset: PcRelOffset };
213 /// Branch if signed `a <= b`.
214 br_if_xslteq64_i8 = BrIfXslteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
215 /// Branch if signed `a <= b`.
216 br_if_xslteq64_i32 = BrIfXslteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
217 /// Branch if signed `a >= b`.
218 br_if_xsgteq64_i8 = BrIfXsgteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
219 /// Branch if signed `a >= b`.
220 br_if_xsgteq64_i32 = BrIfXsgteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
221 /// Branch if unsigned `a < b`.
222 br_if_xult64_u8 = BrIfXult64U8 { a: XReg, b: u8, offset: PcRelOffset };
223 /// Branch if unsigned `a < b`.
224 br_if_xult64_u32 = BrIfXult64U32 { a: XReg, b: u32, offset: PcRelOffset };
225 /// Branch if unsigned `a <= b`.
226 br_if_xulteq64_u8 = BrIfXulteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
227 /// Branch if unsigned `a <= b`.
228 br_if_xulteq64_u32 = BrIfXulteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
229 /// Branch if unsigned `a > b`.
230 br_if_xugt64_u8 = BrIfXugt64U8 { a: XReg, b: u8, offset: PcRelOffset };
231 /// Branch if unsigned `a > b`.
232 br_if_xugt64_u32 = BrIfXugt64U32 { a: XReg, b: u32, offset: PcRelOffset };
233 /// Branch if unsigned `a >= b`.
234 br_if_xugteq64_u8 = BrIfXugteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
235 /// Branch if unsigned `a >= b`.
236 br_if_xugteq64_u32 = BrIfXugteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
237
238 /// Branch to the label indicated by `low32(idx)`.
239 ///
240 /// After this instruction are `amt` instances of `PcRelOffset`
241 /// and the `idx` selects which one will be branched to. The value
242 /// of `idx` is clamped to `amt - 1` (e.g. the last offset is the
243 /// "default" one.
244 br_table32 = BrTable32 { idx: XReg, amt: u32 };
245
246 /// Move between `x` registers.
247 xmov = Xmov { dst: XReg, src: XReg };
248
249 /// Set `dst = 0`
250 xzero = Xzero { dst: XReg };
251 /// Set `dst = 1`
252 xone = Xone { dst: XReg };
253 /// Set `dst = sign_extend(imm8)`.
254 xconst8 = Xconst8 { dst: XReg, imm: i8 };
255 /// Set `dst = sign_extend(imm16)`.
256 xconst16 = Xconst16 { dst: XReg, imm: i16 };
257 /// Set `dst = sign_extend(imm32)`.
258 xconst32 = Xconst32 { dst: XReg, imm: i32 };
259 /// Set `dst = imm64`.
260 xconst64 = Xconst64 { dst: XReg, imm: i64 };
261
262 /// 32-bit wrapping addition: `low32(dst) = low32(src1) + low32(src2)`.
263 ///
264 /// The upper 32-bits of `dst` are unmodified.
265 xadd32 = Xadd32 { operands: BinaryOperands<XReg> };
266 /// Same as `xadd32` but `src2` is a zero-extended 8-bit immediate.
267 xadd32_u8 = Xadd32U8 { dst: XReg, src1: XReg, src2: u8 };
268 /// Same as `xadd32` but `src2` is a 32-bit immediate.
269 xadd32_u32 = Xadd32U32 { dst: XReg, src1: XReg, src2: u32 };
270
271 /// 64-bit wrapping addition: `dst = src1 + src2`.
272 xadd64 = Xadd64 { operands: BinaryOperands<XReg> };
273 /// Same as `xadd64` but `src2` is a zero-extended 8-bit immediate.
274 xadd64_u8 = Xadd64U8 { dst: XReg, src1: XReg, src2: u8 };
275 /// Same as `xadd64` but `src2` is a zero-extended 32-bit immediate.
276 xadd64_u32 = Xadd64U32 { dst: XReg, src1: XReg, src2: u32 };
277
278 /// `low32(dst) = low32(src1) * low32(src2) + low32(src3)`
279 xmadd32 = Xmadd32 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
280 /// `dst = src1 * src2 + src3`
281 xmadd64 = Xmadd64 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
282
283 /// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`.
284 ///
285 /// The upper 32-bits of `dst` are unmodified.
286 xsub32 = Xsub32 { operands: BinaryOperands<XReg> };
287 /// Same as `xsub32` but `src2` is a zero-extended 8-bit immediate.
288 xsub32_u8 = Xsub32U8 { dst: XReg, src1: XReg, src2: u8 };
289 /// Same as `xsub32` but `src2` is a 32-bit immediate.
290 xsub32_u32 = Xsub32U32 { dst: XReg, src1: XReg, src2: u32 };
291
292 /// 64-bit wrapping subtraction: `dst = src1 - src2`.
293 xsub64 = Xsub64 { operands: BinaryOperands<XReg> };
294 /// Same as `xsub64` but `src2` is a zero-extended 8-bit immediate.
295 xsub64_u8 = Xsub64U8 { dst: XReg, src1: XReg, src2: u8 };
296 /// Same as `xsub64` but `src2` is a zero-extended 32-bit immediate.
297 xsub64_u32 = Xsub64U32 { dst: XReg, src1: XReg, src2: u32 };
298
299 /// `low32(dst) = low32(src1) * low32(src2)`
300 xmul32 = XMul32 { operands: BinaryOperands<XReg> };
301 /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
302 xmul32_s8 = Xmul32S8 { dst: XReg, src1: XReg, src2: i8 };
303 /// Same as `xmul32` but `src2` is a sign-extended 32-bit immediate.
304 xmul32_s32 = Xmul32S32 { dst: XReg, src1: XReg, src2: i32 };
305
306 /// `dst = src1 * src2`
307 xmul64 = XMul64 { operands: BinaryOperands<XReg> };
308 /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
309 xmul64_s8 = Xmul64S8 { dst: XReg, src1: XReg, src2: i8 };
310 /// Same as `xmul64` but `src2` is a sign-extended 64-bit immediate.
311 xmul64_s32 = Xmul64S32 { dst: XReg, src1: XReg, src2: i32 };
312
313 /// `low32(dst) = trailing_zeros(low32(src))`
314 xctz32 = Xctz32 { dst: XReg, src: XReg };
315 /// `dst = trailing_zeros(src)`
316 xctz64 = Xctz64 { dst: XReg, src: XReg };
317
318 /// `low32(dst) = leading_zeros(low32(src))`
319 xclz32 = Xclz32 { dst: XReg, src: XReg };
320 /// `dst = leading_zeros(src)`
321 xclz64 = Xclz64 { dst: XReg, src: XReg };
322
323 /// `low32(dst) = count_ones(low32(src))`
324 xpopcnt32 = Xpopcnt32 { dst: XReg, src: XReg };
325 /// `dst = count_ones(src)`
326 xpopcnt64 = Xpopcnt64 { dst: XReg, src: XReg };
327
328 /// `low32(dst) = rotate_left(low32(src1), low32(src2))`
329 xrotl32 = Xrotl32 { operands: BinaryOperands<XReg> };
330 /// `dst = rotate_left(src1, src2)`
331 xrotl64 = Xrotl64 { operands: BinaryOperands<XReg> };
332
333 /// `low32(dst) = rotate_right(low32(src1), low32(src2))`
334 xrotr32 = Xrotr32 { operands: BinaryOperands<XReg> };
335 /// `dst = rotate_right(src1, src2)`
336 xrotr64 = Xrotr64 { operands: BinaryOperands<XReg> };
337
338 /// `low32(dst) = low32(src1) << low5(src2)`
339 xshl32 = Xshl32 { operands: BinaryOperands<XReg> };
340 /// `low32(dst) = low32(src1) >> low5(src2)`
341 xshr32_s = Xshr32S { operands: BinaryOperands<XReg> };
342 /// `low32(dst) = low32(src1) >> low5(src2)`
343 xshr32_u = Xshr32U { operands: BinaryOperands<XReg> };
344 /// `dst = src1 << low5(src2)`
345 xshl64 = Xshl64 { operands: BinaryOperands<XReg> };
346 /// `dst = src1 >> low6(src2)`
347 xshr64_s = Xshr64S { operands: BinaryOperands<XReg> };
348 /// `dst = src1 >> low6(src2)`
349 xshr64_u = Xshr64U { operands: BinaryOperands<XReg> };
350
351 /// `low32(dst) = low32(src1) << low5(src2)`
352 xshl32_u6 = Xshl32U6 { operands: BinaryOperands<XReg, XReg, U6> };
353 /// `low32(dst) = low32(src1) >> low5(src2)`
354 xshr32_s_u6 = Xshr32SU6 { operands: BinaryOperands<XReg, XReg, U6> };
355 /// `low32(dst) = low32(src1) >> low5(src2)`
356 xshr32_u_u6 = Xshr32UU6 { operands: BinaryOperands<XReg, XReg, U6> };
357 /// `dst = src1 << low5(src2)`
358 xshl64_u6 = Xshl64U6 { operands: BinaryOperands<XReg, XReg, U6> };
359 /// `dst = src1 >> low6(src2)`
360 xshr64_s_u6 = Xshr64SU6 { operands: BinaryOperands<XReg, XReg, U6> };
361 /// `dst = src1 >> low6(src2)`
362 xshr64_u_u6 = Xshr64UU6 { operands: BinaryOperands<XReg, XReg, U6> };
363
364 /// `low32(dst) = -low32(src)`
365 xneg32 = Xneg32 { dst: XReg, src: XReg };
366 /// `dst = -src`
367 xneg64 = Xneg64 { dst: XReg, src: XReg };
368
369 /// `low32(dst) = src1 == src2`
370 xeq64 = Xeq64 { operands: BinaryOperands<XReg> };
371 /// `low32(dst) = src1 != src2`
372 xneq64 = Xneq64 { operands: BinaryOperands<XReg> };
373 /// `low32(dst) = src1 < src2` (signed)
374 xslt64 = Xslt64 { operands: BinaryOperands<XReg> };
375 /// `low32(dst) = src1 <= src2` (signed)
376 xslteq64 = Xslteq64 { operands: BinaryOperands<XReg> };
377 /// `low32(dst) = src1 < src2` (unsigned)
378 xult64 = Xult64 { operands: BinaryOperands<XReg> };
379 /// `low32(dst) = src1 <= src2` (unsigned)
380 xulteq64 = Xulteq64 { operands: BinaryOperands<XReg> };
381 /// `low32(dst) = low32(src1) == low32(src2)`
382 xeq32 = Xeq32 { operands: BinaryOperands<XReg> };
383 /// `low32(dst) = low32(src1) != low32(src2)`
384 xneq32 = Xneq32 { operands: BinaryOperands<XReg> };
385 /// `low32(dst) = low32(src1) < low32(src2)` (signed)
386 xslt32 = Xslt32 { operands: BinaryOperands<XReg> };
387 /// `low32(dst) = low32(src1) <= low32(src2)` (signed)
388 xslteq32 = Xslteq32 { operands: BinaryOperands<XReg> };
389 /// `low32(dst) = low32(src1) < low32(src2)` (unsigned)
390 xult32 = Xult32 { operands: BinaryOperands<XReg> };
391 /// `low32(dst) = low32(src1) <= low32(src2)` (unsigned)
392 xulteq32 = Xulteq32 { operands: BinaryOperands<XReg> };
393
394 /// `low32(dst) = zext(*(ptr + offset))`
395 xload8_u32_offset32 = XLoad8U32Offset32 { dst: XReg, ptr: XReg, offset: i32 };
396 /// `low32(dst) = sext(*(ptr + offset))`
397 xload8_s32_offset32 = XLoad8S32Offset32 { dst: XReg, ptr: XReg, offset: i32 };
398 /// `low32(dst) = zext(*(ptr + offset))`
399 xload16le_u32_offset32 = XLoad16LeU32Offset32 { dst: XReg, ptr: XReg, offset: i32 };
400 /// `low32(dst) = sext(*(ptr + offset))`
401 xload16le_s32_offset32 = XLoad16LeS32Offset32 { dst: XReg, ptr: XReg, offset: i32 };
402 /// `low32(dst) = *(ptr + offset)`
403 xload32le_offset32 = XLoad32LeOffset32 { dst: XReg, ptr: XReg, offset: i32 };
404
405 /// `dst = zext(*(ptr + offset))`
406 xload8_u64_offset32 = XLoad8U64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
407 /// `dst = sext(*(ptr + offset))`
408 xload8_s64_offset32 = XLoad8S64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
409 /// `dst = zext(*(ptr + offset))`
410 xload16le_u64_offset32 = XLoad16LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
411 /// `dst = sext(*(ptr + offset))`
412 xload16le_s64_offset32 = XLoad16LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
413 /// `dst = zext(*(ptr + offset))`
414 xload32le_u64_offset32 = XLoad32LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
415 /// `dst = sext(*(ptr + offset))`
416 xload32le_s64_offset32 = XLoad32LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
417 /// `dst = *(ptr + offset)`
418 xload64le_offset32 = XLoad64LeOffset32 { dst: XReg, ptr: XReg, offset: i32 };
419
420 /// `*(ptr + offset) = low8(src)`
421 xstore8_offset32 = XStore8Offset32 { ptr: XReg, offset: i32, src: XReg };
422 /// `*(ptr + offset) = low16(src)`
423 xstore16le_offset32 = XStore16LeOffset32 { ptr: XReg, offset: i32, src: XReg };
424 /// `*(ptr + offset) = low32(src)`
425 xstore32le_offset32 = XStore32LeOffset32 { ptr: XReg, offset: i32, src: XReg };
426 /// `*(ptr + offset) = low64(src)`
427 xstore64le_offset32 = XStore64LeOffset32 { ptr: XReg, offset: i32, src: XReg };
428
429 /// `low32(dst) = zext(*(ptr + offset))`
430 xload8_u32_offset8 = XLoad8U32Offset8 { dst: XReg, ptr: XReg, offset: u8 };
431 /// `low32(dst) = sext(*(ptr + offset))`
432 xload8_s32_offset8 = XLoad8S32Offset8 { dst: XReg, ptr: XReg, offset: u8 };
433 /// `low32(dst) = zext(*(ptr + offset))`
434 xload16le_u32_offset8 = XLoad16LeU32Offset8 { dst: XReg, ptr: XReg, offset: u8 };
435 /// `low32(dst) = sext(*(ptr + offset))`
436 xload16le_s32_offset8 = XLoad16LeS32Offset8 { dst: XReg, ptr: XReg, offset: u8 };
437 /// `low32(dst) = *(ptr + offset)`
438 xload32le_offset8 = XLoad32LeOffset8 { dst: XReg, ptr: XReg, offset: u8 };
439
440 /// `dst = zext(*(ptr + offset))`
441 xload8_u64_offset8 = XLoad8U64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
442 /// `dst = sext(*(ptr + offset))`
443 xload8_s64_offset8 = XLoad8S64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
444 /// `dst = zext(*(ptr + offset))`
445 xload16le_u64_offset8 = XLoad16LeU64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
446 /// `dst = sext(*(ptr + offset))`
447 xload16le_s64_offset8 = XLoad16LeS64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
448 /// `dst = zext(*(ptr + offset))`
449 xload32le_u64_offset8 = XLoad32LeU64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
450 /// `dst = sext(*(ptr + offset))`
451 xload32le_s64_offset8 = XLoad32LeS64Offset8 { dst: XReg, ptr: XReg, offset: u8 };
452 /// `dst = *(ptr + offset)`
453 xload64le_offset8 = XLoad64LeOffset8 { dst: XReg, ptr: XReg, offset: u8 };
454
455 /// `*(ptr + offset) = low8(src)`
456 xstore8_offset8 = XStore8Offset8 { ptr: XReg, offset: u8, src: XReg };
457 /// `*(ptr + offset) = low16(src)`
458 xstore16le_offset8 = XStore16LeOffset8 { ptr: XReg, offset: u8, src: XReg };
459 /// `*(ptr + offset) = low32(src)`
460 xstore32le_offset8 = XStore32LeOffset8 { ptr: XReg, offset: u8, src: XReg };
461 /// `*(ptr + offset) = low64(src)`
462 xstore64le_offset8 = XStore64LeOffset8 { ptr: XReg, offset: u8, src: XReg };
463
464 // wasm addressing modes
465 //
466 // g32 = 32-bit guest, arithmetic is zero-extending the `addr`
467 // to the host pointer width.
468
469 /// `low32(dst) = zext_8_32(*(base + zext(addr) + offset))`
470 xload8_u32_g32 = XLoad8U32G32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
471 /// `low32(dst) = sext_8_32(*(base + zext(addr) + offset))`
472 xload8_s32_g32 = XLoad8S32G32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
473 /// `low32(dst) = zext_16_32(*(base + zext(addr) + offset))`
474 xload16le_u32_g32 = XLoad16LeU32G32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
475 /// `low32(dst) = sext_16_32(*(base + zext(addr) + offset))`
476 xload16le_s32_g32 = XLoad16LeS32G32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
477 /// `low32(dst) = *(base + zext(addr) + offset)`
478 xload32le_g32 = XLoad32LeG32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
479 /// `dst = *(base + zext(addr) + offset)`
480 xload64le_g32 = XLoad64LeG32 { dst: XReg, base: XReg, addr: XReg, offset: u8 };
481
482 /// `*(base + zext(addr) + offset) = low8(src)`
483 xstore8_g32 = XStore8G32 { base: XReg, addr: XReg, offset: u8, src: XReg };
484 /// `*(base + zext(addr) + offset) = low16(src)`
485 xstore16le_g32 = XStore16LeG32 { base: XReg, addr: XReg, offset: u8, src: XReg };
486 /// `*(base + zext(addr) + offset) = low32(src)`
487 xstore32le_g32 = XStore32LeG32 { base: XReg, addr: XReg, offset: u8, src: XReg };
488 /// `*(base + zext(addr) + offset) = src`
489 xstore64le_g32 = XStore64LeG32 { base: XReg, addr: XReg, offset: u8, src: XReg };
490
491 /// `push lr; push fp; fp = sp`
492 push_frame = PushFrame ;
493 /// `sp = fp; pop fp; pop lr`
494 pop_frame = PopFrame ;
495
496 /// Macro-instruction to enter a function, allocate some stack, and
497 /// then save some registers.
498 ///
499 /// This is equivalent to `push_frame`, `stack_alloc32 amt`, then
500 /// saving all of `regs` to the top of the stack just allocated.
501 push_frame_save = PushFrameSave { amt: u16, regs: UpperRegSet<XReg> };
502 /// Inverse of `push_frame_save`. Restores `regs` from the top of
503 /// the stack, then runs `stack_free32 amt`, then runs `pop_frame`.
504 pop_frame_restore = PopFrameRestore { amt: u16, regs: UpperRegSet<XReg> };
505
506 /// `sp = sp.checked_sub(amt)`
507 stack_alloc32 = StackAlloc32 { amt: u32 };
508
509 /// `sp = sp + amt`
510 stack_free32 = StackFree32 { amt: u32 };
511
512 /// `dst = zext(low8(src))`
513 zext8 = Zext8 { dst: XReg, src: XReg };
514 /// `dst = zext(low16(src))`
515 zext16 = Zext16 { dst: XReg, src: XReg };
516 /// `dst = zext(low32(src))`
517 zext32 = Zext32 { dst: XReg, src: XReg };
518 /// `dst = sext(low8(src))`
519 sext8 = Sext8 { dst: XReg, src: XReg };
520 /// `dst = sext(low16(src))`
521 sext16 = Sext16 { dst: XReg, src: XReg };
522 /// `dst = sext(low32(src))`
523 sext32 = Sext32 { dst: XReg, src: XReg };
524
525 /// `low32(dst) = |low32(src)|`
526 xabs32 = XAbs32 { dst: XReg, src: XReg };
527 /// `dst = |src|`
528 xabs64 = XAbs64 { dst: XReg, src: XReg };
529
530 /// `low32(dst) = low32(src1) / low32(src2)` (signed)
531 xdiv32_s = XDiv32S { operands: BinaryOperands<XReg> };
532
533 /// `dst = src1 / src2` (signed)
534 xdiv64_s = XDiv64S { operands: BinaryOperands<XReg> };
535
536 /// `low32(dst) = low32(src1) / low32(src2)` (unsigned)
537 xdiv32_u = XDiv32U { operands: BinaryOperands<XReg> };
538
539 /// `dst = src1 / src2` (unsigned)
540 xdiv64_u = XDiv64U { operands: BinaryOperands<XReg> };
541
542 /// `low32(dst) = low32(src1) % low32(src2)` (signed)
543 xrem32_s = XRem32S { operands: BinaryOperands<XReg> };
544
545 /// `dst = src1 / src2` (signed)
546 xrem64_s = XRem64S { operands: BinaryOperands<XReg> };
547
548 /// `low32(dst) = low32(src1) % low32(src2)` (unsigned)
549 xrem32_u = XRem32U { operands: BinaryOperands<XReg> };
550
551 /// `dst = src1 / src2` (unsigned)
552 xrem64_u = XRem64U { operands: BinaryOperands<XReg> };
553
554 /// `low32(dst) = low32(src1) & low32(src2)`
555 xband32 = XBand32 { operands: BinaryOperands<XReg> };
556 /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
557 xband32_s8 = Xband32S8 { dst: XReg, src1: XReg, src2: i8 };
558 /// Same as `xband32` but `src2` is a sign-extended 32-bit immediate.
559 xband32_s32 = Xband32S32 { dst: XReg, src1: XReg, src2: i32 };
560 /// `dst = src1 & src2`
561 xband64 = XBand64 { operands: BinaryOperands<XReg> };
562 /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
563 xband64_s8 = Xband64S8 { dst: XReg, src1: XReg, src2: i8 };
564 /// Same as `xband64` but `src2` is a sign-extended 32-bit immediate.
565 xband64_s32 = Xband64S32 { dst: XReg, src1: XReg, src2: i32 };
566 /// `low32(dst) = low32(src1) | low32(src2)`
567 xbor32 = XBor32 { operands: BinaryOperands<XReg> };
568 /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
569 xbor32_s8 = Xbor32S8 { dst: XReg, src1: XReg, src2: i8 };
570 /// Same as `xbor32` but `src2` is a sign-extended 32-bit immediate.
571 xbor32_s32 = Xbor32S32 { dst: XReg, src1: XReg, src2: i32 };
572 /// `dst = src1 | src2`
573 xbor64 = XBor64 { operands: BinaryOperands<XReg> };
574 /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
575 xbor64_s8 = Xbor64S8 { dst: XReg, src1: XReg, src2: i8 };
576 /// Same as `xbor64` but `src2` is a sign-extended 32-bit immediate.
577 xbor64_s32 = Xbor64S32 { dst: XReg, src1: XReg, src2: i32 };
578
579 /// `low32(dst) = low32(src1) ^ low32(src2)`
580 xbxor32 = XBxor32 { operands: BinaryOperands<XReg> };
581 /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
582 xbxor32_s8 = Xbxor32S8 { dst: XReg, src1: XReg, src2: i8 };
583 /// Same as `xbxor32` but `src2` is a sign-extended 32-bit immediate.
584 xbxor32_s32 = Xbxor32S32 { dst: XReg, src1: XReg, src2: i32 };
585 /// `dst = src1 ^ src2`
586 xbxor64 = XBxor64 { operands: BinaryOperands<XReg> };
587 /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
588 xbxor64_s8 = Xbxor64S8 { dst: XReg, src1: XReg, src2: i8 };
589 /// Same as `xbxor64` but `src2` is a sign-extended 32-bit immediate.
590 xbxor64_s32 = Xbxor64S32 { dst: XReg, src1: XReg, src2: i32 };
591
592 /// `low32(dst) = !low32(src1)`
593 xbnot32 = XBnot32 { dst: XReg, src: XReg };
594 /// `dst = !src1`
595 xbnot64 = XBnot64 { dst: XReg, src: XReg };
596
597 /// `low32(dst) = min(low32(src1), low32(src2))` (unsigned)
598 xmin32_u = Xmin32U { operands: BinaryOperands<XReg> };
599 /// `low32(dst) = min(low32(src1), low32(src2))` (signed)
600 xmin32_s = Xmin32S { operands: BinaryOperands<XReg> };
601 /// `low32(dst) = max(low32(src1), low32(src2))` (unsigned)
602 xmax32_u = Xmax32U { operands: BinaryOperands<XReg> };
603 /// `low32(dst) = max(low32(src1), low32(src2))` (signed)
604 xmax32_s = Xmax32S { operands: BinaryOperands<XReg> };
605 /// `dst = min(src1, src2)` (unsigned)
606 xmin64_u = Xmin64U { operands: BinaryOperands<XReg> };
607 /// `dst = min(src1, src2)` (signed)
608 xmin64_s = Xmin64S { operands: BinaryOperands<XReg> };
609 /// `dst = max(src1, src2)` (unsigned)
610 xmax64_u = Xmax64U { operands: BinaryOperands<XReg> };
611 /// `dst = max(src1, src2)` (signed)
612 xmax64_s = Xmax64S { operands: BinaryOperands<XReg> };
613
614 /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
615 xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
616 /// `dst = low32(cond) ? if_nonzero : if_zero`
617 xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
618
619 /// `trapif(addr > bound_ptr - size)` (unsigned)
620 xbc32_bound_trap = XBc32BoundTrap {
621 addr: XReg,
622 bound: XReg,
623 size: u8
624 };
625 /// `trapif(addr > *(bound_ptr + bound_off) - size)` (unsigned)
626 ///
627 /// Note that the `bound_ptr + bound_off` load loads a
628 /// host-native-endian pointer-sized value.
629 xbc32_boundne_trap = XBc32BoundNeTrap {
630 addr: XReg,
631 bound_ptr: XReg,
632 bound_off: u8,
633 size: u8
634 };
635 /// `trapif(addr >= bound_ptr)` (unsigned)
636 xbc32_strict_bound_trap = XBc32StrictBoundTrap {
637 addr: XReg,
638 bound: XReg
639 };
640 /// `trapif(addr >= *(bound_ptr + bound_off))` (unsigned)
641 xbc32_strict_boundne_trap = XBc32StrictBoundNeTrap {
642 addr: XReg,
643 bound_ptr: XReg,
644 bound_off: u8
645 };
646 }
647 };
648}
649
650/// Calls the given macro with each extended opcode.
651#[macro_export]
652macro_rules! for_each_extended_op {
653 ( $macro:ident ) => {
654 $macro! {
655 /// Raise a trap.
656 trap = Trap;
657
658 /// Do nothing.
659 nop = Nop;
660
661 /// A special opcode to halt interpreter execution and yield control
662 /// back to the host.
663 ///
664 /// This opcode results in `DoneReason::CallIndirectHost` where the
665 /// `id` here is shepherded along to the embedder. It's up to the
666 /// embedder to determine what to do with the `id` and the current
667 /// state of registers and the stack.
668 ///
669 /// In Wasmtime this is used to implement interpreter-to-host calls.
670 /// This is modeled as a `call` instruction where the first
671 /// parameter is the native function pointer to invoke and all
672 /// remaining parameters for the native function are in following
673 /// parameter positions (e.g. `x1`, `x2`, ...). The results of the
674 /// host call are then store in `x0`.
675 ///
676 /// Handling this in Wasmtime is done through a "relocation" which
677 /// is resolved at link-time when raw bytecode from Cranelift is
678 /// assembled into the final object that Wasmtime will interpret.
679 call_indirect_host = CallIndirectHost { id: u8 };
680
681 /// Gets the special "fp" register and moves it into `dst`.
682 xmov_fp = XmovFp { dst: XReg };
683
684 /// Gets the special "lr" register and moves it into `dst`.
685 xmov_lr = XmovLr { dst: XReg };
686
687 /// `dst = byteswap(low32(src))`
688 bswap32 = Bswap32 { dst: XReg, src: XReg };
689 /// `dst = byteswap(src)`
690 bswap64 = Bswap64 { dst: XReg, src: XReg };
691
692 /// 32-bit checked unsigned addition: `low32(dst) = low32(src1) +
693 /// low32(src2)`.
694 ///
695 /// The upper 32-bits of `dst` are unmodified. Traps if the addition
696 /// overflows.
697 xadd32_uoverflow_trap = Xadd32UoverflowTrap { operands: BinaryOperands<XReg> };
698
699 /// 64-bit checked unsigned addition: `dst = src1 + src2`.
700 xadd64_uoverflow_trap = Xadd64UoverflowTrap { operands: BinaryOperands<XReg> };
701
702 /// `dst = high64(src1 * src2)` (signed)
703 xmulhi64_s = XMulHi64S { operands: BinaryOperands<XReg> };
704 /// `dst = high64(src1 * src2)` (unsigned)
705 xmulhi64_u = XMulHi64U { operands: BinaryOperands<XReg> };
706
707 /// low32(dst) = if low32(src) == 0 { 0 } else { -1 }
708 xbmask32 = Xbmask32 { dst: XReg, src: XReg };
709 /// dst = if src == 0 { 0 } else { -1 }
710 xbmask64 = Xbmask64 { dst: XReg, src: XReg };
711
712 /// `dst = zext(*(ptr + offset))`
713 xload16be_u64_offset32 = XLoad16BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
714 /// `dst = sext(*(ptr + offset))`
715 xload16be_s64_offset32 = XLoad16BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
716 /// `dst = zext(*(ptr + offset))`
717 xload32be_u64_offset32 = XLoad32BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
718 /// `dst = sext(*(ptr + offset))`
719 xload32be_s64_offset32 = XLoad32BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 };
720 /// `dst = *(ptr + offset)`
721 xload64be_offset32 = XLoad64BeOffset32 { dst: XReg, ptr: XReg, offset: i32 };
722
723 /// `*(ptr + offset) = low16(src)`
724 xstore16be_offset32 = XStore16BeOffset32 { ptr: XReg, offset: i32, src: XReg };
725 /// `*(ptr + offset) = low32(src)`
726 xstore32be_offset32 = XStore32BeOffset32 { ptr: XReg, offset: i32, src: XReg };
727 /// `*(ptr + offset) = low64(src)`
728 xstore64be_offset32 = XStore64BeOffset32 { ptr: XReg, offset: i32, src: XReg };
729
730 /// `low32(dst) = zext(*(ptr + offset))`
731 fload32be_offset32 = Fload32BeOffset32 { dst: FReg, ptr: XReg, offset: i32 };
732 /// `dst = *(ptr + offset)`
733 fload64be_offset32 = Fload64BeOffset32 { dst: FReg, ptr: XReg, offset: i32 };
734 /// `*(ptr + offset) = low32(src)`
735 fstore32be_offset32 = Fstore32BeOffset32 { ptr: XReg, offset: i32, src: FReg };
736 /// `*(ptr + offset) = src`
737 fstore64be_offset32 = Fstore64BeOffset32 { ptr: XReg, offset: i32, src: FReg };
738
739 /// `low32(dst) = zext(*(ptr + offset))`
740 fload32le_offset32 = Fload32LeOffset32 { dst: FReg, ptr: XReg, offset: i32 };
741 /// `dst = *(ptr + offset)`
742 fload64le_offset32 = Fload64LeOffset32 { dst: FReg, ptr: XReg, offset: i32 };
743 /// `*(ptr + offset) = low32(src)`
744 fstore32le_offset32 = Fstore32LeOffset32 { ptr: XReg, offset: i32, src: FReg };
745 /// `*(ptr + offset) = src`
746 fstore64le_offset32 = Fstore64LeOffset32 { ptr: XReg, offset: i32, src: FReg };
747
748 /// `dst = *(ptr + offset)`
749 vload128le_offset32 = VLoad128Offset32 { dst: VReg, ptr: XReg, offset: i32 };
750 /// `*(ptr + offset) = src`
751 vstore128le_offset32 = Vstore128LeOffset32 { ptr: XReg, offset: i32, src: VReg };
752
753 /// Move between `f` registers.
754 fmov = Fmov { dst: FReg, src: FReg };
755 /// Move between `v` registers.
756 vmov = Vmov { dst: VReg, src: VReg };
757
758 /// `low32(dst) = bitcast low32(src) as i32`
759 bitcast_int_from_float_32 = BitcastIntFromFloat32 { dst: XReg, src: FReg };
760 /// `dst = bitcast src as i64`
761 bitcast_int_from_float_64 = BitcastIntFromFloat64 { dst: XReg, src: FReg };
762 /// `low32(dst) = bitcast low32(src) as f32`
763 bitcast_float_from_int_32 = BitcastFloatFromInt32 { dst: FReg, src: XReg };
764 /// `dst = bitcast src as f64`
765 bitcast_float_from_int_64 = BitcastFloatFromInt64 { dst: FReg, src: XReg };
766
767 /// `low32(dst) = bits`
768 fconst32 = FConst32 { dst: FReg, bits: u32 };
769 /// `dst = bits`
770 fconst64 = FConst64 { dst: FReg, bits: u64 };
771
772 /// `low32(dst) = zext(src1 == src2)`
773 feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg };
774 /// `low32(dst) = zext(src1 != src2)`
775 fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg };
776 /// `low32(dst) = zext(src1 < src2)`
777 flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg };
778 /// `low32(dst) = zext(src1 <= src2)`
779 flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg };
780 /// `low32(dst) = zext(src1 == src2)`
781 feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg };
782 /// `low32(dst) = zext(src1 != src2)`
783 fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg };
784 /// `low32(dst) = zext(src1 < src2)`
785 flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg };
786 /// `low32(dst) = zext(src1 <= src2)`
787 flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg };
788
789 /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
790 fselect32 = FSelect32 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
791 /// `dst = low32(cond) ? if_nonzero : if_zero`
792 fselect64 = FSelect64 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
793
794 /// `low32(dst) = demote(src)`
795 f32_from_f64 = F32FromF64 { dst: FReg, src: FReg };
796 /// `(st) = promote(low32(src))`
797 f64_from_f32 = F64FromF32 { dst: FReg, src: FReg };
798
799 /// `low32(dst) = checked_f32_from_signed(low32(src))`
800 f32_from_x32_s = F32FromX32S { dst: FReg, src: XReg };
801 /// `low32(dst) = checked_f32_from_unsigned(low32(src))`
802 f32_from_x32_u = F32FromX32U { dst: FReg, src: XReg };
803 /// `low32(dst) = checked_f32_from_signed(src)`
804 f32_from_x64_s = F32FromX64S { dst: FReg, src: XReg };
805 /// `low32(dst) = checked_f32_from_unsigned(src)`
806 f32_from_x64_u = F32FromX64U { dst: FReg, src: XReg };
807 /// `dst = checked_f64_from_signed(low32(src))`
808 f64_from_x32_s = F64FromX32S { dst: FReg, src: XReg };
809 /// `dst = checked_f64_from_unsigned(low32(src))`
810 f64_from_x32_u = F64FromX32U { dst: FReg, src: XReg };
811 /// `dst = checked_f64_from_signed(src)`
812 f64_from_x64_s = F64FromX64S { dst: FReg, src: XReg };
813 /// `dst = checked_f64_from_unsigned(src)`
814 f64_from_x64_u = F64FromX64U { dst: FReg, src: XReg };
815
816 /// `low32(dst) = checked_signed_from_f32(low32(src))`
817 x32_from_f32_s = X32FromF32S { dst: XReg, src: FReg };
818 /// `low32(dst) = checked_unsigned_from_f32(low32(src))`
819 x32_from_f32_u = X32FromF32U { dst: XReg, src: FReg };
820 /// `low32(dst) = checked_signed_from_f64(src)`
821 x32_from_f64_s = X32FromF64S { dst: XReg, src: FReg };
822 /// `low32(dst) = checked_unsigned_from_f64(src)`
823 x32_from_f64_u = X32FromF64U { dst: XReg, src: FReg };
824 /// `dst = checked_signed_from_f32(low32(src))`
825 x64_from_f32_s = X64FromF32S { dst: XReg, src: FReg };
826 /// `dst = checked_unsigned_from_f32(low32(src))`
827 x64_from_f32_u = X64FromF32U { dst: XReg, src: FReg };
828 /// `dst = checked_signed_from_f64(src)`
829 x64_from_f64_s = X64FromF64S { dst: XReg, src: FReg };
830 /// `dst = checked_unsigned_from_f64(src)`
831 x64_from_f64_u = X64FromF64U { dst: XReg, src: FReg };
832
833 /// `low32(dst) = saturating_signed_from_f32(low32(src))`
834 x32_from_f32_s_sat = X32FromF32SSat { dst: XReg, src: FReg };
835 /// `low32(dst) = saturating_unsigned_from_f32(low32(src))`
836 x32_from_f32_u_sat = X32FromF32USat { dst: XReg, src: FReg };
837 /// `low32(dst) = saturating_signed_from_f64(src)`
838 x32_from_f64_s_sat = X32FromF64SSat { dst: XReg, src: FReg };
839 /// `low32(dst) = saturating_unsigned_from_f64(src)`
840 x32_from_f64_u_sat = X32FromF64USat { dst: XReg, src: FReg };
841 /// `dst = saturating_signed_from_f32(low32(src))`
842 x64_from_f32_s_sat = X64FromF32SSat { dst: XReg, src: FReg };
843 /// `dst = saturating_unsigned_from_f32(low32(src))`
844 x64_from_f32_u_sat = X64FromF32USat { dst: XReg, src: FReg };
845 /// `dst = saturating_signed_from_f64(src)`
846 x64_from_f64_s_sat = X64FromF64SSat { dst: XReg, src: FReg };
847 /// `dst = saturating_unsigned_from_f64(src)`
848 x64_from_f64_u_sat = X64FromF64USat { dst: XReg, src: FReg };
849
850 /// `low32(dst) = copysign(low32(src1), low32(src2))`
851 fcopysign32 = FCopySign32 { operands: BinaryOperands<FReg> };
852 /// `dst = copysign(src1, src2)`
853 fcopysign64 = FCopySign64 { operands: BinaryOperands<FReg> };
854
855 /// `low32(dst) = low32(src1) + low32(src2)`
856 fadd32 = Fadd32 { operands: BinaryOperands<FReg> };
857 /// `low32(dst) = low32(src1) - low32(src2)`
858 fsub32 = Fsub32 { operands: BinaryOperands<FReg> };
859 /// `low128(dst) = low128(src1) - low128(src2)`
860 vsubf32x4 = Vsubf32x4 { operands: BinaryOperands<VReg> };
861 /// `low32(dst) = low32(src1) * low32(src2)`
862 fmul32 = Fmul32 { operands: BinaryOperands<FReg> };
863 /// `low128(dst) = low128(src1) * low128(src2)`
864 vmulf32x4 = Vmulf32x4 { operands: BinaryOperands<VReg> };
865 /// `low32(dst) = low32(src1) / low32(src2)`
866 fdiv32 = Fdiv32 { operands: BinaryOperands<FReg> };
867 /// `low128(dst) = low128(src1) / low128(src2)`
868 vdivf32x4 = Vdivf32x4 { operands: BinaryOperands<VReg> };
869 /// `low32(dst) = ieee_maximum(low32(src1), low32(src2))`
870 fmaximum32 = Fmaximum32 { operands: BinaryOperands<FReg> };
871 /// `low32(dst) = ieee_minimum(low32(src1), low32(src2))`
872 fminimum32 = Fminimum32 { operands: BinaryOperands<FReg> };
873 /// `low32(dst) = ieee_trunc(low32(src))`
874 ftrunc32 = Ftrunc32 { dst: FReg, src: FReg };
875 /// `low128(dst) = ieee_trunc(low128(src))`
876 vtrunc32x4 = Vtrunc32x4 { dst: VReg, src: VReg };
877 /// `low128(dst) = ieee_trunc(low128(src))`
878 vtrunc64x2 = Vtrunc64x2 { dst: VReg, src: VReg };
879 /// `low32(dst) = ieee_floor(low32(src))`
880 ffloor32 = Ffloor32 { dst: FReg, src: FReg };
881 /// `low128(dst) = ieee_floor(low128(src))`
882 vfloor32x4 = Vfloor32x4 { dst: VReg, src: VReg };
883 /// `low128(dst) = ieee_floor(low128(src))`
884 vfloor64x2 = Vfloor64x2 { dst: VReg, src: VReg };
885 /// `low32(dst) = ieee_ceil(low32(src))`
886 fceil32 = Fceil32 { dst: FReg, src: FReg };
887 /// `low128(dst) = ieee_ceil(low128(src))`
888 vceil32x4 = Vceil32x4 { dst: VReg, src: VReg };
889 /// `low128(dst) = ieee_ceil(low128(src))`
890 vceil64x2 = Vceil64x2 { dst: VReg, src: VReg };
891 /// `low32(dst) = ieee_nearest(low32(src))`
892 fnearest32 = Fnearest32 { dst: FReg, src: FReg };
893 /// `low32(dst) = ieee_sqrt(low32(src))`
894 fsqrt32 = Fsqrt32 { dst: FReg, src: FReg };
895 /// `low32(dst) = ieee_sqrt(low32(src))`
896 vsqrt32x4 = Vsqrt32x4 { dst: VReg, src: VReg };
897 /// `low32(dst) = ieee_sqrt(low32(src))`
898 vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
899 /// `low32(dst) = -low32(src)`
900 fneg32 = Fneg32 { dst: FReg, src: FReg };
901 /// `low128(dst) = -low128(src)`
902 vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg };
903 /// `low32(dst) = |low32(src)|`
904 fabs32 = Fabs32 { dst: FReg, src: FReg };
905
906 /// `dst = src1 + src2`
907 fadd64 = Fadd64 { operands: BinaryOperands<FReg> };
908 /// `dst = src1 - src2`
909 fsub64 = Fsub64 { operands: BinaryOperands<FReg> };
910 /// `dst = src1 * src2`
911 fmul64 = Fmul64 { operands: BinaryOperands<FReg> };
912 /// `dst = src1 / src2`
913 fdiv64 = Fdiv64 { operands: BinaryOperands<FReg> };
914 /// `dst = src1 / src2`
915 vdivf64x2 = VDivF64x2 { operands: BinaryOperands<VReg> };
916 /// `dst = ieee_maximum(src1, src2)`
917 fmaximum64 = Fmaximum64 { operands: BinaryOperands<FReg> };
918 /// `dst = ieee_minimum(src1, src2)`
919 fminimum64 = Fminimum64 { operands: BinaryOperands<FReg> };
920 /// `dst = ieee_trunc(src)`
921 ftrunc64 = Ftrunc64 { dst: FReg, src: FReg };
922 /// `dst = ieee_floor(src)`
923 ffloor64 = Ffloor64 { dst: FReg, src: FReg };
924 /// `dst = ieee_ceil(src)`
925 fceil64 = Fceil64 { dst: FReg, src: FReg };
926 /// `dst = ieee_nearest(src)`
927 fnearest64 = Fnearest64 { dst: FReg, src: FReg };
928 /// `low128(dst) = ieee_nearest(low128(src))`
929 vnearest32x4 = Vnearest32x4 { dst: VReg, src: VReg };
930 /// `low128(dst) = ieee_nearest(low128(src))`
931 vnearest64x2 = Vnearest64x2 { dst: VReg, src: VReg };
932 /// `dst = ieee_sqrt(src)`
933 fsqrt64 = Fsqrt64 { dst: FReg, src: FReg };
934 /// `dst = -src`
935 fneg64 = Fneg64 { dst: FReg, src: FReg };
936 /// `dst = |src|`
937 fabs64 = Fabs64 { dst: FReg, src: FReg };
938
939 /// `dst = imm`
940 vconst128 = Vconst128 { dst: VReg, imm: u128 };
941
942 /// `dst = src1 + src2`
943 vaddi8x16 = VAddI8x16 { operands: BinaryOperands<VReg> };
944 /// `dst = src1 + src2`
945 vaddi16x8 = VAddI16x8 { operands: BinaryOperands<VReg> };
946 /// `dst = src1 + src2`
947 vaddi32x4 = VAddI32x4 { operands: BinaryOperands<VReg> };
948 /// `dst = src1 + src2`
949 vaddi64x2 = VAddI64x2 { operands: BinaryOperands<VReg> };
950 /// `dst = src1 + src2`
951 vaddf32x4 = VAddF32x4 { operands: BinaryOperands<VReg> };
952 /// `dst = src1 + src2`
953 vaddf64x2 = VAddF64x2 { operands: BinaryOperands<VReg> };
954
955 /// `dst = satruating_add(src1, src2)`
956 vaddi8x16_sat = VAddI8x16Sat { operands: BinaryOperands<VReg> };
957 /// `dst = satruating_add(src1, src2)`
958 vaddu8x16_sat = VAddU8x16Sat { operands: BinaryOperands<VReg> };
959 /// `dst = satruating_add(src1, src2)`
960 vaddi16x8_sat = VAddI16x8Sat { operands: BinaryOperands<VReg> };
961 /// `dst = satruating_add(src1, src2)`
962 vaddu16x8_sat = VAddU16x8Sat { operands: BinaryOperands<VReg> };
963
964 /// `dst = [src1[0] + src1[1], ..., src2[6] + src2[7]]`
965 vaddpairwisei16x8_s = VAddpairwiseI16x8S { operands: BinaryOperands<VReg> };
966 /// `dst = [src1[0] + src1[1], ..., src2[2] + src2[3]]`
967 vaddpairwisei32x4_s = VAddpairwiseI32x4S { operands: BinaryOperands<VReg> };
968
969 /// `dst = src1 << src2`
970 vshli8x16 = VShlI8x16 { operands: BinaryOperands<VReg, VReg, XReg> };
971 /// `dst = src1 << src2`
972 vshli16x8 = VShlI16x8 { operands: BinaryOperands<VReg, VReg, XReg> };
973 /// `dst = src1 << src2`
974 vshli32x4 = VShlI32x4 { operands: BinaryOperands<VReg, VReg, XReg> };
975 /// `dst = src1 << src2`
976 vshli64x2 = VShlI64x2 { operands: BinaryOperands<VReg, VReg, XReg> };
977 /// `dst = src1 >> src2` (signed)
978 vshri8x16_s = VShrI8x16S { operands: BinaryOperands<VReg, VReg, XReg> };
979 /// `dst = src1 >> src2` (signed)
980 vshri16x8_s = VShrI16x8S { operands: BinaryOperands<VReg, VReg, XReg> };
981 /// `dst = src1 >> src2` (signed)
982 vshri32x4_s = VShrI32x4S { operands: BinaryOperands<VReg, VReg, XReg> };
983 /// `dst = src1 >> src2` (signed)
984 vshri64x2_s = VShrI64x2S { operands: BinaryOperands<VReg, VReg, XReg> };
985 /// `dst = src1 >> src2` (unsigned)
986 vshri8x16_u = VShrI8x16U { operands: BinaryOperands<VReg, VReg, XReg> };
987 /// `dst = src1 >> src2` (unsigned)
988 vshri16x8_u = VShrI16x8U { operands: BinaryOperands<VReg, VReg, XReg> };
989 /// `dst = src1 >> src2` (unsigned)
990 vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
991 /// `dst = src1 >> src2` (unsigned)
992 vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };
993
994 /// `dst = splat(low8(src))`
995 vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
996 /// `dst = splat(low16(src))`
997 vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
998 /// `dst = splat(low32(src))`
999 vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
1000 /// `dst = splat(src)`
1001 vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
1002 /// `dst = splat(low32(src))`
1003 vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
1004 /// `dst = splat(src)`
1005 vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
1006
1007 /// Load the 64-bit source as i8x8 and sign-extend to i16x8.
1008 vload8x8_s_offset32 = VLoad8x8SOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1009 /// Load the 64-bit source as u8x8 and zero-extend to i16x8.
1010 vload8x8_u_offset32 = VLoad8x8UOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1011 /// Load the 64-bit source as i16x4 and sign-extend to i32x4.
1012 vload16x4le_s_offset32 = VLoad16x4LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1013 /// Load the 64-bit source as u16x4 and zero-extend to i32x4.
1014 vload16x4le_u_offset32 = VLoad16x4LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1015 /// Load the 64-bit source as i32x2 and sign-extend to i64x2.
1016 vload32x2le_s_offset32 = VLoad32x2LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1017 /// Load the 64-bit source as u32x2 and zero-extend to i64x2.
1018 vload32x2le_u_offset32 = VLoad32x2LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
1019
1020 /// `dst = src1 & src2`
1021 vband128 = VBand128 { operands: BinaryOperands<VReg> };
1022 /// `dst = src1 | src2`
1023 vbor128 = VBor128 { operands: BinaryOperands<VReg> };
1024 /// `dst = src1 ^ src2`
1025 vbxor128 = VBxor128 { operands: BinaryOperands<VReg> };
1026 /// `dst = !src1`
1027 vbnot128 = VBnot128 { dst: VReg, src: VReg };
1028 /// `dst = (c & x) | (!c & y)`
1029 vbitselect128 = VBitselect128 { dst: VReg, c: VReg, x: VReg, y: VReg };
1030 /// Collect high bits of each lane into the low 32-bits of the
1031 /// destination.
1032 vbitmask8x16 = Vbitmask8x16 { dst: XReg, src: VReg };
1033 /// Collect high bits of each lane into the low 32-bits of the
1034 /// destination.
1035 vbitmask16x8 = Vbitmask16x8 { dst: XReg, src: VReg };
1036 /// Collect high bits of each lane into the low 32-bits of the
1037 /// destination.
1038 vbitmask32x4 = Vbitmask32x4 { dst: XReg, src: VReg };
1039 /// Collect high bits of each lane into the low 32-bits of the
1040 /// destination.
1041 vbitmask64x2 = Vbitmask64x2 { dst: XReg, src: VReg };
1042 /// Store whether all lanes are nonzero in `dst`.
1043 valltrue8x16 = Valltrue8x16 { dst: XReg, src: VReg };
1044 /// Store whether all lanes are nonzero in `dst`.
1045 valltrue16x8 = Valltrue16x8 { dst: XReg, src: VReg };
1046 /// Store whether all lanes are nonzero in `dst`.
1047 valltrue32x4 = Valltrue32x4 { dst: XReg, src: VReg };
1048 /// Store whether any lanes are nonzero in `dst`.
1049 valltrue64x2 = Valltrue64x2 { dst: XReg, src: VReg };
1050 /// Store whether any lanes are nonzero in `dst`.
1051 vanytrue8x16 = Vanytrue8x16 { dst: XReg, src: VReg };
1052 /// Store whether any lanes are nonzero in `dst`.
1053 vanytrue16x8 = Vanytrue16x8 { dst: XReg, src: VReg };
1054 /// Store whether any lanes are nonzero in `dst`.
1055 vanytrue32x4 = Vanytrue32x4 { dst: XReg, src: VReg };
1056 /// Store whether any lanes are nonzero in `dst`.
1057 vanytrue64x2 = Vanytrue64x2 { dst: XReg, src: VReg };
1058
1059 /// Int-to-float conversion (same as `f32_from_x32_s`)
1060 vf32x4_from_i32x4_s = VF32x4FromI32x4S { dst: VReg, src: VReg };
1061 /// Int-to-float conversion (same as `f32_from_x32_u`)
1062 vf32x4_from_i32x4_u = VF32x4FromI32x4U { dst: VReg, src: VReg };
1063 /// Int-to-float conversion (same as `f64_from_x64_s`)
1064 vf64x2_from_i64x2_s = VF64x2FromI64x2S { dst: VReg, src: VReg };
1065 /// Int-to-float conversion (same as `f64_from_x64_u`)
1066 vf64x2_from_i64x2_u = VF64x2FromI64x2U { dst: VReg, src: VReg };
1067 /// Float-to-int conversion (same as `x32_from_f32_s`
1068 vi32x4_from_f32x4_s = VI32x4FromF32x4S { dst: VReg, src: VReg };
1069 /// Float-to-int conversion (same as `x32_from_f32_u`
1070 vi32x4_from_f32x4_u = VI32x4FromF32x4U { dst: VReg, src: VReg };
1071 /// Float-to-int conversion (same as `x64_from_f64_s`
1072 vi64x2_from_f64x2_s = VI64x2FromF64x2S { dst: VReg, src: VReg };
1073 /// Float-to-int conversion (same as `x64_from_f64_u`
1074 vi64x2_from_f64x2_u = VI64x2FromF64x2U { dst: VReg, src: VReg };
1075
1076 /// Widens the low lanes of the input vector, as signed, to twice
1077 /// the width.
1078 vwidenlow8x16_s = VWidenLow8x16S { dst: VReg, src: VReg };
1079 /// Widens the low lanes of the input vector, as unsigned, to twice
1080 /// the width.
1081 vwidenlow8x16_u = VWidenLow8x16U { dst: VReg, src: VReg };
1082 /// Widens the low lanes of the input vector, as signed, to twice
1083 /// the width.
1084 vwidenlow16x8_s = VWidenLow16x8S { dst: VReg, src: VReg };
1085 /// Widens the low lanes of the input vector, as unsigned, to twice
1086 /// the width.
1087 vwidenlow16x8_u = VWidenLow16x8U { dst: VReg, src: VReg };
1088 /// Widens the low lanes of the input vector, as signed, to twice
1089 /// the width.
1090 vwidenlow32x4_s = VWidenLow32x4S { dst: VReg, src: VReg };
1091 /// Widens the low lanes of the input vector, as unsigned, to twice
1092 /// the width.
1093 vwidenlow32x4_u = VWidenLow32x4U { dst: VReg, src: VReg };
1094 /// Widens the high lanes of the input vector, as signed, to twice
1095 /// the width.
1096 vwidenhigh8x16_s = VWidenHigh8x16S { dst: VReg, src: VReg };
1097 /// Widens the high lanes of the input vector, as unsigned, to twice
1098 /// the width.
1099 vwidenhigh8x16_u = VWidenHigh8x16U { dst: VReg, src: VReg };
1100 /// Widens the high lanes of the input vector, as signed, to twice
1101 /// the width.
1102 vwidenhigh16x8_s = VWidenHigh16x8S { dst: VReg, src: VReg };
1103 /// Widens the high lanes of the input vector, as unsigned, to twice
1104 /// the width.
1105 vwidenhigh16x8_u = VWidenHigh16x8U { dst: VReg, src: VReg };
1106 /// Widens the high lanes of the input vector, as signed, to twice
1107 /// the width.
1108 vwidenhigh32x4_s = VWidenHigh32x4S { dst: VReg, src: VReg };
1109 /// Widens the high lanes of the input vector, as unsigned, to twice
1110 /// the width.
1111 vwidenhigh32x4_u = VWidenHigh32x4U { dst: VReg, src: VReg };
1112
1113 /// Narrows the two 16x8 vectors, assuming all input lanes are
1114 /// signed, to half the width. Narrowing is signed and saturating.
1115 vnarrow16x8_s = Vnarrow16x8S { operands: BinaryOperands<VReg> };
1116 /// Narrows the two 16x8 vectors, assuming all input lanes are
1117 /// signed, to half the width. Narrowing is unsigned and saturating.
1118 vnarrow16x8_u = Vnarrow16x8U { operands: BinaryOperands<VReg> };
1119 /// Narrows the two 32x4 vectors, assuming all input lanes are
1120 /// signed, to half the width. Narrowing is signed and saturating.
1121 vnarrow32x4_s = Vnarrow32x4S { operands: BinaryOperands<VReg> };
1122 /// Narrows the two 32x4 vectors, assuming all input lanes are
1123 /// signed, to half the width. Narrowing is unsigned and saturating.
1124 vnarrow32x4_u = Vnarrow32x4U { operands: BinaryOperands<VReg> };
1125 /// Narrows the two 64x2 vectors, assuming all input lanes are
1126 /// signed, to half the width. Narrowing is signed and saturating.
1127 vnarrow64x2_s = Vnarrow64x2S { operands: BinaryOperands<VReg> };
1128 /// Narrows the two 64x2 vectors, assuming all input lanes are
1129 /// signed, to half the width. Narrowing is unsigned and saturating.
1130 vnarrow64x2_u = Vnarrow64x2U { operands: BinaryOperands<VReg> };
1131 /// Narrows the two 64x2 vectors, assuming all input lanes are
1132 /// unsigned, to half the width. Narrowing is unsigned and saturating.
1133 vunarrow64x2_u = Vunarrow64x2U { operands: BinaryOperands<VReg> };
1134 /// Promotes the low two lanes of the f32x4 input to f64x2.
1135 vfpromotelow = VFpromoteLow { dst: VReg, src: VReg };
1136 /// Demotes the two f64x2 lanes to f32x2 and then extends with two
1137 /// more zero lanes.
1138 vfdemote = VFdemote { dst: VReg, src: VReg };
1139
1140 /// `dst = src1 - src2`
1141 vsubi8x16 = VSubI8x16 { operands: BinaryOperands<VReg> };
1142 /// `dst = src1 - src2`
1143 vsubi16x8 = VSubI16x8 { operands: BinaryOperands<VReg> };
1144 /// `dst = src1 - src2`
1145 vsubi32x4 = VSubI32x4 { operands: BinaryOperands<VReg> };
1146 /// `dst = src1 - src2`
1147 vsubi64x2 = VSubI64x2 { operands: BinaryOperands<VReg> };
1148 /// `dst = src1 - src2`
1149 vsubf64x2 = VSubF64x2 { operands: BinaryOperands<VReg> };
1150
1151 /// `dst = saturating_sub(src1, src2)`
1152 vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands<VReg> };
1153 /// `dst = saturating_sub(src1, src2)`
1154 vsubu8x16_sat = VSubU8x16Sat { operands: BinaryOperands<VReg> };
1155 /// `dst = saturating_sub(src1, src2)`
1156 vsubi16x8_sat = VSubI16x8Sat { operands: BinaryOperands<VReg> };
1157 /// `dst = saturating_sub(src1, src2)`
1158 vsubu16x8_sat = VSubU16x8Sat { operands: BinaryOperands<VReg> };
1159
1160 /// `dst = src1 * src2`
1161 vmuli8x16 = VMulI8x16 { operands: BinaryOperands<VReg> };
1162 /// `dst = src1 * src2`
1163 vmuli16x8 = VMulI16x8 { operands: BinaryOperands<VReg> };
1164 /// `dst = src1 * src2`
1165 vmuli32x4 = VMulI32x4 { operands: BinaryOperands<VReg> };
1166 /// `dst = src1 * src2`
1167 vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
1168 /// `dst = src1 * src2`
1169 vmulf64x2 = VMulF64x2 { operands: BinaryOperands<VReg> };
1170
1171 /// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
1172 vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
1173
1174 /// `dst = count_ones(src)`
1175 vpopcnt8x16 = VPopcnt8x16 { dst: VReg, src: VReg };
1176
1177 /// `low32(dst) = zext(src[lane])`
1178 xextractv8x16 = XExtractV8x16 { dst: XReg, src: VReg, lane: u8 };
1179 /// `low32(dst) = zext(src[lane])`
1180 xextractv16x8 = XExtractV16x8 { dst: XReg, src: VReg, lane: u8 };
1181 /// `low32(dst) = src[lane]`
1182 xextractv32x4 = XExtractV32x4 { dst: XReg, src: VReg, lane: u8 };
1183 /// `dst = src[lane]`
1184 xextractv64x2 = XExtractV64x2 { dst: XReg, src: VReg, lane: u8 };
1185 /// `low32(dst) = src[lane]`
1186 fextractv32x4 = FExtractV32x4 { dst: FReg, src: VReg, lane: u8 };
1187 /// `dst = src[lane]`
1188 fextractv64x2 = FExtractV64x2 { dst: FReg, src: VReg, lane: u8 };
1189
1190 /// `dst = src1; dst[lane] = src2`
1191 vinsertx8 = VInsertX8 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1192 /// `dst = src1; dst[lane] = src2`
1193 vinsertx16 = VInsertX16 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1194 /// `dst = src1; dst[lane] = src2`
1195 vinsertx32 = VInsertX32 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1196 /// `dst = src1; dst[lane] = src2`
1197 vinsertx64 = VInsertX64 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1198 /// `dst = src1; dst[lane] = src2`
1199 vinsertf32 = VInsertF32 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1200 /// `dst = src1; dst[lane] = src2`
1201 vinsertf64 = VInsertF64 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1202
1203 /// `dst = src == dst`
1204 veq8x16 = Veq8x16 { operands: BinaryOperands<VReg> };
1205 /// `dst = src != dst`
1206 vneq8x16 = Vneq8x16 { operands: BinaryOperands<VReg> };
1207 /// `dst = src < dst` (signed)
1208 vslt8x16 = Vslt8x16 { operands: BinaryOperands<VReg> };
1209 /// `dst = src <= dst` (signed)
1210 vslteq8x16 = Vslteq8x16 { operands: BinaryOperands<VReg> };
1211 /// `dst = src < dst` (unsigned)
1212 vult8x16 = Vult8x16 { operands: BinaryOperands<VReg> };
1213 /// `dst = src <= dst` (unsigned)
1214 vulteq8x16 = Vulteq8x16 { operands: BinaryOperands<VReg> };
1215 /// `dst = src == dst`
1216 veq16x8 = Veq16x8 { operands: BinaryOperands<VReg> };
1217 /// `dst = src != dst`
1218 vneq16x8 = Vneq16x8 { operands: BinaryOperands<VReg> };
1219 /// `dst = src < dst` (signed)
1220 vslt16x8 = Vslt16x8 { operands: BinaryOperands<VReg> };
1221 /// `dst = src <= dst` (signed)
1222 vslteq16x8 = Vslteq16x8 { operands: BinaryOperands<VReg> };
1223 /// `dst = src < dst` (unsigned)
1224 vult16x8 = Vult16x8 { operands: BinaryOperands<VReg> };
1225 /// `dst = src <= dst` (unsigned)
1226 vulteq16x8 = Vulteq16x8 { operands: BinaryOperands<VReg> };
1227 /// `dst = src == dst`
1228 veq32x4 = Veq32x4 { operands: BinaryOperands<VReg> };
1229 /// `dst = src != dst`
1230 vneq32x4 = Vneq32x4 { operands: BinaryOperands<VReg> };
1231 /// `dst = src < dst` (signed)
1232 vslt32x4 = Vslt32x4 { operands: BinaryOperands<VReg> };
1233 /// `dst = src <= dst` (signed)
1234 vslteq32x4 = Vslteq32x4 { operands: BinaryOperands<VReg> };
1235 /// `dst = src < dst` (unsigned)
1236 vult32x4 = Vult32x4 { operands: BinaryOperands<VReg> };
1237 /// `dst = src <= dst` (unsigned)
1238 vulteq32x4 = Vulteq32x4 { operands: BinaryOperands<VReg> };
1239 /// `dst = src == dst`
1240 veq64x2 = Veq64x2 { operands: BinaryOperands<VReg> };
1241 /// `dst = src != dst`
1242 vneq64x2 = Vneq64x2 { operands: BinaryOperands<VReg> };
1243 /// `dst = src < dst` (signed)
1244 vslt64x2 = Vslt64x2 { operands: BinaryOperands<VReg> };
1245 /// `dst = src <= dst` (signed)
1246 vslteq64x2 = Vslteq64x2 { operands: BinaryOperands<VReg> };
1247 /// `dst = src < dst` (unsigned)
1248 vult64x2 = Vult64x2 { operands: BinaryOperands<VReg> };
1249 /// `dst = src <= dst` (unsigned)
1250 vulteq64x2 = Vulteq64x2 { operands: BinaryOperands<VReg> };
1251
1252 /// `dst = -src`
1253 vneg8x16 = Vneg8x16 { dst: VReg, src: VReg };
1254 /// `dst = -src`
1255 vneg16x8 = Vneg16x8 { dst: VReg, src: VReg };
1256 /// `dst = -src`
1257 vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
1258 /// `dst = -src`
1259 vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
1260 /// `dst = -src`
1261 vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg };
1262
1263 /// `dst = min(src1, src2)` (signed)
1264 vmin8x16_s = Vmin8x16S { operands: BinaryOperands<VReg> };
1265 /// `dst = min(src1, src2)` (unsigned)
1266 vmin8x16_u = Vmin8x16U { operands: BinaryOperands<VReg> };
1267 /// `dst = min(src1, src2)` (signed)
1268 vmin16x8_s = Vmin16x8S { operands: BinaryOperands<VReg> };
1269 /// `dst = min(src1, src2)` (unsigned)
1270 vmin16x8_u = Vmin16x8U { operands: BinaryOperands<VReg> };
1271 /// `dst = max(src1, src2)` (signed)
1272 vmax8x16_s = Vmax8x16S { operands: BinaryOperands<VReg> };
1273 /// `dst = max(src1, src2)` (unsigned)
1274 vmax8x16_u = Vmax8x16U { operands: BinaryOperands<VReg> };
1275 /// `dst = max(src1, src2)` (signed)
1276 vmax16x8_s = Vmax16x8S { operands: BinaryOperands<VReg> };
1277 /// `dst = max(src1, src2)` (unsigned)
1278 vmax16x8_u = Vmax16x8U { operands: BinaryOperands<VReg> };
1279
1280 /// `dst = min(src1, src2)` (signed)
1281 vmin32x4_s = Vmin32x4S { operands: BinaryOperands<VReg> };
1282 /// `dst = min(src1, src2)` (unsigned)
1283 vmin32x4_u = Vmin32x4U { operands: BinaryOperands<VReg> };
1284 /// `dst = max(src1, src2)` (signed)
1285 vmax32x4_s = Vmax32x4S { operands: BinaryOperands<VReg> };
1286 /// `dst = max(src1, src2)` (unsigned)
1287 vmax32x4_u = Vmax32x4U { operands: BinaryOperands<VReg> };
1288
1289 /// `dst = |src|`
1290 vabs8x16 = Vabs8x16 { dst: VReg, src: VReg };
1291 /// `dst = |src|`
1292 vabs16x8 = Vabs16x8 { dst: VReg, src: VReg };
1293 /// `dst = |src|`
1294 vabs32x4 = Vabs32x4 { dst: VReg, src: VReg };
1295 /// `dst = |src|`
1296 vabs64x2 = Vabs64x2 { dst: VReg, src: VReg };
1297
1298 /// `dst = |src|`
1299 vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
1300 /// `dst = |src|`
1301 vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg };
1302 /// `dst = ieee_maximum(src1, src2)`
1303 vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands<VReg> };
1304 /// `dst = ieee_maximum(src1, src2)`
1305 vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands<VReg> };
1306 /// `dst = ieee_minimum(src1, src2)`
1307 vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
1308 /// `dst = ieee_minimum(src1, src2)`
1309 vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };
1310
1311 /// `dst = shuffle(src1, src2, mask)`
1312 vshuffle = VShuffle { dst: VReg, src1: VReg, src2: VReg, mask: u128 };
1313
1314 /// `dst = swizzle(src1, src2)`
1315 vswizzlei8x16 = Vswizzlei8x16 { operands: BinaryOperands<VReg> };
1316
1317 /// `dst = (src1 + src2 + 1) // 2`
1318 vavground8x16 = Vavground8x16 { operands: BinaryOperands<VReg> };
1319 /// `dst = (src1 + src2 + 1) // 2`
1320 vavground16x8 = Vavground16x8 { operands: BinaryOperands<VReg> };
1321
1322 /// `dst = src == dst`
1323 veqf32x4 = VeqF32x4 { operands: BinaryOperands<VReg> };
1324 /// `dst = src != dst`
1325 vneqf32x4 = VneqF32x4 { operands: BinaryOperands<VReg> };
1326 /// `dst = src < dst`
1327 vltf32x4 = VltF32x4 { operands: BinaryOperands<VReg> };
1328 /// `dst = src <= dst`
1329 vlteqf32x4 = VlteqF32x4 { operands: BinaryOperands<VReg> };
1330 /// `dst = src == dst`
1331 veqf64x2 = VeqF64x2 { operands: BinaryOperands<VReg> };
1332 /// `dst = src != dst`
1333 vneqf64x2 = VneqF64x2 { operands: BinaryOperands<VReg> };
1334 /// `dst = src < dst`
1335 vltf64x2 = VltF64x2 { operands: BinaryOperands<VReg> };
1336 /// `dst = src <= dst`
1337 vlteqf64x2 = VlteqF64x2 { operands: BinaryOperands<VReg> };
1338
1339 /// `dst = ieee_fma(a, b, c)`
1340 vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg };
1341 /// `dst = ieee_fma(a, b, c)`
1342 vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg };
1343
1344 /// `dst = low32(cond) ? if_nonzero : if_zero`
1345 vselect = Vselect { dst: VReg, cond: XReg, if_nonzero: VReg, if_zero: VReg };
1346
1347 /// `dst_hi:dst_lo = lhs_hi:lhs_lo + rhs_hi:rhs_lo`
1348 xadd128 = Xadd128 {
1349 dst_lo: XReg,
1350 dst_hi: XReg,
1351 lhs_lo: XReg,
1352 lhs_hi: XReg,
1353 rhs_lo: XReg,
1354 rhs_hi: XReg
1355 };
1356 /// `dst_hi:dst_lo = lhs_hi:lhs_lo - rhs_hi:rhs_lo`
1357 xsub128 = Xsub128 {
1358 dst_lo: XReg,
1359 dst_hi: XReg,
1360 lhs_lo: XReg,
1361 lhs_hi: XReg,
1362 rhs_lo: XReg,
1363 rhs_hi: XReg
1364 };
1365 /// `dst_hi:dst_lo = sext(lhs) * sext(rhs)`
1366 xwidemul64_s = Xwidemul64S {
1367 dst_lo: XReg,
1368 dst_hi: XReg,
1369 lhs: XReg,
1370 rhs: XReg
1371 };
1372 /// `dst_hi:dst_lo = zext(lhs) * zext(rhs)`
1373 xwidemul64_u = Xwidemul64U {
1374 dst_lo: XReg,
1375 dst_hi: XReg,
1376 lhs: XReg,
1377 rhs: XReg
1378 };
1379 }
1380 };
1381}
1382
1383#[cfg(feature = "decode")]
1384pub mod decode;
1385#[cfg(feature = "disas")]
1386pub mod disas;
1387#[cfg(feature = "encode")]
1388pub mod encode;
1389#[cfg(feature = "interp")]
1390pub mod interp;
1391#[cfg(feature = "profile")]
1392pub mod profile;
1393#[cfg(all(not(feature = "profile"), feature = "interp"))]
1394mod profile_disabled;
1395#[cfg(all(not(feature = "profile"), feature = "interp"))]
1396use profile_disabled as profile;
1397
1398pub mod regs;
1399pub use regs::*;
1400
1401pub mod imms;
1402pub use imms::*;
1403
1404pub mod op;
1405pub use op::*;
1406
1407pub mod opcode;
1408pub use opcode::*;
1409
1410#[cfg(any(feature = "encode", feature = "decode"))]
1411pub(crate) unsafe fn unreachable_unchecked<T>() -> T {
1412 #[cfg(debug_assertions)]
1413 unreachable!();
1414
1415 #[cfg(not(debug_assertions))]
1416 unsafe {
1417 core::hint::unreachable_unchecked()
1418 }
1419}