pulley_interpreter/
decode.rs

1//! Decoding support for pulley bytecode.
2
3use core::ptr::NonNull;
4
5use alloc::vec::Vec;
6use cranelift_bitset::scalar::ScalarBitSetStorage;
7use cranelift_bitset::ScalarBitSet;
8
9use crate::imms::*;
10use crate::opcode::*;
11use crate::regs::*;
12
13/// Either an `Ok(T)` or an `Err(DecodingError)`.
14pub type Result<T, E = DecodingError> = core::result::Result<T, E>;
15
16/// An error when decoding Pulley bytecode.
17pub enum DecodingError {
18    /// Reached the end of the bytecode stream before we finished decoding a
19    /// single bytecode.
20    UnexpectedEof {
21        /// The position in the bytecode stream where this error occurred.
22        position: usize,
23    },
24
25    /// Found an invalid opcode.
26    InvalidOpcode {
27        /// The position in the bytecode stream where this error occurred.
28        position: usize,
29        /// The invalid opcode that was found.
30        code: u8,
31    },
32
33    /// Found an invalid extended opcode.
34    InvalidExtendedOpcode {
35        /// The position in the bytecode stream where this error occurred.
36        position: usize,
37        /// The invalid extended opcode that was found.
38        code: u16,
39    },
40
41    /// Found an invalid register.
42    InvalidReg {
43        /// The position in the bytecode stream where this error occurred.
44        position: usize,
45        /// The invalid register that was found.
46        reg: u8,
47    },
48}
49
50impl core::fmt::Debug for DecodingError {
51    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
52        core::fmt::Display::fmt(self, f)
53    }
54}
55
56impl core::fmt::Display for DecodingError {
57    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
58        match self {
59            Self::UnexpectedEof { position } => {
60                write!(f, "unexpected end-of-file at bytecode offset {position:#x}")
61            }
62            Self::InvalidOpcode { position, code } => {
63                write!(
64                    f,
65                    "found invalid opcode {code:#x} at bytecode offset {position:#x}"
66                )
67            }
68            Self::InvalidExtendedOpcode { position, code } => {
69                write!(
70                    f,
71                    "found invalid opcode {code:#x} at bytecode offset {position:#x}"
72                )
73            }
74            Self::InvalidReg { position, reg } => {
75                write!(
76                    f,
77                    "found invalid register {reg:#x} at bytecode offset {position:#x}"
78                )
79            }
80        }
81    }
82}
83
84#[cfg(feature = "std")]
85impl std::error::Error for DecodingError {}
86
87/// An abstraction over any kind of bytecode stream.
88///
89/// There are two primary implementations:
90///
91/// 1. `SafeBytecodeStream`: A thin wrapper around an index into a `&[u8]`. This
92///    implementation is 100% safe code.
93///
94/// 2. `UnsafeBytecodeStream`: A thin wrapper over a raw pointer. This
95///    implementation is wildly unsafe and will result in memory unsafety and
96///    other terrors when given invalid bytecode, or even valid bytecode
97///    encoding a program that itself does not preserve memory safety.
98pub trait BytecodeStream: Copy {
99    /// The type of error that this bytecode stream produces on invalid
100    /// operations.
101    type Error;
102
103    /// Create an "unexpected end-of-stream" error at the current position.
104    fn unexpected_eof(&self) -> Self::Error;
105
106    /// Create an "invalid opcode" error at the current position.
107    fn invalid_opcode(&self, code: u8) -> Self::Error;
108
109    /// Create an "invalid extended opcode" error at the current position.
110    fn invalid_extended_opcode(&self, code: u16) -> Self::Error;
111
112    /// Create an "invalid register" error at the current position.
113    fn invalid_reg(&self, reg: u8) -> Self::Error;
114
115    /// Read `N` bytes from this bytecode stream, advancing the stream's
116    /// position at the same time.
117    fn read<const N: usize>(&mut self) -> Result<[u8; N], Self::Error>;
118}
119
120/// A 100% safe implementation of a bytecode stream.
121///
122/// This is a thin wrapper around an index into a `&[u8]`.
123#[derive(Clone, Copy, Debug)]
124pub struct SafeBytecodeStream<'a> {
125    bytecode: &'a [u8],
126    position: usize,
127}
128
129impl<'a> SafeBytecodeStream<'a> {
130    /// Create a new `SafeBytecodeStream` from the given slice and with an
131    /// initial position pointing at the start of the slice.
132    pub fn new(bytecode: &'a [u8]) -> Self {
133        Self {
134            bytecode,
135            position: 0,
136        }
137    }
138
139    /// Get this stream's current position within its underlying slice.
140    pub fn position(&self) -> usize {
141        self.position
142    }
143
144    /// Get this stream's underlying bytecode slice.
145    pub fn as_slice(&self) -> &[u8] {
146        &self.bytecode
147    }
148}
149
150impl BytecodeStream for SafeBytecodeStream<'_> {
151    fn read<const N: usize>(&mut self) -> Result<[u8; N], Self::Error> {
152        let (bytes, rest) = self
153            .bytecode
154            .split_first_chunk()
155            .ok_or_else(|| self.unexpected_eof())?;
156        self.bytecode = rest;
157        self.position += N;
158        Ok(*bytes)
159    }
160
161    type Error = DecodingError;
162
163    fn unexpected_eof(&self) -> Self::Error {
164        DecodingError::UnexpectedEof {
165            position: self.position,
166        }
167    }
168
169    fn invalid_opcode(&self, code: u8) -> Self::Error {
170        DecodingError::InvalidOpcode {
171            position: self.position - 1,
172            code,
173        }
174    }
175
176    fn invalid_extended_opcode(&self, code: u16) -> Self::Error {
177        DecodingError::InvalidExtendedOpcode {
178            position: self.position,
179            code,
180        }
181    }
182
183    fn invalid_reg(&self, reg: u8) -> Self::Error {
184        DecodingError::InvalidReg {
185            position: self.position,
186            reg,
187        }
188    }
189}
190
191/// An uninhabited type that cannot be constructed at runtime.
192#[derive(Debug)]
193pub enum Uninhabited {}
194
195/// An unsafe bytecode stream.
196///
197/// This is a wrapper over a raw pointer to bytecode somewhere in memory.
198#[derive(Clone, Copy, Debug)]
199pub struct UnsafeBytecodeStream(NonNull<u8>);
200
201impl UnsafeBytecodeStream {
202    /// Construct a new `UnsafeBytecodeStream` pointing at the given PC.
203    ///
204    /// # Safety
205    ///
206    /// The given `pc` must point to valid Pulley bytecode, and it is the
207    /// caller's responsibility to ensure that the resulting
208    /// `UnsafeBytecodeStream` is only used to access the valid bytecode. For
209    /// example, if the current bytecode instruction unconditionally jumps to a
210    /// new PC, this stream must not be used to read just after the
211    /// unconditional jump instruction because there is no guarantee that that
212    /// memory is part of the bytecode stream or not.
213    pub unsafe fn new(pc: NonNull<u8>) -> Self {
214        UnsafeBytecodeStream(pc)
215    }
216
217    /// Get a new `UnsafeBytecodeStream` pointing at the bytecode that is at the
218    /// given relative offset from this stream's current position.
219    ///
220    /// # Safety
221    ///
222    /// Same as the `new` constructor. May only be used when it is guaranteed
223    /// that the address at `self._as_ptr() + offset` contains valid Pulley
224    /// bytecode.
225    pub unsafe fn offset(&self, offset: isize) -> Self {
226        UnsafeBytecodeStream(NonNull::new_unchecked(self.0.as_ptr().offset(offset)))
227    }
228
229    /// Get this stream's underlying raw pointer.
230    pub fn as_ptr(&self) -> NonNull<u8> {
231        self.0
232    }
233}
234
235impl BytecodeStream for UnsafeBytecodeStream {
236    fn read<const N: usize>(&mut self) -> Result<[u8; N], Self::Error> {
237        let bytes = unsafe { self.0.cast::<[u8; N]>().as_ptr().read() };
238        self.0 = unsafe { NonNull::new_unchecked(self.0.as_ptr().add(N)) };
239        Ok(bytes)
240    }
241
242    type Error = Uninhabited;
243
244    fn unexpected_eof(&self) -> Self::Error {
245        unsafe { crate::unreachable_unchecked() }
246    }
247
248    fn invalid_opcode(&self, _code: u8) -> Self::Error {
249        unsafe { crate::unreachable_unchecked() }
250    }
251
252    fn invalid_extended_opcode(&self, _code: u16) -> Self::Error {
253        unsafe { crate::unreachable_unchecked() }
254    }
255
256    fn invalid_reg(&self, _reg: u8) -> Self::Error {
257        unsafe { crate::unreachable_unchecked() }
258    }
259}
260
261/// Anything that can be decoded from a bytecode stream, e.g. opcodes,
262/// immediates, registers, etc...
263pub trait Decode: Sized {
264    /// Decode this type from the given bytecode stream.
265    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
266    where
267        T: BytecodeStream;
268}
269
270impl Decode for u8 {
271    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
272    where
273        T: BytecodeStream,
274    {
275        bytecode.read::<1>().map(|a| a[0])
276    }
277}
278
279impl Decode for u16 {
280    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
281    where
282        T: BytecodeStream,
283    {
284        Ok(u16::from_le_bytes(bytecode.read()?))
285    }
286}
287
288impl Decode for u32 {
289    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
290    where
291        T: BytecodeStream,
292    {
293        Ok(u32::from_le_bytes(bytecode.read()?))
294    }
295}
296
297impl Decode for u64 {
298    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
299    where
300        T: BytecodeStream,
301    {
302        Ok(u64::from_le_bytes(bytecode.read()?))
303    }
304}
305
306impl Decode for u128 {
307    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
308    where
309        T: BytecodeStream,
310    {
311        Ok(u128::from_le_bytes(bytecode.read()?))
312    }
313}
314
315impl Decode for i8 {
316    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
317    where
318        T: BytecodeStream,
319    {
320        bytecode.read::<1>().map(|a| a[0] as i8)
321    }
322}
323
324impl Decode for i16 {
325    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
326    where
327        T: BytecodeStream,
328    {
329        Ok(i16::from_le_bytes(bytecode.read()?))
330    }
331}
332
333impl Decode for i32 {
334    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
335    where
336        T: BytecodeStream,
337    {
338        Ok(i32::from_le_bytes(bytecode.read()?))
339    }
340}
341
342impl Decode for i64 {
343    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
344    where
345        T: BytecodeStream,
346    {
347        Ok(i64::from_le_bytes(bytecode.read()?))
348    }
349}
350
351impl Decode for i128 {
352    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
353    where
354        T: BytecodeStream,
355    {
356        Ok(i128::from_le_bytes(bytecode.read()?))
357    }
358}
359
360impl Decode for XReg {
361    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
362    where
363        T: BytecodeStream,
364    {
365        let byte = u8::decode(bytecode)?;
366        XReg::new(byte).ok_or_else(|| bytecode.invalid_reg(byte))
367    }
368}
369
370impl Decode for FReg {
371    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
372    where
373        T: BytecodeStream,
374    {
375        let byte = u8::decode(bytecode)?;
376        FReg::new(byte).ok_or_else(|| bytecode.invalid_reg(byte))
377    }
378}
379
380impl Decode for VReg {
381    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
382    where
383        T: BytecodeStream,
384    {
385        let byte = u8::decode(bytecode)?;
386        VReg::new(byte).ok_or_else(|| bytecode.invalid_reg(byte))
387    }
388}
389
390impl Decode for PcRelOffset {
391    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
392    where
393        T: BytecodeStream,
394    {
395        i32::decode(bytecode).map(|x| Self::from(x))
396    }
397}
398
399impl Decode for Opcode {
400    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
401    where
402        T: BytecodeStream,
403    {
404        let byte = u8::decode(bytecode)?;
405        match Opcode::new(byte) {
406            Some(v) => Ok(v),
407            None => Err(bytecode.invalid_opcode(byte)),
408        }
409    }
410}
411
412impl Decode for ExtendedOpcode {
413    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
414    where
415        T: BytecodeStream,
416    {
417        let word = u16::decode(bytecode)?;
418        match ExtendedOpcode::new(word) {
419            Some(v) => Ok(v),
420            None => Err(bytecode.invalid_extended_opcode(word)),
421        }
422    }
423}
424
425impl<D: Reg, S1: Reg, S2: Reg> Decode for BinaryOperands<D, S1, S2> {
426    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
427    where
428        T: BytecodeStream,
429    {
430        u16::decode(bytecode).map(|bits| Self::from_bits(bits))
431    }
432}
433
434impl<D: Reg, S1: Reg> Decode for BinaryOperands<D, S1, U6> {
435    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
436    where
437        T: BytecodeStream,
438    {
439        u16::decode(bytecode).map(|bits| Self::from_bits(bits))
440    }
441}
442
443impl<S: Decode + ScalarBitSetStorage> Decode for ScalarBitSet<S> {
444    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
445    where
446        T: BytecodeStream,
447    {
448        S::decode(bytecode).map(ScalarBitSet::from)
449    }
450}
451
452impl<R: Reg + Decode> Decode for UpperRegSet<R> {
453    fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
454    where
455        T: BytecodeStream,
456    {
457        ScalarBitSet::decode(bytecode).map(Self::from)
458    }
459}
460
461/// A Pulley bytecode decoder.
462///
463/// Does not materialize bytecode instructions, instead all decoding methods are
464/// given an `OpVisitor` implementation and the appropriate visitor methods are
465/// called upon decoding an instruction. This minimizes the amount of times we
466/// branch on the opcode, avoids constructing temporary storage, and plays well
467/// with our variable-length instruction encoding.
468#[derive(Default)]
469pub struct Decoder {
470    _private: (),
471}
472
473impl Decoder {
474    /// Create a new decoder.
475    pub fn new() -> Self {
476        Self::default()
477    }
478
479    /// Decode all instructions in the visitor's bytecode stream.
480    ///
481    /// The associated visitor method is invoked after each instruction is
482    /// decoded.
483    pub fn decode_all<'a, V>(visitor: &mut V) -> Result<Vec<V::Return>>
484    where
485        V: OpVisitor<BytecodeStream = SafeBytecodeStream<'a>> + ExtendedOpVisitor,
486    {
487        let mut decoder = Decoder::new();
488        let mut results = Vec::new();
489
490        while !visitor.bytecode().as_slice().is_empty() {
491            results.push(decoder.decode_one(visitor)?);
492        }
493
494        Ok(results)
495    }
496}
497
498/// An `OpVisitor` combinator to sequence one visitor and then another.
499pub struct SequencedVisitor<'a, F, V1, V2> {
500    join: F,
501    v1: &'a mut V1,
502    v2: &'a mut V2,
503}
504
505impl<'a, F, V1, V2> SequencedVisitor<'a, F, V1, V2> {
506    /// Create a new sequenced visitor.
507    ///
508    /// The given `join` function is used to combine the results of each
509    /// sub-visitor so that it can be returned from this combined visitor.
510    pub fn new(join: F, v1: &'a mut V1, v2: &'a mut V2) -> Self {
511        SequencedVisitor { join, v1, v2 }
512    }
513}
514
515macro_rules! define_decoder {
516    (
517        $(
518            $( #[$attr:meta] )*
519                $snake_name:ident = $name:ident $( {
520                $(
521                    $( #[$field_attr:meta] )*
522                    $field:ident : $field_ty:ty
523                ),*
524            } )? ;
525        )*
526    ) => {
527        impl Decoder {
528            /// Decode one instruction from the visitor's bytestream.
529            ///
530            /// Upon decoding, the visitor's associated callback is invoked and
531            /// the results returned.
532            #[inline(always)]
533            pub fn decode_one<V>(
534                &mut self,
535                visitor: &mut V,
536            ) -> Result<V::Return, <V::BytecodeStream as BytecodeStream>::Error>
537            where
538                V: OpVisitor + ExtendedOpVisitor,
539            {
540                visitor.before_visit();
541
542                let byte = u8::decode(visitor.bytecode())?;
543                let opcode = Opcode::new(byte).ok_or_else(|| {
544                    visitor.bytecode().invalid_opcode(byte)
545                })?;
546
547                match opcode {
548                    $(
549                        Opcode::$name => {
550                            $(
551                                $(
552                                    let $field = <$field_ty>::decode(
553                                        visitor.bytecode(),
554                                    )?;
555                                )*
556                            )?
557
558                            let ret = visitor.$snake_name($( $( $field ),* )?);
559                            visitor.after_visit();
560                            Ok(ret)
561                        },
562                    )*
563                    Opcode::ExtendedOp => {
564                        decode_one_extended(visitor)
565                    }
566                }
567            }
568        }
569
570        /// Callbacks upon decoding instructions from bytecode.
571        ///
572        /// Implement this trait for your type, give an instance of your type to
573        /// a `Decoder` method, and the `Decoder` will invoke the associated
574        /// method for each instruction that it decodes. For example, if the
575        /// `Decoder` decodes an `xadd32` instruction, then it will invoke the
576        /// `xadd32` visitor method, passing along any decoded immediates,
577        /// operands, etc... as arguments.
578        pub trait OpVisitor {
579            /// The type of this visitor's bytecode stream.
580            type BytecodeStream: BytecodeStream;
581
582            /// Get this visitor's underlying bytecode stream.
583            fn bytecode(&mut self) -> &mut Self::BytecodeStream;
584
585            /// The type of values returned by each visitor method.
586            type Return;
587
588            /// A callback invoked before starting to decode an instruction.
589            ///
590            /// Does nothing by default.
591            fn before_visit(&mut self) {}
592
593            /// A callback invoked after an instruction has been completely
594            /// decoded.
595            ///
596            /// Does nothing by default.
597            fn after_visit(&mut self) {}
598
599            $(
600                $( #[$attr] )*
601                fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return;
602            )*
603        }
604
605        impl<F, T, V1, V2> OpVisitor for SequencedVisitor<'_, F, V1, V2>
606        where
607            F: FnMut(V1::Return, V2::Return) -> T,
608            V1: OpVisitor,
609            V2: OpVisitor<BytecodeStream = V1::BytecodeStream>,
610        {
611            type BytecodeStream = V1::BytecodeStream;
612
613            fn bytecode(&mut self) -> &mut Self::BytecodeStream {
614                self.v1.bytecode()
615            }
616
617            type Return = T;
618
619            fn before_visit(&mut self) {
620                self.v1.before_visit();
621                self.v2.before_visit();
622            }
623
624            fn after_visit(&mut self) {
625                *self.v2.bytecode() = *self.v1.bytecode();
626                self.v1.after_visit();
627                self.v2.after_visit();
628            }
629
630            $(
631                $( #[$attr] )*
632                fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return {
633                    let a = self.v1.$snake_name( $( $( $field , )* )? );
634                    let b = self.v2.$snake_name( $( $( $field , )* )? );
635                    (self.join)(a, b)
636                }
637            )*
638        }
639    };
640}
641for_each_op!(define_decoder);
642
643macro_rules! define_extended_decoder {
644    (
645        $(
646            $( #[$attr:meta] )*
647                $snake_name:ident = $name:ident $( {
648                $(
649                    $( #[$field_attr:meta] )*
650                    $field:ident : $field_ty:ty
651                ),*
652            } )? ;
653        )*
654    ) => {
655        /// Like `OpVisitor` but for extended operations.
656        pub trait ExtendedOpVisitor: OpVisitor {
657            $(
658                $( #[$attr] )*
659                fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return;
660            )*
661        }
662
663        fn decode_one_extended<V>(
664            visitor: &mut V,
665        ) -> Result<V::Return, <V::BytecodeStream as BytecodeStream>::Error>
666        where
667            V: ExtendedOpVisitor,
668        {
669            let code = u16::decode(visitor.bytecode())?;
670            let opcode = ExtendedOpcode::new(code).ok_or_else(|| {
671                visitor.bytecode().invalid_extended_opcode(code)
672            })?;
673
674            match opcode {
675                $(
676                    ExtendedOpcode::$name => {
677                        $(
678                            $(
679                                let $field = <$field_ty>::decode(
680                                    visitor.bytecode(),
681                                )?;
682                            )*
683                        )?
684
685                        let ret = visitor.$snake_name($( $( $field ),* )?);
686                        visitor.after_visit();
687                        Ok(ret)
688                    }
689                )*
690            }
691        }
692
693
694        impl<F, T, V1, V2> ExtendedOpVisitor for SequencedVisitor<'_, F, V1, V2>
695        where
696            F: FnMut(V1::Return, V2::Return) -> T,
697            V1: ExtendedOpVisitor,
698            V2: ExtendedOpVisitor<BytecodeStream = V1::BytecodeStream>,
699        {
700            $(
701                $( #[$attr] )*
702                fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return {
703                    let a = self.v1.$snake_name( $( $( $field , )* )? );
704                    let b = self.v2.$snake_name( $( $( $field , )* )? );
705                    (self.join)(a, b)
706                }
707            )*
708        }
709    };
710}
711for_each_extended_op!(define_extended_decoder);
712
713/// Functions for decoding the operands of an instruction, assuming the opcode
714/// has already been decoded.
715pub mod operands {
716    use super::*;
717
718    macro_rules! define_operands_decoder {
719        (
720            $(
721                $( #[$attr:meta] )*
722                    $snake_name:ident = $name:ident $( {
723                    $(
724                        $( #[$field_attr:meta] )*
725                        $field:ident : $field_ty:ty
726                    ),*
727                } )? ;
728            )*
729        ) => {
730            $(
731                #[allow(unused_variables, reason = "macro-generated")]
732                #[allow(missing_docs, reason = "macro-generated")]
733                pub fn $snake_name<T: BytecodeStream>(pc: &mut T) -> Result<($($($field_ty,)*)?), T::Error> {
734                    Ok((($($((<$field_ty>::decode(pc))?,)*)?)))
735                }
736            )*
737        };
738    }
739
740    for_each_op!(define_operands_decoder);
741
742    /// Decode an extended opcode from `pc` to match the payload of the
743    /// "extended" opcode.
744    pub fn extended<T: BytecodeStream>(pc: &mut T) -> Result<(ExtendedOpcode,), T::Error> {
745        Ok((ExtendedOpcode::decode(pc)?,))
746    }
747
748    for_each_extended_op!(define_operands_decoder);
749}