read_fonts/tables/glyf/bytecode/
decode.rs

1//! TrueType bytecode decoder.
2
3use super::{InlineOperands, Instruction, Opcode};
4
5/// An error returned by [`Decoder::decode`] if the end of the bytecode
6/// stream is reached unexpectedly.
7#[derive(Copy, Clone, Debug)]
8pub struct DecodeError;
9
10impl std::fmt::Display for DecodeError {
11    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
12        f.write_str("unexpected end of bytecode")
13    }
14}
15
16/// Decodes instructions from TrueType bytecode.
17#[derive(Copy, Clone)]
18pub struct Decoder<'a> {
19    /// The bytecode for the program.
20    pub bytecode: &'a [u8],
21    /// The "program counter" or current offset into the bytecode.
22    pub pc: usize,
23}
24
25impl<'a> Decoder<'a> {
26    /// Creates a new decoder for the given bytecode and program counter.
27    pub fn new(bytecode: &'a [u8], pc: usize) -> Self {
28        Self { bytecode, pc }
29    }
30
31    /// Decodes the next instruction.
32    ///
33    /// Returns `None` at the end of the bytecode stream.
34    pub fn decode(&mut self) -> Option<Result<Instruction<'a>, DecodeError>> {
35        let opcode = Opcode::from_byte(*self.bytecode.get(self.pc)?);
36        Some(self.decode_inner(opcode))
37    }
38
39    fn decode_inner(&mut self, opcode: Opcode) -> Result<Instruction<'a>, DecodeError> {
40        let mut opcode_len = opcode.len();
41        let mut count_len = 0;
42        // If the opcode length is negative the next byte contains the number
43        // of inline operands and |opcode_len| is the size of each operand.
44        // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/truetype/ttinterp.c#L7046>
45        if opcode_len < 0 {
46            let inline_count = *self.bytecode.get(self.pc + 1).ok_or(DecodeError)?;
47            opcode_len = opcode_len.abs() * inline_count as i32 + 2;
48            count_len = 1;
49        }
50        let opcode_len = opcode_len as usize;
51        let pc = self.pc;
52        let next_pc = pc + opcode_len;
53        // Skip opcode and potential inline operand count byte.
54        let inline_start = pc + 1 + count_len;
55        let inline_size = next_pc - inline_start;
56        let mut inline_operands = InlineOperands::default();
57        if inline_size > 0 {
58            inline_operands.bytes = self
59                .bytecode
60                .get(inline_start..inline_start + inline_size)
61                .ok_or(DecodeError)?;
62            inline_operands.is_words = opcode.is_push_words();
63        }
64        self.pc += opcode_len;
65        Ok(Instruction {
66            opcode,
67            inline_operands,
68            pc,
69        })
70    }
71}
72
73/// Returns an iterator that yields all instructions in the given bytecode
74/// starting at the specified program counter.
75pub fn decode_all(
76    bytecode: &[u8],
77    pc: usize,
78) -> impl Iterator<Item = Result<Instruction<'_>, DecodeError>> + '_ + Clone {
79    let mut decoder = Decoder::new(bytecode, pc);
80    std::iter::from_fn(move || decoder.decode())
81}
82
83#[cfg(test)]
84mod tests {
85    use super::Opcode;
86
87    #[test]
88    fn mixed_ops() {
89        let mut enc = Encoder::default();
90        // intermix push and non-push ops of various sizes to test boundary
91        // conditions
92        let cases: &[(Opcode, &[i16])] = &[
93            (Opcode::PUSHB100, &[1, 2, 3, 255, 5]),
94            (Opcode::PUSHW010, &[-1, 4508, -3]),
95            (Opcode::IUP0, &[]),
96            (Opcode::NPUSHB, &[55; 255]),
97            (Opcode::MDRP00110, &[]),
98            (Opcode::NPUSHW, &[i16::MIN; 32]),
99            (Opcode::LOOPCALL, &[]),
100            (Opcode::FLIPOFF, &[]),
101            (
102                Opcode::PUSHW011,
103                &[i16::MIN, i16::MIN / 2, i16::MAX, i16::MAX / 2],
104            ),
105            (Opcode::GETVARIATION, &[]),
106        ];
107        for (opcode, values) in cases {
108            if !values.is_empty() {
109                enc.encode_push(values);
110            } else {
111                enc.encode(*opcode);
112            }
113        }
114        let all_ins = super::decode_all(&enc.0, 0)
115            .map(|ins| ins.unwrap())
116            .collect::<Vec<_>>();
117        for (ins, (expected_opcode, expected_values)) in all_ins.iter().zip(cases) {
118            assert_eq!(ins.opcode, *expected_opcode);
119            let values = ins
120                .inline_operands
121                .values()
122                .map(|v| v as i16)
123                .collect::<Vec<_>>();
124            assert_eq!(&values, expected_values);
125        }
126    }
127
128    #[test]
129    fn non_push_ops() {
130        // test decoding of all single byte (non-push) opcodes
131        let non_push_ops: Vec<_> = (0..=255)
132            .filter(|b| !Opcode::from_byte(*b).is_push())
133            .collect();
134        let decoded: Vec<_> = super::decode_all(&non_push_ops, 0)
135            .map(|ins| ins.unwrap().opcode as u8)
136            .collect();
137        assert_eq!(non_push_ops, decoded);
138    }
139
140    #[test]
141    fn real_bytecode() {
142        // taken from NotoSerif-Regular, glyph Rturnedsmall, gid 1272
143        let bytecode = [
144            181, 5, 1, 9, 3, 1, 76, 75, 176, 45, 80, 88, 64, 35, 0, 3, 0, 9, 7, 3, 9, 105, 6, 4, 2,
145            1, 1, 2, 97, 5, 1, 2, 2, 109, 77, 11, 8, 2, 7, 7, 0, 95, 10, 1, 0, 0, 107, 0, 78, 27,
146            64, 41, 0, 7, 8, 0, 8, 7, 114, 0, 3, 0, 9, 8, 3, 9, 105, 6, 4, 2, 1, 1, 2, 97, 5, 1, 2,
147            2, 109, 77, 11, 1, 8, 8, 0, 95, 10, 1, 0, 0, 107, 0, 78, 89, 64, 31, 37, 36, 1, 0, 40,
148            38, 36, 44, 37, 44, 34, 32, 27, 25, 24, 23, 22, 20, 17, 16, 12, 10, 9, 8, 0, 35, 1, 35,
149            12, 13, 22, 43,
150        ];
151        // comments below contain the ttx assembly
152        let expected = [
153            // PUSHB[ ]	/* 6 values pushed */
154            // 5 1 9 3 1 76
155            "PUSHB[5] 5 1 9 3 1 76",
156            // MPPEM[ ]	/* MeasurePixelPerEm */
157            "MPPEM",
158            // PUSHB[ ]	/* 1 value pushed */
159            // 45
160            "PUSHB[0] 45",
161            // LT[ ]	/* LessThan */
162            "LT",
163            // IF[ ]	/* If */
164            "IF",
165            //   NPUSHB[ ]	/* 35 values pushed */
166            //   0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7
167            //   7 0 95 10 1 0 0 107 0 78
168            "NPUSHB 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 7 0 95 10 1 0 0 107 0 78",
169            // ELSE[ ]	/* Else */
170            "ELSE",
171            //   NPUSHB[ ]	/* 41 values pushed */
172            //   0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2
173            //   2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78
174            "NPUSHB 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78",
175            // EIF[ ]	/* EndIf */
176            "EIF",
177            // NPUSHB[ ]	/* 31 values pushed */
178            // 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0
179            // 35 1 35 12 13 22
180            "NPUSHB 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 35 1 35 12 13 22",
181            // CALL[ ]	/* CallFunction */
182            "CALL",
183        ];
184        let decoded: Vec<_> = super::decode_all(&bytecode, 0)
185            .map(|ins| ins.unwrap())
186            .collect();
187        let decoded_asm: Vec<_> = decoded.iter().map(|ins| ins.to_string()).collect();
188        assert_eq!(decoded_asm, expected);
189    }
190
191    /// Simple encoder used for testing.
192    #[derive(Default)]
193    struct Encoder(Vec<u8>);
194
195    impl Encoder {
196        pub fn encode(&mut self, opcode: Opcode) {
197            assert!(!opcode.is_push(), "use the encode_push method instead");
198            self.0.push(opcode as u8);
199        }
200
201        pub fn encode_push(&mut self, values: &[i16]) {
202            if values.is_empty() {
203                return;
204            }
205            let is_bytes = values.iter().all(|&x| x >= 0 && x <= u8::MAX as _);
206            if values.len() < 256 {
207                if is_bytes {
208                    if values.len() <= 8 {
209                        let opcode =
210                            Opcode::from_byte(Opcode::PUSHB000 as u8 + values.len() as u8 - 1);
211                        self.0.push(opcode as u8);
212                    } else {
213                        self.0.push(Opcode::NPUSHB as _);
214                        self.0.push(values.len() as _);
215                    }
216                    self.0.extend(values.iter().map(|&x| x as u8));
217                } else {
218                    if values.len() <= 8 {
219                        let opcode =
220                            Opcode::from_byte(Opcode::PUSHW000 as u8 + values.len() as u8 - 1);
221                        self.0.push(opcode as u8);
222                    } else {
223                        self.0.push(Opcode::NPUSHW as _);
224                        self.0.push(values.len() as _)
225                    }
226                    for &value in values {
227                        let value = value as u16;
228                        self.0.push((value >> 8) as _);
229                        self.0.push((value & 0xFF) as _);
230                    }
231                }
232            } else {
233                panic!("too many values to push in a single instruction");
234            }
235        }
236    }
237}