solana_sbpf/
assembler.rs

1#![allow(clippy::arithmetic_side_effects)]
2// Copyright 2017 Rich Lane <lanerl@gmail.com>
3//
4// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
5// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! This module translates eBPF assembly language to binary.
9
10use self::InstructionType::{
11    AluBinary, AluUnary, CallImm, CallReg, Endian, JumpConditional, JumpUnconditional, LoadDwImm,
12    LoadReg, NoOperand, StoreImm, StoreReg, Syscall,
13};
14use crate::{
15    asm_parser::{
16        parse,
17        Operand::{Integer, Label, Memory, Register},
18        Statement,
19    },
20    ebpf::{self, Insn},
21    elf::Executable,
22    program::{BuiltinProgram, FunctionRegistry, SBPFVersion},
23    vm::ContextObject,
24};
25use std::collections::HashMap;
26
27#[cfg(not(feature = "shuttle-test"))]
28use std::sync::Arc;
29
30#[cfg(feature = "shuttle-test")]
31use shuttle::sync::Arc;
32
33#[derive(Clone, Copy, Debug, PartialEq)]
34enum InstructionType {
35    AluBinary,
36    AluUnary,
37    LoadDwImm,
38    LoadReg,
39    StoreImm,
40    StoreReg,
41    JumpUnconditional,
42    JumpConditional,
43    Syscall,
44    CallImm,
45    CallReg,
46    Endian(i64),
47    NoOperand,
48}
49
50fn make_instruction_map(sbpf_version: SBPFVersion) -> HashMap<String, (InstructionType, u8)> {
51    let mut result = HashMap::new();
52
53    let alu_binary_ops = [
54        ("add", ebpf::BPF_ADD),
55        ("sub", ebpf::BPF_SUB),
56        ("mul", ebpf::BPF_MUL),
57        ("div", ebpf::BPF_DIV),
58        ("or", ebpf::BPF_OR),
59        ("and", ebpf::BPF_AND),
60        ("lsh", ebpf::BPF_LSH),
61        ("rsh", ebpf::BPF_RSH),
62        ("mod", ebpf::BPF_MOD),
63        ("xor", ebpf::BPF_XOR),
64        ("mov", ebpf::BPF_MOV),
65        ("arsh", ebpf::BPF_ARSH),
66        ("hor", ebpf::BPF_HOR),
67    ];
68
69    let mem_classes = [
70        (
71            "ldx",
72            LoadReg,
73            ebpf::BPF_MEM | ebpf::BPF_LDX,
74            ebpf::BPF_ALU32_LOAD | ebpf::BPF_X,
75        ),
76        (
77            "st",
78            StoreImm,
79            ebpf::BPF_MEM | ebpf::BPF_ST,
80            ebpf::BPF_ALU64_STORE | ebpf::BPF_K,
81        ),
82        (
83            "stx",
84            StoreReg,
85            ebpf::BPF_MEM | ebpf::BPF_STX,
86            ebpf::BPF_ALU64_STORE | ebpf::BPF_X,
87        ),
88    ];
89    let mem_sizes = [
90        ("b", ebpf::BPF_B, ebpf::BPF_1B),
91        ("h", ebpf::BPF_H, ebpf::BPF_2B),
92        ("w", ebpf::BPF_W, ebpf::BPF_4B),
93        ("dw", ebpf::BPF_DW, ebpf::BPF_8B),
94    ];
95
96    let jump_conditions = [
97        ("jeq", ebpf::BPF_JEQ),
98        ("jgt", ebpf::BPF_JGT),
99        ("jge", ebpf::BPF_JGE),
100        ("jlt", ebpf::BPF_JLT),
101        ("jle", ebpf::BPF_JLE),
102        ("jset", ebpf::BPF_JSET),
103        ("jne", ebpf::BPF_JNE),
104        ("jsgt", ebpf::BPF_JSGT),
105        ("jsge", ebpf::BPF_JSGE),
106        ("jslt", ebpf::BPF_JSLT),
107        ("jsle", ebpf::BPF_JSLE),
108    ];
109
110    {
111        let mut entry = |name: &str, inst_type: InstructionType, opc: u8| {
112            result.insert(name.to_string(), (inst_type, opc))
113        };
114
115        if sbpf_version == SBPFVersion::V0 {
116            entry("exit", NoOperand, ebpf::EXIT);
117            entry("return", NoOperand, ebpf::EXIT);
118        } else {
119            entry("exit", NoOperand, ebpf::RETURN);
120            entry("return", NoOperand, ebpf::RETURN);
121        }
122
123        // Miscellaneous.
124        entry("ja", JumpUnconditional, ebpf::JA);
125        entry(
126            "syscall",
127            Syscall,
128            if sbpf_version == SBPFVersion::V0 {
129                ebpf::CALL_IMM
130            } else {
131                ebpf::SYSCALL
132            },
133        );
134        entry("call", CallImm, ebpf::CALL_IMM);
135        entry("callx", CallReg, ebpf::CALL_REG);
136        entry("lddw", LoadDwImm, ebpf::LD_DW_IMM);
137
138        // AluUnary.
139        entry("neg", AluUnary, ebpf::NEG64);
140        entry("neg32", AluUnary, ebpf::NEG32);
141        entry("neg64", AluUnary, ebpf::NEG64);
142
143        // AluBinary.
144        for &(name, opc) in &alu_binary_ops {
145            entry(name, AluBinary, ebpf::BPF_ALU64_STORE | opc);
146            entry(&format!("{name}32"), AluBinary, ebpf::BPF_ALU32_LOAD | opc);
147            entry(&format!("{name}64"), AluBinary, ebpf::BPF_ALU64_STORE | opc);
148        }
149
150        // Product Quotient Remainder.
151        entry(
152            "lmul",
153            AluBinary,
154            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_LMUL,
155        );
156        entry(
157            "lmul64",
158            AluBinary,
159            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_LMUL,
160        );
161        entry("lmul32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_LMUL);
162        entry(
163            "uhmul",
164            AluBinary,
165            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UHMUL,
166        );
167        entry(
168            "uhmul64",
169            AluBinary,
170            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UHMUL,
171        );
172        entry(
173            "shmul",
174            AluBinary,
175            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SHMUL,
176        );
177        entry(
178            "shmul64",
179            AluBinary,
180            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SHMUL,
181        );
182        entry(
183            "udiv",
184            AluBinary,
185            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UDIV,
186        );
187        entry(
188            "udiv64",
189            AluBinary,
190            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UDIV,
191        );
192        entry("udiv32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_UDIV);
193        entry(
194            "urem",
195            AluBinary,
196            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UREM,
197        );
198        entry(
199            "urem64",
200            AluBinary,
201            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UREM,
202        );
203        entry("urem32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_UREM);
204        entry(
205            "sdiv",
206            AluBinary,
207            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SDIV,
208        );
209        entry(
210            "sdiv64",
211            AluBinary,
212            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SDIV,
213        );
214        entry("sdiv32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_SDIV);
215        entry(
216            "srem",
217            AluBinary,
218            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SREM,
219        );
220        entry(
221            "srem64",
222            AluBinary,
223            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SREM,
224        );
225        entry("srem32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_SREM);
226
227        // Memory
228        if sbpf_version.move_memory_instruction_classes() {
229            for &(prefix, class, _, opcode) in &mem_classes {
230                for &(suffix, _, size) in &mem_sizes {
231                    entry(&format!("{prefix}{suffix}"), class, opcode | size);
232                }
233            }
234        } else {
235            for &(prefix, class, opcode, _) in &mem_classes {
236                for &(suffix, size, _) in &mem_sizes {
237                    entry(&format!("{prefix}{suffix}"), class, opcode | size);
238                }
239            }
240        }
241
242        // JumpConditional.
243        for &(name, condition) in &jump_conditions {
244            entry(name, JumpConditional, ebpf::BPF_JMP | condition);
245        }
246
247        // Endian.
248        for &size in &[16, 32, 64] {
249            entry(&format!("be{size}"), Endian(size), ebpf::BE);
250            entry(&format!("le{size}"), Endian(size), ebpf::LE);
251        }
252    }
253
254    result
255}
256
257fn insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String> {
258    if !(0..16).contains(&dst) {
259        return Err(format!("Invalid destination register {dst}"));
260    }
261    if !(0..16).contains(&src) {
262        return Err(format!("Invalid source register {src}"));
263    }
264    if off < i16::MIN as i64 || off > i16::MAX as i64 {
265        return Err(format!("Invalid offset {off}"));
266    }
267    if imm < i32::MIN as i64 || imm > i32::MAX as i64 {
268        return Err(format!("Invalid immediate {imm}"));
269    }
270    Ok(Insn {
271        ptr: 0,
272        opc,
273        dst: dst as u8,
274        src: src as u8,
275        off: off as i16,
276        imm,
277    })
278}
279
280fn resolve_label(
281    insn_ptr: usize,
282    labels: &HashMap<&str, usize>,
283    label: &str,
284) -> Result<i64, String> {
285    labels
286        .get(label)
287        .map(|target_pc| *target_pc as i64 - insn_ptr as i64 - 1)
288        .ok_or_else(|| format!("Label not found {label}"))
289}
290
291/// Parse assembly source and translate to binary.
292///
293/// # Examples
294///
295/// ```
296/// use solana_sbpf::{assembler::assemble, program::BuiltinProgram, vm::Config};
297/// use test_utils::TestContextObject;
298/// let executable = assemble::<TestContextObject>(
299///    "add64 r1, 0x605
300///     mov64 r2, 0x32
301///     mov64 r1, r0
302///     be16 r0
303///     neg64 r2
304///     exit",
305///     std::sync::Arc::new(BuiltinProgram::new_mock()),
306/// ).unwrap();
307/// let program = executable.get_text_bytes().1;
308/// println!("{:?}", program);
309/// # assert_eq!(program,
310/// #            &[0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
311/// #              0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
312/// #              0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
313/// #              0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
314/// #              0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
315/// #              0x9d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
316/// ```
317///
318/// This will produce the following output:
319///
320/// ```test
321/// [0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
322///  0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
323///  0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
324///  0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
325///  0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
326///  0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
327/// ```
328pub fn assemble<C: ContextObject>(
329    src: &str,
330    loader: Arc<BuiltinProgram<C>>,
331) -> Result<Executable<C>, String> {
332    let sbpf_version = *loader.get_config().enabled_sbpf_versions.end();
333
334    let statements = parse(src)?;
335    let instruction_map = make_instruction_map(sbpf_version);
336    let mut insn_ptr = 0;
337    let mut function_registry = FunctionRegistry::default();
338    let mut labels = HashMap::new();
339    labels.insert("entrypoint", 0);
340    for statement in statements.iter() {
341        match statement {
342            Statement::Label { name } => {
343                if name.starts_with("function_") || name == "entrypoint" {
344                    function_registry
345                        .register_function(insn_ptr as u32, name.as_bytes(), insn_ptr)
346                        .map_err(|_| format!("Label hash collision {name}"))?;
347                }
348                labels.insert(name.as_str(), insn_ptr);
349            }
350            Statement::Directive { name, operands } =>
351            {
352                #[allow(clippy::single_match)]
353                match (name.as_str(), operands.as_slice()) {
354                    ("fill", [Integer(repeat), Integer(_value)]) => {
355                        insn_ptr += *repeat as usize;
356                    }
357                    _ => {}
358                }
359            }
360            Statement::Instruction { name, .. } => {
361                insn_ptr += if name == "lddw" { 2 } else { 1 };
362            }
363        }
364    }
365    insn_ptr = 0;
366    let mut instructions: Vec<Insn> = Vec::new();
367    for statement in statements.iter() {
368        match statement {
369            Statement::Label { .. } => {}
370            Statement::Directive { name, operands } =>
371            {
372                #[allow(clippy::single_match)]
373                match (name.as_str(), operands.as_slice()) {
374                    ("fill", [Integer(repeat), Integer(value)]) => {
375                        for _ in 0..*repeat {
376                            instructions.push(Insn {
377                                ptr: insn_ptr,
378                                opc: *value as u8,
379                                dst: (*value >> 8) as u8 & 0xF,
380                                src: (*value >> 12) as u8 & 0xF,
381                                off: (*value >> 16) as u16 as i16,
382                                imm: (*value >> 32) as u32 as i64,
383                            });
384                            insn_ptr += 1;
385                        }
386                    }
387                    _ => return Err(format!("Invalid directive {name:?}")),
388                }
389            }
390            Statement::Instruction { name, operands } => {
391                let name = name.as_str();
392                match instruction_map.get(name) {
393                    Some(&(inst_type, opc)) => {
394                        let mut insn = match (inst_type, operands.as_slice()) {
395                            (AluBinary, [Register(dst), Register(src)]) => {
396                                insn(opc | ebpf::BPF_X, *dst, *src, 0, 0)
397                            }
398                            (AluBinary, [Register(dst), Integer(imm)]) => {
399                                insn(opc | ebpf::BPF_K, *dst, 0, 0, *imm)
400                            }
401                            (AluUnary, [Register(dst)]) => insn(opc, *dst, 0, 0, 0),
402                            (LoadReg, [Register(dst), Memory(src, off)])
403                            | (StoreReg, [Memory(dst, off), Register(src)]) => {
404                                insn(opc, *dst, *src, *off, 0)
405                            }
406                            (StoreImm, [Memory(dst, off), Integer(imm)]) => {
407                                insn(opc, *dst, 0, *off, *imm)
408                            }
409                            (NoOperand, []) => insn(opc, 0, 0, 0, 0),
410                            (JumpUnconditional, [Integer(off)]) => insn(opc, 0, 0, *off, 0),
411                            (JumpConditional, [Register(dst), Register(src), Integer(off)]) => {
412                                insn(opc | ebpf::BPF_X, *dst, *src, *off, 0)
413                            }
414                            (JumpConditional, [Register(dst), Integer(imm), Integer(off)]) => {
415                                insn(opc | ebpf::BPF_K, *dst, 0, *off, *imm)
416                            }
417                            (JumpUnconditional, [Label(label)]) => {
418                                insn(opc, 0, 0, resolve_label(insn_ptr, &labels, label)?, 0)
419                            }
420                            (CallImm, [Integer(imm)]) => {
421                                let instr_imm = if sbpf_version.static_syscalls() {
422                                    *imm
423                                } else {
424                                    *imm + insn_ptr as i64 + 1
425                                };
426                                let target_pc = *imm + insn_ptr as i64 + 1;
427                                let label = format!("function_{}", target_pc as usize);
428                                function_registry
429                                    .register_function(
430                                        target_pc as u32,
431                                        label.as_bytes(),
432                                        target_pc as usize,
433                                    )
434                                    .map_err(|_| format!("Label hash collision {name}"))?;
435                                insn(opc, 0, 0, 0, instr_imm)
436                            }
437                            (CallReg, [Register(dst)]) => {
438                                if sbpf_version.callx_uses_src_reg() {
439                                    insn(opc, 0, *dst, 0, 0)
440                                } else {
441                                    insn(opc, 0, 0, 0, *dst)
442                                }
443                            }
444                            (JumpConditional, [Register(dst), Register(src), Label(label)]) => {
445                                insn(
446                                    opc | ebpf::BPF_X,
447                                    *dst,
448                                    *src,
449                                    resolve_label(insn_ptr, &labels, label)?,
450                                    0,
451                                )
452                            }
453                            (JumpConditional, [Register(dst), Integer(imm), Label(label)]) => insn(
454                                opc | ebpf::BPF_K,
455                                *dst,
456                                0,
457                                resolve_label(insn_ptr, &labels, label)?,
458                                *imm,
459                            ),
460                            (Syscall, [Label(label)]) => insn(
461                                opc,
462                                0,
463                                0,
464                                0,
465                                ebpf::hash_symbol_name(label.as_bytes()) as i32 as i64,
466                            ),
467                            (Syscall, [Integer(imm)]) => insn(opc, 0, 0, 0, *imm),
468                            (CallImm, [Label(label)]) => {
469                                let label: &str = label;
470                                let mut target_pc = *labels
471                                    .get(label)
472                                    .ok_or_else(|| format!("Label not found {label}"))?
473                                    as i64;
474                                if sbpf_version.static_syscalls() {
475                                    target_pc = target_pc - insn_ptr as i64 - 1;
476                                }
477                                insn(opc, 0, 1, 0, target_pc)
478                            }
479                            (Endian(size), [Register(dst)]) => insn(opc, *dst, 0, 0, size),
480                            (LoadDwImm, [Register(dst), Integer(imm)]) => {
481                                insn(opc, *dst, 0, 0, (*imm << 32) >> 32)
482                            }
483                            _ => Err(format!("Unexpected operands: {operands:?}")),
484                        }?;
485                        insn.ptr = insn_ptr;
486                        instructions.push(insn);
487                        insn_ptr += 1;
488                        if let LoadDwImm = inst_type {
489                            if let Integer(imm) = operands[1] {
490                                instructions.push(Insn {
491                                    ptr: insn_ptr,
492                                    imm: imm >> 32,
493                                    ..Insn::default()
494                                });
495                                insn_ptr += 1;
496                            }
497                        }
498                    }
499                    None => return Err(format!("Invalid instruction {name:?}")),
500                }
501            }
502        }
503    }
504    let program = instructions
505        .iter()
506        .flat_map(|insn| insn.to_vec())
507        .collect::<Vec<_>>();
508    Executable::<C>::from_text_bytes(&program, loader, sbpf_version, function_registry)
509        .map_err(|err| format!("Executable constructor {err:?}"))
510}