polkavm_linker/
program_from_elf.rs

1use polkavm_common::abi::{MemoryMapBuilder, VM_CODE_ADDRESS_ALIGNMENT, VM_MAX_PAGE_SIZE, VM_MIN_PAGE_SIZE};
2use polkavm_common::cast::cast;
3use polkavm_common::program::{
4    self, FrameKind, Instruction, InstructionSet, LineProgramOp, Opcode, ProgramBlob, ProgramCounter, ProgramSymbol,
5};
6use polkavm_common::utils::{align_to_next_page_u32, align_to_next_page_u64};
7use polkavm_common::varint;
8use polkavm_common::writer::{ProgramBlobBuilder, Writer};
9
10use core::ops::Range;
11use std::borrow::Cow;
12use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
13use std::sync::Arc;
14
15use crate::dwarf::Location;
16use crate::elf::{Elf, Section, SectionIndex};
17use crate::fast_range_map::RangeMap;
18use crate::riscv::DecoderConfig;
19use crate::riscv::Reg as RReg;
20use crate::riscv::{AtomicKind, BranchKind, CmovKind, Inst, LoadKind, RegImmKind, StoreKind};
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
23#[repr(u8)]
24enum Reg {
25    // The registers supported by the VM.
26    RA = 0,
27    SP = 1,
28    T0 = 2,
29    T1 = 3,
30    T2 = 4,
31    S0 = 5,
32    S1 = 6,
33    A0 = 7,
34    A1 = 8,
35    A2 = 9,
36    A3 = 10,
37    A4 = 11,
38    A5 = 12,
39
40    // Extra fake registers. These will be stripped away before the final codegen pass.
41    E0 = 13,
42    E1 = 14,
43    E2 = 15,
44    E3 = 16,
45}
46
47impl Reg {
48    fn to_usize(self) -> usize {
49        self as usize
50    }
51}
52
53impl From<polkavm_common::program::Reg> for Reg {
54    fn from(reg: polkavm_common::program::Reg) -> Reg {
55        use polkavm_common::program::Reg as R;
56        match reg {
57            R::RA => Reg::RA,
58            R::SP => Reg::SP,
59            R::T0 => Reg::T0,
60            R::T1 => Reg::T1,
61            R::T2 => Reg::T2,
62            R::S0 => Reg::S0,
63            R::S1 => Reg::S1,
64            R::A0 => Reg::A0,
65            R::A1 => Reg::A1,
66            R::A2 => Reg::A2,
67            R::A3 => Reg::A3,
68            R::A4 => Reg::A4,
69            R::A5 => Reg::A5,
70        }
71    }
72}
73
74impl From<polkavm_common::program::RawReg> for Reg {
75    fn from(reg: polkavm_common::program::RawReg) -> Reg {
76        reg.get().into()
77    }
78}
79
80impl From<polkavm_common::program::RawReg> for RegImm {
81    fn from(reg: polkavm_common::program::RawReg) -> RegImm {
82        RegImm::Reg(reg.get().into())
83    }
84}
85
86impl Reg {
87    pub const fn from_usize(value: usize) -> Option<Reg> {
88        match value {
89            0 => Some(Reg::RA),
90            1 => Some(Reg::SP),
91            2 => Some(Reg::T0),
92            3 => Some(Reg::T1),
93            4 => Some(Reg::T2),
94            5 => Some(Reg::S0),
95            6 => Some(Reg::S1),
96            7 => Some(Reg::A0),
97            8 => Some(Reg::A1),
98            9 => Some(Reg::A2),
99            10 => Some(Reg::A3),
100            11 => Some(Reg::A4),
101            12 => Some(Reg::A5),
102            13 => Some(Reg::E0),
103            14 => Some(Reg::E1),
104            15 => Some(Reg::E2),
105            16 => Some(Reg::E3),
106            _ => None,
107        }
108    }
109
110    pub const fn name(self) -> &'static str {
111        use Reg::*;
112        match self {
113            RA => "ra",
114            SP => "sp",
115            T0 => "t0",
116            T1 => "t1",
117            T2 => "t2",
118            S0 => "s0",
119            S1 => "s1",
120            A0 => "a0",
121            A1 => "a1",
122            A2 => "a2",
123            A3 => "a3",
124            A4 => "a4",
125            A5 => "a5",
126
127            E0 => "e0",
128            E1 => "e1",
129            E2 => "e2",
130            E3 => "e3",
131        }
132    }
133
134    fn fake_register_index(self) -> Option<usize> {
135        match self {
136            Reg::E0 => Some(0),
137            Reg::E1 => Some(1),
138            Reg::E2 => Some(2),
139            Reg::E3 => Some(3),
140            _ => None,
141        }
142    }
143
144    const ALL: [Reg; 17] = {
145        use Reg::*;
146        [RA, SP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, E0, E1, E2, E3]
147    };
148
149    const FAKE: [Reg; 4] = { [Reg::E0, Reg::E1, Reg::E2, Reg::E3] };
150    const INPUT_REGS: [Reg; 9] = [Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5, Reg::T0, Reg::T1, Reg::T2];
151    const OUTPUT_REGS: [Reg; 2] = [Reg::A0, Reg::A1];
152}
153
154polkavm_common::static_assert!(Reg::INPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_INPUT_REGS);
155polkavm_common::static_assert!(Reg::OUTPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_OUTPUT_REGS);
156
157#[derive(Debug)]
158pub enum ProgramFromElfErrorKind {
159    FailedToParseElf(object::read::Error),
160    FailedToParseDwarf(gimli::Error),
161    FailedToParseProgram(program::ProgramParseError),
162    UnsupportedSection(String),
163    UnsupportedInstruction { section: String, offset: u64, instruction: u32 },
164    UnsupportedRegister { reg: RReg },
165
166    Other(Cow<'static, str>),
167}
168
169impl From<object::read::Error> for ProgramFromElfError {
170    fn from(error: object::read::Error) -> Self {
171        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(error))
172    }
173}
174
175impl From<gimli::Error> for ProgramFromElfError {
176    fn from(error: gimli::Error) -> Self {
177        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseDwarf(error))
178    }
179}
180
181impl From<program::ProgramParseError> for ProgramFromElfError {
182    fn from(error: program::ProgramParseError) -> Self {
183        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseProgram(error))
184    }
185}
186
187#[derive(Debug)]
188pub struct ProgramFromElfError(ProgramFromElfErrorKind);
189
190impl From<ProgramFromElfErrorKind> for ProgramFromElfError {
191    fn from(kind: ProgramFromElfErrorKind) -> Self {
192        Self(kind)
193    }
194}
195
196impl ProgramFromElfError {
197    pub(crate) fn other(error: impl Into<Cow<'static, str>>) -> Self {
198        Self(ProgramFromElfErrorKind::Other(error.into()))
199    }
200}
201
202impl std::error::Error for ProgramFromElfError {}
203
204impl core::fmt::Display for ProgramFromElfError {
205    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
206        match &self.0 {
207            ProgramFromElfErrorKind::FailedToParseElf(error) => write!(fmt, "failed to parse ELF file: {}", error),
208            ProgramFromElfErrorKind::FailedToParseDwarf(error) => write!(fmt, "failed to parse DWARF: {}", error),
209            ProgramFromElfErrorKind::FailedToParseProgram(error) => write!(fmt, "{}", error),
210            ProgramFromElfErrorKind::UnsupportedSection(section) => write!(fmt, "unsupported section: {}", section),
211            ProgramFromElfErrorKind::UnsupportedInstruction {
212                section,
213                offset,
214                instruction,
215            } => {
216                write!(
217                    fmt,
218                    "unsupported instruction in section '{section}' at offset 0x{offset:x}: 0x{instruction:08x}"
219                )
220            }
221            ProgramFromElfErrorKind::UnsupportedRegister { reg } => write!(fmt, "unsupported register: {reg}"),
222            ProgramFromElfErrorKind::Other(message) => fmt.write_str(message),
223        }
224    }
225}
226
227fn cast_reg_non_zero(reg: RReg) -> Result<Option<Reg>, ProgramFromElfError> {
228    use RReg::*;
229    match reg {
230        Zero => Ok(None),
231        RA => Ok(Some(Reg::RA)),
232        SP => Ok(Some(Reg::SP)),
233        T0 => Ok(Some(Reg::T0)),
234        T1 => Ok(Some(Reg::T1)),
235        T2 => Ok(Some(Reg::T2)),
236        S0 => Ok(Some(Reg::S0)),
237        S1 => Ok(Some(Reg::S1)),
238        A0 => Ok(Some(Reg::A0)),
239        A1 => Ok(Some(Reg::A1)),
240        A2 => Ok(Some(Reg::A2)),
241        A3 => Ok(Some(Reg::A3)),
242        A4 => Ok(Some(Reg::A4)),
243        A5 => Ok(Some(Reg::A5)),
244        GP | TP | A6 | A7 | S2 | S3 | S4 | S5 | S6 | S7 | S8 | S9 | S10 | S11 | T3 | T4 | T5 | T6 => {
245            Err(ProgramFromElfErrorKind::UnsupportedRegister { reg }.into())
246        }
247    }
248}
249
250fn cast_reg_any(reg: RReg) -> Result<RegImm, ProgramFromElfError> {
251    Ok(cast_reg_non_zero(reg)?.map_or(RegImm::Imm(0), RegImm::Reg))
252}
253
254#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
255pub(crate) struct Source {
256    pub(crate) section_index: SectionIndex,
257    pub(crate) offset_range: AddressRange,
258}
259
260impl core::fmt::Display for Source {
261    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
262        write!(
263            fmt,
264            "<{}+{}..{}>",
265            self.section_index, self.offset_range.start, self.offset_range.end
266        )
267    }
268}
269
270impl Source {
271    fn begin(&self) -> SectionTarget {
272        SectionTarget {
273            section_index: self.section_index,
274            offset: self.offset_range.start,
275        }
276    }
277
278    fn iter(&'_ self) -> impl Iterator<Item = SectionTarget> + '_ {
279        (self.offset_range.start..self.offset_range.end)
280            .step_by(2)
281            .map(|offset| SectionTarget {
282                section_index: self.section_index,
283                offset,
284            })
285    }
286}
287
288// TODO: Use smallvec.
289#[derive(Clone, Debug)]
290struct SourceStack(Vec<Source>);
291
292impl core::fmt::Display for SourceStack {
293    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
294        fmt.write_str("[")?;
295        let mut is_first = true;
296        for source in &self.0 {
297            if is_first {
298                is_first = false;
299            } else {
300                fmt.write_str(", ")?;
301            }
302            source.fmt(fmt)?;
303        }
304        fmt.write_str("]")
305    }
306}
307
308impl SourceStack {
309    fn as_slice(&self) -> &[Source] {
310        &self.0
311    }
312
313    fn top(&self) -> &Source {
314        &self.0[0]
315    }
316
317    fn overlay_on_top_of(&self, stack: &SourceStack) -> Self {
318        let mut vec = Vec::with_capacity(self.0.len() + stack.0.len());
319        vec.extend(self.0.iter().copied());
320        vec.extend(stack.0.iter().copied());
321
322        SourceStack(vec)
323    }
324
325    fn overlay_on_top_of_inplace(&mut self, stack: &SourceStack) {
326        self.0.extend(stack.0.iter().copied());
327    }
328
329    fn display(&self, section_to_function_name: &BTreeMap<SectionTarget, &str>) -> String {
330        use core::fmt::Write;
331
332        let mut out = String::new();
333        out.push('[');
334        let mut is_first = true;
335        for source in &self.0 {
336            if is_first {
337                is_first = false;
338            } else {
339                out.push_str(", ");
340            }
341            write!(&mut out, "{}", source).unwrap();
342            if let Some((origin, name)) = section_to_function_name.range(..=source.begin()).next_back() {
343                if origin.section_index == source.section_index {
344                    write!(&mut out, " \"{name}\"+{}", source.offset_range.start - origin.offset).unwrap();
345                }
346            }
347        }
348        out.push(']');
349        out
350    }
351}
352
353impl From<Source> for SourceStack {
354    fn from(source: Source) -> Self {
355        SourceStack(vec![source])
356    }
357}
358
359#[derive(Clone, Debug)]
360struct EndOfBlock<T> {
361    source: SourceStack,
362    instruction: ControlInst<T>,
363}
364
365impl<T> EndOfBlock<T> {
366    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<EndOfBlock<U>, E> {
367        Ok(EndOfBlock {
368            source: self.source,
369            instruction: self.instruction.map_target(map)?,
370        })
371    }
372}
373
374#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
375pub(crate) struct AddressRange {
376    pub(crate) start: u64,
377    pub(crate) end: u64,
378}
379
380impl AddressRange {
381    pub(crate) fn is_empty(&self) -> bool {
382        self.end == self.start
383    }
384
385    pub(crate) const fn is_overlapping(&self, other: &AddressRange) -> bool {
386        !(self.end <= other.start || self.start >= other.end)
387    }
388}
389
390impl core::fmt::Display for AddressRange {
391    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
392        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
393    }
394}
395
396impl core::fmt::Debug for AddressRange {
397    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
398        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
399    }
400}
401
402impl From<Range<u64>> for AddressRange {
403    fn from(range: Range<u64>) -> Self {
404        AddressRange {
405            start: range.start,
406            end: range.end,
407        }
408    }
409}
410
411#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
412pub(crate) struct SectionTarget {
413    pub(crate) section_index: SectionIndex,
414    pub(crate) offset: u64,
415}
416
417impl core::fmt::Display for SectionTarget {
418    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
419        write!(fmt, "<{}+{}>", self.section_index, self.offset)
420    }
421}
422
423impl core::fmt::Debug for SectionTarget {
424    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
425        write!(fmt, "<{}+{}>", self.section_index, self.offset)
426    }
427}
428
429impl From<SectionTarget> for SectionIndex {
430    fn from(target: SectionTarget) -> Self {
431        target.section_index
432    }
433}
434
435fn extract_delimited<'a>(str: &mut &'a str, prefix: &str, suffix: &str) -> Option<(&'a str, &'a str)> {
436    let original = *str;
437    let start_of_prefix = str.find(prefix)?;
438    let start = start_of_prefix + prefix.len();
439    let end = str[start..].find(suffix)? + start;
440    *str = &str[end + suffix.len()..];
441    Some((&original[..start_of_prefix], &original[start..end]))
442}
443
444#[test]
445fn test_extract_delimited() {
446    let mut str = "foo <section #1234+567> bar";
447    assert_eq!(extract_delimited(&mut str, "<section #", ">").unwrap(), ("foo ", "1234+567"));
448    assert_eq!(str, " bar");
449}
450
451impl SectionTarget {
452    fn fmt_human_readable<H>(&self, elf: &Elf<H>) -> String
453    where
454        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
455    {
456        Self::make_human_readable_in_debug_string(elf, &self.to_string())
457    }
458
459    fn make_human_readable_in_debug_string<H>(elf: &Elf<H>, mut str: &str) -> String
460    where
461        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
462    {
463        // A hack-ish way to make nested `Debug` error messages more readable by replacing
464        // raw section indexes and offsets with a more human readable string.
465
466        let mut output = String::new();
467        while let Some((prefix, chunk)) = extract_delimited(&mut str, "<section #", ">") {
468            output.push_str(prefix);
469
470            let mut iter = chunk.split('+');
471            if let Some(section_index) = iter.next().and_then(|s| s.parse::<usize>().ok()) {
472                if let Some(offset) = iter.next().and_then(|s| s.parse::<u64>().ok()) {
473                    if let Some(section) = elf.section_by_raw_index(section_index) {
474                        use core::fmt::Write;
475
476                        let symbol = elf.symbols().find(|symbol| {
477                            let Ok((symbol_section, symbol_offset)) = symbol.section_and_offset() else {
478                                return false;
479                            };
480                            symbol_section.index().raw() == section_index
481                                && offset >= symbol_offset
482                                && offset < (symbol_offset + symbol.size())
483                        });
484
485                        let section_name = section.name();
486                        write!(&mut output, "<section #{section_index}+{offset} ('{section_name}'").unwrap();
487                        if let Some(symbol) = symbol {
488                            if let Some(symbol_name) = symbol.name() {
489                                write!(
490                                    &mut output,
491                                    ": '{}'+{}",
492                                    symbol_name,
493                                    offset - symbol.section_and_offset().unwrap().1
494                                )
495                                .unwrap();
496                            }
497                        }
498                        output.push_str(")>");
499                        continue;
500                    }
501                }
502            }
503            output.push_str(chunk);
504        }
505
506        output.push_str(str);
507        output
508    }
509
510    fn add(self, offset: u64) -> Self {
511        SectionTarget {
512            section_index: self.section_index,
513            offset: self.offset + offset,
514        }
515    }
516
517    fn map_offset_i64(self, cb: impl FnOnce(i64) -> i64) -> Self {
518        let offset = self.offset as i64;
519        SectionTarget {
520            section_index: self.section_index,
521            offset: cb(offset) as u64,
522        }
523    }
524}
525
526#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
527#[repr(transparent)]
528struct BlockTarget {
529    block_index: usize,
530}
531
532impl BlockTarget {
533    fn from_raw(block_index: usize) -> Self {
534        BlockTarget { block_index }
535    }
536
537    fn index(self) -> usize {
538        self.block_index
539    }
540}
541
542#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
543enum AnyTarget {
544    Data(SectionTarget),
545    Code(BlockTarget),
546}
547
548#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
549enum RegImm {
550    Reg(Reg),
551    Imm(i32),
552}
553
554impl RegImm {
555    fn map_register(self, mut map: impl FnMut(Reg) -> Reg) -> RegImm {
556        match self {
557            RegImm::Reg(reg) => RegImm::Reg(map(reg)),
558            RegImm::Imm(value) => RegImm::Imm(value),
559        }
560    }
561}
562
563impl From<Reg> for RegImm {
564    fn from(reg: Reg) -> Self {
565        RegImm::Reg(reg)
566    }
567}
568
569impl From<i32> for RegImm {
570    fn from(value: i32) -> Self {
571        RegImm::Imm(value)
572    }
573}
574
575#[derive(Copy, Clone, PartialEq, Eq, Debug)]
576enum BasicInst<T> {
577    LoadAbsolute {
578        kind: LoadKind,
579        dst: Reg,
580        target: SectionTarget,
581    },
582    StoreAbsolute {
583        kind: StoreKind,
584        src: RegImm,
585        target: SectionTarget,
586    },
587    LoadIndirect {
588        kind: LoadKind,
589        dst: Reg,
590        base: Reg,
591        offset: i32,
592    },
593    StoreIndirect {
594        kind: StoreKind,
595        src: RegImm,
596        base: Reg,
597        offset: i32,
598    },
599    LoadAddress {
600        dst: Reg,
601        target: T,
602    },
603    // This is supposed to load the address from the GOT, instead of loading it directly as an immediate.
604    LoadAddressIndirect {
605        dst: Reg,
606        target: T,
607    },
608    LoadImmediate {
609        dst: Reg,
610        imm: i32,
611    },
612    LoadImmediate64 {
613        dst: Reg,
614        imm: i64,
615    },
616    MoveReg {
617        dst: Reg,
618        src: Reg,
619    },
620    Reg {
621        kind: RegKind,
622        dst: Reg,
623        src: Reg,
624    },
625    RegReg {
626        kind: RegRegKind,
627        dst: Reg,
628        src1: Reg,
629        src2: Reg,
630    },
631    AnyAny {
632        kind: AnyAnyKind,
633        dst: Reg,
634        src1: RegImm,
635        src2: RegImm,
636    },
637    Cmov {
638        kind: CmovKind,
639        dst: Reg,
640        src: RegImm,
641        cond: Reg,
642    },
643    Ecalli {
644        nth_import: usize,
645    },
646    Sbrk {
647        dst: Reg,
648        size: Reg,
649    },
650    Memset,
651    Nop,
652}
653
654#[derive(Copy, Clone)]
655enum OpKind {
656    Read,
657    Write,
658    ReadWrite,
659}
660
661impl<T> BasicInst<T> {
662    fn is_nop(&self) -> bool {
663        match self {
664            BasicInst::MoveReg { dst, src } => dst == src,
665            BasicInst::Nop => true,
666            _ => false,
667        }
668    }
669
670    fn src_mask(&self, imports: &[Import]) -> RegMask {
671        match *self {
672            BasicInst::Nop
673            | BasicInst::LoadImmediate { .. }
674            | BasicInst::LoadImmediate64 { .. }
675            | BasicInst::LoadAbsolute { .. }
676            | BasicInst::LoadAddress { .. }
677            | BasicInst::LoadAddressIndirect { .. } => RegMask::empty(),
678            BasicInst::MoveReg { src, .. } | BasicInst::Reg { src, .. } => RegMask::from(src),
679            BasicInst::StoreAbsolute { src, .. } => RegMask::from(src),
680            BasicInst::LoadIndirect { base, .. } => RegMask::from(base),
681            BasicInst::StoreIndirect { src, base, .. } => RegMask::from(src) | RegMask::from(base),
682            BasicInst::RegReg { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
683            BasicInst::AnyAny { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
684            BasicInst::Cmov { dst, src, cond, .. } => RegMask::from(dst) | RegMask::from(src) | RegMask::from(cond),
685            BasicInst::Ecalli { nth_import } => imports[nth_import].src_mask(),
686            BasicInst::Sbrk { size, .. } => RegMask::from(size),
687            BasicInst::Memset => RegMask::from(Reg::A0) | RegMask::from(Reg::A1) | RegMask::from(Reg::A2),
688        }
689    }
690
691    fn dst_mask(&self, imports: &[Import]) -> RegMask {
692        match *self {
693            BasicInst::Nop | BasicInst::StoreAbsolute { .. } | BasicInst::StoreIndirect { .. } => RegMask::empty(),
694            BasicInst::MoveReg { dst, .. }
695            | BasicInst::LoadImmediate { dst, .. }
696            | BasicInst::LoadImmediate64 { dst, .. }
697            | BasicInst::LoadAbsolute { dst, .. }
698            | BasicInst::LoadAddress { dst, .. }
699            | BasicInst::LoadAddressIndirect { dst, .. }
700            | BasicInst::LoadIndirect { dst, .. }
701            | BasicInst::RegReg { dst, .. }
702            | BasicInst::Cmov { dst, .. }
703            | BasicInst::Reg { dst, .. }
704            | BasicInst::AnyAny { dst, .. } => RegMask::from(dst),
705            BasicInst::Ecalli { nth_import } => imports[nth_import].dst_mask(),
706            BasicInst::Sbrk { dst, .. } => RegMask::from(dst),
707            BasicInst::Memset { .. } => RegMask::from(Reg::A0) | RegMask::from(Reg::A2),
708        }
709    }
710
711    fn has_side_effects(&self, config: &Config) -> bool {
712        match *self {
713            BasicInst::Sbrk { .. }
714            | BasicInst::Ecalli { .. }
715            | BasicInst::StoreAbsolute { .. }
716            | BasicInst::StoreIndirect { .. }
717            | BasicInst::Memset { .. } => true,
718            BasicInst::LoadAbsolute { .. } | BasicInst::LoadIndirect { .. } => !config.elide_unnecessary_loads,
719            BasicInst::Nop
720            | BasicInst::MoveReg { .. }
721            | BasicInst::Reg { .. }
722            | BasicInst::LoadImmediate { .. }
723            | BasicInst::LoadImmediate64 { .. }
724            | BasicInst::LoadAddress { .. }
725            | BasicInst::LoadAddressIndirect { .. }
726            | BasicInst::RegReg { .. }
727            | BasicInst::Cmov { .. }
728            | BasicInst::AnyAny { .. } => false,
729        }
730    }
731
732    fn map_register(self, mut map: impl FnMut(Reg, OpKind) -> Reg) -> Option<Self> {
733        // Note: ALWAYS map the inputs first; otherwise `regalloc2` might break!
734        match self {
735            BasicInst::LoadImmediate { dst, imm } => Some(BasicInst::LoadImmediate {
736                dst: map(dst, OpKind::Write),
737                imm,
738            }),
739            BasicInst::LoadImmediate64 { dst, imm } => Some(BasicInst::LoadImmediate64 {
740                dst: map(dst, OpKind::Write),
741                imm,
742            }),
743            BasicInst::LoadAbsolute { kind, dst, target } => Some(BasicInst::LoadAbsolute {
744                kind,
745                dst: map(dst, OpKind::Write),
746                target,
747            }),
748            BasicInst::StoreAbsolute { kind, src, target } => Some(BasicInst::StoreAbsolute {
749                kind,
750                src: src.map_register(|reg| map(reg, OpKind::Read)),
751                target,
752            }),
753            BasicInst::LoadAddress { dst, target } => Some(BasicInst::LoadAddress {
754                dst: map(dst, OpKind::Write),
755                target,
756            }),
757            BasicInst::LoadAddressIndirect { dst, target } => Some(BasicInst::LoadAddressIndirect {
758                dst: map(dst, OpKind::Write),
759                target,
760            }),
761            BasicInst::LoadIndirect { kind, dst, base, offset } => Some(BasicInst::LoadIndirect {
762                kind,
763                base: map(base, OpKind::Read),
764                dst: map(dst, OpKind::Write),
765                offset,
766            }),
767            BasicInst::StoreIndirect { kind, src, base, offset } => Some(BasicInst::StoreIndirect {
768                kind,
769                src: src.map_register(|reg| map(reg, OpKind::Read)),
770                base: map(base, OpKind::Read),
771                offset,
772            }),
773            BasicInst::Reg { kind, dst, src } => Some(BasicInst::Reg {
774                kind,
775                src: map(src, OpKind::Read),
776                dst: map(dst, OpKind::Write),
777            }),
778            BasicInst::RegReg { kind, dst, src1, src2 } => Some(BasicInst::RegReg {
779                kind,
780                src1: map(src1, OpKind::Read),
781                src2: map(src2, OpKind::Read),
782                dst: map(dst, OpKind::Write),
783            }),
784            BasicInst::AnyAny { kind, dst, src1, src2 } => Some(BasicInst::AnyAny {
785                kind,
786                src1: src1.map_register(|reg| map(reg, OpKind::Read)),
787                src2: src2.map_register(|reg| map(reg, OpKind::Read)),
788                dst: map(dst, OpKind::Write),
789            }),
790            BasicInst::MoveReg { dst, src } => Some(BasicInst::MoveReg {
791                src: map(src, OpKind::Read),
792                dst: map(dst, OpKind::Write),
793            }),
794            BasicInst::Cmov { kind, dst, src, cond } => Some(BasicInst::Cmov {
795                kind,
796                src: src.map_register(|reg| map(reg, OpKind::Read)),
797                cond: map(cond, OpKind::Read),
798                dst: map(dst, OpKind::ReadWrite),
799            }),
800            BasicInst::Ecalli { .. } => None,
801            BasicInst::Sbrk { dst, size } => Some(BasicInst::Sbrk {
802                size: map(size, OpKind::Read),
803                dst: map(dst, OpKind::Write),
804            }),
805            BasicInst::Memset => {
806                assert_eq!(map(Reg::A1, OpKind::Read), Reg::A1);
807                assert_eq!(map(Reg::A0, OpKind::ReadWrite), Reg::A0);
808                assert_eq!(map(Reg::A2, OpKind::ReadWrite), Reg::A2);
809                Some(BasicInst::Memset)
810            }
811            BasicInst::Nop => Some(BasicInst::Nop),
812        }
813    }
814
815    fn operands(&self, imports: &[Import]) -> impl Iterator<Item = (Reg, OpKind)>
816    where
817        T: Clone,
818    {
819        let mut list = [None, None, None, None, None, None, None, None];
820        let mut length = 0;
821        // Abuse the `map_register` to avoid matching on everything again.
822        let is_special_instruction = self
823            .clone()
824            .map_register(|reg, kind| {
825                list[length] = Some((reg, kind));
826                length += 1;
827                reg
828            })
829            .is_none();
830
831        if is_special_instruction {
832            assert_eq!(length, 0);
833
834            let BasicInst::Ecalli { nth_import } = *self else { unreachable!() };
835            let import = &imports[nth_import];
836
837            for reg in import.src_mask() {
838                list[length] = Some((reg, OpKind::Read));
839                length += 1;
840            }
841
842            for reg in import.dst_mask() {
843                list[length] = Some((reg, OpKind::Write));
844                length += 1;
845            }
846        };
847
848        let mut seen_dst = false;
849        list.into_iter().take_while(|reg| reg.is_some()).flatten().map(move |(reg, kind)| {
850            let is_dst = matches!(kind, OpKind::Write | OpKind::ReadWrite);
851
852            // Sanity check to make sure inputs always come before outputs, so that `regalloc2` doesn't break.
853            if seen_dst {
854                assert!(is_dst);
855            }
856            seen_dst |= is_dst;
857
858            (reg, kind)
859        })
860    }
861
862    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<BasicInst<U>, E> {
863        Ok(match self {
864            BasicInst::MoveReg { dst, src } => BasicInst::MoveReg { dst, src },
865            BasicInst::LoadImmediate { dst, imm } => BasicInst::LoadImmediate { dst, imm },
866            BasicInst::LoadImmediate64 { dst, imm } => BasicInst::LoadImmediate64 { dst, imm },
867            BasicInst::LoadAbsolute { kind, dst, target } => BasicInst::LoadAbsolute { kind, dst, target },
868            BasicInst::StoreAbsolute { kind, src, target } => BasicInst::StoreAbsolute { kind, src, target },
869            BasicInst::LoadAddress { dst, target } => BasicInst::LoadAddress { dst, target: map(target)? },
870            BasicInst::LoadAddressIndirect { dst, target } => BasicInst::LoadAddressIndirect { dst, target: map(target)? },
871            BasicInst::LoadIndirect { kind, dst, base, offset } => BasicInst::LoadIndirect { kind, dst, base, offset },
872            BasicInst::StoreIndirect { kind, src, base, offset } => BasicInst::StoreIndirect { kind, src, base, offset },
873            BasicInst::Reg { kind, dst, src } => BasicInst::Reg { kind, dst, src },
874            BasicInst::RegReg { kind, dst, src1, src2 } => BasicInst::RegReg { kind, dst, src1, src2 },
875            BasicInst::AnyAny { kind, dst, src1, src2 } => BasicInst::AnyAny { kind, dst, src1, src2 },
876            BasicInst::Cmov { kind, dst, src, cond } => BasicInst::Cmov { kind, dst, src, cond },
877            BasicInst::Ecalli { nth_import } => BasicInst::Ecalli { nth_import },
878            BasicInst::Sbrk { dst, size } => BasicInst::Sbrk { dst, size },
879            BasicInst::Memset => BasicInst::Memset,
880            BasicInst::Nop => BasicInst::Nop,
881        })
882    }
883
884    fn target(&self) -> (Option<SectionTarget>, Option<T>)
885    where
886        T: Copy,
887    {
888        match self {
889            BasicInst::LoadAbsolute { target, .. } | BasicInst::StoreAbsolute { target, .. } => (Some(*target), None),
890            BasicInst::LoadAddress { target, .. } | BasicInst::LoadAddressIndirect { target, .. } => (None, Some(*target)),
891            BasicInst::Nop
892            | BasicInst::MoveReg { .. }
893            | BasicInst::LoadImmediate { .. }
894            | BasicInst::LoadImmediate64 { .. }
895            | BasicInst::LoadIndirect { .. }
896            | BasicInst::StoreIndirect { .. }
897            | BasicInst::Reg { .. }
898            | BasicInst::RegReg { .. }
899            | BasicInst::AnyAny { .. }
900            | BasicInst::Cmov { .. }
901            | BasicInst::Sbrk { .. }
902            | BasicInst::Memset { .. }
903            | BasicInst::Ecalli { .. } => (None, None),
904        }
905    }
906}
907
908#[derive(Copy, Clone, PartialEq, Eq, Debug)]
909enum ControlInst<T> {
910    Jump {
911        target: T,
912    },
913    Call {
914        ra: Reg,
915        target: T,
916        target_return: T,
917    },
918    JumpIndirect {
919        base: Reg,
920        offset: i64,
921    },
922    CallIndirect {
923        ra: Reg,
924        base: Reg,
925        offset: i64,
926        target_return: T,
927    },
928    Branch {
929        kind: BranchKind,
930        src1: RegImm,
931        src2: RegImm,
932        target_true: T,
933        target_false: T,
934    },
935    Unimplemented,
936}
937
938impl<T> ControlInst<T> {
939    fn src_mask(&self) -> RegMask {
940        match *self {
941            ControlInst::Jump { .. } | ControlInst::Call { .. } | ControlInst::Unimplemented => RegMask::empty(),
942            ControlInst::JumpIndirect { base, .. } | ControlInst::CallIndirect { base, .. } => RegMask::from(base),
943            ControlInst::Branch { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
944        }
945    }
946
947    fn dst_mask(&self) -> RegMask {
948        match *self {
949            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Branch { .. } | ControlInst::Unimplemented => {
950                RegMask::empty()
951            }
952            ControlInst::Call { ra, .. } | ControlInst::CallIndirect { ra, .. } => RegMask::from(ra),
953        }
954    }
955
956    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<ControlInst<U>, E> {
957        Ok(match self {
958            ControlInst::Jump { target } => ControlInst::Jump { target: map(target)? },
959            ControlInst::Call { ra, target, target_return } => ControlInst::Call {
960                ra,
961                target: map(target)?,
962                target_return: map(target_return)?,
963            },
964            ControlInst::JumpIndirect { base, offset } => ControlInst::JumpIndirect { base, offset },
965            ControlInst::CallIndirect {
966                ra,
967                base,
968                offset,
969                target_return,
970            } => ControlInst::CallIndirect {
971                ra,
972                base,
973                offset,
974                target_return: map(target_return)?,
975            },
976            ControlInst::Branch {
977                kind,
978                src1,
979                src2,
980                target_true,
981                target_false,
982            } => ControlInst::Branch {
983                kind,
984                src1,
985                src2,
986                target_true: map(target_true)?,
987                target_false: map(target_false)?,
988            },
989            ControlInst::Unimplemented => ControlInst::Unimplemented,
990        })
991    }
992
993    fn targets(&self) -> [Option<&T>; 2] {
994        match self {
995            ControlInst::Jump { target, .. } => [Some(target), None],
996            ControlInst::Call { target, target_return, .. } => [Some(target), Some(target_return)],
997            ControlInst::CallIndirect { target_return, .. } => [Some(target_return), None],
998            ControlInst::Branch {
999                target_true, target_false, ..
1000            } => [Some(target_true), Some(target_false)],
1001            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => [None, None],
1002        }
1003    }
1004
1005    fn fallthrough_target(&self) -> Option<T>
1006    where
1007        T: Copy,
1008    {
1009        match self {
1010            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1011            ControlInst::Branch { target_false: target, .. }
1012            | ControlInst::Call { target_return: target, .. }
1013            | ControlInst::CallIndirect { target_return: target, .. } => Some(*target),
1014        }
1015    }
1016
1017    fn fallthrough_target_mut(&mut self) -> Option<&mut T> {
1018        match self {
1019            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1020            ControlInst::Branch { target_false: target, .. }
1021            | ControlInst::Call { target_return: target, .. }
1022            | ControlInst::CallIndirect { target_return: target, .. } => Some(target),
1023        }
1024    }
1025}
1026
1027#[derive(Copy, Clone, Debug)]
1028enum InstExt<BasicT, ControlT> {
1029    Basic(BasicInst<BasicT>),
1030    Control(ControlInst<ControlT>),
1031}
1032
1033impl<BasicT, ControlT> InstExt<BasicT, ControlT> {
1034    fn nop() -> Self {
1035        InstExt::Basic(BasicInst::Nop)
1036    }
1037}
1038
1039impl<BasicT, ControlT> From<BasicInst<BasicT>> for InstExt<BasicT, ControlT> {
1040    fn from(inst: BasicInst<BasicT>) -> Self {
1041        InstExt::Basic(inst)
1042    }
1043}
1044
1045impl<BasicT, ControlT> From<ControlInst<ControlT>> for InstExt<BasicT, ControlT> {
1046    fn from(inst: ControlInst<ControlT>) -> Self {
1047        InstExt::Control(inst)
1048    }
1049}
1050
1051#[derive(Debug)]
1052struct BasicBlock<BasicT, ControlT> {
1053    target: BlockTarget,
1054    source: Source,
1055    ops: Vec<(SourceStack, BasicInst<BasicT>)>,
1056    next: EndOfBlock<ControlT>,
1057}
1058
1059impl<BasicT, ControlT> BasicBlock<BasicT, ControlT> {
1060    fn new(target: BlockTarget, source: Source, ops: Vec<(SourceStack, BasicInst<BasicT>)>, next: EndOfBlock<ControlT>) -> Self {
1061        Self { target, source, ops, next }
1062    }
1063}
1064
1065fn split_function_name(name: &str) -> (String, String) {
1066    let (with_hash, without_hash) = if let Ok(name) = rustc_demangle::try_demangle(name) {
1067        (name.to_string(), format!("{:#}", name))
1068    } else {
1069        (name.to_owned(), name.to_owned())
1070    };
1071
1072    // Ideally we'd parse the symbol into an actual AST and use that,
1073    // but that's a lot of work, so for now let's just do it like this.
1074    //
1075    // Here we want to split the symbol into two parts: the namespace, and the name + hash.
1076    // The idea being that multiple symbols most likely share the namespcae, allowing us to
1077    // deduplicate those strings in the output blob.
1078    //
1079    // For example, this symbol:
1080    //   _ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$7reserve21do_reserve_and_handle17hddecba91f804dbebE
1081    // can be demangled into these:
1082    //   with_hash    = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle::hddecba91f804dbeb"
1083    //   without_hash = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle"
1084    //
1085    // So what we want is to split it in two like this:
1086    //   prefix = "alloc::raw_vec::RawVec<T,A>::reserve"
1087    //   suffix = "do_reserve_and_handle::hddecba91f804dbeb"
1088
1089    if with_hash.contains("::") {
1090        let suffix_index = {
1091            let mut found = None;
1092            let mut depth = 0;
1093            let mut last = '\0';
1094            let mut index = without_hash.len();
1095            for ch in without_hash.chars().rev() {
1096                if ch == '>' {
1097                    depth += 1;
1098                } else if ch == '<' {
1099                    depth -= 1;
1100                } else if ch == ':' && depth == 0 && last == ':' {
1101                    found = Some(index + 1);
1102                    break;
1103                }
1104
1105                last = ch;
1106                index -= ch.len_utf8();
1107            }
1108
1109            found
1110        };
1111
1112        if let Some(suffix_index) = suffix_index {
1113            let prefix = &with_hash[..suffix_index - 2];
1114            let suffix = &with_hash[suffix_index..];
1115            return (prefix.to_owned(), suffix.to_owned());
1116        } else {
1117            log::warn!("Failed to split symbol: {:?}", with_hash);
1118        }
1119    }
1120
1121    (String::new(), with_hash)
1122}
1123
1124#[derive(Clone, Debug)]
1125enum DataRef {
1126    Section { section_index: SectionIndex, range: Range<usize> },
1127    Padding(usize),
1128}
1129
1130impl DataRef {
1131    fn size(&self) -> usize {
1132        match self {
1133            Self::Section { range, .. } => range.len(),
1134            Self::Padding(size) => *size,
1135        }
1136    }
1137}
1138
1139#[derive(Debug)]
1140struct MemoryConfig {
1141    ro_data: Vec<DataRef>,
1142    rw_data: Vec<DataRef>,
1143    ro_data_size: u32,
1144    rw_data_size: u32,
1145    min_stack_size: u32,
1146}
1147
1148fn get_padding(memory_end: u64, align: u64) -> Option<u64> {
1149    let misalignment = memory_end % align;
1150    if misalignment == 0 {
1151        None
1152    } else {
1153        Some(align - misalignment)
1154    }
1155}
1156
1157fn process_sections<H>(
1158    elf: &Elf<H>,
1159    current_address: &mut u64,
1160    chunks: &mut Vec<DataRef>,
1161    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1162    sections: impl IntoIterator<Item = SectionIndex>,
1163) -> u64
1164where
1165    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1166{
1167    for section_index in sections {
1168        let section = elf.section_by_index(section_index);
1169        assert!(section.size() >= section.data().len() as u64);
1170
1171        if let Some(padding) = get_padding(*current_address, section.align()) {
1172            *current_address += padding;
1173            chunks.push(DataRef::Padding(padding as usize));
1174        }
1175
1176        let section_name = section.name();
1177        let section_base_address = *current_address;
1178        base_address_for_section.insert(section.index(), section_base_address);
1179
1180        *current_address += section.size();
1181        if !section.data().is_empty() {
1182            chunks.push(DataRef::Section {
1183                section_index: section.index(),
1184                range: 0..section.data().len(),
1185            });
1186        }
1187
1188        let padding = section.size() - section.data().len() as u64;
1189        if padding > 0 {
1190            chunks.push(DataRef::Padding(padding.try_into().expect("overflow")))
1191        }
1192
1193        log::trace!(
1194            "Found section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}/0x{:x}",
1195            section_name,
1196            section.original_address(),
1197            section.original_address() + section.size(),
1198            section_base_address,
1199            section_base_address + section.size(),
1200            section.data().len(),
1201            section.size(),
1202        );
1203    }
1204
1205    let size_in_memory: u64 = chunks.iter().map(|chunk| chunk.size() as u64).sum();
1206    while let Some(DataRef::Padding(..)) = chunks.last() {
1207        chunks.pop();
1208    }
1209
1210    *current_address = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), *current_address).expect("overflow");
1211    // Add a guard page between this section and the next one.
1212    *current_address += u64::from(VM_MAX_PAGE_SIZE);
1213
1214    size_in_memory
1215}
1216
1217#[allow(clippy::too_many_arguments)]
1218fn extract_memory_config<H>(
1219    elf: &Elf<H>,
1220    sections_ro_data: &[SectionIndex],
1221    sections_rw_data: &[SectionIndex],
1222    sections_bss: &[SectionIndex],
1223    sections_min_stack_size: &[SectionIndex],
1224    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1225    mut min_stack_size: u32,
1226) -> Result<MemoryConfig, ProgramFromElfError>
1227where
1228    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1229{
1230    let mut current_address = u64::from(VM_MAX_PAGE_SIZE);
1231
1232    let mut ro_data = Vec::new();
1233    let mut rw_data = Vec::new();
1234    let ro_data_address = current_address;
1235    let ro_data_size = process_sections(
1236        elf,
1237        &mut current_address,
1238        &mut ro_data,
1239        base_address_for_section,
1240        sections_ro_data.iter().copied(),
1241    );
1242    let rw_data_address = current_address;
1243    let rw_data_size = process_sections(
1244        elf,
1245        &mut current_address,
1246        &mut rw_data,
1247        base_address_for_section,
1248        sections_rw_data.iter().copied().chain(sections_bss.iter().copied()),
1249    );
1250
1251    for &section_index in sections_min_stack_size {
1252        let section = elf.section_by_index(section_index);
1253        let data = section.data();
1254        if data.len() % 4 != 0 {
1255            return Err(ProgramFromElfError::other(format!("section '{}' has invalid size", section.name())));
1256        }
1257
1258        for xs in data.chunks_exact(4) {
1259            let value = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
1260            min_stack_size = core::cmp::max(min_stack_size, value);
1261        }
1262    }
1263
1264    let min_stack_size =
1265        align_to_next_page_u32(VM_MIN_PAGE_SIZE, min_stack_size).ok_or(ProgramFromElfError::other("out of range size for the stack"))?;
1266
1267    log::trace!("Configured minimum stack size: 0x{min_stack_size:x}");
1268
1269    let ro_data_size = u32::try_from(ro_data_size).expect("overflow");
1270    let rw_data_size = u32::try_from(rw_data_size).expect("overflow");
1271
1272    // Sanity check that the memory configuration is actually valid.
1273    {
1274        let rw_data_size_physical: u64 = rw_data.iter().map(|x| x.size() as u64).sum();
1275        let rw_data_size_physical = u32::try_from(rw_data_size_physical).expect("overflow");
1276        assert!(rw_data_size_physical <= rw_data_size);
1277
1278        let config = match MemoryMapBuilder::new(VM_MAX_PAGE_SIZE)
1279            .ro_data_size(ro_data_size)
1280            .rw_data_size(rw_data_size)
1281            .stack_size(min_stack_size)
1282            .build()
1283        {
1284            Ok(config) => config,
1285            Err(error) => {
1286                return Err(ProgramFromElfError::other(error));
1287            }
1288        };
1289
1290        assert_eq!(u64::from(config.ro_data_address()), ro_data_address);
1291        assert_eq!(u64::from(config.rw_data_address()), rw_data_address);
1292    }
1293
1294    let memory_config = MemoryConfig {
1295        ro_data,
1296        rw_data,
1297        ro_data_size,
1298        rw_data_size,
1299        min_stack_size,
1300    };
1301
1302    Ok(memory_config)
1303}
1304
1305#[derive(Clone, PartialEq, Eq, Debug, Hash)]
1306struct ExternMetadata {
1307    index: Option<u32>,
1308    symbol: Vec<u8>,
1309    input_regs: u8,
1310    output_regs: u8,
1311}
1312
1313#[derive(Clone, PartialEq, Eq, Debug)]
1314struct Export {
1315    location: SectionTarget,
1316    metadata: ExternMetadata,
1317}
1318
1319fn extract_exports<H>(
1320    elf: &Elf<H>,
1321    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1322    section: &Section,
1323) -> Result<Vec<Export>, ProgramFromElfError>
1324where
1325    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1326{
1327    let mut b = polkavm_common::elf::Reader::from(section.data());
1328    let mut exports = Vec::new();
1329    loop {
1330        let Ok(version) = b.read_byte() else { break };
1331
1332        if version != 1 {
1333            return Err(ProgramFromElfError::other(format!(
1334                "failed to parse export metadata: unsupported export metadata version: {}",
1335                version
1336            )));
1337        }
1338
1339        let metadata = {
1340            let location = SectionTarget {
1341                section_index: section.index(),
1342                offset: b.offset() as u64,
1343            };
1344
1345            // Ignore the address as written; we'll just use the relocations instead.
1346            let address = if elf.is_64() { b.read_u64() } else { b.read_u32().map(u64::from) };
1347            let address = address.map_err(|error| ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)))?;
1348
1349            let Some(relocation) = relocations.get(&location) else {
1350                return Err(ProgramFromElfError::other(format!(
1351                    "found an export without a relocation for a pointer to the metadata at {location} (found address = 0x{address:x})"
1352                )));
1353            };
1354
1355            let target = match relocation {
1356                RelocationKind::Abs {
1357                    target,
1358                    size: RelocationSize::U64,
1359                } if elf.is_64() => target,
1360                RelocationKind::Abs {
1361                    target,
1362                    size: RelocationSize::U32,
1363                } if !elf.is_64() => target,
1364                _ => {
1365                    return Err(ProgramFromElfError::other(format!(
1366                        "found an export with an unexpected relocation at {location}: {relocation:?}"
1367                    )));
1368                }
1369            };
1370
1371            parse_extern_metadata(elf, relocations, *target)?
1372        };
1373
1374        let location = SectionTarget {
1375            section_index: section.index(),
1376            offset: b.offset() as u64,
1377        };
1378
1379        // Ignore the address as written; we'll just use the relocations instead.
1380        let error = if elf.is_64() { b.read_u64().err() } else { b.read_u32().err() };
1381
1382        if let Some(error) = error {
1383            return Err(ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)));
1384        }
1385
1386        let Some(relocation) = relocations.get(&location) else {
1387            return Err(ProgramFromElfError::other(format!(
1388                "found an export without a relocation for a pointer to the code at {location}"
1389            )));
1390        };
1391
1392        let target = match relocation {
1393            RelocationKind::Abs {
1394                target,
1395                size: RelocationSize::U64,
1396            } if elf.is_64() => target,
1397            RelocationKind::Abs {
1398                target,
1399                size: RelocationSize::U32,
1400            } if !elf.is_64() => target,
1401            _ => {
1402                return Err(ProgramFromElfError::other(format!(
1403                    "found an export with an unexpected relocation at {location}: {relocation:?}"
1404                )));
1405            }
1406        };
1407
1408        exports.push(Export {
1409            location: *target,
1410            metadata,
1411        });
1412    }
1413
1414    Ok(exports)
1415}
1416
1417#[derive(Clone, Debug)]
1418struct Import {
1419    metadata: ExternMetadata,
1420}
1421
1422impl core::ops::Deref for Import {
1423    type Target = ExternMetadata;
1424    fn deref(&self) -> &Self::Target {
1425        &self.metadata
1426    }
1427}
1428
1429impl Import {
1430    fn src(&'_ self) -> impl Iterator<Item = Reg> + '_ {
1431        assert!(self.metadata.input_regs as usize <= Reg::INPUT_REGS.len());
1432        Reg::INPUT_REGS
1433            .into_iter()
1434            .take(self.metadata.input_regs as usize)
1435            .chain(core::iter::once(Reg::SP))
1436    }
1437
1438    fn src_mask(&self) -> RegMask {
1439        let mut mask = RegMask::empty();
1440        for reg in self.src() {
1441            mask.insert(reg);
1442        }
1443
1444        mask
1445    }
1446
1447    #[allow(clippy::unused_self)]
1448    fn dst(&self) -> impl Iterator<Item = Reg> {
1449        assert!(self.metadata.output_regs as usize <= Reg::OUTPUT_REGS.len());
1450        [Reg::T0, Reg::T1, Reg::T2, Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5].into_iter()
1451    }
1452
1453    fn dst_mask(&self) -> RegMask {
1454        let mut mask = RegMask::empty();
1455        for reg in self.dst() {
1456            mask.insert(reg);
1457        }
1458
1459        mask
1460    }
1461}
1462
1463fn parse_extern_metadata_impl<H>(
1464    elf: &Elf<H>,
1465    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1466    target: SectionTarget,
1467) -> Result<ExternMetadata, String>
1468where
1469    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1470{
1471    let section = elf.section_by_index(target.section_index);
1472    let mut b = polkavm_common::elf::Reader::from(section.data());
1473
1474    // Skip `sh_offset` bytes:
1475    let _ = b.read(target.offset as usize)?;
1476
1477    let version = b.read_byte()?;
1478    if version != 1 && version != 2 {
1479        return Err(format!("unsupported extern metadata version: '{version}' (expected '1' or '2')"));
1480    }
1481
1482    let flags = b.read_u32()?;
1483    let symbol_length = b.read_u32()?;
1484    let Some(symbol_relocation) = relocations.get(&SectionTarget {
1485        section_index: section.index(),
1486        offset: b.offset() as u64,
1487    }) else {
1488        return Err("missing relocation for the symbol".into());
1489    };
1490
1491    // Ignore the address as written; we'll just use the relocations instead.
1492    if elf.is_64() {
1493        b.read_u64()?;
1494    } else {
1495        b.read_u32()?;
1496    };
1497
1498    let symbol_location = match symbol_relocation {
1499        RelocationKind::Abs {
1500            target,
1501            size: RelocationSize::U64,
1502        } if elf.is_64() => target,
1503        RelocationKind::Abs {
1504            target,
1505            size: RelocationSize::U32,
1506        } if !elf.is_64() => target,
1507        _ => return Err(format!("unexpected relocation for the symbol: {symbol_relocation:?}")),
1508    };
1509
1510    let Some(symbol) = elf
1511        .section_by_index(symbol_location.section_index)
1512        .data()
1513        .get(symbol_location.offset as usize..symbol_location.offset.saturating_add(u64::from(symbol_length)) as usize)
1514    else {
1515        return Err("symbol out of bounds".into());
1516    };
1517
1518    let input_regs = b.read_byte()?;
1519    if input_regs as usize > Reg::INPUT_REGS.len() {
1520        return Err(format!("too many input registers: {input_regs}"));
1521    }
1522
1523    let output_regs = b.read_byte()?;
1524    if output_regs as usize > Reg::OUTPUT_REGS.len() {
1525        return Err(format!("too many output registers: {output_regs}"));
1526    }
1527
1528    let index = if version >= 2 {
1529        let has_index = b.read_byte()?;
1530        let index = b.read_u32()?;
1531        if has_index > 0 {
1532            Some(index)
1533        } else {
1534            None
1535        }
1536    } else {
1537        None
1538    };
1539
1540    if flags != 0 {
1541        return Err(format!("found unsupported flags: 0x{flags:x}"));
1542    }
1543
1544    Ok(ExternMetadata {
1545        index,
1546        symbol: symbol.to_owned(),
1547        input_regs,
1548        output_regs,
1549    })
1550}
1551
1552fn parse_extern_metadata<H>(
1553    elf: &Elf<H>,
1554    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1555    target: SectionTarget,
1556) -> Result<ExternMetadata, ProgramFromElfError>
1557where
1558    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1559{
1560    parse_extern_metadata_impl(elf, relocations, target)
1561        .map_err(|error| ProgramFromElfError::other(format!("failed to parse extern metadata: {}", error)))
1562}
1563
1564fn check_imports_and_assign_indexes(imports: &mut Vec<Import>, used_imports: &HashSet<usize>) -> Result<(), ProgramFromElfError> {
1565    let mut import_by_symbol: HashMap<Vec<u8>, usize> = HashMap::new();
1566    for (nth_import, import) in imports.iter().enumerate() {
1567        if let Some(&old_nth_import) = import_by_symbol.get(&import.metadata.symbol) {
1568            let old_import = &imports[old_nth_import];
1569            if import.metadata == old_import.metadata {
1570                continue;
1571            }
1572
1573            return Err(ProgramFromElfError::other(format!(
1574                "duplicate imports with the same symbol yet different prototype: {}",
1575                ProgramSymbol::new(&*import.metadata.symbol)
1576            )));
1577        }
1578
1579        import_by_symbol.insert(import.metadata.symbol.clone(), nth_import);
1580    }
1581
1582    if imports.iter().any(|import| import.metadata.index.is_some()) {
1583        let mut import_by_index: HashMap<u32, ExternMetadata> = HashMap::new();
1584        let mut max_index = 0;
1585        for import in &*imports {
1586            if let Some(index) = import.index {
1587                if let Some(old_metadata) = import_by_index.get(&index) {
1588                    if *old_metadata != import.metadata {
1589                        return Err(ProgramFromElfError::other(format!(
1590                            "duplicate imports with the same index yet different prototypes: {}, {}",
1591                            ProgramSymbol::new(&*old_metadata.symbol),
1592                            ProgramSymbol::new(&*import.metadata.symbol)
1593                        )));
1594                    }
1595                } else {
1596                    import_by_index.insert(index, import.metadata.clone());
1597                }
1598
1599                max_index = core::cmp::max(max_index, index);
1600            } else {
1601                return Err(ProgramFromElfError::other(format!(
1602                    "import without a specified index: {}",
1603                    ProgramSymbol::new(&*import.metadata.symbol)
1604                )));
1605            }
1606        }
1607
1608        // If there are any holes in the indexes then insert dummy imports.
1609        for index in 0..max_index {
1610            if !import_by_index.contains_key(&index) {
1611                imports.push(Import {
1612                    metadata: ExternMetadata {
1613                        index: Some(index),
1614                        symbol: Vec::new(),
1615                        input_regs: 0,
1616                        output_regs: 0,
1617                    },
1618                })
1619            }
1620        }
1621    } else {
1622        let mut ordered: Vec<_> = used_imports.iter().copied().collect();
1623        ordered.sort_by(|&a, &b| imports[a].metadata.symbol.cmp(&imports[b].metadata.symbol));
1624
1625        for (assigned_index, &nth_import) in ordered.iter().enumerate() {
1626            imports[nth_import].metadata.index = Some(assigned_index as u32);
1627        }
1628    }
1629
1630    for import in imports {
1631        log::debug!(
1632            "Import: '{}', index = {:?}, input regs = {}, output regs = {}",
1633            String::from_utf8_lossy(&import.metadata.symbol),
1634            import.metadata.index,
1635            import.metadata.input_regs,
1636            import.metadata.output_regs
1637        );
1638    }
1639
1640    Ok(())
1641}
1642
1643fn get_relocation_target<H>(elf: &Elf<H>, relocation: &crate::elf::Relocation) -> Result<Option<SectionTarget>, ProgramFromElfError>
1644where
1645    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1646{
1647    match relocation.target() {
1648        object::RelocationTarget::Absolute => {
1649            if let object::RelocationFlags::Elf { r_type } = relocation.flags() {
1650                if r_type == object::elf::R_RISCV_NONE {
1651                    // GNU ld apparently turns R_RISCV_ALIGN and R_RISCV_RELAX into these.
1652                    return Ok(None);
1653                }
1654            }
1655            // Example of such relocation:
1656            //   Offset     Info    Type                Sym. Value  Symbol's Name + Addend
1657            //   00060839  00000001 R_RISCV_32                        0
1658            //
1659            // So far I've only seen these emitted for `.debug_info`.
1660            //
1661            // I'm not entirely sure what's the point of those, as they don't point to any symbol
1662            // and have an addend of zero.
1663            assert_eq!(relocation.addend(), 0);
1664            assert!(!relocation.has_implicit_addend());
1665            Ok(None)
1666        }
1667        object::RelocationTarget::Symbol(target_symbol_index) => {
1668            let target_symbol = elf
1669                .symbol_by_index(target_symbol_index)
1670                .map_err(|error| ProgramFromElfError::other(format!("failed to fetch relocation target: {}", error)))?;
1671
1672            let (section, offset) = target_symbol.section_and_offset()?;
1673            log::trace!(
1674                "Fetched relocation target: target section = \"{}\", target symbol = \"{}\" ({}), symbol offset = 0x{:x} + 0x{:x}",
1675                section.name(),
1676                target_symbol.name().unwrap_or(""),
1677                target_symbol_index.0,
1678                offset,
1679                relocation.addend(),
1680            );
1681
1682            let Some(offset) = offset.checked_add_signed(relocation.addend()) else {
1683                return Err(ProgramFromElfError::other(
1684                    "failed to add addend to the symbol's offset due to overflow",
1685                ));
1686            };
1687
1688            Ok(Some(SectionTarget {
1689                section_index: section.index(),
1690                offset,
1691            }))
1692        }
1693        _ => Err(ProgramFromElfError::other(format!(
1694            "unsupported target for relocation: {:?}",
1695            relocation
1696        ))),
1697    }
1698}
1699
1700enum MinMax {
1701    MaxSigned,
1702    MinSigned,
1703    MaxUnsigned,
1704    MinUnsigned,
1705
1706    MaxSigned64,
1707    MinSigned64,
1708    MaxUnsigned64,
1709    MinUnsigned64,
1710}
1711
1712fn emit_minmax(
1713    kind: MinMax,
1714    dst: Reg,
1715    src1: Option<Reg>,
1716    src2: Option<Reg>,
1717    tmp: Reg,
1718    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1719) {
1720    // This is supposed to emit something like this:
1721    //   tmp = src1 ? src2
1722    //   dst = src1
1723    //   dst = src2 if tmp == 0
1724
1725    assert_ne!(dst, tmp);
1726    assert_ne!(Some(tmp), src1);
1727    assert_ne!(Some(tmp), src2);
1728    assert_ne!(Some(dst), src2);
1729
1730    let (cmp_src1, cmp_src2, cmp_kind) = match kind {
1731        MinMax::MinUnsigned => (src1, src2, AnyAnyKind::SetLessThanUnsigned32),
1732        MinMax::MaxUnsigned => (src2, src1, AnyAnyKind::SetLessThanUnsigned32),
1733        MinMax::MinSigned => (src1, src2, AnyAnyKind::SetLessThanSigned32),
1734        MinMax::MaxSigned => (src2, src1, AnyAnyKind::SetLessThanSigned32),
1735        MinMax::MinUnsigned64 => (src1, src2, AnyAnyKind::SetLessThanUnsigned64),
1736        MinMax::MaxUnsigned64 => (src2, src1, AnyAnyKind::SetLessThanUnsigned64),
1737        MinMax::MinSigned64 => (src1, src2, AnyAnyKind::SetLessThanSigned64),
1738        MinMax::MaxSigned64 => (src2, src1, AnyAnyKind::SetLessThanSigned64),
1739    };
1740
1741    emit(InstExt::Basic(BasicInst::AnyAny {
1742        kind: cmp_kind,
1743        dst: tmp,
1744        src1: cmp_src1.map_or(RegImm::Imm(0), RegImm::Reg),
1745        src2: cmp_src2.map_or(RegImm::Imm(0), RegImm::Reg),
1746    }));
1747
1748    if let Some(src1) = src1 {
1749        emit(InstExt::Basic(BasicInst::MoveReg { dst, src: src1 }));
1750    } else {
1751        emit(InstExt::Basic(BasicInst::LoadImmediate { dst: tmp, imm: 0 }));
1752    }
1753
1754    emit(InstExt::Basic(BasicInst::Cmov {
1755        kind: CmovKind::EqZero,
1756        dst,
1757        src: src2.map_or(RegImm::Imm(0), RegImm::Reg),
1758        cond: tmp,
1759    }));
1760}
1761
1762fn resolve_simple_zero_register_usage(
1763    kind: crate::riscv::RegRegKind,
1764    dst: Reg,
1765    src1: RReg,
1766    src2: RReg,
1767    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1768) -> bool {
1769    use crate::riscv::RegRegKind as K;
1770    if kind == K::OrInverted && src1 == RReg::Zero && src2 != RReg::Zero {
1771        emit(InstExt::Basic(BasicInst::AnyAny {
1772            kind: AnyAnyKind::Xor32,
1773            dst,
1774            src1: RegImm::Imm(!0),
1775            src2: cast_reg_any(src2).unwrap(),
1776        }));
1777        return true;
1778    }
1779
1780    if kind == K::Xnor && src1 == RReg::Zero && src2 != RReg::Zero {
1781        emit(InstExt::Basic(BasicInst::AnyAny {
1782            kind: AnyAnyKind::Xor32,
1783            dst,
1784            src1: RegImm::Imm(!0),
1785            src2: cast_reg_any(src2).unwrap(),
1786        }));
1787        return true;
1788    }
1789
1790    if kind == K::Xnor && src1 != RReg::Zero && src2 == RReg::Zero {
1791        emit(InstExt::Basic(BasicInst::AnyAny {
1792            kind: AnyAnyKind::Xor32,
1793            dst,
1794            src1: cast_reg_any(src1).unwrap(),
1795            src2: RegImm::Imm(!0),
1796        }));
1797        return true;
1798    }
1799
1800    if (kind == K::Minimum || kind == K::Maximum) && (src1 == RReg::Zero || src2 == RReg::Zero) {
1801        if src1 == RReg::Zero && src2 == RReg::Zero {
1802            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
1803            return true;
1804        }
1805
1806        let tmp = Reg::E2;
1807        let src1 = cast_reg_any(src1).unwrap();
1808        let src2 = cast_reg_any(src2).unwrap();
1809        let (kind, cmp_src1, cmp_src2) = match kind {
1810            K::Minimum => (AnyAnyKind::SetLessThanSigned32, src1, src2),
1811            K::Maximum => (AnyAnyKind::SetLessThanSigned32, src2, src1),
1812            _ => unreachable!(),
1813        };
1814
1815        emit(InstExt::Basic(BasicInst::AnyAny {
1816            kind,
1817            dst: tmp,
1818            src1: cmp_src1,
1819            src2: cmp_src2,
1820        }));
1821
1822        match src1 {
1823            RegImm::Reg(src) => emit(InstExt::Basic(BasicInst::MoveReg { dst, src })),
1824            RegImm::Imm(imm) => emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm })),
1825        }
1826
1827        emit(InstExt::Basic(BasicInst::Cmov {
1828            kind: CmovKind::EqZero,
1829            dst,
1830            src: src2,
1831            cond: tmp,
1832        }));
1833
1834        return true;
1835    }
1836
1837    if matches!(kind, K::RotateLeft32AndSignExtend | K::RotateRight32AndSignExtend) && src1 != RReg::Zero && src2 == RReg::Zero {
1838        emit(InstExt::Basic(BasicInst::AnyAny {
1839            kind: AnyAnyKind::Add32AndSignExtend,
1840            dst,
1841            src1: cast_reg_any(src1).unwrap(),
1842            src2: RegImm::Imm(0),
1843        }));
1844        return true;
1845    }
1846
1847    false
1848}
1849
1850fn emit_or_combine_byte(
1851    location: SectionTarget,
1852    dst: Reg,
1853    src: Reg,
1854    rv64: bool,
1855    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1856) {
1857    let op_reg = dst;
1858    let cmp_reg = Reg::E1;
1859    let tmp_reg = Reg::E2;
1860    let mask_reg = if dst != src { src } else { Reg::E3 };
1861    let range = if rv64 { 0..64 } else { 0..32 };
1862
1863    log::warn!("Emulating orc.b at {:?} with an instruction sequence", location);
1864
1865    if dst != src {
1866        emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
1867    }
1868
1869    // Loop:
1870    // mov tmp, op
1871    // shl mask, 8
1872    // or tmp, mask
1873    // test op, mask
1874    // cmov.neq op, tmp
1875
1876    for iter in range.step_by(8) {
1877        emit(InstExt::Basic(BasicInst::MoveReg { dst: tmp_reg, src: op_reg }));
1878
1879        if iter == 0 {
1880            emit(InstExt::Basic(BasicInst::LoadImmediate { dst: mask_reg, imm: 0xff }));
1881        } else {
1882            emit(InstExt::Basic(BasicInst::AnyAny {
1883                kind: if rv64 {
1884                    AnyAnyKind::ShiftLogicalLeft64
1885                } else {
1886                    AnyAnyKind::ShiftLogicalLeft32
1887                },
1888                dst: mask_reg,
1889                src1: RegImm::Reg(mask_reg),
1890                src2: RegImm::Imm(8),
1891            }));
1892        }
1893
1894        emit(InstExt::Basic(BasicInst::AnyAny {
1895            kind: if rv64 { AnyAnyKind::Or64 } else { AnyAnyKind::Or32 },
1896            dst: tmp_reg,
1897            src1: RegImm::Reg(tmp_reg),
1898            src2: RegImm::Reg(mask_reg),
1899        }));
1900
1901        emit(InstExt::Basic(BasicInst::AnyAny {
1902            kind: if rv64 { AnyAnyKind::And64 } else { AnyAnyKind::And32 },
1903            dst: cmp_reg,
1904            src1: RegImm::Reg(op_reg),
1905            src2: RegImm::Reg(mask_reg),
1906        }));
1907
1908        emit(InstExt::Basic(BasicInst::Cmov {
1909            kind: CmovKind::NotEqZero,
1910            dst: op_reg,
1911            src: RegImm::Reg(tmp_reg),
1912            cond: cmp_reg,
1913        }));
1914    }
1915}
1916
1917fn convert_instruction<H>(
1918    elf: &Elf<H>,
1919    section: &Section,
1920    current_location: SectionTarget,
1921    instruction: Inst,
1922    instruction_size: u64,
1923    rv64: bool,
1924    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1925) -> Result<(), ProgramFromElfError>
1926where
1927    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1928{
1929    match instruction {
1930        Inst::LoadUpperImmediate { dst, value } => {
1931            let Some(dst) = cast_reg_non_zero(dst)? else {
1932                emit(InstExt::nop());
1933                return Ok(());
1934            };
1935
1936            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: value as i32 }));
1937            Ok(())
1938        }
1939        Inst::JumpAndLink { dst, target } => {
1940            let target = SectionTarget {
1941                section_index: section.index(),
1942                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
1943            };
1944
1945            if target.offset > section.size() {
1946                return Err(ProgramFromElfError::other("out of range JAL instruction"));
1947            }
1948
1949            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
1950                let target_return = current_location.add(instruction_size);
1951                ControlInst::Call {
1952                    ra: dst,
1953                    target,
1954                    target_return,
1955                }
1956            } else {
1957                ControlInst::Jump { target }
1958            };
1959
1960            emit(InstExt::Control(next));
1961            Ok(())
1962        }
1963        Inst::Branch { kind, src1, src2, target } => {
1964            let src1 = cast_reg_any(src1)?;
1965            let src2 = cast_reg_any(src2)?;
1966
1967            let target_true = SectionTarget {
1968                section_index: section.index(),
1969                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
1970            };
1971
1972            if target_true.offset > section.size() {
1973                return Err(ProgramFromElfError::other("out of range unrelocated branch"));
1974            }
1975
1976            let target_false = current_location.add(instruction_size);
1977            emit(InstExt::Control(ControlInst::Branch {
1978                kind,
1979                src1,
1980                src2,
1981                target_true,
1982                target_false,
1983            }));
1984            Ok(())
1985        }
1986        Inst::JumpAndLinkRegister { dst, base, value } => {
1987            let Some(base) = cast_reg_non_zero(base)? else {
1988                return Err(ProgramFromElfError::other("found an unrelocated JALR instruction"));
1989            };
1990
1991            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
1992                let target_return = current_location.add(instruction_size);
1993                ControlInst::CallIndirect {
1994                    ra: dst,
1995                    base,
1996                    offset: value.into(),
1997                    target_return,
1998                }
1999            } else {
2000                ControlInst::JumpIndirect {
2001                    base,
2002                    offset: value.into(),
2003                }
2004            };
2005
2006            emit(InstExt::Control(next));
2007            Ok(())
2008        }
2009        Inst::Unimplemented => {
2010            emit(InstExt::Control(ControlInst::Unimplemented));
2011            Ok(())
2012        }
2013        Inst::FenceI | Inst::Fence { .. } => {
2014            emit(InstExt::Basic(BasicInst::Nop));
2015            Ok(())
2016        }
2017        Inst::Load { kind, dst, base, offset } => {
2018            let Some(base) = cast_reg_non_zero(base)? else {
2019                return Err(ProgramFromElfError::other("found an unrelocated absolute load"));
2020            };
2021
2022            // LLVM riscv-enable-dead-defs pass may rewrite dst to the zero register.
2023            match cast_reg_non_zero(dst)? {
2024                Some(dst) => emit(InstExt::Basic(BasicInst::LoadIndirect { kind, dst, base, offset })),
2025                None => emit(InstExt::Basic(BasicInst::Nop)),
2026            }
2027
2028            Ok(())
2029        }
2030        Inst::Store { kind, src, base, offset } => {
2031            let Some(base) = cast_reg_non_zero(base)? else {
2032                return Err(ProgramFromElfError::other("found an unrelocated absolute store"));
2033            };
2034
2035            let src = cast_reg_any(src)?;
2036            emit(InstExt::Basic(BasicInst::StoreIndirect { kind, src, base, offset }));
2037            Ok(())
2038        }
2039        Inst::RegImm { kind, dst, src, imm } => {
2040            let Some(dst) = cast_reg_non_zero(dst)? else {
2041                emit(InstExt::nop());
2042                return Ok(());
2043            };
2044
2045            let src = cast_reg_any(src)?;
2046            let kind = match kind {
2047                RegImmKind::Add32 => AnyAnyKind::Add32,
2048                RegImmKind::Add32AndSignExtend => AnyAnyKind::Add32AndSignExtend,
2049                RegImmKind::Add64 => AnyAnyKind::Add64,
2050                RegImmKind::And32 => AnyAnyKind::And32,
2051                RegImmKind::And64 => AnyAnyKind::And64,
2052                RegImmKind::Or32 => AnyAnyKind::Or32,
2053                RegImmKind::Or64 => AnyAnyKind::Or64,
2054                RegImmKind::Xor32 => AnyAnyKind::Xor32,
2055                RegImmKind::Xor64 => AnyAnyKind::Xor64,
2056                RegImmKind::SetLessThanUnsigned32 => AnyAnyKind::SetLessThanUnsigned32,
2057                RegImmKind::SetLessThanUnsigned64 => AnyAnyKind::SetLessThanUnsigned64,
2058                RegImmKind::SetLessThanSigned32 => AnyAnyKind::SetLessThanSigned32,
2059                RegImmKind::SetLessThanSigned64 => AnyAnyKind::SetLessThanSigned64,
2060                RegImmKind::ShiftLogicalLeft32 => AnyAnyKind::ShiftLogicalLeft32,
2061                RegImmKind::ShiftLogicalLeft32AndSignExtend => AnyAnyKind::ShiftLogicalLeft32AndSignExtend,
2062                RegImmKind::ShiftLogicalLeft64 => AnyAnyKind::ShiftLogicalLeft64,
2063                RegImmKind::ShiftLogicalRight32 => AnyAnyKind::ShiftLogicalRight32,
2064                RegImmKind::ShiftLogicalRight32AndSignExtend => AnyAnyKind::ShiftLogicalRight32AndSignExtend,
2065                RegImmKind::ShiftLogicalRight64 => AnyAnyKind::ShiftLogicalRight64,
2066                RegImmKind::ShiftArithmeticRight32 => AnyAnyKind::ShiftArithmeticRight32,
2067                RegImmKind::ShiftArithmeticRight32AndSignExtend => AnyAnyKind::ShiftArithmeticRight32AndSignExtend,
2068                RegImmKind::ShiftArithmeticRight64 => AnyAnyKind::ShiftArithmeticRight64,
2069                RegImmKind::RotateRight32 => AnyAnyKind::RotateRight32,
2070                RegImmKind::RotateRight32AndSignExtend => AnyAnyKind::RotateRight32AndSignExtend,
2071                RegImmKind::RotateRight64 => AnyAnyKind::RotateRight64,
2072            };
2073
2074            match src {
2075                RegImm::Imm(0) => {
2076                    // The optimizer can take care of this later, but doing it early here is more efficient.
2077                    emit(InstExt::Basic(BasicInst::LoadImmediate {
2078                        dst,
2079                        imm: OperationKind::from(kind)
2080                            .apply_const(0, cast(imm).to_i64_sign_extend())
2081                            .try_into()
2082                            .expect("load immediate overflow"),
2083                    }));
2084                }
2085                RegImm::Reg(src) if imm == 0 && matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) => {
2086                    emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2087                }
2088                _ => {
2089                    emit(InstExt::Basic(BasicInst::AnyAny {
2090                        kind,
2091                        dst,
2092                        src1: src,
2093                        src2: imm.into(),
2094                    }));
2095                }
2096            }
2097
2098            Ok(())
2099        }
2100        Inst::Reg { kind, dst, src } => {
2101            let Some(dst) = cast_reg_non_zero(dst)? else {
2102                emit(InstExt::nop());
2103                return Ok(());
2104            };
2105
2106            use crate::riscv::RegKind as K;
2107
2108            let Some(src) = cast_reg_non_zero(src)? else {
2109                let imm = match kind {
2110                    K::CountLeadingZeroBits32 | K::CountTrailingZeroBits32 => 32,
2111                    K::CountLeadingZeroBits64 | K::CountTrailingZeroBits64 => 64,
2112                    K::CountSetBits32 | K::CountSetBits64 => 0,
2113                    K::ReverseByte => 0,
2114                    K::OrCombineByte => 0,
2115                    K::SignExtend8 | K::SignExtend16 | K::ZeroExtend16 => 0,
2116                };
2117
2118                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm }));
2119                return Ok(());
2120            };
2121
2122            let kind = match kind {
2123                K::CountLeadingZeroBits32 => RegKind::CountLeadingZeroBits32,
2124                K::CountLeadingZeroBits64 => RegKind::CountLeadingZeroBits64,
2125                K::CountSetBits32 => RegKind::CountSetBits32,
2126                K::CountSetBits64 => RegKind::CountSetBits64,
2127                K::CountTrailingZeroBits32 => RegKind::CountTrailingZeroBits32,
2128                K::CountTrailingZeroBits64 => RegKind::CountTrailingZeroBits64,
2129                K::ReverseByte => RegKind::ReverseByte,
2130                K::SignExtend8 => RegKind::SignExtend8,
2131                K::SignExtend16 => RegKind::SignExtend16,
2132                K::ZeroExtend16 => RegKind::ZeroExtend16,
2133                K::OrCombineByte => {
2134                    emit_or_combine_byte(current_location, dst, src, rv64, &mut emit);
2135                    return Ok(());
2136                }
2137            };
2138
2139            emit(InstExt::Basic(BasicInst::Reg { kind, dst, src }));
2140
2141            Ok(())
2142        }
2143        Inst::RegReg { kind, dst, src1, src2 } => {
2144            let Some(dst) = cast_reg_non_zero(dst)? else {
2145                emit(InstExt::nop());
2146                return Ok(());
2147            };
2148
2149            macro_rules! anyany {
2150                ($kind:ident) => {
2151                    BasicInst::AnyAny {
2152                        kind: AnyAnyKind::$kind,
2153                        dst,
2154                        src1: cast_reg_any(src1)?,
2155                        src2: cast_reg_any(src2)?,
2156                    }
2157                };
2158            }
2159
2160            macro_rules! regreg {
2161                ($kind:ident) => {
2162                    match (cast_reg_non_zero(src1)?, cast_reg_non_zero(src2)?) {
2163                        (Some(src1), Some(src2)) => BasicInst::RegReg {
2164                            kind: RegRegKind::$kind,
2165                            dst,
2166                            src1,
2167                            src2,
2168                        },
2169                        (lhs, rhs) => {
2170                            let lhs = lhs
2171                                .map(|reg| RegValue::InputReg {
2172                                    reg,
2173                                    source_block: BlockTarget::from_raw(0),
2174                                    bits_used: u64::MAX,
2175                                })
2176                                .unwrap_or(RegValue::Constant(0));
2177
2178                            let rhs = rhs
2179                                .map(|reg| RegValue::InputReg {
2180                                    reg,
2181                                    source_block: BlockTarget::from_raw(0),
2182                                    bits_used: u64::MAX,
2183                                })
2184                                .unwrap_or(RegValue::Constant(0));
2185
2186                            match OperationKind::from(RegRegKind::$kind).apply(elf, lhs, rhs) {
2187                                Some(RegValue::Constant(imm)) => {
2188                                    let imm: i32 = imm.try_into().expect("immediate operand overflow");
2189                                    BasicInst::LoadImmediate { dst, imm }
2190                                }
2191                                Some(RegValue::InputReg { reg, .. }) => BasicInst::MoveReg { dst, src: reg },
2192                                _ => {
2193                                    return Err(ProgramFromElfError::other(format!(
2194                                        "found a {:?} instruction using a zero register",
2195                                        kind
2196                                    )))
2197                                }
2198                            }
2199                        }
2200                    }
2201                };
2202            }
2203
2204            if resolve_simple_zero_register_usage(kind, dst, src1, src2, &mut emit) {
2205                emit(InstExt::nop());
2206                return Ok(());
2207            };
2208
2209            use crate::riscv::RegRegKind as K;
2210            let instruction = match kind {
2211                K::Add32 => anyany!(Add32),
2212                K::Add32AndSignExtend => anyany!(Add32AndSignExtend),
2213                K::Add64 => anyany!(Add64),
2214                K::Sub32 => anyany!(Sub32),
2215                K::Sub32AndSignExtend => anyany!(Sub32AndSignExtend),
2216                K::Sub64 => anyany!(Sub64),
2217                K::And32 => anyany!(And32),
2218                K::And64 => anyany!(And64),
2219                K::Or32 => anyany!(Or32),
2220                K::Or64 => anyany!(Or64),
2221                K::Xor32 => anyany!(Xor32),
2222                K::Xor64 => anyany!(Xor64),
2223                K::SetLessThanUnsigned32 => anyany!(SetLessThanUnsigned32),
2224                K::SetLessThanUnsigned64 => anyany!(SetLessThanUnsigned64),
2225                K::SetLessThanSigned32 => anyany!(SetLessThanSigned32),
2226                K::SetLessThanSigned64 => anyany!(SetLessThanSigned64),
2227                K::ShiftLogicalLeft32 => anyany!(ShiftLogicalLeft32),
2228                K::ShiftLogicalLeft32AndSignExtend => anyany!(ShiftLogicalLeft32AndSignExtend),
2229                K::ShiftLogicalLeft64 => anyany!(ShiftLogicalLeft64),
2230                K::ShiftLogicalRight32 => anyany!(ShiftLogicalRight32),
2231                K::ShiftLogicalRight32AndSignExtend => anyany!(ShiftLogicalRight32AndSignExtend),
2232                K::ShiftLogicalRight64 => anyany!(ShiftLogicalRight64),
2233                K::ShiftArithmeticRight32 => anyany!(ShiftArithmeticRight32),
2234                K::ShiftArithmeticRight32AndSignExtend => anyany!(ShiftArithmeticRight32AndSignExtend),
2235                K::ShiftArithmeticRight64 => anyany!(ShiftArithmeticRight64),
2236                K::Mul32 => anyany!(Mul32),
2237                K::Mul32AndSignExtend => anyany!(Mul32AndSignExtend),
2238                K::Mul64 => anyany!(Mul64),
2239                K::MulUpperSignedSigned32 => regreg!(MulUpperSignedSigned32),
2240                K::MulUpperSignedSigned64 => regreg!(MulUpperSignedSigned64),
2241                K::MulUpperUnsignedUnsigned32 => regreg!(MulUpperUnsignedUnsigned32),
2242                K::MulUpperUnsignedUnsigned64 => regreg!(MulUpperUnsignedUnsigned64),
2243                K::MulUpperSignedUnsigned32 => regreg!(MulUpperSignedUnsigned32),
2244                K::MulUpperSignedUnsigned64 => regreg!(MulUpperSignedUnsigned64),
2245                K::Div32 => regreg!(Div32),
2246                K::Div32AndSignExtend => regreg!(Div32AndSignExtend),
2247                K::Div64 => regreg!(Div64),
2248                K::DivUnsigned32 => regreg!(DivUnsigned32),
2249                K::DivUnsigned32AndSignExtend => regreg!(DivUnsigned32AndSignExtend),
2250                K::DivUnsigned64 => regreg!(DivUnsigned64),
2251                K::Rem32 => regreg!(Rem32),
2252                K::Rem32AndSignExtend => regreg!(Rem32AndSignExtend),
2253                K::Rem64 => regreg!(Rem64),
2254                K::RemUnsigned32 => regreg!(RemUnsigned32),
2255                K::RemUnsigned32AndSignExtend => regreg!(RemUnsigned32AndSignExtend),
2256                K::RemUnsigned64 => regreg!(RemUnsigned64),
2257
2258                K::AndInverted => regreg!(AndInverted),
2259                K::OrInverted => regreg!(OrInverted),
2260                K::Xnor => regreg!(Xnor),
2261                K::Maximum => regreg!(Maximum),
2262                K::MaximumUnsigned => regreg!(MaximumUnsigned),
2263                K::Minimum => regreg!(Minimum),
2264                K::MinimumUnsigned => regreg!(MinimumUnsigned),
2265                K::RotateLeft32 => regreg!(RotateLeft32),
2266                K::RotateLeft32AndSignExtend => regreg!(RotateLeft32AndSignExtend),
2267                K::RotateLeft64 => regreg!(RotateLeft64),
2268                K::RotateRight32 => anyany!(RotateRight32),
2269                K::RotateRight32AndSignExtend => anyany!(RotateRight32AndSignExtend),
2270                K::RotateRight64 => anyany!(RotateRight64),
2271            };
2272
2273            emit(InstExt::Basic(instruction));
2274            Ok(())
2275        }
2276        Inst::AddUpperImmediateToPc { .. } => Err(ProgramFromElfError::other(format!(
2277            "found an unrelocated auipc instruction at offset {} in section '{}'; is the program compiled with relocations?",
2278            current_location.offset,
2279            section.name()
2280        ))),
2281        Inst::Ecall => Err(ProgramFromElfError::other(
2282            "found a bare ecall instruction; those are not supported",
2283        )),
2284        Inst::Cmov { kind, dst, src, cond, .. } => {
2285            let Some(dst) = cast_reg_non_zero(dst)? else {
2286                emit(InstExt::Basic(BasicInst::Nop));
2287                return Ok(());
2288            };
2289
2290            match cast_reg_non_zero(cond)? {
2291                Some(cond) => {
2292                    emit(InstExt::Basic(BasicInst::Cmov {
2293                        kind,
2294                        dst,
2295                        src: cast_reg_any(src)?,
2296                        cond,
2297                    }));
2298                }
2299                None => match kind {
2300                    CmovKind::EqZero => {
2301                        if let Some(src) = cast_reg_non_zero(src)? {
2302                            emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2303                        } else {
2304                            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2305                        }
2306                    }
2307                    CmovKind::NotEqZero => {
2308                        emit(InstExt::nop());
2309                    }
2310                },
2311            };
2312
2313            Ok(())
2314        }
2315        Inst::LoadReserved32 { dst, src, .. } => {
2316            let Some(dst) = cast_reg_non_zero(dst)? else {
2317                return Err(ProgramFromElfError::other(
2318                    "found an atomic load with a zero register as the destination",
2319                ));
2320            };
2321
2322            let Some(src) = cast_reg_non_zero(src)? else {
2323                return Err(ProgramFromElfError::other(
2324                    "found an atomic load with a zero register as the source",
2325                ));
2326            };
2327
2328            emit(InstExt::Basic(BasicInst::LoadIndirect {
2329                kind: LoadKind::I32,
2330                dst,
2331                base: src,
2332                offset: 0,
2333            }));
2334
2335            Ok(())
2336        }
2337        Inst::LoadReserved64 { dst, src, .. } if rv64 => {
2338            let Some(dst) = cast_reg_non_zero(dst)? else {
2339                return Err(ProgramFromElfError::other(
2340                    "found an atomic load with a zero register as the destination",
2341                ));
2342            };
2343
2344            let Some(src) = cast_reg_non_zero(src)? else {
2345                return Err(ProgramFromElfError::other(
2346                    "found an atomic load with a zero register as the source",
2347                ));
2348            };
2349
2350            emit(InstExt::Basic(BasicInst::LoadIndirect {
2351                kind: LoadKind::U64,
2352                dst,
2353                base: src,
2354                offset: 0,
2355            }));
2356
2357            Ok(())
2358        }
2359        Inst::StoreConditional32 { src, addr, dst, .. } => {
2360            let Some(addr) = cast_reg_non_zero(addr)? else {
2361                return Err(ProgramFromElfError::other(
2362                    "found an atomic store with a zero register as the address",
2363                ));
2364            };
2365
2366            let src = cast_reg_any(src)?;
2367            emit(InstExt::Basic(BasicInst::StoreIndirect {
2368                kind: StoreKind::U32,
2369                src,
2370                base: addr,
2371                offset: 0,
2372            }));
2373
2374            if let Some(dst) = cast_reg_non_zero(dst)? {
2375                // The store always succeeds, so write zero here.
2376                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2377            }
2378
2379            Ok(())
2380        }
2381        Inst::StoreConditional64 { src, addr, dst, .. } if rv64 => {
2382            let Some(addr) = cast_reg_non_zero(addr)? else {
2383                return Err(ProgramFromElfError::other(
2384                    "found an atomic store with a zero register as the address",
2385                ));
2386            };
2387
2388            let src = cast_reg_any(src)?;
2389            emit(InstExt::Basic(BasicInst::StoreIndirect {
2390                kind: StoreKind::U64,
2391                src,
2392                base: addr,
2393                offset: 0,
2394            }));
2395
2396            if let Some(dst) = cast_reg_non_zero(dst)? {
2397                // The store always succeeds, so write zero here.
2398                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2399            }
2400
2401            Ok(())
2402        }
2403        Inst::LoadReserved64 { .. } | Inst::StoreConditional64 { .. } => {
2404            unreachable!("64-bit instruction in a 32-bit program: {instruction:?}");
2405        }
2406        Inst::Atomic {
2407            kind,
2408            dst: old_value,
2409            addr,
2410            src: operand,
2411            ..
2412        } => {
2413            let Some(addr) = cast_reg_non_zero(addr)? else {
2414                return Err(ProgramFromElfError::other(
2415                    "found an atomic operation with a zero register as the address",
2416                ));
2417            };
2418
2419            let is_64_bit = match kind {
2420                AtomicKind::Swap32
2421                | AtomicKind::Add32
2422                | AtomicKind::And32
2423                | AtomicKind::Or32
2424                | AtomicKind::Xor32
2425                | AtomicKind::MaxSigned32
2426                | AtomicKind::MinSigned32
2427                | AtomicKind::MaxUnsigned32
2428                | AtomicKind::MinUnsigned32 => false,
2429
2430                AtomicKind::Swap64
2431                | AtomicKind::Add64
2432                | AtomicKind::MaxSigned64
2433                | AtomicKind::MinSigned64
2434                | AtomicKind::MaxUnsigned64
2435                | AtomicKind::MinUnsigned64
2436                | AtomicKind::And64
2437                | AtomicKind::Or64
2438                | AtomicKind::Xor64 => true,
2439            };
2440
2441            let mut operand = cast_reg_non_zero(operand)?;
2442            if rv64 && !is_64_bit {
2443                // Zero-extend the operand to ignore any bits that might be there.
2444                if let Some(src) = operand {
2445                    emit(InstExt::Basic(BasicInst::AnyAny {
2446                        kind: AnyAnyKind::ShiftLogicalLeft64,
2447                        dst: Reg::E3,
2448                        src1: RegImm::Reg(src),
2449                        src2: RegImm::Imm(32),
2450                    }));
2451                    emit(InstExt::Basic(BasicInst::AnyAny {
2452                        kind: AnyAnyKind::ShiftArithmeticRight64,
2453                        dst: Reg::E3,
2454                        src1: RegImm::Reg(Reg::E3),
2455                        src2: RegImm::Imm(32),
2456                    }));
2457                }
2458                operand = Some(Reg::E3);
2459            }
2460            let operand_regimm = operand.map_or(RegImm::Imm(0), RegImm::Reg);
2461            let (old_value, new_value, output) = match cast_reg_non_zero(old_value)? {
2462                None => (Reg::E0, Reg::E0, None),
2463                Some(old_value) if old_value == addr || Some(old_value) == operand => (Reg::E0, Reg::E1, Some(old_value)),
2464                Some(old_value) => (old_value, Reg::E0, None),
2465            };
2466
2467            emit(InstExt::Basic(BasicInst::LoadIndirect {
2468                kind: if is_64_bit { LoadKind::U64 } else { LoadKind::I32 },
2469                dst: old_value,
2470                base: addr,
2471                offset: 0,
2472            }));
2473
2474            match kind {
2475                AtomicKind::Swap64 => {
2476                    emit(InstExt::Basic(BasicInst::AnyAny {
2477                        kind: AnyAnyKind::Add64,
2478                        dst: new_value,
2479                        src1: operand_regimm,
2480                        src2: RegImm::Imm(0),
2481                    }));
2482                }
2483                AtomicKind::Swap32 => {
2484                    emit(InstExt::Basic(BasicInst::AnyAny {
2485                        kind: AnyAnyKind::Add32,
2486                        dst: new_value,
2487                        src1: operand_regimm,
2488                        src2: RegImm::Imm(0),
2489                    }));
2490                }
2491                AtomicKind::Add64 => {
2492                    emit(InstExt::Basic(BasicInst::AnyAny {
2493                        kind: AnyAnyKind::Add64,
2494                        dst: new_value,
2495                        src1: old_value.into(),
2496                        src2: operand_regimm,
2497                    }));
2498                }
2499                AtomicKind::Add32 => {
2500                    emit(InstExt::Basic(BasicInst::AnyAny {
2501                        kind: AnyAnyKind::Add32,
2502                        dst: new_value,
2503                        src1: old_value.into(),
2504                        src2: operand_regimm,
2505                    }));
2506                }
2507                AtomicKind::And64 => {
2508                    emit(InstExt::Basic(BasicInst::AnyAny {
2509                        kind: AnyAnyKind::And64,
2510                        dst: new_value,
2511                        src1: old_value.into(),
2512                        src2: operand_regimm,
2513                    }));
2514                }
2515                AtomicKind::And32 => {
2516                    emit(InstExt::Basic(BasicInst::AnyAny {
2517                        kind: AnyAnyKind::And32,
2518                        dst: new_value,
2519                        src1: old_value.into(),
2520                        src2: operand_regimm,
2521                    }));
2522                }
2523                AtomicKind::Or64 => {
2524                    emit(InstExt::Basic(BasicInst::AnyAny {
2525                        kind: AnyAnyKind::Or64,
2526                        dst: new_value,
2527                        src1: old_value.into(),
2528                        src2: operand_regimm,
2529                    }));
2530                }
2531                AtomicKind::Or32 => {
2532                    emit(InstExt::Basic(BasicInst::AnyAny {
2533                        kind: AnyAnyKind::Or32,
2534                        dst: new_value,
2535                        src1: old_value.into(),
2536                        src2: operand_regimm,
2537                    }));
2538                }
2539                AtomicKind::Xor64 => {
2540                    emit(InstExt::Basic(BasicInst::AnyAny {
2541                        kind: AnyAnyKind::Xor64,
2542                        dst: new_value,
2543                        src1: old_value.into(),
2544                        src2: operand_regimm,
2545                    }));
2546                }
2547                AtomicKind::Xor32 => {
2548                    emit(InstExt::Basic(BasicInst::AnyAny {
2549                        kind: AnyAnyKind::Xor32,
2550                        dst: new_value,
2551                        src1: old_value.into(),
2552                        src2: operand_regimm,
2553                    }));
2554                }
2555                AtomicKind::MaxSigned32 => {
2556                    emit_minmax(MinMax::MaxSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2557                }
2558                AtomicKind::MinSigned32 => {
2559                    emit_minmax(MinMax::MinSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2560                }
2561                AtomicKind::MaxUnsigned32 => {
2562                    emit_minmax(MinMax::MaxUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2563                }
2564                AtomicKind::MinUnsigned32 => {
2565                    emit_minmax(MinMax::MinUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2566                }
2567                AtomicKind::MaxSigned64 => {
2568                    emit_minmax(MinMax::MaxSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2569                }
2570                AtomicKind::MinSigned64 => {
2571                    emit_minmax(MinMax::MinSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2572                }
2573                AtomicKind::MaxUnsigned64 => {
2574                    emit_minmax(MinMax::MaxUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2575                }
2576                AtomicKind::MinUnsigned64 => {
2577                    emit_minmax(MinMax::MinUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2578                }
2579            }
2580
2581            emit(InstExt::Basic(BasicInst::StoreIndirect {
2582                kind: if is_64_bit { StoreKind::U64 } else { StoreKind::U32 },
2583                src: new_value.into(),
2584                base: addr,
2585                offset: 0,
2586            }));
2587
2588            if let Some(output) = output {
2589                emit(InstExt::Basic(BasicInst::MoveReg {
2590                    dst: output,
2591                    src: old_value,
2592                }));
2593            }
2594
2595            Ok(())
2596        }
2597    }
2598}
2599
2600/// Read `n` bytes in `text` at `relative_offset` where `n` is
2601/// the length of the instruction at `relative_offset`.
2602///
2603/// # Panics
2604/// - Valid RISC-V instructions can be 2 or 4 bytes. Misaligned
2605///   `relative_offset` are considered an internal error.
2606/// - `relative_offset` is expected to be inbounds.
2607///
2608/// # Returns
2609/// The instruction length and the raw instruction.
2610fn read_instruction_bytes(text: &[u8], relative_offset: usize) -> (u64, u32) {
2611    assert!(
2612        relative_offset % VM_CODE_ADDRESS_ALIGNMENT as usize == 0,
2613        "internal error: misaligned instruction read: 0x{relative_offset:08x}"
2614    );
2615
2616    if Inst::is_compressed(text[relative_offset]) {
2617        (2, u32::from(u16::from_le_bytes([text[relative_offset], text[relative_offset + 1]])))
2618    } else {
2619        (
2620            4,
2621            u32::from_le_bytes([
2622                text[relative_offset],
2623                text[relative_offset + 1],
2624                text[relative_offset + 2],
2625                text[relative_offset + 3],
2626            ]),
2627        )
2628    }
2629}
2630
2631#[allow(clippy::too_many_arguments)]
2632fn parse_code_section<H>(
2633    elf: &Elf<H>,
2634    section: &Section,
2635    decoder_config: &DecoderConfig,
2636    relocations: &BTreeMap<SectionTarget, RelocationKind>,
2637    imports: &mut Vec<Import>,
2638    metadata_to_nth_import: &mut HashMap<ExternMetadata, usize>,
2639    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
2640    output: &mut Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2641) -> Result<(), ProgramFromElfError>
2642where
2643    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
2644{
2645    let section_index = section.index();
2646    let section_name = section.name();
2647    let text = &section.data();
2648
2649    if text.len() % VM_CODE_ADDRESS_ALIGNMENT as usize != 0 {
2650        return Err(ProgramFromElfError::other(format!(
2651            "size of section '{section_name}' is not divisible by 2"
2652        )));
2653    }
2654
2655    output.reserve(text.len() / 4);
2656    let mut relative_offset = 0;
2657    while relative_offset < text.len() {
2658        let current_location = SectionTarget {
2659            section_index: section.index(),
2660            offset: relative_offset.try_into().expect("overflow"),
2661        };
2662
2663        let (inst_size, raw_inst) = read_instruction_bytes(text, relative_offset);
2664
2665        const FUNC3_ECALLI: u32 = 0b000;
2666        const FUNC3_SBRK: u32 = 0b001;
2667        const FUNC3_MEMSET: u32 = 0b010;
2668
2669        if crate::riscv::R(raw_inst).unpack() == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero) {
2670            let initial_offset = relative_offset as u64;
2671            let pointer_size = if elf.is_64() { 8 } else { 4 };
2672
2673            // `ret` can be 2 bytes long, so (on 32-bit): 4 (ecalli) + 4 (pointer) + 2 (ret) = 10
2674            if relative_offset + pointer_size + 6 > text.len() {
2675                return Err(ProgramFromElfError::other("truncated ecalli instruction"));
2676            }
2677
2678            let target_location = current_location.add(4);
2679            relative_offset += 4 + pointer_size;
2680
2681            let Some(relocation) = relocations.get(&target_location) else {
2682                return Err(ProgramFromElfError::other(format!(
2683                    "found an external call without a relocation for a pointer to metadata at {target_location}"
2684                )));
2685            };
2686
2687            let metadata_location = match relocation {
2688                RelocationKind::Abs {
2689                    target,
2690                    size: RelocationSize::U64,
2691                } if elf.is_64() => target,
2692                RelocationKind::Abs {
2693                    target,
2694                    size: RelocationSize::U32,
2695                } if !elf.is_64() => target,
2696                _ => {
2697                    return Err(ProgramFromElfError::other(format!(
2698                        "found an external call with an unexpected relocation at {target_location}: {relocation:?}"
2699                    )));
2700                }
2701            };
2702
2703            let metadata = parse_extern_metadata(elf, relocations, *metadata_location)?;
2704
2705            // The same import can be inlined in multiple places, so deduplicate those here.
2706            let nth_import = match metadata_to_nth_import.entry(metadata) {
2707                std::collections::hash_map::Entry::Vacant(entry) => {
2708                    let nth_import = imports.len();
2709                    imports.push(Import {
2710                        metadata: entry.key().clone(),
2711                    });
2712                    entry.insert(nth_import);
2713                    nth_import
2714                }
2715                std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
2716            };
2717
2718            output.push((
2719                Source {
2720                    section_index,
2721                    offset_range: AddressRange::from(initial_offset..relative_offset as u64),
2722                },
2723                InstExt::Basic(BasicInst::Ecalli { nth_import }),
2724            ));
2725
2726            const INST_RET: Inst = Inst::JumpAndLinkRegister {
2727                dst: RReg::Zero,
2728                base: RReg::RA,
2729                value: 0,
2730            };
2731
2732            let (next_inst_size, next_raw_inst) = read_instruction_bytes(text, relative_offset);
2733
2734            if Inst::decode(decoder_config, next_raw_inst) != Some(INST_RET) {
2735                return Err(ProgramFromElfError::other("external call shim doesn't end with a 'ret'"));
2736            }
2737
2738            output.push((
2739                Source {
2740                    section_index,
2741                    offset_range: AddressRange::from(relative_offset as u64..relative_offset as u64 + next_inst_size),
2742                },
2743                InstExt::Control(ControlInst::JumpIndirect { base: Reg::RA, offset: 0 }),
2744            ));
2745
2746            relative_offset += next_inst_size as usize;
2747            continue;
2748        }
2749
2750        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_SBRK, 0, dst, size, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
2751            let Some(dst) = cast_reg_non_zero(dst)? else {
2752                return Err(ProgramFromElfError::other(
2753                    "found an 'sbrk' instruction with the zero register as the destination",
2754                ));
2755            };
2756
2757            let Some(size) = cast_reg_non_zero(size)? else {
2758                return Err(ProgramFromElfError::other(
2759                    "found an 'sbrk' instruction with the zero register as the size",
2760                ));
2761            };
2762
2763            output.push((
2764                Source {
2765                    section_index,
2766                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
2767                },
2768                InstExt::Basic(BasicInst::Sbrk { dst, size }),
2769            ));
2770
2771            relative_offset += inst_size as usize;
2772            continue;
2773        }
2774
2775        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_MEMSET, 0, RReg::Zero, RReg::Zero, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
2776            output.push((
2777                Source {
2778                    section_index,
2779                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
2780                },
2781                InstExt::Basic(BasicInst::Memset),
2782            ));
2783
2784            relative_offset += inst_size as usize;
2785            continue;
2786        }
2787
2788        let source = Source {
2789            section_index,
2790            offset_range: AddressRange::from(relative_offset as u64..relative_offset as u64 + inst_size),
2791        };
2792
2793        relative_offset += inst_size as usize;
2794
2795        let Some(original_inst) = Inst::decode(decoder_config, raw_inst) else {
2796            return Err(ProgramFromElfErrorKind::UnsupportedInstruction {
2797                section: section.name().into(),
2798                offset: current_location.offset,
2799                instruction: raw_inst,
2800            }
2801            .into());
2802        };
2803
2804        if let Some(inst) = instruction_overrides.remove(&current_location) {
2805            output.push((source, inst));
2806        } else {
2807            // For some reason (compiler bug?) *very rarely* we have those AUIPC instructions
2808            // without any relocation attached to them, so let's deal with them traditionally.
2809            if let Inst::AddUpperImmediateToPc {
2810                dst: base_upper,
2811                value: value_upper,
2812            } = original_inst
2813            {
2814                if relative_offset < text.len() {
2815                    let (next_inst_size, next_inst) = read_instruction_bytes(text, relative_offset);
2816                    let next_inst = Inst::decode(decoder_config, next_inst);
2817
2818                    if let Some(Inst::JumpAndLinkRegister { dst: ra_dst, base, value }) = next_inst {
2819                        if base == ra_dst && base == base_upper {
2820                            if let Some(ra) = cast_reg_non_zero(ra_dst)? {
2821                                let offset = (relative_offset as i32 - next_inst_size as i32)
2822                                    .wrapping_add(value)
2823                                    .wrapping_add(value_upper as i32);
2824                                if offset >= 0 && offset < section.data().len() as i32 {
2825                                    output.push((
2826                                        source,
2827                                        InstExt::Control(ControlInst::Call {
2828                                            ra,
2829                                            target: SectionTarget {
2830                                                section_index,
2831                                                offset: u64::from(cast(offset).to_unsigned()),
2832                                            },
2833                                            target_return: current_location.add(inst_size + next_inst_size),
2834                                        }),
2835                                    ));
2836
2837                                    relative_offset += inst_size as usize;
2838                                    continue;
2839                                }
2840                            }
2841                        }
2842                    }
2843                }
2844            }
2845
2846            let original_length = output.len();
2847            convert_instruction(elf, section, current_location, original_inst, inst_size, elf.is_64(), |inst| {
2848                output.push((source, inst));
2849            })?;
2850
2851            // We need to always emit at least one instruction (even if it's a NOP) to handle potential jumps.
2852            assert_ne!(
2853                output.len(),
2854                original_length,
2855                "internal error: no instructions were emitted for instruction {original_inst:?} in section {section_name}"
2856            );
2857        }
2858    }
2859
2860    Ok(())
2861}
2862
2863fn split_code_into_basic_blocks<H>(
2864    elf: &Elf<H>,
2865    #[allow(unused_variables)] section_to_function_name: &BTreeMap<SectionTarget, &str>,
2866    jump_targets: &HashSet<SectionTarget>,
2867    instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2868) -> Result<Vec<BasicBlock<SectionTarget, SectionTarget>>, ProgramFromElfError>
2869where
2870    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
2871{
2872    #[cfg(test)]
2873    let _ = elf;
2874
2875    let mut blocks: Vec<BasicBlock<SectionTarget, SectionTarget>> = Vec::new();
2876    let mut current_block: Vec<(SourceStack, BasicInst<SectionTarget>)> = Vec::new();
2877    let mut block_start_opt = None;
2878    let mut last_source_in_block = None;
2879    #[cfg(not(test))]
2880    let mut current_symbol = "";
2881    for (source, op) in instructions {
2882        // TODO: This panics because we use a dummy ELF in tests; fix it.
2883        #[cfg(not(test))]
2884        {
2885            if let Some(name) = section_to_function_name.get(&source.begin()) {
2886                current_symbol = name;
2887            }
2888            log::trace!(
2889                "Instruction at {source} (0x{:x}) \"{current_symbol}\": {op:?}",
2890                elf.section_by_index(source.section_index).original_address() + source.offset_range.start
2891            );
2892        }
2893
2894        if let Some(last_source_in_block) = last_source_in_block {
2895            // Handle the case where we've emitted multiple instructions from a single RISC-V instruction.
2896            if source == last_source_in_block {
2897                let InstExt::Basic(instruction) = op else { unreachable!() };
2898                current_block.push((source.into(), instruction));
2899                continue;
2900            }
2901        }
2902
2903        assert!(source.offset_range.start < source.offset_range.end);
2904
2905        let is_jump_target = jump_targets.contains(&source.begin());
2906        let (block_section, block_start) = if !is_jump_target {
2907            // Make sure nothing wants to jump into the middle of this instruction.
2908            assert!((source.offset_range.start..source.offset_range.end)
2909                .step_by(2)
2910                .skip(1)
2911                .all(|offset| !jump_targets.contains(&SectionTarget {
2912                    section_index: source.section_index,
2913                    offset
2914                })));
2915
2916            if let Some((block_section, block_start)) = block_start_opt {
2917                // We're in a block that's reachable by a jump.
2918                (block_section, block_start)
2919            } else {
2920                // Nothing can possibly jump here, so just skip this instruction.
2921                log::trace!("Skipping dead instruction at {}: {:?}", source.begin(), op);
2922                continue;
2923            }
2924        } else {
2925            // Control flow can jump to this instruction.
2926            if let Some((block_section, block_start)) = block_start_opt.take() {
2927                // End the current basic block to prevent a jump into the middle of it.
2928                if !current_block.is_empty() {
2929                    let block_index = BlockTarget::from_raw(blocks.len());
2930                    let block_source = Source {
2931                        section_index: block_section,
2932                        offset_range: (block_start..source.offset_range.start).into(),
2933                    };
2934
2935                    let last_instruction_source = current_block.last().unwrap().0.as_slice()[0];
2936                    assert_eq!(last_instruction_source.section_index, block_section);
2937
2938                    let end_of_block_source = Source {
2939                        section_index: block_section,
2940                        offset_range: (last_instruction_source.offset_range.start..source.offset_range.start).into(),
2941                    };
2942
2943                    assert!(block_source.offset_range.start < block_source.offset_range.end);
2944                    assert!(end_of_block_source.offset_range.start < end_of_block_source.offset_range.end);
2945
2946                    log::trace!("Emitting block (due to a potential jump): {}", block_source.begin());
2947                    blocks.push(BasicBlock::new(
2948                        block_index,
2949                        block_source,
2950                        core::mem::take(&mut current_block),
2951                        EndOfBlock {
2952                            source: end_of_block_source.into(),
2953                            instruction: ControlInst::Jump { target: source.begin() },
2954                        },
2955                    ));
2956                }
2957            }
2958
2959            block_start_opt = Some((source.section_index, source.offset_range.start));
2960            (source.section_index, source.offset_range.start)
2961        };
2962
2963        match op {
2964            InstExt::Control(instruction) => {
2965                last_source_in_block = None;
2966                block_start_opt = None;
2967
2968                let block_index = BlockTarget::from_raw(blocks.len());
2969                let block_source = Source {
2970                    section_index: block_section,
2971                    offset_range: (block_start..source.offset_range.end).into(),
2972                };
2973
2974                log::trace!("Emitting block (due to a control instruction): {}", block_source.begin());
2975                blocks.push(BasicBlock::new(
2976                    block_index,
2977                    block_source,
2978                    core::mem::take(&mut current_block),
2979                    EndOfBlock {
2980                        source: source.into(),
2981                        instruction,
2982                    },
2983                ));
2984
2985                if let ControlInst::Branch { target_false, .. } = instruction {
2986                    if !cfg!(test) {
2987                        if source.section_index != target_false.section_index {
2988                            return Err(ProgramFromElfError::other("found a branch with a fallthrough to another section"));
2989                        }
2990                        assert_eq!(source.offset_range.end, target_false.offset);
2991                    }
2992                    block_start_opt = Some((block_section, source.offset_range.end));
2993                }
2994            }
2995            InstExt::Basic(instruction) => {
2996                last_source_in_block = Some(source);
2997                current_block.push((source.into(), instruction));
2998            }
2999        }
3000    }
3001
3002    if !current_block.is_empty() {
3003        return Err(ProgramFromElfError::other(
3004            "code doesn't end with a control-flow affecting instruction",
3005        ));
3006    }
3007
3008    Ok(blocks)
3009}
3010
3011fn build_section_to_block_map(
3012    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3013) -> Result<HashMap<SectionTarget, BlockTarget>, ProgramFromElfError> {
3014    let mut section_to_block = HashMap::new();
3015    for (block_index, block) in blocks.iter().enumerate() {
3016        let section_target = block.source.begin();
3017        let block_target = BlockTarget::from_raw(block_index);
3018        if section_to_block.insert(section_target, block_target).is_some() {
3019            return Err(ProgramFromElfError::other("found two or more basic blocks with the same location"));
3020        }
3021    }
3022
3023    Ok(section_to_block)
3024}
3025
3026fn resolve_basic_block_references(
3027    data_sections_set: &HashSet<SectionIndex>,
3028    section_to_block: &HashMap<SectionTarget, BlockTarget>,
3029    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3030) -> Result<Vec<BasicBlock<AnyTarget, BlockTarget>>, ProgramFromElfError> {
3031    let mut output = Vec::with_capacity(blocks.len());
3032    for block in blocks {
3033        let mut ops = Vec::with_capacity(block.ops.len());
3034        for (source, op) in &block.ops {
3035            let map = |target: SectionTarget| {
3036                if data_sections_set.contains(&target.section_index) {
3037                    Ok(AnyTarget::Data(target))
3038                } else if let Some(&target) = section_to_block.get(&target) {
3039                    Ok(AnyTarget::Code(target))
3040                } else {
3041                    return Err(ProgramFromElfError::other(format!(
3042                        "found basic instruction which doesn't point to a data section nor resolve to any basic block: {source:?}, {op:?}",
3043                    )));
3044                }
3045            };
3046
3047            let op = op.map_target(map)?;
3048            ops.push((source.clone(), op));
3049        }
3050
3051        let Ok(next) = block
3052            .next
3053            .clone()
3054            .map_target(|section_target| section_to_block.get(&section_target).copied().ok_or(()))
3055        else {
3056            return Err(ProgramFromElfError::other(format!(
3057                "found control instruction at the end of block at {block_source} whose target doesn't resolve to any basic block: {next:?}",
3058                block_source = block.source,
3059                next = block.next.instruction,
3060            )));
3061        };
3062
3063        output.push(BasicBlock::new(block.target, block.source, ops, next));
3064    }
3065
3066    Ok(output)
3067}
3068
3069fn garbage_collect_reachability(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &mut ReachabilityGraph) -> bool {
3070    let mut queue_code = VecSet::new();
3071    let mut queue_data = VecSet::new();
3072    for (block_target, reachability) in &reachability_graph.for_code {
3073        if reachability.always_reachable_or_exported() {
3074            queue_code.push(*block_target);
3075        }
3076    }
3077
3078    for (data_target, reachability) in &reachability_graph.for_data {
3079        if reachability.always_reachable_or_exported() {
3080            queue_data.push(*data_target);
3081        }
3082    }
3083
3084    while !queue_code.is_empty() || !queue_data.is_empty() {
3085        while let Some(block_target) = queue_code.pop_unique() {
3086            each_reference(&all_blocks[block_target.index()], |ext| match ext {
3087                ExtRef::Jump(target) | ExtRef::Address(target) => queue_code.push(target),
3088                ExtRef::DataAddress(target) => queue_data.push(target),
3089            });
3090        }
3091
3092        while let Some(data_target) = queue_data.pop_unique() {
3093            if let Some(list) = reachability_graph.code_references_in_data_section.get(&data_target) {
3094                for &target in list {
3095                    queue_code.push(target);
3096                }
3097            }
3098
3099            if let Some(list) = reachability_graph.data_references_in_data_section.get(&data_target) {
3100                for &target in list {
3101                    queue_data.push(target);
3102                }
3103            }
3104        }
3105    }
3106
3107    let set_code = queue_code.into_set();
3108    let set_data = queue_data.into_set();
3109    if set_code.len() == reachability_graph.for_code.len() && set_data.len() == reachability_graph.for_data.len() {
3110        return false;
3111    }
3112
3113    log::debug!(
3114        "Code reachability garbage collection: {} -> {}",
3115        reachability_graph.for_code.len(),
3116        set_code.len()
3117    );
3118    reachability_graph.for_code.retain(|block_target, reachability| {
3119        reachability.reachable_from.retain(|inner_key| set_code.contains(inner_key));
3120        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3121        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3122        if !set_code.contains(block_target) {
3123            assert!(!reachability.always_reachable);
3124            log::trace!("  Garbage collected: {block_target:?}");
3125            false
3126        } else {
3127            true
3128        }
3129    });
3130
3131    assert_eq!(reachability_graph.for_code.len(), set_code.len());
3132
3133    log::debug!(
3134        "Data reachability garbage collection: {} -> {}",
3135        reachability_graph.for_data.len(),
3136        set_data.len()
3137    );
3138    reachability_graph.for_data.retain(|data_target, reachability| {
3139        assert!(reachability.reachable_from.is_empty());
3140        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3141        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3142        if !set_data.contains(data_target) {
3143            assert!(!reachability.always_reachable);
3144            log::trace!("  Garbage collected: {data_target:?}");
3145            false
3146        } else {
3147            true
3148        }
3149    });
3150
3151    reachability_graph.code_references_in_data_section.retain(|data_target, list| {
3152        if !set_data.contains(data_target) {
3153            false
3154        } else {
3155            assert!(list.iter().all(|block_target| set_code.contains(block_target)));
3156            true
3157        }
3158    });
3159
3160    reachability_graph.data_references_in_data_section.retain(|data_target, list| {
3161        if !set_data.contains(data_target) {
3162            false
3163        } else {
3164            assert!(list.iter().all(|next_data_target| set_data.contains(next_data_target)));
3165            true
3166        }
3167    });
3168
3169    assert_eq!(reachability_graph.for_data.len(), set_data.len());
3170    true
3171}
3172
3173fn remove_unreachable_code_impl(
3174    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3175    reachability_graph: &mut ReachabilityGraph,
3176    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3177    queue_code: &mut VecSet<BlockTarget>,
3178    queue_data: &mut VecSet<SectionIndex>,
3179    current: BlockTarget,
3180) {
3181    assert!(reachability_graph.for_code.get(&current).unwrap().is_unreachable());
3182    log::trace!("Removing {current:?} from the graph...");
3183
3184    each_reference(&all_blocks[current.index()], |ext| match ext {
3185        ExtRef::Jump(target) => {
3186            log::trace!("{target:?} is not reachable from {current:?} anymore");
3187            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3188            reachability.reachable_from.remove(&current);
3189            if reachability.is_unreachable() {
3190                log::trace!("{target:?} is now unreachable!");
3191                queue_code.push(target)
3192            } else if let Some(ref mut optimize_queue) = optimize_queue {
3193                optimize_queue.push(target);
3194            }
3195        }
3196        ExtRef::Address(target) => {
3197            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3198            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3199            reachability.address_taken_in.remove(&current);
3200            if reachability.is_unreachable() {
3201                log::trace!("{target:?} is now unreachable!");
3202                queue_code.push(target)
3203            } else if let Some(ref mut optimize_queue) = optimize_queue {
3204                optimize_queue.push(target);
3205            }
3206        }
3207        ExtRef::DataAddress(target) => {
3208            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3209            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3210            reachability.address_taken_in.remove(&current);
3211            if reachability.is_unreachable() {
3212                log::trace!("{target:?} is now unreachable!");
3213                queue_data.push(target);
3214            }
3215        }
3216    });
3217
3218    reachability_graph.for_code.remove(&current);
3219}
3220
3221fn remove_unreachable_data_impl(
3222    reachability_graph: &mut ReachabilityGraph,
3223    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3224    queue_code: &mut VecSet<BlockTarget>,
3225    queue_data: &mut VecSet<SectionIndex>,
3226    current: SectionIndex,
3227) {
3228    assert!(reachability_graph.for_data.get(&current).unwrap().is_unreachable());
3229    log::trace!("Removing {current:?} from the graph...");
3230
3231    let code_refs = reachability_graph.code_references_in_data_section.remove(&current);
3232    let data_refs = reachability_graph.data_references_in_data_section.remove(&current);
3233
3234    if let Some(list) = code_refs {
3235        for target in list {
3236            log::trace!("{target:?} is not reachable from {current:?} anymore");
3237            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3238            reachability.referenced_by_data.remove(&current);
3239            if reachability.is_unreachable() {
3240                log::trace!("{target:?} is now unreachable!");
3241                queue_code.push(target)
3242            } else if let Some(ref mut optimize_queue) = optimize_queue {
3243                optimize_queue.push(target);
3244            }
3245        }
3246    }
3247
3248    if let Some(list) = data_refs {
3249        for target in list {
3250            log::trace!("{target:?} is not reachable from {current:?} anymore");
3251            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3252            reachability.referenced_by_data.remove(&current);
3253            if reachability.is_unreachable() {
3254                log::trace!("{target:?} is now unreachable!");
3255                queue_data.push(target)
3256            }
3257        }
3258    }
3259
3260    reachability_graph.for_data.remove(&current);
3261}
3262
3263fn remove_code_if_globally_unreachable(
3264    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3265    reachability_graph: &mut ReachabilityGraph,
3266    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3267    block_target: BlockTarget,
3268) {
3269    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3270        return;
3271    };
3272    if !reachability.is_unreachable() {
3273        return;
3274    }
3275
3276    // The inner block is now globally unreachable.
3277    let mut queue_code = VecSet::new();
3278    let mut queue_data = VecSet::new();
3279    remove_unreachable_code_impl(
3280        all_blocks,
3281        reachability_graph,
3282        optimize_queue.as_deref_mut(),
3283        &mut queue_code,
3284        &mut queue_data,
3285        block_target,
3286    );
3287
3288    // If there are other dependencies which are now unreachable then remove them too.
3289    while !queue_code.is_empty() || !queue_data.is_empty() {
3290        while let Some(next) = queue_code.pop_unique() {
3291            remove_unreachable_code_impl(
3292                all_blocks,
3293                reachability_graph,
3294                optimize_queue.as_deref_mut(),
3295                &mut queue_code,
3296                &mut queue_data,
3297                next,
3298            );
3299        }
3300
3301        while let Some(next) = queue_data.pop_unique() {
3302            remove_unreachable_data_impl(
3303                reachability_graph,
3304                optimize_queue.as_deref_mut(),
3305                &mut queue_code,
3306                &mut queue_data,
3307                next,
3308            );
3309        }
3310    }
3311}
3312
3313fn remove_if_data_is_globally_unreachable(
3314    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3315    reachability_graph: &mut ReachabilityGraph,
3316    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3317    data_target: SectionIndex,
3318) {
3319    let Some(reachability) = reachability_graph.for_data.get(&data_target) else {
3320        return;
3321    };
3322    if !reachability.is_unreachable() {
3323        return;
3324    }
3325
3326    let mut queue_code = VecSet::new();
3327    let mut queue_data = VecSet::new();
3328    remove_unreachable_data_impl(
3329        reachability_graph,
3330        optimize_queue.as_deref_mut(),
3331        &mut queue_code,
3332        &mut queue_data,
3333        data_target,
3334    );
3335
3336    // If there are other dependencies which are now unreachable then remove them too.
3337    while !queue_code.is_empty() || !queue_data.is_empty() {
3338        while let Some(next) = queue_code.pop_unique() {
3339            remove_unreachable_code_impl(
3340                all_blocks,
3341                reachability_graph,
3342                optimize_queue.as_deref_mut(),
3343                &mut queue_code,
3344                &mut queue_data,
3345                next,
3346            );
3347        }
3348
3349        while let Some(next) = queue_data.pop_unique() {
3350            remove_unreachable_data_impl(
3351                reachability_graph,
3352                optimize_queue.as_deref_mut(),
3353                &mut queue_code,
3354                &mut queue_data,
3355                next,
3356            );
3357        }
3358    }
3359}
3360
3361fn add_to_optimize_queue(
3362    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3363    reachability_graph: &ReachabilityGraph,
3364    optimize_queue: &mut VecSet<BlockTarget>,
3365    block_target: BlockTarget,
3366) {
3367    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3368        return;
3369    };
3370    if reachability.is_unreachable() {
3371        return;
3372    }
3373
3374    optimize_queue.push(block_target);
3375
3376    for &previous in &reachability.reachable_from {
3377        optimize_queue.push(previous);
3378    }
3379
3380    for &previous in &reachability.address_taken_in {
3381        optimize_queue.push(previous);
3382    }
3383
3384    for &next in all_blocks[block_target.index()].next.instruction.targets().into_iter().flatten() {
3385        optimize_queue.push(next);
3386    }
3387
3388    each_reference(&all_blocks[block_target.index()], |ext| match ext {
3389        ExtRef::Jump(target) => optimize_queue.push(target),
3390        ExtRef::Address(target) => optimize_queue.push(target),
3391        ExtRef::DataAddress(..) => {}
3392    });
3393}
3394
3395fn perform_nop_elimination(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) {
3396    all_blocks[current.index()].ops.retain(|(_, instruction)| !instruction.is_nop());
3397}
3398
3399#[deny(clippy::as_conversions)]
3400fn perform_inlining(
3401    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3402    reachability_graph: &mut ReachabilityGraph,
3403    exports: &mut [Export],
3404    optimize_queue: Option<&mut VecSet<BlockTarget>>,
3405    inline_history: &mut HashSet<(BlockTarget, BlockTarget)>,
3406    inline_threshold: usize,
3407    current: BlockTarget,
3408) -> bool {
3409    fn is_infinite_loop(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) -> bool {
3410        all_blocks[current.index()].next.instruction == ControlInst::Jump { target: current }
3411    }
3412
3413    fn inline(
3414        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3415        reachability_graph: &mut ReachabilityGraph,
3416        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3417        outer: BlockTarget,
3418        inner: BlockTarget,
3419    ) {
3420        log::trace!("Inlining {inner:?} into {outer:?}...");
3421        log::trace!("  {outer:?} will now end with: {:?}", all_blocks[inner.index()].next.instruction);
3422
3423        if let Some(ref mut optimize_queue) = optimize_queue {
3424            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, outer);
3425            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, inner);
3426        }
3427
3428        // Inlining into ourselves doesn't make sense.
3429        assert_ne!(outer, inner);
3430
3431        // No infinite loops.
3432        assert!(!is_infinite_loop(all_blocks, inner));
3433
3434        // Make sure this block actually goes to the block we're inlining.
3435        assert_eq!(all_blocks[outer.index()].next.instruction, ControlInst::Jump { target: inner });
3436
3437        // The inner block is not reachable from here anymore.
3438        // NOTE: This needs to be done *before* adding the references below,
3439        //       as the inner block might be an infinite loop.
3440        reachability_graph.for_code.get_mut(&inner).unwrap().reachable_from.remove(&outer);
3441
3442        // Everything which the inner block accesses will be reachable from here, so update reachability.
3443        each_reference(&all_blocks[inner.index()], |ext| match ext {
3444            ExtRef::Jump(target) => {
3445                reachability_graph.for_code.entry(target).or_default().reachable_from.insert(outer);
3446            }
3447            ExtRef::Address(target) => {
3448                reachability_graph
3449                    .for_code
3450                    .entry(target)
3451                    .or_default()
3452                    .address_taken_in
3453                    .insert(outer);
3454            }
3455            ExtRef::DataAddress(target) => {
3456                reachability_graph
3457                    .for_data
3458                    .entry(target)
3459                    .or_default()
3460                    .address_taken_in
3461                    .insert(outer);
3462            }
3463        });
3464
3465        // Remove it from the graph if it's globally unreachable now.
3466        remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, inner);
3467
3468        let outer_source = all_blocks[outer.index()].next.source.clone();
3469        let inner_source = all_blocks[inner.index()].next.source.clone();
3470        let inner_code: Vec<_> = all_blocks[inner.index()]
3471            .ops
3472            .iter()
3473            .map(|(inner_source, op)| (outer_source.overlay_on_top_of(inner_source), *op))
3474            .collect();
3475
3476        all_blocks[outer.index()].ops.extend(inner_code);
3477        all_blocks[outer.index()].next.source.overlay_on_top_of_inplace(&inner_source);
3478        all_blocks[outer.index()].next.instruction = all_blocks[inner.index()].next.instruction;
3479    }
3480
3481    fn should_inline(
3482        all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3483        reachability_graph: &ReachabilityGraph,
3484        current: BlockTarget,
3485        target: BlockTarget,
3486        inline_threshold: usize,
3487    ) -> bool {
3488        // Don't inline if it's an infinite loop.
3489        if target == current || is_infinite_loop(all_blocks, target) {
3490            return false;
3491        }
3492
3493        if let Some(fallthrough_target) = all_blocks[target.index()].next.instruction.fallthrough_target() {
3494            if fallthrough_target.index() == target.index() + 1 {
3495                // Do not inline if we'd need to inject a new fallthrough basic block.
3496                return false;
3497            }
3498        }
3499
3500        // Inline if the target block is small enough.
3501        if all_blocks[target.index()].ops.len() <= inline_threshold {
3502            return true;
3503        }
3504
3505        // Inline if the target block is only reachable from here.
3506        if let Some(reachability) = reachability_graph.for_code.get(&target) {
3507            if reachability.is_only_reachable_from(current) {
3508                return true;
3509            }
3510        }
3511
3512        false
3513    }
3514
3515    if !reachability_graph.is_code_reachable(current) {
3516        return false;
3517    }
3518
3519    let block = &all_blocks[current.index()];
3520    match block.next.instruction {
3521        ControlInst::Jump { target } => {
3522            if all_blocks[current.index()].ops.is_empty() && inline_history.insert((current, target)) {
3523                let reachability = reachability_graph.for_code.get_mut(&current).unwrap();
3524                if !reachability.exports.is_empty() {
3525                    let export_indexes = core::mem::take(&mut reachability.exports);
3526                    for &export_index in &export_indexes {
3527                        exports[export_index].location = all_blocks[target.index()].source.begin();
3528                    }
3529                    reachability_graph.for_code.get_mut(&target).unwrap().exports.extend(export_indexes);
3530                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, current);
3531                    return true;
3532                }
3533            }
3534
3535            if should_inline(all_blocks, reachability_graph, current, target, inline_threshold) && inline_history.insert((current, target))
3536            {
3537                inline(all_blocks, reachability_graph, optimize_queue, current, target);
3538                return true;
3539            }
3540        }
3541        ControlInst::Branch {
3542            kind,
3543            src1,
3544            src2,
3545            target_true,
3546            target_false,
3547        } => {
3548            if let ControlInst::Jump { target } = all_blocks[target_true.index()].next.instruction {
3549                if target != target_true && all_blocks[target_true.index()].ops.is_empty() {
3550                    // We're branching to another block which immediately jumps somewhere else.
3551                    // So skip the middle-man and just jump where we want to go directly.
3552                    assert!(reachability_graph
3553                        .for_code
3554                        .get_mut(&target_true)
3555                        .unwrap()
3556                        .reachable_from
3557                        .remove(&current));
3558
3559                    reachability_graph.for_code.get_mut(&target).unwrap().reachable_from.insert(current);
3560                    all_blocks[current.index()].next.instruction = ControlInst::Branch {
3561                        kind,
3562                        src1,
3563                        src2,
3564                        target_true: target,
3565                        target_false,
3566                    };
3567
3568                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, target_true);
3569                    return true;
3570                }
3571            }
3572        }
3573        ControlInst::Call { .. } => unreachable!(),
3574        _ => {}
3575    }
3576
3577    false
3578}
3579
3580fn gather_references(block: &BasicBlock<AnyTarget, BlockTarget>) -> BTreeSet<ExtRef> {
3581    let mut references = BTreeSet::new();
3582    each_reference(block, |ext| {
3583        references.insert(ext);
3584    });
3585    references
3586}
3587
3588fn update_references(
3589    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3590    reachability_graph: &mut ReachabilityGraph,
3591    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3592    block_target: BlockTarget,
3593    mut old_references: BTreeSet<ExtRef>,
3594) {
3595    let mut new_references = gather_references(&all_blocks[block_target.index()]);
3596    new_references.retain(|ext| !old_references.remove(ext));
3597
3598    for ext in &old_references {
3599        match ext {
3600            ExtRef::Jump(target) => {
3601                log::trace!("{target:?} is not reachable from {block_target:?} anymore");
3602                reachability_graph
3603                    .for_code
3604                    .get_mut(target)
3605                    .unwrap()
3606                    .reachable_from
3607                    .remove(&block_target);
3608            }
3609            ExtRef::Address(target) => {
3610                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
3611                reachability_graph
3612                    .for_code
3613                    .get_mut(target)
3614                    .unwrap()
3615                    .address_taken_in
3616                    .remove(&block_target);
3617            }
3618            ExtRef::DataAddress(target) => {
3619                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
3620                reachability_graph
3621                    .for_data
3622                    .get_mut(target)
3623                    .unwrap()
3624                    .address_taken_in
3625                    .remove(&block_target);
3626            }
3627        }
3628    }
3629
3630    for ext in &new_references {
3631        match ext {
3632            ExtRef::Jump(target) => {
3633                log::trace!("{target:?} is reachable from {block_target:?}");
3634                reachability_graph
3635                    .for_code
3636                    .get_mut(target)
3637                    .unwrap()
3638                    .reachable_from
3639                    .insert(block_target);
3640            }
3641            ExtRef::Address(target) => {
3642                log::trace!("{target:?}'s address is taken in {block_target:?}");
3643                reachability_graph
3644                    .for_code
3645                    .get_mut(target)
3646                    .unwrap()
3647                    .address_taken_in
3648                    .insert(block_target);
3649            }
3650            ExtRef::DataAddress(target) => {
3651                log::trace!("{target:?}'s address is taken in {block_target:?}");
3652                reachability_graph
3653                    .for_data
3654                    .get_mut(target)
3655                    .unwrap()
3656                    .address_taken_in
3657                    .insert(block_target);
3658            }
3659        }
3660    }
3661
3662    for ext in old_references.into_iter().chain(new_references.into_iter()) {
3663        match ext {
3664            ExtRef::Jump(target) => {
3665                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3666            }
3667            ExtRef::Address(target) => {
3668                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3669            }
3670            ExtRef::DataAddress(target) => {
3671                remove_if_data_is_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3672            }
3673        }
3674    }
3675}
3676
3677#[deny(clippy::as_conversions)]
3678fn perform_dead_code_elimination(
3679    config: &Config,
3680    imports: &[Import],
3681    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3682    registers_needed_for_block: &mut [RegMask],
3683    reachability_graph: &mut ReachabilityGraph,
3684    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3685    block_target: BlockTarget,
3686) -> bool {
3687    #[allow(clippy::too_many_arguments)]
3688    fn perform_dead_code_elimination_on_block(
3689        config: &Config,
3690        imports: &[Import],
3691        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3692        reachability_graph: &mut ReachabilityGraph,
3693        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3694        modified: &mut bool,
3695        mut registers_needed: RegMask,
3696        block_target: BlockTarget,
3697    ) -> RegMask {
3698        let next_instruction = &all_blocks[block_target.index()].next.instruction;
3699        registers_needed.remove(next_instruction.dst_mask());
3700        registers_needed.insert(next_instruction.src_mask());
3701
3702        let mut dead_code = Vec::new();
3703        for (nth_instruction, (_, op)) in all_blocks[block_target.index()].ops.iter().enumerate().rev() {
3704            let dst_mask = op.dst_mask(imports);
3705            if !op.has_side_effects(config) && (dst_mask & registers_needed) == RegMask::empty() {
3706                // This instruction has no side effects and its result is not used; it's dead.
3707                dead_code.push(nth_instruction);
3708                continue;
3709            }
3710
3711            // If the register was overwritten it means it wasn't needed later.
3712            registers_needed.remove(dst_mask);
3713            // ...unless it was used as a source.
3714            registers_needed.insert(op.src_mask(imports));
3715        }
3716
3717        if dead_code.is_empty() {
3718            return registers_needed;
3719        }
3720
3721        *modified = true;
3722        if let Some(ref mut optimize_queue) = optimize_queue {
3723            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, block_target);
3724        }
3725
3726        let references = gather_references(&all_blocks[block_target.index()]);
3727        for nth_instruction in dead_code {
3728            log::trace!(
3729                "Removing dead instruction in {}: {:?}",
3730                all_blocks[block_target.index()].ops[nth_instruction].0,
3731                all_blocks[block_target.index()].ops[nth_instruction].1
3732            );
3733
3734            // Replace it with a NOP.
3735            all_blocks[block_target.index()].ops[nth_instruction].1 = BasicInst::Nop;
3736        }
3737
3738        all_blocks[block_target.index()]
3739            .ops
3740            .retain(|(_, instruction)| !instruction.is_nop());
3741
3742        update_references(all_blocks, reachability_graph, optimize_queue, block_target, references);
3743        registers_needed
3744    }
3745
3746    if !reachability_graph.is_code_reachable(block_target) {
3747        return false;
3748    }
3749
3750    let mut previous_blocks = Vec::new();
3751    for &previous_block in &reachability_graph.for_code.get(&block_target).unwrap().reachable_from {
3752        if previous_block == block_target {
3753            continue;
3754        }
3755
3756        let ControlInst::Jump { target } = all_blocks[previous_block.index()].next.instruction else {
3757            continue;
3758        };
3759        if target == block_target {
3760            previous_blocks.push(previous_block);
3761        }
3762    }
3763
3764    let registers_needed_for_next_block = match all_blocks[block_target.index()].next.instruction {
3765        // If it's going to trap then it's not going to need any of the register values.
3766        ControlInst::Unimplemented => RegMask::empty(),
3767        // If it's a jump then we'll need whatever registers the jump target needs.
3768        ControlInst::Jump { target } => registers_needed_for_block[target.index()],
3769        ControlInst::Branch {
3770            target_true, target_false, ..
3771        } => registers_needed_for_block[target_true.index()] | registers_needed_for_block[target_false.index()],
3772        // ...otherwise assume it'll need all of them.
3773        ControlInst::Call { .. } => unreachable!(),
3774        ControlInst::CallIndirect { .. } | ControlInst::JumpIndirect { .. } => RegMask::all(),
3775    };
3776
3777    let mut modified = false;
3778    let registers_needed_for_this_block = perform_dead_code_elimination_on_block(
3779        config,
3780        imports,
3781        all_blocks,
3782        reachability_graph,
3783        optimize_queue.as_deref_mut(),
3784        &mut modified,
3785        registers_needed_for_next_block,
3786        block_target,
3787    );
3788
3789    if registers_needed_for_block[block_target.index()] != registers_needed_for_this_block {
3790        registers_needed_for_block[block_target.index()] = registers_needed_for_this_block;
3791        if let Some(ref mut optimize_queue) = optimize_queue {
3792            for previous_block in previous_blocks {
3793                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, previous_block);
3794            }
3795        }
3796    }
3797
3798    modified
3799}
3800
3801#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3802pub enum AnyAnyKind {
3803    Add32,
3804    Add32AndSignExtend,
3805    Add64,
3806    Sub32,
3807    Sub32AndSignExtend,
3808    Sub64,
3809    And32,
3810    And64,
3811    Or32,
3812    Or64,
3813    Xor32,
3814    Xor64,
3815    SetLessThanUnsigned32,
3816    SetLessThanUnsigned64,
3817    SetLessThanSigned32,
3818    SetLessThanSigned64,
3819    ShiftLogicalLeft32,
3820    ShiftLogicalLeft32AndSignExtend,
3821    ShiftLogicalLeft64,
3822    ShiftLogicalRight32,
3823    ShiftLogicalRight32AndSignExtend,
3824    ShiftLogicalRight64,
3825    ShiftArithmeticRight32,
3826    ShiftArithmeticRight32AndSignExtend,
3827    ShiftArithmeticRight64,
3828    Mul32,
3829    Mul32AndSignExtend,
3830    Mul64,
3831    RotateRight32,
3832    RotateRight32AndSignExtend,
3833    RotateRight64,
3834}
3835
3836#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3837pub enum RegKind {
3838    CountLeadingZeroBits32,
3839    CountLeadingZeroBits64,
3840    CountSetBits32,
3841    CountSetBits64,
3842    CountTrailingZeroBits32,
3843    CountTrailingZeroBits64,
3844    ReverseByte,
3845    SignExtend8,
3846    SignExtend16,
3847    ZeroExtend16,
3848}
3849
3850#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3851pub enum RegRegKind {
3852    MulUpperSignedSigned32,
3853    MulUpperSignedSigned64,
3854    MulUpperUnsignedUnsigned32,
3855    MulUpperUnsignedUnsigned64,
3856    MulUpperSignedUnsigned32,
3857    MulUpperSignedUnsigned64,
3858    Div32,
3859    Div32AndSignExtend,
3860    Div64,
3861    DivUnsigned32,
3862    DivUnsigned32AndSignExtend,
3863    DivUnsigned64,
3864    Rem32,
3865    Rem32AndSignExtend,
3866    Rem64,
3867    RemUnsigned32,
3868    RemUnsigned32AndSignExtend,
3869    RemUnsigned64,
3870
3871    AndInverted,
3872    OrInverted,
3873    Xnor,
3874    Maximum,
3875    MaximumUnsigned,
3876    Minimum,
3877    MinimumUnsigned,
3878    RotateLeft32,
3879    RotateLeft32AndSignExtend,
3880    RotateLeft64,
3881}
3882
3883#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3884enum OperationKind {
3885    Add32,
3886    Add32AndSignExtend,
3887    Add64,
3888    Sub32,
3889    Sub32AndSignExtend,
3890    Sub64,
3891    And32,
3892    And64,
3893    Or32,
3894    Or64,
3895    Xor32,
3896    Xor64,
3897    SetLessThanUnsigned32,
3898    SetLessThanUnsigned64,
3899    SetLessThanSigned32,
3900    SetLessThanSigned64,
3901    ShiftLogicalLeft32,
3902    ShiftLogicalLeft32AndSignExtend,
3903    ShiftLogicalLeft64,
3904    ShiftLogicalRight32,
3905    ShiftLogicalRight32AndSignExtend,
3906    ShiftLogicalRight64,
3907    ShiftArithmeticRight32,
3908    ShiftArithmeticRight32AndSignExtend,
3909    ShiftArithmeticRight64,
3910
3911    Mul32,
3912    Mul32AndSignExtend,
3913    Mul64,
3914    MulUpperSignedSigned32,
3915    MulUpperSignedSigned64,
3916    MulUpperSignedUnsigned32,
3917    MulUpperSignedUnsigned64,
3918    MulUpperUnsignedUnsigned32,
3919    MulUpperUnsignedUnsigned64,
3920    Div32,
3921    Div32AndSignExtend,
3922    Div64,
3923    DivUnsigned32,
3924    DivUnsigned32AndSignExtend,
3925    DivUnsigned64,
3926    Rem32,
3927    Rem32AndSignExtend,
3928    Rem64,
3929    RemUnsigned32,
3930    RemUnsigned32AndSignExtend,
3931    RemUnsigned64,
3932
3933    Eq32,
3934    Eq64,
3935    NotEq32,
3936    NotEq64,
3937    SetGreaterOrEqualSigned32,
3938    SetGreaterOrEqualSigned64,
3939    SetGreaterOrEqualUnsigned32,
3940    SetGreaterOrEqualUnsigned64,
3941
3942    AndInverted,
3943    OrInverted,
3944    Xnor,
3945    Maximum,
3946    MaximumUnsigned,
3947    Minimum,
3948    MinimumUnsigned,
3949    RotateLeft32,
3950    RotateLeft32AndSignExtend,
3951    RotateLeft64,
3952    RotateRight32,
3953    RotateRight32AndSignExtend,
3954    RotateRight64,
3955}
3956
3957impl From<AnyAnyKind> for OperationKind {
3958    fn from(kind: AnyAnyKind) -> Self {
3959        match kind {
3960            AnyAnyKind::Add32 => Self::Add32,
3961            AnyAnyKind::Add32AndSignExtend => Self::Add32AndSignExtend,
3962            AnyAnyKind::Add64 => Self::Add64,
3963            AnyAnyKind::Sub32 => Self::Sub32,
3964            AnyAnyKind::Sub32AndSignExtend => Self::Sub32AndSignExtend,
3965            AnyAnyKind::Sub64 => Self::Sub64,
3966            AnyAnyKind::And32 => Self::And32,
3967            AnyAnyKind::And64 => Self::And64,
3968            AnyAnyKind::Or32 => Self::Or32,
3969            AnyAnyKind::Or64 => Self::Or64,
3970            AnyAnyKind::Xor32 => Self::Xor32,
3971            AnyAnyKind::Xor64 => Self::Xor64,
3972            AnyAnyKind::SetLessThanUnsigned32 => Self::SetLessThanUnsigned32,
3973            AnyAnyKind::SetLessThanUnsigned64 => Self::SetLessThanUnsigned64,
3974            AnyAnyKind::SetLessThanSigned32 => Self::SetLessThanSigned32,
3975            AnyAnyKind::SetLessThanSigned64 => Self::SetLessThanSigned64,
3976            AnyAnyKind::ShiftLogicalLeft32 => Self::ShiftLogicalLeft32,
3977            AnyAnyKind::ShiftLogicalLeft32AndSignExtend => Self::ShiftLogicalLeft32AndSignExtend,
3978            AnyAnyKind::ShiftLogicalLeft64 => Self::ShiftLogicalLeft64,
3979            AnyAnyKind::ShiftLogicalRight32 => Self::ShiftLogicalRight32,
3980            AnyAnyKind::ShiftLogicalRight32AndSignExtend => Self::ShiftLogicalRight32AndSignExtend,
3981            AnyAnyKind::ShiftLogicalRight64 => Self::ShiftLogicalRight64,
3982            AnyAnyKind::ShiftArithmeticRight32 => Self::ShiftArithmeticRight32,
3983            AnyAnyKind::ShiftArithmeticRight32AndSignExtend => Self::ShiftArithmeticRight32AndSignExtend,
3984            AnyAnyKind::ShiftArithmeticRight64 => Self::ShiftArithmeticRight64,
3985            AnyAnyKind::Mul32 => Self::Mul32,
3986            AnyAnyKind::Mul32AndSignExtend => Self::Mul32AndSignExtend,
3987            AnyAnyKind::Mul64 => Self::Mul64,
3988            AnyAnyKind::RotateRight32 => Self::RotateRight32,
3989            AnyAnyKind::RotateRight32AndSignExtend => Self::RotateRight32AndSignExtend,
3990            AnyAnyKind::RotateRight64 => Self::RotateRight64,
3991        }
3992    }
3993}
3994
3995impl From<RegRegKind> for OperationKind {
3996    fn from(kind: RegRegKind) -> Self {
3997        match kind {
3998            RegRegKind::MulUpperSignedSigned32 => Self::MulUpperSignedSigned32,
3999            RegRegKind::MulUpperSignedSigned64 => Self::MulUpperSignedSigned64,
4000            RegRegKind::MulUpperUnsignedUnsigned32 => Self::MulUpperUnsignedUnsigned32,
4001            RegRegKind::MulUpperUnsignedUnsigned64 => Self::MulUpperUnsignedUnsigned64,
4002            RegRegKind::MulUpperSignedUnsigned32 => Self::MulUpperSignedUnsigned32,
4003            RegRegKind::MulUpperSignedUnsigned64 => Self::MulUpperSignedUnsigned64,
4004            RegRegKind::Div32 => Self::Div32,
4005            RegRegKind::Div32AndSignExtend => Self::Div32AndSignExtend,
4006            RegRegKind::Div64 => Self::Div64,
4007            RegRegKind::DivUnsigned32 => Self::DivUnsigned32,
4008            RegRegKind::DivUnsigned32AndSignExtend => Self::DivUnsigned32AndSignExtend,
4009            RegRegKind::DivUnsigned64 => Self::DivUnsigned64,
4010            RegRegKind::Rem32 => Self::Rem32,
4011            RegRegKind::Rem32AndSignExtend => Self::Rem32AndSignExtend,
4012            RegRegKind::Rem64 => Self::Rem64,
4013            RegRegKind::RemUnsigned32 => Self::RemUnsigned32,
4014            RegRegKind::RemUnsigned32AndSignExtend => Self::RemUnsigned32AndSignExtend,
4015            RegRegKind::RemUnsigned64 => Self::RemUnsigned64,
4016            RegRegKind::AndInverted => Self::AndInverted,
4017            RegRegKind::OrInverted => Self::OrInverted,
4018            RegRegKind::Xnor => Self::Xnor,
4019            RegRegKind::Maximum => Self::Maximum,
4020            RegRegKind::MaximumUnsigned => Self::MaximumUnsigned,
4021            RegRegKind::Minimum => Self::Minimum,
4022            RegRegKind::MinimumUnsigned => Self::MinimumUnsigned,
4023            RegRegKind::RotateLeft32 => Self::RotateLeft32,
4024            RegRegKind::RotateLeft32AndSignExtend => Self::RotateLeft32AndSignExtend,
4025            RegRegKind::RotateLeft64 => Self::RotateLeft64,
4026        }
4027    }
4028}
4029
4030impl From<BranchKind> for OperationKind {
4031    fn from(kind: BranchKind) -> Self {
4032        match kind {
4033            BranchKind::Eq32 => Self::Eq32,
4034            BranchKind::Eq64 => Self::Eq64,
4035            BranchKind::NotEq32 => Self::NotEq32,
4036            BranchKind::NotEq64 => Self::NotEq64,
4037            BranchKind::LessSigned32 => Self::SetLessThanSigned32,
4038            BranchKind::LessSigned64 => Self::SetLessThanSigned64,
4039            BranchKind::GreaterOrEqualSigned32 => Self::SetGreaterOrEqualSigned32,
4040            BranchKind::GreaterOrEqualSigned64 => Self::SetGreaterOrEqualSigned64,
4041            BranchKind::LessUnsigned32 => Self::SetLessThanUnsigned32,
4042            BranchKind::LessUnsigned64 => Self::SetLessThanUnsigned64,
4043            BranchKind::GreaterOrEqualUnsigned32 => Self::SetGreaterOrEqualUnsigned32,
4044            BranchKind::GreaterOrEqualUnsigned64 => Self::SetGreaterOrEqualUnsigned64,
4045        }
4046    }
4047}
4048
4049impl OperationKind {
4050    #[rustfmt::skip]
4051    fn apply_const(self, lhs: i64, rhs: i64) -> i64 {
4052        use polkavm_common::operation::*;
4053        macro_rules! op32 {
4054            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4055                let $lhs: i32 = lhs.try_into().expect("operand overflow");
4056                let $rhs: i32 = rhs.try_into().expect("operand overflow");
4057                let out: i32 = $e;
4058                cast(out).to_i64_sign_extend()
4059            }};
4060        }
4061
4062        macro_rules! op32_on_64 {
4063            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4064                let $lhs: u64 = cast($lhs).to_unsigned();
4065                let $lhs: u32 = cast($lhs).truncate_to_u32();
4066                let $lhs: i32 = cast($lhs).to_signed();
4067                let $rhs: u64 = cast($rhs).to_unsigned();
4068                let $rhs: u32 = cast($rhs).truncate_to_u32();
4069                let $rhs: i32 = cast($rhs).to_signed();
4070                let out: i32 = $e;
4071                cast(out).to_i64_sign_extend()
4072            }};
4073        }
4074
4075        match self {
4076            Self::Add32 => {
4077                op32!(|lhs, rhs| lhs.wrapping_add(rhs))
4078            }
4079            Self::Add32AndSignExtend => {
4080                op32_on_64!(|lhs, rhs| lhs.wrapping_add(rhs))
4081            }
4082            Self::Add64 => {
4083                lhs.wrapping_add(rhs)
4084            },
4085            Self::And32 => {
4086                op32!(|lhs, rhs| lhs & rhs)
4087            }
4088            Self::And64 => {
4089                lhs & rhs
4090            },
4091            Self::Div32 => {
4092                op32!(|lhs, rhs| div(lhs, rhs))
4093            }
4094            Self::Div32AndSignExtend => {
4095                op32_on_64!(|lhs, rhs| div(lhs, rhs))
4096            }
4097            Self::Div64 => {
4098                div64(lhs, rhs)
4099            },
4100            Self::DivUnsigned32 => {
4101                op32!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4102            }
4103            Self::DivUnsigned32AndSignExtend => {
4104                op32_on_64!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4105            }
4106            Self::DivUnsigned64 => {
4107                cast(divu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4108            },
4109            Self::Eq32 => {
4110                op32!(|lhs, rhs| i32::from(lhs == rhs))
4111            }
4112            Self::Eq64 => {
4113                i64::from(lhs == rhs)
4114            },
4115            Self::Mul32 => {
4116                op32!(|lhs, rhs| lhs.wrapping_mul(rhs))
4117            }
4118            Self::Mul32AndSignExtend => {
4119                op32_on_64!(|lhs, rhs| lhs.wrapping_mul(rhs))
4120            }
4121            Self::Mul64 => {
4122                lhs.wrapping_mul(rhs)
4123            },
4124            Self::MulUpperSignedSigned32 => {
4125                op32!(|lhs, rhs| mulh(lhs, rhs))
4126            },
4127            Self::MulUpperSignedSigned64 => {
4128                mulh64(lhs, rhs)
4129            },
4130            Self::MulUpperSignedUnsigned32 => {
4131                op32!(|lhs, rhs| mulhsu(lhs, cast(rhs).to_unsigned()))
4132            },
4133            Self::MulUpperSignedUnsigned64 => {
4134                mulhsu64(lhs, cast(rhs).to_unsigned())
4135            },
4136            Self::MulUpperUnsignedUnsigned32 => {
4137                op32!(|lhs, rhs| cast(mulhu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4138            },
4139            Self::MulUpperUnsignedUnsigned64 => {
4140                cast(mulhu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4141            },
4142            Self::NotEq32 => {
4143                op32!(|lhs, rhs| i32::from(lhs != rhs))
4144            },
4145            Self::NotEq64 => {
4146                i64::from(lhs != rhs)
4147            },
4148            Self::Or32 => {
4149                op32!(|lhs, rhs| lhs | rhs)
4150            },
4151            Self::Or64 => {
4152                lhs | rhs
4153            },
4154            Self::Rem32 => {
4155                op32!(|lhs, rhs| rem(lhs, rhs))
4156            },
4157            Self::Rem32AndSignExtend => {
4158                op32_on_64!(|lhs, rhs| rem(lhs, rhs))
4159            },
4160            Self::Rem64 => {
4161                rem64(lhs, rhs)
4162            },
4163            Self::RemUnsigned32 => {
4164                op32!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4165            },
4166            Self::RemUnsigned32AndSignExtend => {
4167                op32_on_64!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4168            }
4169            Self::RemUnsigned64 => {
4170                remu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned()) as i64
4171            },
4172            Self::SetGreaterOrEqualSigned32 => {
4173                op32!(|lhs, rhs| i32::from(lhs >= rhs))
4174            },
4175            Self::SetGreaterOrEqualSigned64 => {
4176                i64::from(lhs >= rhs)
4177            },
4178            Self::SetGreaterOrEqualUnsigned32 => {
4179                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned()))
4180            },
4181            Self::SetGreaterOrEqualUnsigned64 => {
4182                i64::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned())
4183            },
4184            Self::SetLessThanSigned32 => {
4185                op32!(|lhs, rhs| i32::from(lhs < rhs))
4186            },
4187            Self::SetLessThanSigned64 => {
4188                i64::from(lhs < rhs)
4189            },
4190            Self::SetLessThanUnsigned32 => {
4191                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() < cast(rhs).to_unsigned()))
4192            },
4193            Self::SetLessThanUnsigned64 => {
4194                i64::from((lhs as u64) < (rhs as u64))
4195            },
4196            Self::ShiftArithmeticRight32 => {
4197                op32!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4198            },
4199            Self::ShiftArithmeticRight32AndSignExtend => {
4200                op32_on_64!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4201            },
4202            Self::ShiftArithmeticRight64 => {
4203                let rhs = cast(rhs).to_unsigned();
4204                let rhs = cast(rhs).truncate_to_u32();
4205                lhs.wrapping_shr(rhs)
4206            },
4207            Self::ShiftLogicalLeft32 => {
4208                op32!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4209            },
4210            Self::ShiftLogicalLeft32AndSignExtend => {
4211                op32_on_64!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4212            },
4213            Self::ShiftLogicalLeft64 => {
4214                let rhs = cast(rhs).to_unsigned();
4215                let rhs = cast(rhs).truncate_to_u32();
4216                (lhs as u64).wrapping_shl(rhs) as i64
4217            },
4218            Self::ShiftLogicalRight32 => {
4219                op32!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4220            },
4221            Self::ShiftLogicalRight32AndSignExtend => {
4222                op32_on_64!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4223            }
4224            Self::ShiftLogicalRight64 => {
4225                (lhs as u64).wrapping_shr(rhs as u32) as i64
4226            },
4227            Self::Sub32 => {
4228                op32!(|lhs, rhs| lhs.wrapping_sub(rhs))
4229            },
4230            Self::Sub32AndSignExtend => {
4231                op32_on_64!(|lhs, rhs| lhs.wrapping_sub(rhs))
4232            },
4233            Self::Sub64 => {
4234                lhs.wrapping_sub(rhs)
4235            },
4236            Self::Xor32 => {
4237                op32!(|lhs, rhs| lhs ^ rhs)
4238            },
4239            Self::Xor64 => {
4240                lhs ^ rhs
4241            },
4242            //
4243            // Zbb instructions
4244            //
4245            Self::AndInverted => lhs & (!rhs),
4246            Self::OrInverted => lhs | (!rhs),
4247            Self::Xnor => !(lhs ^ rhs),
4248            Self::Maximum => lhs.max(rhs),
4249            Self::MaximumUnsigned => (lhs as u64).max(rhs as u64) as i64,
4250            Self::Minimum => lhs.min(rhs),
4251            Self::MinimumUnsigned => (lhs as u64).min(rhs as u64) as i64,
4252            Self::RotateLeft32 => {
4253                op32!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4254            },
4255            Self::RotateLeft32AndSignExtend => {
4256                op32_on_64!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4257            },
4258            Self::RotateLeft64 => {
4259                let rhs = cast(rhs).to_unsigned();
4260                let rhs = cast(rhs).truncate_to_u32();
4261                lhs.rotate_left(rhs)
4262            },
4263            Self::RotateRight32 => {
4264                op32!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4265            },
4266            Self::RotateRight32AndSignExtend => {
4267                op32_on_64!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4268            },
4269            Self::RotateRight64 => {
4270                let rhs = cast(rhs).to_unsigned();
4271                let rhs = cast(rhs).truncate_to_u32();
4272                lhs.rotate_right(rhs)
4273            },
4274        }
4275    }
4276
4277    fn apply<H>(self, elf: &Elf<H>, lhs: RegValue, rhs: RegValue) -> Option<RegValue>
4278    where
4279        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4280    {
4281        use OperationKind as O;
4282        use RegValue::Constant as C;
4283
4284        #[rustfmt::skip]
4285        let value = match (self, lhs, rhs) {
4286            (_, C(lhs), C(rhs)) => {
4287                C(self.apply_const(lhs, rhs))
4288            },
4289            (O::Add32, RegValue::DataAddress(lhs), C(rhs)) => {
4290                let offset = cast(cast(lhs.offset).to_signed().wrapping_add(rhs)).to_unsigned();
4291                if offset <= elf.section_by_index(lhs.section_index).size() {
4292                    RegValue::DataAddress(SectionTarget {
4293                        section_index: lhs.section_index,
4294                        offset,
4295                    })
4296                } else {
4297                    return None;
4298                }
4299            },
4300            (O::Sub32, RegValue::DataAddress(lhs), C(rhs)) => {
4301                let offset = cast(lhs.offset).to_signed().wrapping_sub(rhs);
4302                if offset >= 0 {
4303                    RegValue::DataAddress(SectionTarget {
4304                        section_index: lhs.section_index,
4305                        offset: cast(offset).to_unsigned(),
4306                    })
4307                } else {
4308                    return None;
4309                }
4310            },
4311
4312            // (x == x) = 1
4313            (O::Eq32,                   lhs, rhs) if lhs == rhs => C(1),
4314            (O::Eq64,                   lhs, rhs) if lhs == rhs => C(1),
4315            // (x != x) = 0
4316            (O::NotEq32,                lhs, rhs) if lhs == rhs => C(0),
4317            (O::NotEq64,                lhs, rhs) if lhs == rhs => C(0),
4318            // x & x = x
4319            (O::And32,                  lhs, rhs) if lhs == rhs => lhs,
4320            (O::And64,                  lhs, rhs) if lhs == rhs => lhs,
4321            // x | x = x
4322            (O::Or32,                   lhs, rhs) if lhs == rhs => lhs,
4323            (O::Or64,                   lhs, rhs) if lhs == rhs => lhs,
4324
4325            // x + 0 = x
4326            (O::Add32,                  lhs, C(0)) => lhs,
4327            (O::Add64,                  lhs, C(0)) => lhs,
4328            // 0 + x = x
4329            (O::Add32,                  C(0), rhs) => rhs,
4330            (O::Add64,                  C(0), rhs) => rhs,
4331            // x | 0 = x
4332            (O::Or32,                   lhs, C(0)) => lhs,
4333            (O::Or64,                   lhs, C(0)) => lhs,
4334            // 0 | x = x
4335            (O::Or32,                   C(0), rhs) => rhs,
4336            (O::Or64,                   C(0), rhs) => rhs,
4337            // x ^ 0 = x
4338            (O::Xor32,                  lhs, C(0)) => lhs,
4339            (O::Xor64,                  lhs, C(0)) => lhs,
4340            // 0 ^ x = x
4341            (O::Xor32,                  C(0), rhs) => rhs,
4342            (O::Xor64,                  C(0), rhs) => rhs,
4343
4344            // x - 0 = x
4345            (O::Sub32,                  lhs, C(0)) => lhs,
4346            (O::Sub64,                  lhs, C(0)) => lhs,
4347            // x << 0 = x
4348            (O::ShiftLogicalLeft32,     lhs, C(0)) => lhs,
4349            (O::ShiftLogicalLeft64,     lhs, C(0)) => lhs,
4350            // x >> 0 = x
4351            (O::ShiftLogicalRight32,    lhs, C(0)) => lhs,
4352            (O::ShiftLogicalRight64,    lhs, C(0)) => lhs,
4353            // x >> 0 = x
4354            (O::ShiftArithmeticRight32, lhs, C(0)) => lhs,
4355            (O::ShiftArithmeticRight64, lhs, C(0)) => lhs,
4356            // x % 0 = x
4357            (O::Rem32,                          lhs, C(0)) => lhs,
4358            (O::Rem64,                          lhs, C(0)) => lhs,
4359            (O::RemUnsigned32,                  lhs, C(0)) => lhs,
4360            (O::RemUnsigned64,                  lhs, C(0)) => lhs,
4361            (O::Rem32AndSignExtend,             lhs, C(0)) => lhs,
4362            (O::RemUnsigned32AndSignExtend,     lhs, C(0)) => lhs,
4363            // 0 % x = 0
4364            (O::Rem32,                          C(0), _) => C(0),
4365            (O::Rem64,                          C(0), _) => C(0),
4366            (O::RemUnsigned32,                  C(0), _) => C(0),
4367            (O::RemUnsigned64,                  C(0), _) => C(0),
4368            (O::Rem32AndSignExtend,             C(0), _) => C(0),
4369            (O::RemUnsigned32AndSignExtend,     C(0), _) => C(0),
4370
4371            // x & 0 = 0
4372            (O::And32,                    _, C(0)) => C(0),
4373            (O::And64,                    _, C(0)) => C(0),
4374            // 0 & x = 0
4375            (O::And32,                    C(0), _) => C(0),
4376            (O::And64,                    C(0), _) => C(0),
4377            // x * 0 = 0
4378            (O::Mul32,                    _, C(0)) => C(0),
4379            (O::Mul64,                    _, C(0)) => C(0),
4380            (O::MulUpperSignedSigned32,   _, C(0)) => C(0),
4381            (O::MulUpperSignedSigned64,   _, C(0)) => C(0),
4382            (O::MulUpperSignedUnsigned32, _, C(0)) => C(0),
4383            (O::MulUpperSignedUnsigned64, _, C(0)) => C(0),
4384            (O::MulUpperUnsignedUnsigned32, _, C(0)) => C(0),
4385            (O::MulUpperUnsignedUnsigned64, _, C(0)) => C(0),
4386            // 0 * x = 0
4387            (O::Mul32,                    C(0), _) => C(0),
4388            (O::Mul64,                    C(0), _) => C(0),
4389            (O::MulUpperSignedSigned32,   C(0), _) => C(0),
4390            (O::MulUpperSignedSigned64,   C(0), _) => C(0),
4391            (O::MulUpperSignedUnsigned32, C(0), _) => C(0),
4392            (O::MulUpperSignedUnsigned64, C(0), _) => C(0),
4393            (O::MulUpperUnsignedUnsigned32, C(0), _) => C(0),
4394            (O::MulUpperUnsignedUnsigned64, C(0), _) => C(0),
4395
4396            // x / 0 = -1
4397            (O::Div32,                          _, C(0)) => C(-1),
4398            (O::Div64,                          _, C(0)) => C(-1),
4399            (O::DivUnsigned32,                  _, C(0)) => C(-1),
4400            (O::DivUnsigned64,                  _, C(0)) => C(-1),
4401            (O::Div32AndSignExtend,             _, C(0)) => C(-1),
4402            (O::DivUnsigned32AndSignExtend,     _, C(0)) => C(-1),
4403
4404            // 0 / x = 0
4405            (O::Div32,                          C(0), _) => C(0),
4406            (O::Div64,                          C(0), _) => C(0),
4407            (O::DivUnsigned32,                  C(0), _) => C(0),
4408            (O::DivUnsigned64,                  C(0), _) => C(0),
4409            (O::Div32AndSignExtend,             C(0), _) => C(0),
4410            (O::DivUnsigned32AndSignExtend,     C(0), _) => C(0),
4411
4412            // (x & ~0) = x
4413            (O::AndInverted,              lhs, C(0)) => lhs,
4414            // (0 & ~x) = 0
4415            (O::AndInverted,              C(0), _) => C(0),
4416
4417            // (x | ~0) = -1
4418            (O::OrInverted,               _, C(0)) => C(-1),
4419
4420            // unsigned_max(0, x) = x
4421            (O::MaximumUnsigned,          C(0), rhs) => rhs,
4422            (O::MaximumUnsigned,          lhs, C(0)) => lhs,
4423
4424            // unsigned min(0, x) = 0
4425            (O::MinimumUnsigned,          C(0), _) => C(0),
4426            (O::MinimumUnsigned,          _, C(0)) => C(0),
4427
4428            // x <<r 0 = x
4429            (O::RotateLeft32,             lhs, C(0)) => lhs,
4430            (O::RotateLeft32,             C(0), _) => C(0),
4431            (O::RotateLeft64,             lhs, C(0)) => lhs,
4432            (O::RotateLeft64,             C(0), _) => C(0),
4433
4434            // x >>r 0 = x
4435            (O::RotateRight32,            lhs, C(0)) => lhs,
4436            (O::RotateRight32,            C(0), _) => C(0),
4437            (O::RotateRight64,            lhs, C(0)) => lhs,
4438            (O::RotateRight64,            C(0), _) => C(0),
4439
4440            // (0 <<r 0) or (0 >>r 0) = 0
4441            (O::RotateLeft32AndSignExtend,  C(0), _) => C(0),
4442            (O::RotateRight32AndSignExtend, C(0), _) => C(0),
4443
4444            _ => return None,
4445        };
4446
4447        Some(value)
4448    }
4449}
4450
4451#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4452enum RegValue {
4453    InputReg {
4454        reg: Reg,
4455        source_block: BlockTarget,
4456        bits_used: u64,
4457    },
4458    CodeAddress(BlockTarget),
4459    DataAddress(SectionTarget),
4460    Constant(i64),
4461    OutputReg {
4462        reg: Reg,
4463        source_block: BlockTarget,
4464        bits_used: u64,
4465    },
4466    Unknown {
4467        unique: u64,
4468        bits_used: u64,
4469    },
4470}
4471
4472impl RegValue {
4473    fn to_instruction(self, dst: Reg, is_rv64: bool) -> Option<BasicInst<AnyTarget>> {
4474        match self {
4475            RegValue::CodeAddress(target) => Some(BasicInst::LoadAddress {
4476                dst,
4477                target: AnyTarget::Code(target),
4478            }),
4479            RegValue::DataAddress(target) => Some(BasicInst::LoadAddress {
4480                dst,
4481                target: AnyTarget::Data(target),
4482            }),
4483            RegValue::Constant(imm) => {
4484                if let Ok(imm) = i32::try_from(imm) {
4485                    Some(BasicInst::LoadImmediate { dst, imm })
4486                } else {
4487                    assert!(is_rv64, "64-bit register value on 32-bit target");
4488                    Some(BasicInst::LoadImmediate64 { dst, imm })
4489                }
4490            }
4491            _ => None,
4492        }
4493    }
4494
4495    fn bits_used(self) -> u64 {
4496        match self {
4497            RegValue::CodeAddress(..) | RegValue::DataAddress(..) => u64::from(u32::MAX),
4498            RegValue::Constant(value) => value as u64,
4499            RegValue::Unknown { bits_used, .. } | RegValue::InputReg { bits_used, .. } | RegValue::OutputReg { bits_used, .. } => bits_used,
4500        }
4501    }
4502}
4503
4504#[derive(Clone, PartialEq, Eq)]
4505struct BlockRegs {
4506    bitness: Bitness,
4507    regs: [RegValue; Reg::ALL.len()],
4508}
4509
4510#[deny(clippy::as_conversions)]
4511impl BlockRegs {
4512    fn new_input(bitness: Bitness, source_block: BlockTarget) -> Self {
4513        BlockRegs {
4514            bitness,
4515            regs: Reg::ALL.map(|reg| RegValue::InputReg {
4516                reg,
4517                source_block,
4518                bits_used: bitness.bits_used_mask(),
4519            }),
4520        }
4521    }
4522
4523    fn new_output(bitness: Bitness, source_block: BlockTarget) -> Self {
4524        BlockRegs {
4525            bitness,
4526            regs: Reg::ALL.map(|reg| RegValue::OutputReg {
4527                reg,
4528                source_block,
4529                bits_used: bitness.bits_used_mask(),
4530            }),
4531        }
4532    }
4533
4534    fn get_reg(&self, reg: impl Into<RegImm>) -> RegValue {
4535        match reg.into() {
4536            RegImm::Imm(imm) => RegValue::Constant(cast(imm).to_i64_sign_extend()),
4537            RegImm::Reg(reg) => self.regs[reg.to_usize()],
4538        }
4539    }
4540
4541    fn set_reg(&mut self, reg: Reg, value: RegValue) {
4542        self.regs[reg.to_usize()] = value;
4543    }
4544
4545    fn simplify_control_instruction<H>(
4546        &self,
4547        elf: &Elf<H>,
4548        instruction: ControlInst<BlockTarget>,
4549    ) -> Option<(Option<BasicInst<AnyTarget>>, ControlInst<BlockTarget>)>
4550    where
4551        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4552    {
4553        match instruction {
4554            ControlInst::JumpIndirect { base, offset: 0 } => {
4555                if let RegValue::CodeAddress(target) = self.get_reg(base) {
4556                    return Some((None, ControlInst::Jump { target }));
4557                }
4558            }
4559            ControlInst::Branch {
4560                kind,
4561                src1,
4562                src2,
4563                target_true,
4564                target_false,
4565            } => {
4566                if target_true == target_false {
4567                    return Some((None, ControlInst::Jump { target: target_true }));
4568                }
4569
4570                let src1_value = self.get_reg(src1);
4571                let src2_value = self.get_reg(src2);
4572                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4573                    match value {
4574                        RegValue::Constant(0) => {
4575                            return Some((None, ControlInst::Jump { target: target_false }));
4576                        }
4577                        RegValue::Constant(1) => {
4578                            return Some((None, ControlInst::Jump { target: target_true }));
4579                        }
4580                        _ => unreachable!("internal error: constant evaluation of branch operands returned a non-boolean value"),
4581                    }
4582                }
4583
4584                if let RegImm::Reg(_) = src1 {
4585                    if let RegValue::Constant(src1_value) = src1_value {
4586                        if let Ok(src1_value) = src1_value.try_into() {
4587                            return Some((
4588                                None,
4589                                ControlInst::Branch {
4590                                    kind,
4591                                    src1: RegImm::Imm(src1_value),
4592                                    src2,
4593                                    target_true,
4594                                    target_false,
4595                                },
4596                            ));
4597                        }
4598                    }
4599                }
4600
4601                if let RegImm::Reg(_) = src2 {
4602                    if let RegValue::Constant(src2_value) = src2_value {
4603                        if let Ok(src2_value) = src2_value.try_into() {
4604                            return Some((
4605                                None,
4606                                ControlInst::Branch {
4607                                    kind,
4608                                    src1,
4609                                    src2: RegImm::Imm(src2_value),
4610                                    target_true,
4611                                    target_false,
4612                                },
4613                            ));
4614                        }
4615                    }
4616                }
4617            }
4618            ControlInst::CallIndirect {
4619                ra,
4620                base,
4621                offset: 0,
4622                target_return,
4623            } => {
4624                if let RegValue::CodeAddress(target) = self.get_reg(base) {
4625                    let instruction_1 = BasicInst::LoadAddress {
4626                        dst: ra,
4627                        target: AnyTarget::Code(target_return),
4628                    };
4629                    let instruction_2 = ControlInst::Jump { target };
4630                    return Some((Some(instruction_1), instruction_2));
4631                }
4632            }
4633            _ => {}
4634        }
4635
4636        None
4637    }
4638
4639    fn simplify_instruction<H>(&self, elf: &Elf<H>, instruction: BasicInst<AnyTarget>) -> Option<BasicInst<AnyTarget>>
4640    where
4641        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4642    {
4643        let is_rv64 = self.bitness == Bitness::B64;
4644
4645        match instruction {
4646            BasicInst::RegReg { kind, dst, src1, src2 } => {
4647                let src1_value = self.get_reg(src1);
4648                let src2_value = self.get_reg(src2);
4649                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4650                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
4651                        if new_instruction != instruction {
4652                            return Some(new_instruction);
4653                        }
4654                    }
4655                }
4656            }
4657            BasicInst::AnyAny { kind, dst, src1, src2 } => {
4658                let src1_value = self.get_reg(src1);
4659                let src2_value = self.get_reg(src2);
4660                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4661                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
4662                        if new_instruction != instruction {
4663                            return Some(new_instruction);
4664                        }
4665                    }
4666                }
4667
4668                if let RegImm::Reg(_) = src1 {
4669                    if let RegValue::Constant(src1_value) = src1_value {
4670                        if let Ok(src1_value) = src1_value.try_into() {
4671                            return Some(BasicInst::AnyAny {
4672                                kind,
4673                                dst,
4674                                src1: RegImm::Imm(src1_value),
4675                                src2,
4676                            });
4677                        }
4678                    }
4679                }
4680
4681                if let RegImm::Reg(_) = src2 {
4682                    if let RegValue::Constant(src2_value) = src2_value {
4683                        if let Ok(src2_value) = src2_value.try_into() {
4684                            return Some(BasicInst::AnyAny {
4685                                kind,
4686                                dst,
4687                                src1,
4688                                src2: RegImm::Imm(src2_value),
4689                            });
4690                        }
4691                    }
4692                }
4693
4694                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) {
4695                    if src1_value == RegValue::Constant(0) {
4696                        if let RegImm::Reg(src) = src2 {
4697                            return Some(BasicInst::MoveReg { dst, src });
4698                        }
4699                    } else if src2_value == RegValue::Constant(0) {
4700                        if let RegImm::Reg(src) = src1 {
4701                            return Some(BasicInst::MoveReg { dst, src });
4702                        }
4703                    }
4704                }
4705
4706                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64)
4707                    && src1_value != RegValue::Constant(0)
4708                    && src2_value != RegValue::Constant(0)
4709                    && (src1_value.bits_used() & src2_value.bits_used()) == 0
4710                {
4711                    // Replace an `add` with an `or` if it's safe to do so.
4712                    //
4713                    // Curiously LLVM's RISC-V backend doesn't do this even though its AMD64 backend does.
4714                    return Some(BasicInst::AnyAny {
4715                        kind: match kind {
4716                            AnyAnyKind::Add32 => AnyAnyKind::Or32,
4717                            AnyAnyKind::Add64 => AnyAnyKind::Or64,
4718                            _ => unreachable!(),
4719                        },
4720                        dst,
4721                        src1,
4722                        src2,
4723                    });
4724                }
4725            }
4726            BasicInst::Cmov {
4727                kind,
4728                dst,
4729                src: RegImm::Reg(src),
4730                cond,
4731            } => {
4732                if let RegValue::Constant(src_value) = self.get_reg(src) {
4733                    if let Ok(src_value) = src_value.try_into() {
4734                        return Some(BasicInst::Cmov {
4735                            kind,
4736                            dst,
4737                            src: RegImm::Imm(src_value),
4738                            cond,
4739                        });
4740                    }
4741                }
4742            }
4743            BasicInst::LoadIndirect { kind, dst, base, offset } => {
4744                if let RegValue::DataAddress(base) = self.get_reg(base) {
4745                    return Some(BasicInst::LoadAbsolute {
4746                        kind,
4747                        dst,
4748                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
4749                    });
4750                }
4751            }
4752            BasicInst::LoadAddressIndirect { dst, target } => {
4753                return Some(BasicInst::LoadAddress { dst, target });
4754            }
4755            BasicInst::StoreIndirect { kind, src, base, offset } => {
4756                if let RegValue::DataAddress(base) = self.get_reg(base) {
4757                    return Some(BasicInst::StoreAbsolute {
4758                        kind,
4759                        src,
4760                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
4761                    });
4762                }
4763
4764                if let RegImm::Reg(src) = src {
4765                    if let RegValue::Constant(src_value) = self.get_reg(src) {
4766                        if let Ok(src_value) = src_value.try_into() {
4767                            return Some(BasicInst::StoreIndirect {
4768                                kind,
4769                                src: RegImm::Imm(src_value),
4770                                base,
4771                                offset,
4772                            });
4773                        }
4774                    }
4775                }
4776            }
4777            BasicInst::StoreAbsolute {
4778                kind,
4779                src: RegImm::Reg(src),
4780                target,
4781            } => {
4782                if let RegValue::Constant(src_value) = self.get_reg(src) {
4783                    if let Ok(src_value) = src_value.try_into() {
4784                        return Some(BasicInst::StoreAbsolute {
4785                            kind,
4786                            src: RegImm::Imm(src_value),
4787                            target,
4788                        });
4789                    }
4790                }
4791            }
4792            BasicInst::MoveReg { dst, src } => {
4793                if dst == src {
4794                    return Some(BasicInst::Nop);
4795                }
4796            }
4797            _ => {}
4798        }
4799
4800        None
4801    }
4802
4803    fn set_reg_unknown(&mut self, dst: Reg, unknown_counter: &mut u64, bits_used: u64) {
4804        let bits_used_masked = bits_used & self.bitness.bits_used_mask();
4805        if bits_used_masked == 0 {
4806            self.set_reg(dst, RegValue::Constant(0));
4807            return;
4808        }
4809
4810        self.set_reg(
4811            dst,
4812            RegValue::Unknown {
4813                unique: *unknown_counter,
4814                bits_used: bits_used_masked,
4815            },
4816        );
4817        *unknown_counter += 1;
4818    }
4819
4820    fn set_reg_from_control_instruction(&mut self, imports: &[Import], unknown_counter: &mut u64, instruction: ControlInst<BlockTarget>) {
4821        #[allow(clippy::single_match)]
4822        match instruction {
4823            ControlInst::CallIndirect { ra, target_return, .. } => {
4824                let implicit_instruction = BasicInst::LoadAddress {
4825                    dst: ra,
4826                    target: AnyTarget::Code(target_return),
4827                };
4828                self.set_reg_from_instruction(imports, unknown_counter, implicit_instruction);
4829            }
4830            _ => {}
4831        }
4832    }
4833
4834    fn set_reg_from_instruction(&mut self, imports: &[Import], unknown_counter: &mut u64, instruction: BasicInst<AnyTarget>) {
4835        match instruction {
4836            BasicInst::LoadImmediate { dst, imm } => {
4837                self.set_reg(dst, RegValue::Constant(cast(imm).to_i64_sign_extend()));
4838            }
4839            BasicInst::LoadImmediate64 { dst, imm } => {
4840                self.set_reg(dst, RegValue::Constant(imm));
4841            }
4842            BasicInst::LoadAddress {
4843                dst,
4844                target: AnyTarget::Code(target),
4845            }
4846            | BasicInst::LoadAddressIndirect {
4847                dst,
4848                target: AnyTarget::Code(target),
4849            } => {
4850                self.set_reg(dst, RegValue::CodeAddress(target));
4851            }
4852            BasicInst::LoadAddress {
4853                dst,
4854                target: AnyTarget::Data(target),
4855            }
4856            | BasicInst::LoadAddressIndirect {
4857                dst,
4858                target: AnyTarget::Data(target),
4859            } => {
4860                self.set_reg(dst, RegValue::DataAddress(target));
4861            }
4862            BasicInst::MoveReg { dst, src } => {
4863                self.set_reg(dst, self.get_reg(src));
4864            }
4865            BasicInst::AnyAny {
4866                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
4867                dst,
4868                src1,
4869                src2: RegImm::Imm(0),
4870            } => {
4871                self.set_reg(dst, self.get_reg(src1));
4872            }
4873            BasicInst::AnyAny {
4874                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
4875                dst,
4876                src1: RegImm::Imm(0),
4877                src2,
4878            } => {
4879                self.set_reg(dst, self.get_reg(src2));
4880            }
4881            BasicInst::AnyAny {
4882                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64,
4883                dst,
4884                src1,
4885                src2,
4886            } => {
4887                let src1_value = self.get_reg(src1);
4888                let src2_value = self.get_reg(src2);
4889                let bits_used =
4890                    src1_value.bits_used() | src2_value.bits_used() | (src1_value.bits_used() << 1) | (src2_value.bits_used() << 1);
4891
4892                self.set_reg_unknown(dst, unknown_counter, bits_used);
4893            }
4894            BasicInst::AnyAny {
4895                kind: AnyAnyKind::And32 | AnyAnyKind::And64,
4896                dst,
4897                src1,
4898                src2,
4899            } => {
4900                let src1_value = self.get_reg(src1);
4901                let src2_value = self.get_reg(src2);
4902                let bits_used = src1_value.bits_used() & src2_value.bits_used();
4903                self.set_reg_unknown(dst, unknown_counter, bits_used);
4904            }
4905            BasicInst::AnyAny {
4906                kind: AnyAnyKind::Or32 | AnyAnyKind::Or64,
4907                dst,
4908                src1,
4909                src2,
4910            } => {
4911                let src1_value = self.get_reg(src1);
4912                let src2_value = self.get_reg(src2);
4913                let bits_used = src1_value.bits_used() | src2_value.bits_used();
4914                self.set_reg_unknown(dst, unknown_counter, bits_used);
4915            }
4916            BasicInst::AnyAny {
4917                kind: AnyAnyKind::ShiftLogicalRight32,
4918                dst,
4919                src1,
4920                src2: RegImm::Imm(src2),
4921            } => {
4922                let src1_value = self.get_reg(src1);
4923                let bits_used = src1_value.bits_used() >> src2;
4924                self.set_reg_unknown(dst, unknown_counter, bits_used);
4925            }
4926            BasicInst::AnyAny {
4927                kind: AnyAnyKind::ShiftLogicalLeft32,
4928                dst,
4929                src1,
4930                src2: RegImm::Imm(src2),
4931            } => {
4932                let src1_value = self.get_reg(src1);
4933                let bits_used = src1_value.bits_used() << src2;
4934                self.set_reg_unknown(dst, unknown_counter, bits_used);
4935            }
4936            BasicInst::AnyAny {
4937                kind:
4938                    AnyAnyKind::SetLessThanSigned32
4939                    | AnyAnyKind::SetLessThanSigned64
4940                    | AnyAnyKind::SetLessThanUnsigned32
4941                    | AnyAnyKind::SetLessThanUnsigned64,
4942                dst,
4943                ..
4944            } => {
4945                self.set_reg_unknown(dst, unknown_counter, 1);
4946            }
4947            BasicInst::LoadAbsolute {
4948                kind: LoadKind::U8, dst, ..
4949            }
4950            | BasicInst::LoadIndirect {
4951                kind: LoadKind::U8, dst, ..
4952            } => {
4953                self.set_reg_unknown(dst, unknown_counter, u64::from(u8::MAX));
4954            }
4955            BasicInst::LoadAbsolute {
4956                kind: LoadKind::U16, dst, ..
4957            }
4958            | BasicInst::LoadIndirect {
4959                kind: LoadKind::U16, dst, ..
4960            } => {
4961                self.set_reg_unknown(dst, unknown_counter, u64::from(u16::MAX));
4962            }
4963            BasicInst::LoadAbsolute {
4964                kind: LoadKind::U32, dst, ..
4965            }
4966            | BasicInst::LoadIndirect {
4967                kind: LoadKind::U32, dst, ..
4968            } => {
4969                self.set_reg_unknown(dst, unknown_counter, u64::from(u32::MAX));
4970            }
4971            _ => {
4972                for dst in instruction.dst_mask(imports) {
4973                    self.set_reg_unknown(dst, unknown_counter, self.bitness.bits_used_mask());
4974                }
4975            }
4976        }
4977    }
4978}
4979
4980#[allow(clippy::too_many_arguments)]
4981fn perform_constant_propagation<H>(
4982    imports: &[Import],
4983    elf: &Elf<H>,
4984    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
4985    input_regs_for_block: &mut [BlockRegs],
4986    output_regs_for_block: &mut [BlockRegs],
4987    unknown_counter: &mut u64,
4988    reachability_graph: &mut ReachabilityGraph,
4989    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
4990    current: BlockTarget,
4991) -> bool
4992where
4993    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4994{
4995    let is_rv64 = elf.is_64();
4996
4997    let Some(reachability) = reachability_graph.for_code.get(&current) else {
4998        return false;
4999    };
5000
5001    if reachability.is_unreachable() {
5002        return false;
5003    }
5004
5005    let mut modified = false;
5006    if !reachability.is_dynamically_reachable()
5007        && !reachability.always_reachable_or_exported()
5008        && !reachability.reachable_from.is_empty()
5009        && reachability.reachable_from.len() < 64
5010    {
5011        for reg in Reg::ALL {
5012            let mut common_value_opt = None;
5013            for &source in &reachability.reachable_from {
5014                let value = output_regs_for_block[source.index()].get_reg(reg);
5015                if let Some(common_value) = common_value_opt {
5016                    if common_value == value {
5017                        continue;
5018                    }
5019
5020                    common_value_opt = None;
5021                    break;
5022                } else {
5023                    common_value_opt = Some(value);
5024                }
5025            }
5026
5027            if let Some(value) = common_value_opt {
5028                let old_value = input_regs_for_block[current.index()].get_reg(reg);
5029                if value != old_value {
5030                    input_regs_for_block[current.index()].set_reg(reg, value);
5031                    modified = true;
5032                }
5033            }
5034        }
5035    }
5036
5037    let mut regs = input_regs_for_block[current.index()].clone();
5038    let mut references = BTreeSet::new();
5039    let mut modified_this_block = false;
5040    for nth_instruction in 0..all_blocks[current.index()].ops.len() {
5041        let mut instruction = all_blocks[current.index()].ops[nth_instruction].1;
5042        if instruction.is_nop() {
5043            continue;
5044        }
5045
5046        while let Some(new_instruction) = regs.simplify_instruction(elf, instruction) {
5047            log::trace!("Simplifying instruction in {}", all_blocks[current.index()].ops[nth_instruction].0);
5048            for reg in instruction.src_mask(imports) {
5049                log::trace!("  {reg:?} = {:?}", regs.get_reg(reg));
5050            }
5051            log::trace!("     {instruction:?}");
5052            log::trace!("  -> {new_instruction:?}");
5053
5054            if !modified_this_block {
5055                references = gather_references(&all_blocks[current.index()]);
5056                modified_this_block = true;
5057                modified = true;
5058            }
5059
5060            instruction = new_instruction;
5061            all_blocks[current.index()].ops[nth_instruction].1 = new_instruction;
5062        }
5063
5064        if let BasicInst::LoadAbsolute { kind, dst, target } = instruction {
5065            let section = elf.section_by_index(target.section_index);
5066            if section.is_allocated() && !section.is_writable() {
5067                let value = match kind {
5068                    LoadKind::U64 => section
5069                        .data()
5070                        .get(target.offset as usize..target.offset as usize + 8)
5071                        .map(|xs| u64::from_le_bytes([xs[0], xs[1], xs[2], xs[3], xs[4], xs[5], xs[6], xs[7]]))
5072                        .map(|x| cast(x).to_signed()),
5073                    LoadKind::U32 => section
5074                        .data()
5075                        .get(target.offset as usize..target.offset as usize + 4)
5076                        .map(|xs| u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5077                        .map(|x| cast(x).to_u64())
5078                        .map(|x| cast(x).to_signed()),
5079                    LoadKind::I32 => section
5080                        .data()
5081                        .get(target.offset as usize..target.offset as usize + 4)
5082                        .map(|xs| i32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5083                        .map(|x| cast(x).to_i64_sign_extend()),
5084                    LoadKind::U16 => section
5085                        .data()
5086                        .get(target.offset as usize..target.offset as usize + 2)
5087                        .map(|xs| u16::from_le_bytes([xs[0], xs[1]]))
5088                        .map(|x| cast(x).to_u64())
5089                        .map(|x| cast(x).to_signed()),
5090                    LoadKind::I16 => section
5091                        .data()
5092                        .get(target.offset as usize..target.offset as usize + 2)
5093                        .map(|xs| i16::from_le_bytes([xs[0], xs[1]]))
5094                        .map(|x| cast(x).to_i64_sign_extend()),
5095                    LoadKind::I8 => section
5096                        .data()
5097                        .get(target.offset as usize)
5098                        .map(|&x| cast(x).to_signed())
5099                        .map(|x| cast(x).to_i64_sign_extend()),
5100                    LoadKind::U8 => section
5101                        .data()
5102                        .get(target.offset as usize)
5103                        .copied()
5104                        .map(|x| cast(x).to_u64())
5105                        .map(|x| cast(x).to_signed()),
5106                };
5107
5108                if let Some(imm) = value {
5109                    if !modified_this_block {
5110                        references = gather_references(&all_blocks[current.index()]);
5111                        modified_this_block = true;
5112                        modified = true;
5113                    }
5114
5115                    if let Ok(imm) = i32::try_from(imm) {
5116                        instruction = BasicInst::LoadImmediate { dst, imm };
5117                    } else if is_rv64 {
5118                        instruction = BasicInst::LoadImmediate64 { dst, imm };
5119                    } else {
5120                        unreachable!("load immediate overflow in 32-bit");
5121                    }
5122
5123                    all_blocks[current.index()].ops[nth_instruction].1 = instruction;
5124                }
5125            }
5126        }
5127
5128        regs.set_reg_from_instruction(imports, unknown_counter, instruction);
5129    }
5130
5131    if let Some((extra_instruction, new_instruction)) = regs.simplify_control_instruction(elf, all_blocks[current.index()].next.instruction)
5132    {
5133        log::trace!("Simplifying end of {current:?}");
5134        log::trace!("     {:?}", all_blocks[current.index()].next.instruction);
5135        if let Some(ref extra_instruction) = extra_instruction {
5136            log::trace!("  -> {extra_instruction:?}");
5137        }
5138        log::trace!("  -> {new_instruction:?}");
5139
5140        if !modified_this_block {
5141            references = gather_references(&all_blocks[current.index()]);
5142            modified_this_block = true;
5143            modified = true;
5144        }
5145
5146        if let Some(extra_instruction) = extra_instruction {
5147            regs.set_reg_from_instruction(imports, unknown_counter, extra_instruction);
5148
5149            all_blocks[current.index()]
5150                .ops
5151                .push((all_blocks[current.index()].next.source.clone(), extra_instruction));
5152        }
5153        all_blocks[current.index()].next.instruction = new_instruction;
5154    }
5155
5156    regs.set_reg_from_control_instruction(imports, unknown_counter, all_blocks[current.index()].next.instruction);
5157
5158    for reg in Reg::ALL {
5159        if let RegValue::Unknown { bits_used, .. } = regs.get_reg(reg) {
5160            regs.set_reg(
5161                reg,
5162                RegValue::OutputReg {
5163                    reg,
5164                    source_block: current,
5165                    bits_used,
5166                },
5167            )
5168        }
5169    }
5170
5171    let output_regs_modified = output_regs_for_block[current.index()] != regs;
5172    if output_regs_modified {
5173        output_regs_for_block[current.index()] = regs.clone();
5174        modified = true;
5175    }
5176
5177    if modified_this_block {
5178        update_references(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), current, references);
5179        if reachability_graph.is_code_reachable(current) {
5180            if let Some(ref mut optimize_queue) = optimize_queue {
5181                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, current);
5182            }
5183        }
5184    }
5185
5186    if let Some(ref mut optimize_queue) = optimize_queue {
5187        if output_regs_modified {
5188            match all_blocks[current.index()].next.instruction {
5189                ControlInst::Jump { target } => add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target),
5190                ControlInst::Branch {
5191                    target_true, target_false, ..
5192                } => {
5193                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_true);
5194                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_false);
5195                }
5196                ControlInst::Call { .. } => unreachable!(),
5197                _ => {}
5198            }
5199        }
5200    }
5201
5202    modified
5203}
5204
5205fn perform_load_address_and_jump_fusion(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) {
5206    let used_blocks: Vec<_> = (0..all_blocks.len())
5207        .map(BlockTarget::from_raw)
5208        .filter(|&block_target| reachability_graph.is_code_reachable(block_target))
5209        .collect();
5210
5211    for window in used_blocks.windows(2) {
5212        let (current, next) = (window[0], window[1]);
5213        let Some(&(
5214            _,
5215            BasicInst::LoadAddress {
5216                dst,
5217                target: AnyTarget::Code(target_return),
5218            },
5219        )) = all_blocks[current.index()].ops.last()
5220        else {
5221            continue;
5222        };
5223
5224        if target_return != next {
5225            continue;
5226        }
5227
5228        all_blocks[current.index()].next.instruction = match all_blocks[current.index()].next.instruction {
5229            ControlInst::Jump { target } => ControlInst::Call {
5230                target,
5231                target_return,
5232                ra: dst,
5233            },
5234            ControlInst::JumpIndirect { base, offset } if dst != base => ControlInst::CallIndirect {
5235                base,
5236                offset,
5237                target_return,
5238                ra: dst,
5239            },
5240            _ => {
5241                continue;
5242            }
5243        };
5244
5245        all_blocks[current.index()].ops.pop();
5246    }
5247}
5248
5249#[deny(clippy::as_conversions)]
5250fn optimize_program<H>(
5251    config: &Config,
5252    elf: &Elf<H>,
5253    imports: &[Import],
5254    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
5255    reachability_graph: &mut ReachabilityGraph,
5256    exports: &mut [Export],
5257) where
5258    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
5259{
5260    let bitness = if elf.is_64() { Bitness::B64 } else { Bitness::B32 };
5261
5262    let mut optimize_queue = VecSet::new();
5263    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5264        if !reachability_graph.is_code_reachable(current) {
5265            all_blocks[current.index()].ops.clear();
5266            all_blocks[current.index()].next.instruction = ControlInst::Unimplemented;
5267            continue;
5268        }
5269
5270        perform_nop_elimination(all_blocks, current);
5271
5272        let block = &mut all_blocks[current.index()];
5273        block.next.instruction = match block.next.instruction {
5274            ControlInst::Call { ra, target, target_return } => {
5275                block.ops.push((
5276                    block.next.source.clone(),
5277                    BasicInst::LoadAddress {
5278                        dst: ra,
5279                        target: AnyTarget::Code(target_return),
5280                    },
5281                ));
5282                ControlInst::Jump { target }
5283            }
5284            ControlInst::CallIndirect {
5285                ra,
5286                target_return,
5287                base,
5288                offset,
5289            } if ra != base => {
5290                block.ops.push((
5291                    block.next.source.clone(),
5292                    BasicInst::LoadAddress {
5293                        dst: ra,
5294                        target: AnyTarget::Code(target_return),
5295                    },
5296                ));
5297                ControlInst::JumpIndirect { base, offset }
5298            }
5299            instruction => instruction,
5300        };
5301
5302        optimize_queue.push(current);
5303    }
5304
5305    let mut unknown_counter = 0;
5306    let mut input_regs_for_block = Vec::with_capacity(all_blocks.len());
5307    let mut output_regs_for_block = Vec::with_capacity(all_blocks.len());
5308    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5309        input_regs_for_block.push(BlockRegs::new_input(bitness, current));
5310        output_regs_for_block.push(BlockRegs::new_output(bitness, current));
5311    }
5312
5313    let mut registers_needed_for_block = Vec::with_capacity(all_blocks.len());
5314    for _ in 0..all_blocks.len() {
5315        registers_needed_for_block.push(RegMask::all())
5316    }
5317
5318    let mut count_inline = 0;
5319    let mut count_dce = 0;
5320    let mut count_cp = 0;
5321
5322    let mut inline_history: HashSet<(BlockTarget, BlockTarget)> = HashSet::new(); // Necessary to prevent infinite loops.
5323    macro_rules! run_optimizations {
5324        ($current:expr, $optimize_queue:expr) => {{
5325            let mut modified = false;
5326            if reachability_graph.is_code_reachable($current) {
5327                perform_nop_elimination(all_blocks, $current);
5328
5329                if perform_inlining(
5330                    all_blocks,
5331                    reachability_graph,
5332                    exports,
5333                    $optimize_queue,
5334                    &mut inline_history,
5335                    config.inline_threshold,
5336                    $current,
5337                ) {
5338                    count_inline += 1;
5339                    modified |= true;
5340                }
5341
5342                if perform_dead_code_elimination(
5343                    config,
5344                    imports,
5345                    all_blocks,
5346                    &mut registers_needed_for_block,
5347                    reachability_graph,
5348                    $optimize_queue,
5349                    $current,
5350                ) {
5351                    count_dce += 1;
5352                    modified |= true;
5353                }
5354
5355                if perform_constant_propagation(
5356                    imports,
5357                    elf,
5358                    all_blocks,
5359                    &mut input_regs_for_block,
5360                    &mut output_regs_for_block,
5361                    &mut unknown_counter,
5362                    reachability_graph,
5363                    $optimize_queue,
5364                    $current,
5365                ) {
5366                    count_cp += 1;
5367                    modified |= true;
5368                }
5369            }
5370
5371            modified
5372        }};
5373    }
5374
5375    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5376        run_optimizations!(current, None);
5377    }
5378
5379    garbage_collect_reachability(all_blocks, reachability_graph);
5380
5381    let timestamp = std::time::Instant::now();
5382    let mut opt_iteration_count = 0;
5383    while let Some(current) = optimize_queue.pop_non_unique() {
5384        loop {
5385            if !run_optimizations!(current, Some(&mut optimize_queue)) {
5386                break;
5387            }
5388        }
5389        opt_iteration_count += 1;
5390    }
5391
5392    log::debug!(
5393        "Optimizing the program took {opt_iteration_count} iteration(s) and {}ms",
5394        timestamp.elapsed().as_millis()
5395    );
5396    log::debug!("             Inlinining: {count_inline}");
5397    log::debug!("  Dead code elimination: {count_dce}");
5398    log::debug!("   Constant propagation: {count_cp}");
5399    garbage_collect_reachability(all_blocks, reachability_graph);
5400
5401    inline_history.clear();
5402    count_inline = 0;
5403    count_dce = 0;
5404    count_cp = 0;
5405
5406    let timestamp = std::time::Instant::now();
5407    let mut opt_brute_force_iterations = 0;
5408    let mut modified = true;
5409    while modified {
5410        opt_brute_force_iterations += 1;
5411        modified = false;
5412        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5413            modified |= run_optimizations!(current, Some(&mut optimize_queue));
5414        }
5415
5416        while let Some(current) = optimize_queue.pop_non_unique() {
5417            loop {
5418                if !run_optimizations!(current, Some(&mut optimize_queue)) {
5419                    break;
5420                }
5421            }
5422        }
5423
5424        if modified {
5425            garbage_collect_reachability(all_blocks, reachability_graph);
5426        }
5427    }
5428
5429    perform_load_address_and_jump_fusion(all_blocks, reachability_graph);
5430
5431    log::debug!(
5432        "Optimizing the program took {} brute force iteration(s) and {} ms",
5433        opt_brute_force_iterations - 1,
5434        timestamp.elapsed().as_millis()
5435    );
5436    log::debug!("             Inlinining: {count_inline}");
5437    log::debug!("  Dead code elimination: {count_dce}");
5438    log::debug!("   Constant propagation: {count_cp}");
5439}
5440
5441#[cfg(test)]
5442mod test {
5443    use super::*;
5444    use polkavm::Reg;
5445
5446    struct ProgramBuilder {
5447        data_section: SectionIndex,
5448        current_section: SectionIndex,
5449        next_free_section: SectionIndex,
5450        next_offset_for_section: HashMap<SectionIndex, u64>,
5451        instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
5452        exports: Vec<Export>,
5453    }
5454
5455    struct TestProgram {
5456        disassembly: String,
5457        instance: polkavm::RawInstance,
5458    }
5459
5460    impl ProgramBuilder {
5461        fn new() -> Self {
5462            ProgramBuilder {
5463                data_section: SectionIndex::new(0),
5464                current_section: SectionIndex::new(1),
5465                next_free_section: SectionIndex::new(1),
5466                next_offset_for_section: HashMap::default(),
5467                instructions: Vec::new(),
5468                exports: Vec::new(),
5469            }
5470        }
5471
5472        fn from_assembly(assembly: &str) -> Self {
5473            let mut b = Self::new();
5474            b.append_assembly(assembly);
5475            b
5476        }
5477
5478        fn add_export(&mut self, name: impl AsRef<[u8]>, input_regs: u8, output_regs: u8, location: SectionTarget) {
5479            self.exports.push(Export {
5480                location,
5481                metadata: ExternMetadata {
5482                    index: None,
5483                    symbol: name.as_ref().to_owned(),
5484                    input_regs,
5485                    output_regs,
5486                },
5487            })
5488        }
5489
5490        fn add_section(&mut self) -> SectionTarget {
5491            let index = self.next_free_section;
5492            self.next_offset_for_section.insert(index, 0);
5493            self.next_free_section = SectionIndex::new(index.raw() + 1);
5494            SectionTarget {
5495                section_index: index,
5496                offset: 0,
5497            }
5498        }
5499
5500        fn switch_section(&mut self, section_index: impl Into<SectionIndex>) {
5501            self.current_section = section_index.into();
5502        }
5503
5504        fn current_source(&self) -> Source {
5505            let next_offset = self.next_offset_for_section.get(&self.current_section).copied().unwrap_or(0);
5506            Source {
5507                section_index: self.current_section,
5508                offset_range: (next_offset..next_offset + 4).into(),
5509            }
5510        }
5511
5512        fn push(&mut self, inst: impl Into<InstExt<SectionTarget, SectionTarget>>) -> SectionTarget {
5513            let source = self.current_source();
5514            *self.next_offset_for_section.get_mut(&self.current_section).unwrap() += 4;
5515            self.instructions.push((source, inst.into()));
5516            source.begin()
5517        }
5518
5519        fn append_assembly(&mut self, assembly: &str) {
5520            let raw_blob = polkavm_common::assembler::assemble(assembly).unwrap();
5521            let blob = ProgramBlob::parse(raw_blob.into()).unwrap();
5522            let mut program_counter_to_section_target = HashMap::new();
5523            let mut program_counter_to_instruction_index = HashMap::new();
5524            let mut in_new_block = true;
5525            for instruction in blob.instructions(Bitness::B32) {
5526                if in_new_block {
5527                    let block = self.add_section();
5528                    self.switch_section(block);
5529                    program_counter_to_section_target.insert(instruction.offset, block);
5530                    in_new_block = false;
5531                }
5532
5533                program_counter_to_instruction_index.insert(instruction.offset, self.instructions.len());
5534                self.push(BasicInst::Nop);
5535
5536                if instruction.kind.starts_new_basic_block() {
5537                    in_new_block = true;
5538                }
5539            }
5540
5541            for instruction in blob.instructions(Bitness::B32) {
5542                let out = &mut self.instructions[*program_counter_to_instruction_index.get(&instruction.offset).unwrap()].1;
5543                match instruction.kind {
5544                    Instruction::fallthrough => {
5545                        let target = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
5546                        *out = ControlInst::Jump { target }.into();
5547                    }
5548                    Instruction::jump(target) => {
5549                        let target = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
5550                        *out = ControlInst::Jump { target }.into();
5551                    }
5552                    Instruction::load_imm(dst, imm) => {
5553                        *out = BasicInst::LoadImmediate {
5554                            dst: dst.into(),
5555                            imm: cast(imm).to_signed(),
5556                        }
5557                        .into();
5558                    }
5559                    Instruction::add_imm_32(dst, src, imm) => {
5560                        *out = BasicInst::AnyAny {
5561                            kind: AnyAnyKind::Add32,
5562                            dst: dst.into(),
5563                            src1: src.into(),
5564                            src2: cast(imm).to_signed().into(),
5565                        }
5566                        .into();
5567                    }
5568                    Instruction::add_32(dst, src1, src2) => {
5569                        *out = BasicInst::AnyAny {
5570                            kind: AnyAnyKind::Add32,
5571                            dst: dst.into(),
5572                            src1: src1.into(),
5573                            src2: src2.into(),
5574                        }
5575                        .into();
5576                    }
5577                    Instruction::branch_less_unsigned_imm(src1, src2, target) | Instruction::branch_eq_imm(src1, src2, target) => {
5578                        let target_true = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
5579                        let target_false = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
5580                        *out = ControlInst::Branch {
5581                            kind: match instruction.kind {
5582                                Instruction::branch_less_unsigned_imm(..) => BranchKind::LessUnsigned32,
5583                                Instruction::branch_eq_imm(..) => BranchKind::Eq32,
5584                                _ => unreachable!(),
5585                            },
5586                            src1: src1.into(),
5587                            src2: cast(src2).to_signed().into(),
5588                            target_true,
5589                            target_false,
5590                        }
5591                        .into();
5592                    }
5593                    Instruction::jump_indirect(base, 0) => {
5594                        *out = ControlInst::JumpIndirect {
5595                            base: base.into(),
5596                            offset: 0,
5597                        }
5598                        .into();
5599                    }
5600                    Instruction::trap => {
5601                        *out = ControlInst::Unimplemented.into();
5602                    }
5603                    Instruction::store_u32(src, address) => {
5604                        *out = BasicInst::StoreAbsolute {
5605                            kind: StoreKind::U32,
5606                            src: src.into(),
5607                            target: SectionTarget {
5608                                section_index: self.data_section,
5609                                offset: u64::from(address),
5610                            },
5611                        }
5612                        .into();
5613                    }
5614                    Instruction::store_indirect_u32(src, base, offset) => {
5615                        *out = BasicInst::StoreIndirect {
5616                            kind: StoreKind::U32,
5617                            src: src.into(),
5618                            base: base.into(),
5619                            offset: cast(offset).to_signed(),
5620                        }
5621                        .into();
5622                    }
5623                    _ => unimplemented!("{instruction:?}"),
5624                }
5625            }
5626
5627            for export in blob.exports() {
5628                let input_regs = 1;
5629                let output_regs = 1;
5630                let target = program_counter_to_section_target.get(&export.program_counter()).unwrap();
5631                self.add_export(export.symbol().as_bytes(), input_regs, output_regs, *target);
5632            }
5633        }
5634
5635        fn build(&self, config: Config) -> TestProgram {
5636            let elf: Elf<object::elf::FileHeader32<object::endian::LittleEndian>> = Elf::default();
5637            let data_sections_set: HashSet<_> = core::iter::once(self.data_section).collect();
5638            let code_sections_set: HashSet<_> = self.next_offset_for_section.keys().copied().collect();
5639            let relocations = BTreeMap::default();
5640            let imports = [];
5641            let mut exports = self.exports.clone();
5642
5643            // TODO: Refactor the main code so that we don't have to copy-paste this here.
5644            let all_jump_targets = harvest_all_jump_targets(
5645                &elf,
5646                &data_sections_set,
5647                &code_sections_set,
5648                &self.instructions,
5649                &relocations,
5650                &exports,
5651            )
5652            .unwrap();
5653
5654            let all_blocks = split_code_into_basic_blocks(&elf, &Default::default(), &all_jump_targets, self.instructions.clone()).unwrap();
5655            let mut section_to_block = build_section_to_block_map(&all_blocks).unwrap();
5656            let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks).unwrap();
5657            let mut reachability_graph =
5658                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
5659            if matches!(config.opt_level, OptLevel::O2) {
5660                optimize_program(&config, &elf, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
5661            }
5662            let mut used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
5663
5664            if matches!(config.opt_level, OptLevel::O2) {
5665                used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
5666                merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
5667                replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
5668            }
5669
5670            let expected_reachability_graph =
5671                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
5672            assert!(reachability_graph == expected_reachability_graph);
5673
5674            let used_imports = HashSet::new();
5675            let mut base_address_for_section = HashMap::new();
5676            base_address_for_section.insert(self.data_section, 0);
5677            let section_got = self.next_free_section;
5678            let target_to_got_offset = HashMap::new();
5679
5680            let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
5681            let code = emit_code(
5682                &Default::default(),
5683                &imports,
5684                &base_address_for_section,
5685                section_got,
5686                &target_to_got_offset,
5687                &all_blocks,
5688                &used_blocks,
5689                &used_imports,
5690                &jump_target_for_block,
5691                true,
5692                false,
5693            )
5694            .unwrap();
5695
5696            let mut builder = ProgramBlobBuilder::new();
5697
5698            let mut export_count = 0;
5699            for current in used_blocks {
5700                for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
5701                    let export = &exports[export_index];
5702                    let jump_target = jump_target_for_block[current.index()]
5703                        .expect("internal error: export metadata points to a block without a jump target assigned");
5704
5705                    builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
5706                    export_count += 1;
5707                }
5708            }
5709            assert_eq!(export_count, exports.len());
5710
5711            let mut raw_code = Vec::with_capacity(code.len());
5712            for (_, inst) in code {
5713                raw_code.push(inst);
5714            }
5715
5716            builder.set_code(&raw_code, &jump_table);
5717            builder.set_rw_data_size(1);
5718
5719            let blob = ProgramBlob::parse(builder.to_vec().into()).unwrap();
5720            let mut disassembler = polkavm_disassembler::Disassembler::new(&blob, polkavm_disassembler::DisassemblyFormat::Guest).unwrap();
5721            disassembler.emit_header(false);
5722            disassembler.show_offsets(false);
5723            let mut buf = Vec::new();
5724            disassembler.disassemble_into(&mut buf).unwrap();
5725            let disassembly = String::from_utf8(buf).unwrap();
5726
5727            let mut config = polkavm::Config::from_env().unwrap();
5728            config.set_backend(Some(polkavm::BackendKind::Interpreter));
5729            let engine = polkavm::Engine::new(&config).unwrap();
5730            let mut module_config = polkavm::ModuleConfig::default();
5731            module_config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
5732            let module = polkavm::Module::from_blob(&engine, &module_config, blob).unwrap();
5733            let mut instance = module.instantiate().unwrap();
5734            instance.set_gas(10000);
5735            instance.set_reg(polkavm::Reg::RA, polkavm::RETURN_TO_HOST);
5736            let pc = module.exports().find(|export| export.symbol() == "main").unwrap().program_counter();
5737            instance.set_next_program_counter(pc);
5738
5739            TestProgram { disassembly, instance }
5740        }
5741
5742        fn test_optimize(
5743            &self,
5744            mut run: impl FnMut(&mut polkavm::RawInstance),
5745            mut check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
5746            expected_disassembly: &str,
5747        ) {
5748            let mut unopt = self.build(Config {
5749                opt_level: OptLevel::O0,
5750                ..Config::default()
5751            });
5752            let mut opt = self.build(Config {
5753                opt_level: OptLevel::O2,
5754                ..Config::default()
5755            });
5756
5757            log::info!("Unoptimized disassembly:\n{}", unopt.disassembly);
5758            log::info!("Optimized disassembly:\n{}", opt.disassembly);
5759
5760            run(&mut unopt.instance);
5761            run(&mut opt.instance);
5762
5763            check(&mut opt.instance, &mut unopt.instance);
5764
5765            fn normalize(s: &str) -> String {
5766                let mut out = String::new();
5767                for line in s.trim().lines() {
5768                    if !line.trim().starts_with('@') {
5769                        out.push_str("    ");
5770                    }
5771                    out.push_str(line.trim());
5772                    out.push('\n');
5773                }
5774                out
5775            }
5776
5777            let is_todo = expected_disassembly.trim() == "TODO";
5778            let actual_normalized = normalize(&opt.disassembly);
5779            let expected_normalized = normalize(expected_disassembly);
5780            if actual_normalized != expected_normalized && !is_todo {
5781                use core::fmt::Write;
5782                let mut output_actual = String::new();
5783                let mut output_expected = String::new();
5784                for diff in diff::lines(&actual_normalized, &expected_normalized) {
5785                    match diff {
5786                        diff::Result::Left(line) => {
5787                            writeln!(&mut output_actual, "{}", yansi::Paint::red(line)).unwrap();
5788                        }
5789                        diff::Result::Both(line, _) => {
5790                            writeln!(&mut output_actual, "{}", line).unwrap();
5791                            writeln!(&mut output_expected, "{}", line).unwrap();
5792                        }
5793                        diff::Result::Right(line) => {
5794                            writeln!(&mut output_expected, "{}", line).unwrap();
5795                        }
5796                    }
5797                }
5798
5799                {
5800                    use std::io::Write;
5801                    let stderr = std::io::stderr();
5802                    let mut stderr = stderr.lock();
5803
5804                    writeln!(&mut stderr, "Optimization test failed!\n").unwrap();
5805                    writeln!(&mut stderr, "Expected optimized:").unwrap();
5806                    writeln!(&mut stderr, "{output_expected}").unwrap();
5807                    writeln!(&mut stderr, "Actual optimized:").unwrap();
5808                    writeln!(&mut stderr, "{output_actual}").unwrap();
5809                }
5810
5811                panic!("optimized program is not what we've expected")
5812            }
5813
5814            if is_todo {
5815                todo!();
5816            }
5817        }
5818
5819        fn test_optimize_oneshot(
5820            assembly: &str,
5821            expected_disassembly: &str,
5822            run: impl FnMut(&mut polkavm::RawInstance),
5823            check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
5824        ) {
5825            let _ = env_logger::try_init();
5826            let b = ProgramBuilder::from_assembly(assembly);
5827            b.test_optimize(run, check, expected_disassembly);
5828        }
5829    }
5830
5831    fn expect_finished(i: &mut polkavm::RawInstance) {
5832        assert!(matches!(i.run().unwrap(), polkavm::InterruptKind::Finished));
5833    }
5834
5835    fn expect_regs(regs: impl IntoIterator<Item = (Reg, u64)> + Clone) -> impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance) {
5836        move |a: &mut polkavm::RawInstance, b: &mut polkavm::RawInstance| {
5837            for (reg, value) in regs.clone() {
5838                assert_eq!(b.reg(reg), value);
5839                assert_eq!(a.reg(reg), b.reg(reg));
5840            }
5841        }
5842    }
5843
5844    #[test]
5845    fn test_optimize_01_empty_block_elimination() {
5846        ProgramBuilder::test_optimize_oneshot(
5847            "
5848            pub @main:
5849                jump @loop
5850            @before_loop:
5851                jump @loop
5852            @loop:
5853                i32 a0 = a0 + 0x1
5854                jump @before_loop if a0 <u 10
5855                ret
5856            ",
5857            "
5858            @0 [export #0: 'main']
5859                a0 = a0 + 0x1
5860                jump @0 if a0 <u 10
5861            @1
5862                ret
5863            ",
5864            expect_finished,
5865            expect_regs([(Reg::A0, 10)]),
5866        )
5867    }
5868
5869    #[test]
5870    fn test_optimize_02_simple_constant_propagation() {
5871        ProgramBuilder::test_optimize_oneshot(
5872            "
5873            pub @main:
5874                a1 = 0
5875                i32 a1 = a1 + 1
5876            @loop:
5877                i32 a0 = a0 + a1
5878                jump @loop if a0 <u 10
5879                ret
5880            ",
5881            "
5882            @0 [export #0: 'main']
5883                a1 = 0x1
5884                fallthrough
5885            @1
5886                a0 = a0 + a1
5887                jump @1 if a0 <u 10
5888            @2
5889                ret
5890            ",
5891            expect_finished,
5892            expect_regs([(Reg::A0, 10), (Reg::A1, 1)]),
5893        )
5894    }
5895
5896    #[test]
5897    fn test_optimize_03_simple_dead_code_elimination() {
5898        ProgramBuilder::test_optimize_oneshot(
5899            "
5900            pub @main:
5901                i32 a1 = a1 + 100
5902                a1 = 8
5903                i32 a2 = a2 + 0
5904                i32 a0 = a0 + 1
5905                jump @main if a0 <u 10
5906                ret
5907            ",
5908            "
5909            @0 [export #0: 'main']
5910                a1 = 0x8
5911                a0 = a0 + 0x1
5912                jump @0 if a0 <u 10
5913            @1
5914                ret
5915            ",
5916            expect_finished,
5917            expect_regs([(Reg::A0, 10), (Reg::A1, 8)]),
5918        )
5919    }
5920}
5921
5922fn collect_used_blocks(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) -> Vec<BlockTarget> {
5923    let mut used_blocks = Vec::new();
5924    for block in all_blocks {
5925        if !reachability_graph.is_code_reachable(block.target) {
5926            continue;
5927        }
5928
5929        used_blocks.push(block.target);
5930    }
5931
5932    used_blocks
5933}
5934
5935fn add_missing_fallthrough_blocks(
5936    all_blocks: &mut Vec<BasicBlock<AnyTarget, BlockTarget>>,
5937    reachability_graph: &mut ReachabilityGraph,
5938    used_blocks: Vec<BlockTarget>,
5939) -> Vec<BlockTarget> {
5940    let mut new_used_blocks = Vec::new();
5941    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, &used_blocks);
5942    for current in used_blocks {
5943        new_used_blocks.push(current);
5944        if can_fallthrough_to_next_block.contains(&current) {
5945            continue;
5946        }
5947
5948        let Some(target) = all_blocks[current.index()].next.instruction.fallthrough_target_mut().copied() else {
5949            continue;
5950        };
5951
5952        let inline_target = target != current
5953            && all_blocks[target.index()].ops.is_empty()
5954            && all_blocks[target.index()].next.instruction.fallthrough_target_mut().is_none();
5955
5956        let new_block_index = BlockTarget::from_raw(all_blocks.len());
5957        all_blocks.push(BasicBlock {
5958            target: new_block_index,
5959            source: all_blocks[current.index()].source,
5960            ops: Default::default(),
5961            next: if inline_target {
5962                all_blocks[target.index()].next.clone()
5963            } else {
5964                EndOfBlock {
5965                    source: all_blocks[current.index()].next.source.clone(),
5966                    instruction: ControlInst::Jump { target },
5967                }
5968            },
5969        });
5970
5971        new_used_blocks.push(new_block_index);
5972
5973        reachability_graph
5974            .for_code
5975            .entry(new_block_index)
5976            .or_insert(Reachability::default())
5977            .always_reachable = true;
5978        update_references(all_blocks, reachability_graph, None, new_block_index, Default::default());
5979        reachability_graph.for_code.get_mut(&new_block_index).unwrap().always_reachable = false;
5980
5981        let references = gather_references(&all_blocks[current.index()]);
5982        *all_blocks[current.index()].next.instruction.fallthrough_target_mut().unwrap() = new_block_index;
5983        update_references(all_blocks, reachability_graph, None, current, references);
5984    }
5985
5986    new_used_blocks
5987}
5988
5989fn merge_consecutive_fallthrough_blocks(
5990    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
5991    reachability_graph: &mut ReachabilityGraph,
5992    section_to_block: &mut HashMap<SectionTarget, BlockTarget>,
5993    used_blocks: &mut Vec<BlockTarget>,
5994) {
5995    if used_blocks.len() < 2 {
5996        return;
5997    }
5998
5999    let mut removed = HashSet::new();
6000    for nth_block in 0..used_blocks.len() - 1 {
6001        let current = used_blocks[nth_block];
6002        let next = used_blocks[nth_block + 1];
6003
6004        // Find blocks which are empty...
6005        if !all_blocks[current.index()].ops.is_empty() {
6006            continue;
6007        }
6008
6009        // ...and which immediately jump somewhere else.
6010        {
6011            let ControlInst::Jump { target } = all_blocks[current.index()].next.instruction else {
6012                continue;
6013            };
6014            if target != next {
6015                continue;
6016            }
6017        }
6018
6019        let current_reachability = reachability_graph.for_code.get_mut(&current).unwrap();
6020        if current_reachability.always_reachable_or_exported() {
6021            continue;
6022        }
6023
6024        removed.insert(current);
6025
6026        // Gather all other basic blocks which reference this block.
6027        let referenced_by_code: BTreeSet<BlockTarget> = current_reachability
6028            .reachable_from
6029            .iter()
6030            .copied()
6031            .chain(current_reachability.address_taken_in.iter().copied())
6032            .collect();
6033
6034        // Replace code references to this block.
6035        for dep in referenced_by_code {
6036            let references = gather_references(&all_blocks[dep.index()]);
6037            for (_, op) in &mut all_blocks[dep.index()].ops {
6038                *op = op
6039                    .map_target(|target| {
6040                        Ok::<_, ()>(if target == AnyTarget::Code(current) {
6041                            AnyTarget::Code(next)
6042                        } else {
6043                            target
6044                        })
6045                    })
6046                    .unwrap();
6047            }
6048
6049            all_blocks[dep.index()].next.instruction = all_blocks[dep.index()]
6050                .next
6051                .instruction
6052                .map_target(|target| Ok::<_, ()>(if target == current { next } else { target }))
6053                .unwrap();
6054
6055            update_references(all_blocks, reachability_graph, None, dep, references);
6056        }
6057
6058        // Remove it from the graph if it's globally unreachable now.
6059        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
6060
6061        let Some(current_reachability) = reachability_graph.for_code.get_mut(&current) else {
6062            continue;
6063        };
6064
6065        if !current_reachability.referenced_by_data.is_empty() {
6066            // Find all section targets which correspond to this block...
6067            let section_targets: Vec<SectionTarget> = section_to_block
6068                .iter()
6069                .filter(|&(_, block_target)| *block_target == current)
6070                .map(|(section_target, _)| *section_target)
6071                .collect();
6072
6073            // ...then make them to point to the new block.
6074            for section_target in section_targets {
6075                section_to_block.insert(section_target, next);
6076            }
6077
6078            // Grab all of the data sections which reference the current block.
6079            let referenced_by_data = core::mem::take(&mut current_reachability.referenced_by_data);
6080
6081            // Mark the next block as referenced by all of the data sections which reference the current block.
6082            reachability_graph
6083                .for_code
6084                .get_mut(&next)
6085                .unwrap()
6086                .referenced_by_data
6087                .extend(referenced_by_data.iter().copied());
6088
6089            // Mark the data sections as NOT referencing the current block, and make them reference the next block.
6090            for section_index in &referenced_by_data {
6091                if let Some(list) = reachability_graph.code_references_in_data_section.get_mut(section_index) {
6092                    list.retain(|&target| target != current);
6093                    list.push(next);
6094                    list.sort_unstable();
6095                    list.dedup();
6096                }
6097            }
6098        }
6099
6100        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
6101    }
6102
6103    for &current in &removed {
6104        assert!(
6105            !reachability_graph.is_code_reachable(current),
6106            "block {current:?} still reachable: {:#?}",
6107            reachability_graph.for_code.get(&current)
6108        );
6109    }
6110
6111    used_blocks.retain(|current| !removed.contains(current));
6112}
6113
6114fn spill_fake_registers(
6115    section_regspill: SectionIndex,
6116    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6117    reachability_graph: &mut ReachabilityGraph,
6118    imports: &[Import],
6119    used_blocks: &[BlockTarget],
6120    regspill_size: &mut usize,
6121    is_rv64: bool,
6122) {
6123    struct RegAllocBlock<'a> {
6124        instructions: &'a [Vec<regalloc2::Operand>],
6125        num_vregs: usize,
6126    }
6127
6128    impl<'a> regalloc2::Function for RegAllocBlock<'a> {
6129        fn num_insts(&self) -> usize {
6130            self.instructions.len()
6131        }
6132
6133        fn num_blocks(&self) -> usize {
6134            1
6135        }
6136
6137        fn entry_block(&self) -> regalloc2::Block {
6138            regalloc2::Block(0)
6139        }
6140
6141        fn block_insns(&self, _block: regalloc2::Block) -> regalloc2::InstRange {
6142            regalloc2::InstRange::forward(regalloc2::Inst(0), regalloc2::Inst(self.instructions.len() as u32))
6143        }
6144
6145        fn block_succs(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
6146            &[]
6147        }
6148
6149        fn block_preds(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
6150            &[]
6151        }
6152
6153        fn block_params(&self, _block: regalloc2::Block) -> &[regalloc2::VReg] {
6154            &[]
6155        }
6156
6157        fn is_ret(&self, insn: regalloc2::Inst) -> bool {
6158            insn.0 as usize + 1 == self.instructions.len()
6159        }
6160
6161        fn is_branch(&self, _insn: regalloc2::Inst) -> bool {
6162            false
6163        }
6164
6165        fn branch_blockparams(&self, _block: regalloc2::Block, _insn: regalloc2::Inst, _succ_idx: usize) -> &[regalloc2::VReg] {
6166            unimplemented!();
6167        }
6168
6169        fn inst_operands(&self, insn: regalloc2::Inst) -> &[regalloc2::Operand] {
6170            &self.instructions[insn.0 as usize]
6171        }
6172
6173        fn inst_clobbers(&self, _insn: regalloc2::Inst) -> regalloc2::PRegSet {
6174            regalloc2::PRegSet::empty()
6175        }
6176
6177        fn num_vregs(&self) -> usize {
6178            self.num_vregs
6179        }
6180
6181        fn spillslot_size(&self, _regclass: regalloc2::RegClass) -> usize {
6182            1
6183        }
6184    }
6185
6186    let fake_mask = RegMask::fake();
6187    for current in used_blocks {
6188        let block = &mut all_blocks[current.index()];
6189        let Some(start_at) = block
6190            .ops
6191            .iter()
6192            .position(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
6193        else {
6194            continue;
6195        };
6196
6197        let end_at = {
6198            let mut end_at = start_at + 1;
6199            for index in start_at..block.ops.len() {
6200                let instruction = block.ops[index].1;
6201                if !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty() {
6202                    end_at = index + 1;
6203                }
6204            }
6205            end_at
6206        };
6207
6208        // This block uses one or more "fake" registers which are not supported by the VM.
6209        //
6210        // So we have to spill those register into memory and modify the block in such a way
6211        // that it only uses "real" registers natively supported by the VM.
6212        //
6213        // This is not going to be particularily pretty nor very fast at run time, but it is done only as the last restort.
6214
6215        let mut counter = 0;
6216        let mut reg_to_value_index: [usize; Reg::ALL.len()] = Default::default();
6217        let mut instructions = Vec::new();
6218
6219        let mut prologue = Vec::new();
6220        for reg in RegMask::all() {
6221            let value_index = counter;
6222            counter += 1;
6223            reg_to_value_index[reg as usize] = value_index;
6224            prologue.push(regalloc2::Operand::new(
6225                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6226                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
6227                regalloc2::OperandKind::Def,
6228                regalloc2::OperandPos::Late,
6229            ));
6230        }
6231
6232        instructions.push(prologue);
6233
6234        for nth_instruction in start_at..end_at {
6235            let (_, instruction) = &block.ops[nth_instruction];
6236            let mut operands = Vec::new();
6237
6238            for (reg, kind) in instruction.operands(imports) {
6239                match kind {
6240                    OpKind::Write => {
6241                        let value_index = counter;
6242                        counter += 1;
6243                        reg_to_value_index[reg as usize] = value_index;
6244                        operands.push(regalloc2::Operand::new(
6245                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6246                            if reg.fake_register_index().is_none() {
6247                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6248                            } else {
6249                                regalloc2::OperandConstraint::Reg
6250                            },
6251                            regalloc2::OperandKind::Def,
6252                            regalloc2::OperandPos::Late,
6253                        ));
6254                    }
6255                    OpKind::Read => {
6256                        let value_index = reg_to_value_index[reg as usize];
6257                        operands.push(regalloc2::Operand::new(
6258                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6259                            if reg.fake_register_index().is_none() {
6260                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6261                            } else {
6262                                regalloc2::OperandConstraint::Reg
6263                            },
6264                            regalloc2::OperandKind::Use,
6265                            regalloc2::OperandPos::Early,
6266                        ));
6267                    }
6268                    OpKind::ReadWrite => {
6269                        let value_index_read = reg_to_value_index[reg as usize];
6270                        operands.push(regalloc2::Operand::new(
6271                            regalloc2::VReg::new(value_index_read, regalloc2::RegClass::Int),
6272                            if reg.fake_register_index().is_none() {
6273                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6274                            } else {
6275                                regalloc2::OperandConstraint::Reg
6276                            },
6277                            regalloc2::OperandKind::Use,
6278                            regalloc2::OperandPos::Early,
6279                        ));
6280
6281                        let value_index_write = counter;
6282                        counter += 1;
6283
6284                        reg_to_value_index[reg as usize] = value_index_write;
6285                        operands.push(regalloc2::Operand::new(
6286                            regalloc2::VReg::new(value_index_write, regalloc2::RegClass::Int),
6287                            regalloc2::OperandConstraint::Reuse(operands.len() - 1),
6288                            regalloc2::OperandKind::Def,
6289                            regalloc2::OperandPos::Late,
6290                        ));
6291                    }
6292                }
6293            }
6294
6295            instructions.push(operands);
6296        }
6297
6298        let mut epilogue = Vec::new();
6299        for reg in RegMask::all() & !RegMask::fake() {
6300            let value_index = reg_to_value_index[reg as usize];
6301            epilogue.push(regalloc2::Operand::new(
6302                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6303                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
6304                regalloc2::OperandKind::Use,
6305                regalloc2::OperandPos::Early,
6306            ));
6307        }
6308
6309        instructions.push(epilogue);
6310
6311        let alloc_block = RegAllocBlock {
6312            instructions: &instructions,
6313            num_vregs: counter,
6314        };
6315
6316        let env = regalloc2::MachineEnv {
6317            preferred_regs_by_class: [
6318                [Reg::T0, Reg::T1, Reg::T2]
6319                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6320                    .into(),
6321                vec![],
6322                vec![],
6323            ],
6324            non_preferred_regs_by_class: [
6325                [Reg::S0, Reg::S1]
6326                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6327                    .into(),
6328                vec![],
6329                vec![],
6330            ],
6331            scratch_by_class: [None, None, None],
6332            fixed_stack_slots: vec![],
6333        };
6334
6335        let opts = regalloc2::RegallocOptions {
6336            validate_ssa: true,
6337            ..regalloc2::RegallocOptions::default()
6338        };
6339
6340        let output = match regalloc2::run(&alloc_block, &env, &opts) {
6341            Ok(output) => output,
6342            Err(regalloc2::RegAllocError::SSA(vreg, inst)) => {
6343                let nth_instruction: isize = inst.index() as isize - 1 + start_at as isize;
6344                let instruction = block.ops.get(nth_instruction as usize).map(|(_, instruction)| instruction);
6345                panic!("internal error: register allocation failed because of invalid SSA for {vreg} for instruction {instruction:?}");
6346            }
6347            Err(error) => {
6348                panic!("internal error: register allocation failed: {error}")
6349            }
6350        };
6351
6352        let mut buffer = Vec::new();
6353        let mut edits = output.edits.into_iter().peekable();
6354        for nth_instruction in start_at..=end_at {
6355            while let Some((next_edit_at, edit)) = edits.peek() {
6356                let target_nth_instruction: isize = next_edit_at.inst().index() as isize - 1 + start_at as isize;
6357                if target_nth_instruction < 0
6358                    || target_nth_instruction > nth_instruction as isize
6359                    || (target_nth_instruction == nth_instruction as isize && next_edit_at.pos() == regalloc2::InstPosition::After)
6360                {
6361                    break;
6362                }
6363
6364                let target_nth_instruction = target_nth_instruction as usize;
6365                let regalloc2::Edit::Move { from: src, to: dst } = edit.clone();
6366
6367                // Advance the iterator so that we can use `continue` later.
6368                edits.next();
6369
6370                let reg_size = if is_rv64 { 8 } else { 4 };
6371                let src_reg = src.as_reg();
6372                let dst_reg = dst.as_reg();
6373                let new_instruction = match (dst_reg, src_reg) {
6374                    (Some(dst_reg), None) => {
6375                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
6376                        let src_slot = src.as_stack().unwrap();
6377                        let offset = src_slot.index() * reg_size;
6378                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
6379                        BasicInst::LoadAbsolute {
6380                            kind: if is_rv64 { LoadKind::U64 } else { LoadKind::I32 },
6381                            dst: dst_reg,
6382                            target: SectionTarget {
6383                                section_index: section_regspill,
6384                                offset: cast(offset).to_u64(),
6385                            },
6386                        }
6387                    }
6388                    (None, Some(src_reg)) => {
6389                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
6390                        let dst_slot = dst.as_stack().unwrap();
6391                        let offset = dst_slot.index() * reg_size;
6392                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
6393                        BasicInst::StoreAbsolute {
6394                            kind: if is_rv64 { StoreKind::U64 } else { StoreKind::U32 },
6395                            src: src_reg.into(),
6396                            target: SectionTarget {
6397                                section_index: section_regspill,
6398                                offset: cast(offset).to_u64(),
6399                            },
6400                        }
6401                    }
6402                    (Some(dst_reg), Some(src_reg)) => {
6403                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
6404                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
6405                        if src_reg == dst_reg {
6406                            continue;
6407                        }
6408
6409                        BasicInst::MoveReg {
6410                            dst: dst_reg,
6411                            src: src_reg,
6412                        }
6413                    }
6414                    // Won't be emitted according to `regalloc2` docs.
6415                    (None, None) => unreachable!(),
6416                };
6417
6418                log::trace!("Injected:\n     {new_instruction:?}");
6419
6420                let source = block.ops.get(target_nth_instruction).or(block.ops.last()).unwrap().0.clone();
6421                buffer.push((source, new_instruction));
6422            }
6423
6424            if nth_instruction == end_at {
6425                assert!(edits.next().is_none());
6426                break;
6427            }
6428
6429            let (source, instruction) = &block.ops[nth_instruction];
6430            let mut alloc_index = output.inst_alloc_offsets[nth_instruction - start_at + 1];
6431            let new_instruction = instruction
6432                .map_register(|reg, _| {
6433                    let alloc = &output.allocs[alloc_index as usize];
6434                    alloc_index += 1;
6435
6436                    assert_eq!(alloc.kind(), regalloc2::AllocationKind::Reg);
6437                    let allocated_reg = Reg::from_usize(alloc.as_reg().unwrap().hw_enc() as usize).unwrap();
6438                    if reg.fake_register_index().is_none() {
6439                        assert_eq!(reg, allocated_reg);
6440                    } else {
6441                        assert_ne!(reg, allocated_reg);
6442                        assert!(allocated_reg.fake_register_index().is_none());
6443                    }
6444
6445                    allocated_reg
6446                })
6447                .unwrap_or(*instruction);
6448
6449            if *instruction == new_instruction {
6450                log::trace!("Unmodified:\n     {instruction:?}");
6451            } else {
6452                log::trace!("Replaced:\n     {instruction:?}\n  -> {new_instruction:?}");
6453            }
6454
6455            buffer.push((source.clone(), new_instruction));
6456        }
6457
6458        assert!(edits.next().is_none());
6459
6460        reachability_graph
6461            .for_data
6462            .entry(section_regspill)
6463            .or_default()
6464            .address_taken_in
6465            .insert(*current);
6466
6467        block.ops.splice(start_at..end_at, buffer);
6468    }
6469
6470    for current in used_blocks {
6471        if all_blocks[current.index()]
6472            .ops
6473            .iter()
6474            .any(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
6475        {
6476            panic!("internal error: not all fake registers were removed")
6477        }
6478    }
6479}
6480
6481#[deny(clippy::as_conversions)]
6482fn replace_immediates_with_registers(
6483    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6484    imports: &[Import],
6485    used_blocks: &[BlockTarget],
6486) {
6487    let mut imm_to_reg: HashMap<i64, RegMask> = HashMap::new();
6488    for block_target in used_blocks {
6489        let mut reg_to_imm: [Option<i64>; Reg::ALL.len()] = [None; Reg::ALL.len()];
6490        imm_to_reg.clear();
6491
6492        // If there already exists a register which contains a given immediate value
6493        // then there's no point in duplicating it here again; just use that register.
6494        macro_rules! replace {
6495            ($src:ident) => {
6496                if let RegImm::Imm(imm) = $src {
6497                    let imm = cast(*imm).to_i64_sign_extend();
6498                    if imm != 0 {
6499                        let mask = imm_to_reg.get(&imm).copied().unwrap_or(RegMask::empty());
6500                        if let Some(reg) = mask.into_iter().next() {
6501                            *$src = RegImm::Reg(reg);
6502                        }
6503                    }
6504                }
6505            };
6506        }
6507
6508        for (_, op) in &mut all_blocks[block_target.index()].ops {
6509            match op {
6510                BasicInst::LoadImmediate { dst, imm } => {
6511                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
6512                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
6513                    }
6514
6515                    let imm = cast(*imm).to_i64_sign_extend();
6516                    imm_to_reg.entry(imm).or_insert(RegMask::empty()).insert(*dst);
6517                    reg_to_imm[dst.to_usize()] = Some(imm);
6518                    continue;
6519                }
6520                BasicInst::LoadImmediate64 { dst, imm } => {
6521                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
6522                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
6523                    }
6524
6525                    imm_to_reg.entry(*imm).or_insert(RegMask::empty()).insert(*dst);
6526                    reg_to_imm[dst.to_usize()] = Some(*imm);
6527                    continue;
6528                }
6529                BasicInst::AnyAny {
6530                    kind,
6531                    ref mut src1,
6532                    ref mut src2,
6533                    ..
6534                } => {
6535                    replace!(src1);
6536                    if !matches!(
6537                        kind,
6538                        AnyAnyKind::ShiftLogicalLeft32
6539                            | AnyAnyKind::ShiftLogicalRight32
6540                            | AnyAnyKind::ShiftArithmeticRight32
6541                            | AnyAnyKind::ShiftLogicalLeft64
6542                            | AnyAnyKind::ShiftLogicalRight64
6543                            | AnyAnyKind::ShiftArithmeticRight64
6544                            | AnyAnyKind::ShiftLogicalLeft32AndSignExtend
6545                            | AnyAnyKind::ShiftLogicalRight32AndSignExtend
6546                            | AnyAnyKind::ShiftArithmeticRight32AndSignExtend
6547                            | AnyAnyKind::RotateRight32
6548                            | AnyAnyKind::RotateRight32AndSignExtend
6549                            | AnyAnyKind::RotateRight64
6550                    ) {
6551                        replace!(src2);
6552                    }
6553                }
6554                BasicInst::StoreAbsolute { src, .. } => {
6555                    replace!(src);
6556                }
6557                BasicInst::StoreIndirect { src, .. } => {
6558                    replace!(src);
6559                }
6560                BasicInst::Cmov { src, .. } => {
6561                    replace!(src);
6562                }
6563                _ => {}
6564            }
6565
6566            for reg in op.dst_mask(imports) {
6567                if let Some(imm) = reg_to_imm[reg.to_usize()].take() {
6568                    imm_to_reg.get_mut(&imm).unwrap().remove(reg);
6569                }
6570            }
6571        }
6572
6573        if let ControlInst::Branch {
6574            ref mut src1,
6575            ref mut src2,
6576            ..
6577        } = all_blocks[block_target.index()].next.instruction
6578        {
6579            replace!(src1);
6580            replace!(src2);
6581        }
6582    }
6583}
6584
6585fn harvest_all_jump_targets<H>(
6586    elf: &Elf<H>,
6587    data_sections_set: &HashSet<SectionIndex>,
6588    code_sections_set: &HashSet<SectionIndex>,
6589    instructions: &[(Source, InstExt<SectionTarget, SectionTarget>)],
6590    relocations: &BTreeMap<SectionTarget, RelocationKind>,
6591    exports: &[Export],
6592) -> Result<HashSet<SectionTarget>, ProgramFromElfError>
6593where
6594    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
6595{
6596    let mut all_jump_targets = HashSet::new();
6597    for (_, instruction) in instructions {
6598        match instruction {
6599            InstExt::Basic(instruction) => {
6600                let (data_target, code_or_data_target) = instruction.target();
6601                if let Some(target) = data_target {
6602                    if !data_sections_set.contains(&target.section_index) {
6603                        return Err(ProgramFromElfError::other(
6604                            "found basic instruction which refers to a non-data section",
6605                        ));
6606                    }
6607                }
6608
6609                if let Some(target) = code_or_data_target {
6610                    if code_sections_set.contains(&target.section_index) {
6611                        if all_jump_targets.insert(target) {
6612                            log::trace!("Adding jump target: {target} (referenced indirectly by code)");
6613                        }
6614                    } else if !data_sections_set.contains(&target.section_index) {
6615                        return Err(ProgramFromElfError::other(
6616                            "found basic instruction which refers to neither a data nor a text section",
6617                        ));
6618                    }
6619                }
6620            }
6621            InstExt::Control(instruction) => {
6622                for target in instruction.targets().into_iter().flatten() {
6623                    if !code_sections_set.contains(&target.section_index) {
6624                        return Err(ProgramFromElfError::other(
6625                            "found control instruction which refers to a non-text section",
6626                        ));
6627                    }
6628
6629                    if all_jump_targets.insert(*target) {
6630                        log::trace!("Adding jump target: {target} (referenced by a control instruction)");
6631                    }
6632                }
6633            }
6634        }
6635    }
6636
6637    for (source_location, relocation) in relocations {
6638        if !data_sections_set.contains(&source_location.section_index) {
6639            continue;
6640        }
6641
6642        for target in relocation.targets().into_iter().flatten() {
6643            #[allow(clippy::collapsible_if)]
6644            if code_sections_set.contains(&target.section_index) {
6645                if all_jump_targets.insert(target) {
6646                    log::trace!(
6647                        "Adding jump target: {target} (referenced by relocation from {source_location} in '{}')",
6648                        elf.section_by_index(source_location.section_index).name()
6649                    );
6650                }
6651            }
6652        }
6653    }
6654
6655    for export in exports {
6656        let target = export.location;
6657        if !code_sections_set.contains(&target.section_index) {
6658            return Err(ProgramFromElfError::other("export points to a non-code section"));
6659        }
6660
6661        if all_jump_targets.insert(target) {
6662            log::trace!("Adding jump target: {target} (referenced by export)");
6663        }
6664    }
6665
6666    Ok(all_jump_targets)
6667}
6668
6669struct VecSet<T> {
6670    vec: VecDeque<T>,
6671    set: HashSet<T>,
6672}
6673
6674impl<T> VecSet<T> {
6675    fn new() -> Self {
6676        Self {
6677            vec: VecDeque::new(),
6678            set: HashSet::new(),
6679        }
6680    }
6681
6682    fn pop_unique(&mut self) -> Option<T> {
6683        self.vec.pop_front()
6684    }
6685
6686    fn pop_non_unique(&mut self) -> Option<T>
6687    where
6688        T: core::hash::Hash + Eq,
6689    {
6690        // Popping from the front instead of the back cuts down on the time
6691        // the optimizer takes for the Westend runtime from ~53s down to ~2.6s
6692        let value = self.vec.pop_front()?;
6693        self.set.remove(&value);
6694        Some(value)
6695    }
6696
6697    fn push(&mut self, value: T)
6698    where
6699        T: core::hash::Hash + Eq + Clone,
6700    {
6701        if self.set.insert(value.clone()) {
6702            self.vec.push_back(value);
6703        }
6704    }
6705
6706    fn is_empty(&self) -> bool {
6707        self.vec.is_empty()
6708    }
6709
6710    fn into_set(self) -> HashSet<T> {
6711        self.set
6712    }
6713}
6714
6715#[derive(PartialEq, Eq, Debug, Default)]
6716struct ReachabilityGraph {
6717    for_code: BTreeMap<BlockTarget, Reachability>,
6718    for_data: BTreeMap<SectionIndex, Reachability>,
6719    code_references_in_data_section: BTreeMap<SectionIndex, Vec<BlockTarget>>,
6720    data_references_in_data_section: BTreeMap<SectionIndex, Vec<SectionIndex>>,
6721}
6722
6723impl ReachabilityGraph {
6724    fn is_code_reachable(&self, block_target: BlockTarget) -> bool {
6725        if let Some(reachability) = self.for_code.get(&block_target) {
6726            assert!(
6727                !reachability.is_unreachable(),
6728                "Block {block_target:?} is unreachable and yet it wasn't removed from the graph!"
6729            );
6730            true
6731        } else {
6732            false
6733        }
6734    }
6735
6736    fn is_data_section_reachable(&self, section_index: SectionIndex) -> bool {
6737        if let Some(reachability) = self.for_data.get(&section_index) {
6738            assert!(!reachability.is_unreachable());
6739            true
6740        } else {
6741            false
6742        }
6743    }
6744
6745    fn mark_data_section_reachable(&mut self, section_index: SectionIndex) {
6746        self.for_data.entry(section_index).or_default().always_reachable = true;
6747    }
6748}
6749
6750#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
6751struct Reachability {
6752    reachable_from: BTreeSet<BlockTarget>,
6753    address_taken_in: BTreeSet<BlockTarget>,
6754    referenced_by_data: BTreeSet<SectionIndex>,
6755    always_reachable: bool,
6756    always_dynamically_reachable: bool,
6757    exports: Vec<usize>,
6758}
6759
6760impl Reachability {
6761    fn is_only_reachable_from(&self, block_target: BlockTarget) -> bool {
6762        !self.always_reachable
6763            && !self.always_dynamically_reachable
6764            && self.referenced_by_data.is_empty()
6765            && self.address_taken_in.is_empty()
6766            && self.reachable_from.len() == 1
6767            && self.reachable_from.contains(&block_target)
6768            && self.exports.is_empty()
6769    }
6770
6771    fn is_unreachable(&self) -> bool {
6772        self.reachable_from.is_empty()
6773            && self.address_taken_in.is_empty()
6774            && self.referenced_by_data.is_empty()
6775            && !self.always_reachable
6776            && !self.always_dynamically_reachable
6777            && self.exports.is_empty()
6778    }
6779
6780    fn is_dynamically_reachable(&self) -> bool {
6781        !self.address_taken_in.is_empty() || !self.referenced_by_data.is_empty() || self.always_dynamically_reachable
6782    }
6783
6784    fn always_reachable_or_exported(&self) -> bool {
6785        self.always_reachable || !self.exports.is_empty()
6786    }
6787}
6788
6789#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
6790enum ExtRef {
6791    Address(BlockTarget),
6792    Jump(BlockTarget),
6793    DataAddress(SectionIndex),
6794}
6795
6796fn each_reference_for_basic_instruction(instruction: &BasicInst<AnyTarget>, mut cb: impl FnMut(ExtRef)) {
6797    let (data_target, code_or_data_target) = instruction.target();
6798    if let Some(target) = data_target {
6799        cb(ExtRef::DataAddress(target.section_index));
6800    }
6801
6802    if let Some(target) = code_or_data_target {
6803        match target {
6804            AnyTarget::Code(target) => {
6805                cb(ExtRef::Address(target));
6806            }
6807            AnyTarget::Data(target) => {
6808                cb(ExtRef::DataAddress(target.section_index));
6809            }
6810        }
6811    }
6812}
6813
6814fn each_reference_for_control_instruction(instruction: &ControlInst<BlockTarget>, mut cb: impl FnMut(ExtRef)) {
6815    match *instruction {
6816        ControlInst::Jump { target } => {
6817            cb(ExtRef::Jump(target));
6818        }
6819        ControlInst::Call { target, target_return, .. } => {
6820            cb(ExtRef::Jump(target));
6821            cb(ExtRef::Address(target_return));
6822        }
6823        ControlInst::CallIndirect { target_return, .. } => {
6824            cb(ExtRef::Address(target_return));
6825        }
6826        ControlInst::Branch {
6827            target_true, target_false, ..
6828        } => {
6829            cb(ExtRef::Jump(target_true));
6830            cb(ExtRef::Jump(target_false));
6831        }
6832        ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
6833    }
6834}
6835
6836fn each_reference(block: &BasicBlock<AnyTarget, BlockTarget>, mut cb: impl FnMut(ExtRef)) {
6837    for (_, instruction) in &block.ops {
6838        each_reference_for_basic_instruction(instruction, &mut cb);
6839    }
6840
6841    each_reference_for_control_instruction(&block.next.instruction, cb);
6842}
6843
6844fn calculate_reachability(
6845    section_to_block: &HashMap<SectionTarget, BlockTarget>,
6846    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
6847    data_sections_set: &HashSet<SectionIndex>,
6848    exports: &[Export],
6849    relocations: &BTreeMap<SectionTarget, RelocationKind>,
6850) -> Result<ReachabilityGraph, ProgramFromElfError> {
6851    let mut graph = ReachabilityGraph::default();
6852    let mut data_queue: VecSet<SectionTarget> = VecSet::new();
6853    let mut block_queue: VecSet<BlockTarget> = VecSet::new();
6854    let mut section_queue: VecSet<SectionIndex> = VecSet::new();
6855    let mut relocations_per_section: HashMap<SectionIndex, Vec<&RelocationKind>> = HashMap::new();
6856    for (relocation_location, relocation) in relocations.iter() {
6857        relocations_per_section
6858            .entry(relocation_location.section_index)
6859            .or_insert_with(Vec::new)
6860            .push(relocation);
6861    }
6862
6863    for (export_index, export) in exports.iter().enumerate() {
6864        let Some(&block_target) = section_to_block.get(&export.location) else {
6865            return Err(ProgramFromElfError::other("export points to a non-block"));
6866        };
6867
6868        graph.for_code.entry(block_target).or_default().exports.push(export_index);
6869        block_queue.push(block_target);
6870    }
6871
6872    while !block_queue.is_empty() || !data_queue.is_empty() {
6873        while let Some(current_block) = block_queue.pop_unique() {
6874            each_reference(&all_blocks[current_block.index()], |ext| match ext {
6875                ExtRef::Jump(target) => {
6876                    graph.for_code.entry(target).or_default().reachable_from.insert(current_block);
6877                    block_queue.push(target);
6878                }
6879                ExtRef::Address(target) => {
6880                    graph.for_code.entry(target).or_default().address_taken_in.insert(current_block);
6881                    block_queue.push(target)
6882                }
6883                ExtRef::DataAddress(target) => {
6884                    graph.for_data.entry(target).or_default().address_taken_in.insert(current_block);
6885                    section_queue.push(target)
6886                }
6887            });
6888        }
6889
6890        while let Some(target) = data_queue.pop_unique() {
6891            assert!(!section_to_block.contains_key(&target));
6892            assert!(data_sections_set.contains(&target.section_index));
6893            section_queue.push(target.section_index);
6894        }
6895
6896        while let Some(section_index) = section_queue.pop_unique() {
6897            let Some(local_relocations) = relocations_per_section.get(&section_index) else {
6898                continue;
6899            };
6900            for relocation in local_relocations {
6901                for relocation_target in relocation.targets().into_iter().flatten() {
6902                    if let Some(&block_target) = section_to_block.get(&relocation_target) {
6903                        graph
6904                            .code_references_in_data_section
6905                            .entry(section_index)
6906                            .or_default()
6907                            .push(block_target);
6908
6909                        graph
6910                            .for_code
6911                            .entry(block_target)
6912                            .or_default()
6913                            .referenced_by_data
6914                            .insert(section_index);
6915
6916                        block_queue.push(block_target);
6917                    } else {
6918                        graph
6919                            .data_references_in_data_section
6920                            .entry(section_index)
6921                            .or_default()
6922                            .push(relocation_target.section_index);
6923
6924                        graph
6925                            .for_data
6926                            .entry(relocation_target.section_index)
6927                            .or_default()
6928                            .referenced_by_data
6929                            .insert(section_index);
6930
6931                        data_queue.push(relocation_target);
6932                    }
6933                }
6934            }
6935        }
6936    }
6937
6938    for list in graph.code_references_in_data_section.values_mut() {
6939        list.sort_unstable();
6940        list.dedup();
6941    }
6942
6943    for list in graph.data_references_in_data_section.values_mut() {
6944        list.sort_unstable();
6945        list.dedup();
6946    }
6947
6948    for reachability in graph.for_code.values() {
6949        assert!(!reachability.is_unreachable());
6950    }
6951
6952    for reachability in graph.for_data.values() {
6953        assert!(!reachability.is_unreachable());
6954    }
6955
6956    assert_eq!(block_queue.set.len(), graph.for_code.len());
6957    Ok(graph)
6958}
6959
6960#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
6961struct RegMask(u32);
6962
6963impl core::fmt::Debug for RegMask {
6964    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
6965        fmt.write_str("(")?;
6966        let mut is_first = true;
6967        for (nth, reg) in Reg::ALL.iter().enumerate() {
6968            if self.0 & (1 << nth) != 0 {
6969                if is_first {
6970                    is_first = false;
6971                } else {
6972                    fmt.write_str("|")?;
6973                }
6974                fmt.write_str(reg.name())?;
6975            }
6976        }
6977        fmt.write_str(")")?;
6978        Ok(())
6979    }
6980}
6981
6982struct RegMaskIter {
6983    mask: u32,
6984    remaining: &'static [Reg],
6985}
6986
6987impl Iterator for RegMaskIter {
6988    type Item = Reg;
6989    fn next(&mut self) -> Option<Self::Item> {
6990        loop {
6991            let reg = *self.remaining.get(0)?;
6992            let is_set = (self.mask & 1) != 0;
6993            self.remaining = &self.remaining[1..];
6994            self.mask >>= 1;
6995
6996            if is_set {
6997                return Some(reg);
6998            }
6999        }
7000    }
7001}
7002
7003impl IntoIterator for RegMask {
7004    type Item = Reg;
7005    type IntoIter = RegMaskIter;
7006
7007    fn into_iter(self) -> Self::IntoIter {
7008        RegMaskIter {
7009            mask: self.0,
7010            remaining: &Reg::ALL,
7011        }
7012    }
7013}
7014
7015impl RegMask {
7016    fn all() -> Self {
7017        RegMask((1 << Reg::ALL.len()) - 1)
7018    }
7019
7020    fn fake() -> Self {
7021        let mut mask = RegMask(0);
7022        for reg in Reg::FAKE {
7023            mask.insert(reg);
7024        }
7025        mask
7026    }
7027
7028    fn empty() -> Self {
7029        RegMask(0)
7030    }
7031
7032    fn is_empty(self) -> bool {
7033        self == Self::empty()
7034    }
7035
7036    fn remove(&mut self, mask: impl Into<RegMask>) {
7037        *self &= !mask.into();
7038    }
7039
7040    fn insert(&mut self, mask: impl Into<RegMask>) {
7041        *self |= mask.into();
7042    }
7043}
7044
7045impl From<Reg> for RegMask {
7046    fn from(reg: Reg) -> Self {
7047        RegMask(1 << (reg as usize))
7048    }
7049}
7050
7051impl From<RegImm> for RegMask {
7052    fn from(rm: RegImm) -> Self {
7053        match rm {
7054            RegImm::Reg(reg) => reg.into(),
7055            RegImm::Imm(_) => Self::empty(),
7056        }
7057    }
7058}
7059
7060impl core::ops::Not for RegMask {
7061    type Output = Self;
7062    fn not(self) -> Self {
7063        RegMask(!self.0)
7064    }
7065}
7066
7067impl core::ops::BitAnd for RegMask {
7068    type Output = Self;
7069    fn bitand(self, rhs: RegMask) -> Self {
7070        RegMask(self.0 & rhs.0)
7071    }
7072}
7073
7074impl core::ops::BitAnd<Reg> for RegMask {
7075    type Output = Self;
7076    fn bitand(self, rhs: Reg) -> Self {
7077        self & RegMask::from(rhs)
7078    }
7079}
7080
7081impl core::ops::BitAndAssign for RegMask {
7082    fn bitand_assign(&mut self, rhs: RegMask) {
7083        self.0 &= rhs.0;
7084    }
7085}
7086
7087impl core::ops::BitAndAssign<Reg> for RegMask {
7088    fn bitand_assign(&mut self, rhs: Reg) {
7089        self.bitand_assign(RegMask::from(rhs));
7090    }
7091}
7092
7093impl core::ops::BitOr for RegMask {
7094    type Output = Self;
7095    fn bitor(self, rhs: RegMask) -> Self {
7096        RegMask(self.0 | rhs.0)
7097    }
7098}
7099
7100impl core::ops::BitOr<Reg> for RegMask {
7101    type Output = Self;
7102    fn bitor(self, rhs: Reg) -> Self {
7103        self | RegMask::from(rhs)
7104    }
7105}
7106
7107impl core::ops::BitOrAssign for RegMask {
7108    fn bitor_assign(&mut self, rhs: RegMask) {
7109        self.0 |= rhs.0;
7110    }
7111}
7112
7113impl core::ops::BitOrAssign<Reg> for RegMask {
7114    fn bitor_assign(&mut self, rhs: Reg) {
7115        self.bitor_assign(RegMask::from(rhs));
7116    }
7117}
7118
7119#[test]
7120fn test_all_regs_indexes() {
7121    for (index, reg) in Reg::ALL.iter().enumerate() {
7122        assert_eq!(index, *reg as usize);
7123    }
7124}
7125
7126#[derive(Copy, Clone)]
7127struct JumpTarget {
7128    static_target: u32,
7129    dynamic_target: Option<u32>,
7130}
7131
7132fn build_jump_table(
7133    total_block_count: usize,
7134    used_blocks: &[BlockTarget],
7135    reachability_graph: &ReachabilityGraph,
7136) -> (Vec<u32>, Vec<Option<JumpTarget>>) {
7137    let mut jump_target_for_block: Vec<Option<JumpTarget>> = Vec::new();
7138    jump_target_for_block.resize(total_block_count, None);
7139
7140    let mut jump_table = Vec::new();
7141    for (static_target, current) in used_blocks.iter().enumerate() {
7142        let reachability = reachability_graph.for_code.get(current).unwrap();
7143        assert!(!reachability.is_unreachable());
7144
7145        let dynamic_target = if reachability.is_dynamically_reachable() {
7146            let dynamic_target: u32 = (jump_table.len() + 1).try_into().expect("jump table index overflow");
7147            jump_table.push(static_target.try_into().expect("jump table index overflow"));
7148            Some(dynamic_target)
7149        } else {
7150            None
7151        };
7152
7153        jump_target_for_block[current.index()] = Some(JumpTarget {
7154            static_target: static_target.try_into().expect("jump table index overflow"),
7155            dynamic_target,
7156        });
7157    }
7158
7159    (jump_table, jump_target_for_block)
7160}
7161
7162fn calculate_whether_can_fallthrough(
7163    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
7164    used_blocks: &[BlockTarget],
7165) -> HashSet<BlockTarget> {
7166    let mut can_fallthrough_to_next_block: HashSet<BlockTarget> = HashSet::new();
7167    for window in used_blocks.windows(2) {
7168        match all_blocks[window[0].index()].next.instruction {
7169            ControlInst::Jump { target }
7170            | ControlInst::Branch { target_false: target, .. }
7171            | ControlInst::Call { target_return: target, .. }
7172            | ControlInst::CallIndirect { target_return: target, .. } => {
7173                if target == window[1] {
7174                    can_fallthrough_to_next_block.insert(window[0]);
7175                }
7176            }
7177
7178            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
7179        }
7180    }
7181
7182    can_fallthrough_to_next_block
7183}
7184
7185#[allow(clippy::too_many_arguments)]
7186fn emit_code(
7187    section_to_function_name: &BTreeMap<SectionTarget, &str>,
7188    imports: &[Import],
7189    base_address_for_section: &HashMap<SectionIndex, u64>,
7190    section_got: SectionIndex,
7191    target_to_got_offset: &HashMap<AnyTarget, u64>,
7192    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
7193    used_blocks: &[BlockTarget],
7194    used_imports: &HashSet<usize>,
7195    jump_target_for_block: &[Option<JumpTarget>],
7196    is_optimized: bool,
7197    is_rv64: bool,
7198) -> Result<Vec<(SourceStack, Instruction)>, ProgramFromElfError> {
7199    use polkavm_common::program::Reg as PReg;
7200    fn conv_reg(reg: Reg) -> polkavm_common::program::RawReg {
7201        match reg {
7202            Reg::RA => PReg::RA,
7203            Reg::SP => PReg::SP,
7204            Reg::T0 => PReg::T0,
7205            Reg::T1 => PReg::T1,
7206            Reg::T2 => PReg::T2,
7207            Reg::S0 => PReg::S0,
7208            Reg::S1 => PReg::S1,
7209            Reg::A0 => PReg::A0,
7210            Reg::A1 => PReg::A1,
7211            Reg::A2 => PReg::A2,
7212            Reg::A3 => PReg::A3,
7213            Reg::A4 => PReg::A4,
7214            Reg::A5 => PReg::A5,
7215            Reg::E0 | Reg::E1 | Reg::E2 | Reg::E3 => {
7216                unreachable!("internal error: temporary register was not spilled into memory");
7217            }
7218        }
7219        .into()
7220    }
7221
7222    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, used_blocks);
7223    let get_data_address = |source: &SourceStack, target: SectionTarget| -> Result<u32, ProgramFromElfError> {
7224        if let Some(&base_address) = base_address_for_section.get(&target.section_index) {
7225            let Some(address) = base_address.checked_add(target.offset) else {
7226                return Err(ProgramFromElfError::other(format!(
7227                    "address overflow when relocating instruction in {}",
7228                    source.display(section_to_function_name)
7229                )));
7230            };
7231
7232            let Ok(address) = address.try_into() else {
7233                return Err(ProgramFromElfError::other("address overflow when casting"));
7234            };
7235
7236            Ok(address)
7237        } else {
7238            Err(ProgramFromElfError::other("internal error: section with no base address"))
7239        }
7240    };
7241
7242    let get_jump_target = |target: BlockTarget| -> Result<JumpTarget, ProgramFromElfError> {
7243        let Some(jump_target) = jump_target_for_block[target.index()] else {
7244            return Err(ProgramFromElfError::other("out of range jump target"));
7245        };
7246
7247        Ok(jump_target)
7248    };
7249
7250    let mut basic_block_delimited = true;
7251    let mut code: Vec<(SourceStack, Instruction)> = Vec::new();
7252    for block_target in used_blocks {
7253        let block = &all_blocks[block_target.index()];
7254
7255        if !basic_block_delimited {
7256            basic_block_delimited = true;
7257            code.push((
7258                Source {
7259                    section_index: block.source.section_index,
7260                    offset_range: (block.source.offset_range.start..block.source.offset_range.start + 4).into(),
7261                }
7262                .into(),
7263                Instruction::fallthrough,
7264            ));
7265        }
7266
7267        macro_rules! codegen {
7268            (
7269                args = $args:tt,
7270                kind = $kind:expr,
7271
7272                {
7273                    $($p:pat => $inst:ident,)+
7274                }
7275            ) => {
7276                match $kind {
7277                    $(
7278                        $p => Instruction::$inst $args
7279                    ),+
7280                }
7281            }
7282        }
7283
7284        for (source, op) in &block.ops {
7285            let op = match *op {
7286                BasicInst::LoadImmediate { dst, imm } => Instruction::load_imm(conv_reg(dst), cast(imm).to_unsigned()),
7287                BasicInst::LoadImmediate64 { dst, imm } => {
7288                    if !is_rv64 {
7289                        unreachable!("internal error: load_imm64 found when processing 32-bit binary")
7290                    } else {
7291                        Instruction::load_imm64(conv_reg(dst), cast(imm).to_unsigned())
7292                    }
7293                }
7294                BasicInst::LoadAbsolute { kind, dst, target } => {
7295                    codegen! {
7296                        args = (conv_reg(dst), get_data_address(source, target)?),
7297                        kind = kind,
7298                        {
7299                            LoadKind::I8 => load_i8,
7300                            LoadKind::I16 => load_i16,
7301                            LoadKind::I32 => load_i32,
7302                            LoadKind::U8 => load_u8,
7303                            LoadKind::U16 => load_u16,
7304                            LoadKind::U32 => load_u32,
7305                            LoadKind::U64 => load_u64,
7306                        }
7307                    }
7308                }
7309                BasicInst::StoreAbsolute { kind, src, target } => {
7310                    let target = get_data_address(source, target)?;
7311                    match src {
7312                        RegImm::Reg(src) => {
7313                            codegen! {
7314                                args = (conv_reg(src), target),
7315                                kind = kind,
7316                                {
7317                                    StoreKind::U64 => store_u64,
7318                                    StoreKind::U32 => store_u32,
7319                                    StoreKind::U16 => store_u16,
7320                                    StoreKind::U8 => store_u8,
7321                                }
7322                            }
7323                        }
7324                        RegImm::Imm(value) => {
7325                            codegen! {
7326                                args = (target, cast(value).to_unsigned()),
7327                                kind = kind,
7328                                {
7329                                    StoreKind::U64 => store_imm_u64,
7330                                    StoreKind::U32 => store_imm_u32,
7331                                    StoreKind::U16 => store_imm_u16,
7332                                    StoreKind::U8 => store_imm_u8,
7333                                }
7334                            }
7335                        }
7336                    }
7337                }
7338                BasicInst::LoadIndirect { kind, dst, base, offset } => {
7339                    codegen! {
7340                        args = (conv_reg(dst), conv_reg(base), cast(offset).to_unsigned()),
7341                        kind = kind,
7342                        {
7343                            LoadKind::I8 => load_indirect_i8,
7344                            LoadKind::I16 => load_indirect_i16,
7345                            LoadKind::I32 => load_indirect_i32,
7346                            LoadKind::U8 => load_indirect_u8,
7347                            LoadKind::U16 => load_indirect_u16,
7348                            LoadKind::U32 => load_indirect_u32,
7349                            LoadKind::U64 => load_indirect_u64,
7350                        }
7351                    }
7352                }
7353                BasicInst::StoreIndirect { kind, src, base, offset } => match src {
7354                    RegImm::Reg(src) => {
7355                        codegen! {
7356                            args = (conv_reg(src), conv_reg(base), cast(offset).to_unsigned()),
7357                            kind = kind,
7358                            {
7359                                StoreKind::U64 => store_indirect_u64,
7360                                StoreKind::U32 => store_indirect_u32,
7361                                StoreKind::U16 => store_indirect_u16,
7362                                StoreKind::U8 => store_indirect_u8,
7363                            }
7364                        }
7365                    }
7366                    RegImm::Imm(value) => {
7367                        codegen! {
7368                            args = (conv_reg(base), cast(offset).to_unsigned(), cast(value).to_unsigned()),
7369                            kind = kind,
7370                            {
7371                                StoreKind::U64 => store_imm_indirect_u64,
7372                                StoreKind::U32 => store_imm_indirect_u32,
7373                                StoreKind::U16 => store_imm_indirect_u16,
7374                                StoreKind::U8 => store_imm_indirect_u8,
7375                            }
7376                        }
7377                    }
7378                },
7379                BasicInst::LoadAddress { dst, target } => {
7380                    let value = match target {
7381                        AnyTarget::Code(target) => {
7382                            let value = get_jump_target(target)?.dynamic_target.expect("missing jump target for address");
7383                            let Some(value) = value.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7384                                return Err(ProgramFromElfError::other("overflow when emitting an address load"));
7385                            };
7386                            value
7387                        }
7388                        AnyTarget::Data(target) => get_data_address(source, target)?,
7389                    };
7390
7391                    Instruction::load_imm(conv_reg(dst), value)
7392                }
7393                BasicInst::LoadAddressIndirect { dst, target } => {
7394                    let Some(&offset) = target_to_got_offset.get(&target) else {
7395                        return Err(ProgramFromElfError::other(
7396                            "indirect address load without a corresponding GOT entry",
7397                        ));
7398                    };
7399
7400                    let target = SectionTarget {
7401                        section_index: section_got,
7402                        offset,
7403                    };
7404
7405                    let value = get_data_address(source, target)?;
7406                    if is_rv64 {
7407                        Instruction::load_u64(conv_reg(dst), value)
7408                    } else {
7409                        Instruction::load_i32(conv_reg(dst), value)
7410                    }
7411                }
7412                BasicInst::Reg { kind, dst, src } => {
7413                    codegen! {
7414                        args = (conv_reg(dst), conv_reg(src)),
7415                        kind = kind,
7416                        {
7417                            RegKind::CountLeadingZeroBits32 => count_leading_zero_bits_32,
7418                            RegKind::CountLeadingZeroBits64 => count_leading_zero_bits_64,
7419                            RegKind::CountSetBits32 => count_set_bits_32,
7420                            RegKind::CountSetBits64 => count_set_bits_64,
7421                            RegKind::CountTrailingZeroBits32 => count_trailing_zero_bits_32,
7422                            RegKind::CountTrailingZeroBits64 => count_trailing_zero_bits_64,
7423                            RegKind::ReverseByte => reverse_byte,
7424                            RegKind::SignExtend8 => sign_extend_8,
7425                            RegKind::SignExtend16 => sign_extend_16,
7426                            RegKind::ZeroExtend16 => zero_extend_16,
7427                        }
7428                    }
7429                }
7430                BasicInst::RegReg { kind, dst, src1, src2 } => {
7431                    use RegRegKind as K;
7432                    codegen! {
7433                        args = (conv_reg(dst), conv_reg(src1), conv_reg(src2)),
7434                        kind = kind,
7435                        {
7436                            K::MulUpperSignedSigned32 => mul_upper_signed_signed,
7437                            K::MulUpperSignedSigned64 => mul_upper_signed_signed,
7438                            K::MulUpperUnsignedUnsigned32 => mul_upper_unsigned_unsigned,
7439                            K::MulUpperUnsignedUnsigned64 => mul_upper_unsigned_unsigned,
7440                            K::MulUpperSignedUnsigned32 => mul_upper_signed_unsigned,
7441                            K::MulUpperSignedUnsigned64 => mul_upper_signed_unsigned,
7442                            K::Div32 => div_signed_32,
7443                            K::Div32AndSignExtend => div_signed_32,
7444                            K::Div64 => div_signed_64,
7445                            K::DivUnsigned32 => div_unsigned_32,
7446                            K::DivUnsigned32AndSignExtend => div_unsigned_32,
7447                            K::DivUnsigned64 => div_unsigned_64,
7448                            K::Rem32 => rem_signed_32,
7449                            K::Rem32AndSignExtend => rem_signed_32,
7450                            K::Rem64 => rem_signed_64,
7451                            K::RemUnsigned32 => rem_unsigned_32,
7452                            K::RemUnsigned32AndSignExtend => rem_unsigned_32,
7453                            K::RemUnsigned64 => rem_unsigned_64,
7454                            K::AndInverted => and_inverted,
7455                            K::OrInverted => or_inverted,
7456                            K::Xnor => xnor,
7457                            K::Maximum => maximum,
7458                            K::MaximumUnsigned => maximum_unsigned,
7459                            K::Minimum => minimum,
7460                            K::MinimumUnsigned => minimum_unsigned,
7461                            K::RotateLeft32 => rotate_left_32,
7462                            K::RotateLeft32AndSignExtend => rotate_left_32,
7463                            K::RotateLeft64 => rotate_left_64,
7464                        }
7465                    }
7466                }
7467                BasicInst::MoveReg { dst, src } => Instruction::move_reg(conv_reg(dst), conv_reg(src)),
7468                BasicInst::AnyAny { kind, dst, src1, src2 } => {
7469                    use AnyAnyKind as K;
7470                    use Instruction as I;
7471                    let dst = conv_reg(dst);
7472                    match (src1, src2) {
7473                        (RegImm::Reg(src1), RegImm::Reg(src2)) => {
7474                            codegen! {
7475                                args = (dst, conv_reg(src1), conv_reg(src2)),
7476                                kind = kind,
7477                                {
7478                                    K::Add32 => add_32,
7479                                    K::Add32AndSignExtend => add_32,
7480                                    K::Add64 => add_64,
7481                                    K::Sub32 => sub_32,
7482                                    K::Sub32AndSignExtend => sub_32,
7483                                    K::Sub64 => sub_64,
7484                                    K::ShiftLogicalLeft32 => shift_logical_left_32,
7485                                    K::ShiftLogicalLeft32AndSignExtend => shift_logical_left_32,
7486                                    K::ShiftLogicalLeft64 => shift_logical_left_64,
7487                                    K::SetLessThanSigned32 => set_less_than_signed,
7488                                    K::SetLessThanSigned64 => set_less_than_signed,
7489                                    K::SetLessThanUnsigned32 => set_less_than_unsigned,
7490                                    K::SetLessThanUnsigned64 => set_less_than_unsigned,
7491                                    K::Xor32 => xor,
7492                                    K::Xor64 => xor,
7493                                    K::ShiftLogicalRight32 => shift_logical_right_32,
7494                                    K::ShiftLogicalRight32AndSignExtend => shift_logical_right_32,
7495                                    K::ShiftLogicalRight64 => shift_logical_right_64,
7496                                    K::ShiftArithmeticRight32 => shift_arithmetic_right_32,
7497                                    K::ShiftArithmeticRight32AndSignExtend => shift_arithmetic_right_32,
7498                                    K::ShiftArithmeticRight64 => shift_arithmetic_right_64,
7499                                    K::Or32 => or,
7500                                    K::Or64 => or,
7501                                    K::And32 => and,
7502                                    K::And64 => and,
7503                                    K::Mul32 => mul_32,
7504                                    K::Mul32AndSignExtend => mul_32,
7505                                    K::Mul64 => mul_64,
7506                                    K::RotateRight32 => rotate_right_32,
7507                                    K::RotateRight32AndSignExtend => rotate_right_32,
7508                                    K::RotateRight64 => rotate_right_64,
7509                                }
7510                            }
7511                        }
7512                        (RegImm::Reg(src1), RegImm::Imm(src2)) => {
7513                            let src1 = conv_reg(src1);
7514                            let src2 = cast(src2).to_unsigned();
7515                            match kind {
7516                                K::Add32 => I::add_imm_32(dst, src1, src2),
7517                                K::Add32AndSignExtend => I::add_imm_32(dst, src1, src2),
7518                                K::Add64 => I::add_imm_64(dst, src1, src2),
7519                                K::Sub32 => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7520                                K::Sub32AndSignExtend => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7521                                K::Sub64 => I::add_imm_64(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7522                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_32(dst, src1, src2),
7523                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_32(dst, src1, src2),
7524                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_64(dst, src1, src2),
7525                                K::SetLessThanSigned32 => I::set_less_than_signed_imm(dst, src1, src2),
7526                                K::SetLessThanSigned64 => I::set_less_than_signed_imm(dst, src1, src2),
7527                                K::SetLessThanUnsigned32 => I::set_less_than_unsigned_imm(dst, src1, src2),
7528                                K::SetLessThanUnsigned64 => I::set_less_than_unsigned_imm(dst, src1, src2),
7529                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src1, src2),
7530                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_32(dst, src1, src2),
7531                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_32(dst, src1, src2),
7532                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_64(dst, src1, src2),
7533                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_32(dst, src1, src2),
7534                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_32(dst, src1, src2),
7535                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_64(dst, src1, src2),
7536                                K::Or32 | K::Or64 => I::or_imm(dst, src1, src2),
7537                                K::And32 | K::And64 => I::and_imm(dst, src1, src2),
7538                                K::Mul32 => I::mul_imm_32(dst, src1, src2),
7539                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src1, src2),
7540                                K::Mul64 => I::mul_imm_64(dst, src1, src2),
7541                                K::RotateRight32 => I::rotate_right_imm_32(dst, src1, src2),
7542                                K::RotateRight32AndSignExtend => I::rotate_right_imm_32(dst, src1, src2),
7543                                K::RotateRight64 => I::rotate_right_imm_64(dst, src1, src2),
7544                            }
7545                        }
7546                        (RegImm::Imm(src1), RegImm::Reg(src2)) => {
7547                            let src1 = cast(src1).to_unsigned();
7548                            let src2 = conv_reg(src2);
7549                            match kind {
7550                                K::Add32 => I::add_imm_32(dst, src2, src1),
7551                                K::Add32AndSignExtend => I::add_imm_32(dst, src2, src1),
7552                                K::Add64 => I::add_imm_64(dst, src2, src1),
7553                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src2, src1),
7554                                K::Or32 | K::Or64 => I::or_imm(dst, src2, src1),
7555                                K::And32 | K::And64 => I::and_imm(dst, src2, src1),
7556                                K::Mul32 => I::mul_imm_32(dst, src2, src1),
7557                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src2, src1),
7558                                K::Mul64 => I::mul_imm_64(dst, src2, src1),
7559
7560                                K::Sub32 => I::negate_and_add_imm_32(dst, src2, src1),
7561                                K::Sub32AndSignExtend => I::negate_and_add_imm_32(dst, src2, src1),
7562                                K::Sub64 => I::negate_and_add_imm_64(dst, src2, src1),
7563                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_alt_32(dst, src2, src1),
7564                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_alt_32(dst, src2, src1),
7565                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_alt_64(dst, src2, src1),
7566                                K::SetLessThanSigned32 => I::set_greater_than_signed_imm(dst, src2, src1),
7567                                K::SetLessThanSigned64 => I::set_greater_than_signed_imm(dst, src2, src1),
7568                                K::SetLessThanUnsigned32 => I::set_greater_than_unsigned_imm(dst, src2, src1),
7569                                K::SetLessThanUnsigned64 => I::set_greater_than_unsigned_imm(dst, src2, src1),
7570                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_alt_32(dst, src2, src1),
7571                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_alt_32(dst, src2, src1),
7572                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_alt_64(dst, src2, src1),
7573                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
7574                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
7575                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_alt_64(dst, src2, src1),
7576
7577                                K::RotateRight32 => I::rotate_right_imm_alt_32(dst, src2, src1),
7578                                K::RotateRight32AndSignExtend => I::rotate_right_imm_alt_32(dst, src2, src1),
7579                                K::RotateRight64 => I::rotate_right_imm_alt_64(dst, src2, src1),
7580                            }
7581                        }
7582                        (RegImm::Imm(src1), RegImm::Imm(src2)) => {
7583                            if is_optimized {
7584                                unreachable!("internal error: instruction with only constant operands: {op:?}")
7585                            } else {
7586                                let imm: u32 = OperationKind::from(kind)
7587                                    .apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend())
7588                                    .try_into()
7589                                    .expect("load immediate overflow");
7590                                I::load_imm(dst, imm)
7591                            }
7592                        }
7593                    }
7594                }
7595                BasicInst::Cmov { kind, dst, src, cond } => match src {
7596                    RegImm::Reg(src) => {
7597                        codegen! {
7598                            args = (conv_reg(dst), conv_reg(src), conv_reg(cond)),
7599                            kind = kind,
7600                            {
7601                                CmovKind::EqZero => cmov_if_zero,
7602                                CmovKind::NotEqZero => cmov_if_not_zero,
7603                            }
7604                        }
7605                    }
7606                    RegImm::Imm(imm) => {
7607                        codegen! {
7608                            args = (conv_reg(dst), conv_reg(cond), cast(imm).to_unsigned()),
7609                            kind = kind,
7610                            {
7611                                CmovKind::EqZero => cmov_if_zero_imm,
7612                                CmovKind::NotEqZero => cmov_if_not_zero_imm,
7613                            }
7614                        }
7615                    }
7616                },
7617                BasicInst::Ecalli { nth_import } => {
7618                    assert!(used_imports.contains(&nth_import));
7619                    let import = &imports[nth_import];
7620                    Instruction::ecalli(import.metadata.index.expect("internal error: no index was assigned to an ecall"))
7621                }
7622                BasicInst::Sbrk { dst, size } => Instruction::sbrk(conv_reg(dst), conv_reg(size)),
7623                BasicInst::Memset => Instruction::memset,
7624                BasicInst::Nop => unreachable!("internal error: a nop instruction was not removed"),
7625            };
7626
7627            code.push((source.clone(), op));
7628        }
7629
7630        fn unconditional_jump(target: JumpTarget) -> Instruction {
7631            Instruction::jump(target.static_target)
7632        }
7633
7634        match block.next.instruction {
7635            ControlInst::Jump { target } => {
7636                let target = get_jump_target(target)?;
7637                if can_fallthrough_to_next_block.contains(block_target) {
7638                    assert!(basic_block_delimited);
7639                    basic_block_delimited = false;
7640                } else {
7641                    code.push((block.next.source.clone(), unconditional_jump(target)));
7642                }
7643            }
7644            ControlInst::Call { ra, target, target_return } => {
7645                assert!(can_fallthrough_to_next_block.contains(block_target));
7646
7647                let target = get_jump_target(target)?;
7648                let target_return = get_jump_target(target_return)?
7649                    .dynamic_target
7650                    .expect("missing jump target for address");
7651                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7652                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
7653                };
7654
7655                code.push((
7656                    block.next.source.clone(),
7657                    Instruction::load_imm_and_jump(conv_reg(ra), target_return, target.static_target),
7658                ));
7659            }
7660            ControlInst::JumpIndirect { base, offset } => {
7661                if offset != 0 {
7662                    return Err(ProgramFromElfError::other(
7663                        "found an indirect jump with a non-zero offset - this would most likely result in a broken program; aborting",
7664                    ));
7665                }
7666
7667                let Ok(offset) = offset.try_into() else {
7668                    unreachable!("internal error: indirect jump with an out-of-range offset");
7669                };
7670
7671                code.push((block.next.source.clone(), Instruction::jump_indirect(conv_reg(base), offset)));
7672            }
7673            ControlInst::CallIndirect {
7674                ra,
7675                base,
7676                offset,
7677                target_return,
7678            } => {
7679                if offset != 0 {
7680                    return Err(ProgramFromElfError::other(
7681                        "found an indirect call with a non-zero offset - this would most likely result in a broken program; aborting",
7682                    ));
7683                }
7684
7685                assert!(can_fallthrough_to_next_block.contains(block_target));
7686
7687                let target_return = get_jump_target(target_return)?
7688                    .dynamic_target
7689                    .expect("missing jump target for address");
7690                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7691                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
7692                };
7693
7694                let Ok(offset) = offset.try_into() else {
7695                    unreachable!("internal error: indirect call with an out-of-range offset");
7696                };
7697
7698                code.push((
7699                    block.next.source.clone(),
7700                    Instruction::load_imm_and_jump_indirect(conv_reg(ra), conv_reg(base), target_return, offset),
7701                ));
7702            }
7703            ControlInst::Branch {
7704                kind,
7705                src1,
7706                src2,
7707                target_true,
7708                target_false,
7709            } => {
7710                assert!(can_fallthrough_to_next_block.contains(block_target));
7711
7712                let target_true = get_jump_target(target_true)?;
7713                get_jump_target(target_false)?;
7714
7715                let instruction = match (src1, src2) {
7716                    (RegImm::Reg(src1), RegImm::Reg(src2)) => {
7717                        codegen! {
7718                            args = (conv_reg(src1), conv_reg(src2), target_true.static_target),
7719                            kind = kind,
7720                            {
7721                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq,
7722                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq,
7723                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned,
7724                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed,
7725                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed,
7726                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned,
7727                            }
7728                        }
7729                    }
7730                    (RegImm::Imm(src1), RegImm::Reg(src2)) => {
7731                        codegen! {
7732                            args = (conv_reg(src2), cast(src1).to_unsigned(), target_true.static_target),
7733                            kind = kind,
7734                            {
7735                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
7736                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
7737                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_less_or_equal_unsigned_imm,
7738                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_less_or_equal_signed_imm,
7739                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_greater_signed_imm,
7740                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_greater_unsigned_imm,
7741                            }
7742                        }
7743                    }
7744                    (RegImm::Reg(src1), RegImm::Imm(src2)) => {
7745                        codegen! {
7746                            args = (conv_reg(src1), cast(src2).to_unsigned(), target_true.static_target),
7747                            kind = kind,
7748                            {
7749                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
7750                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
7751                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed_imm,
7752                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned_imm,
7753                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed_imm,
7754                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned_imm,
7755                            }
7756                        }
7757                    }
7758                    (RegImm::Imm(src1), RegImm::Imm(src2)) => {
7759                        if is_optimized {
7760                            unreachable!("internal error: branch with only constant operands")
7761                        } else {
7762                            match OperationKind::from(kind).apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend()) {
7763                                1 => unconditional_jump(target_true),
7764                                0 => {
7765                                    assert!(can_fallthrough_to_next_block.contains(block_target));
7766                                    Instruction::fallthrough
7767                                }
7768                                _ => unreachable!(),
7769                            }
7770                        }
7771                    }
7772                };
7773
7774                code.push((block.next.source.clone(), instruction));
7775            }
7776            ControlInst::Unimplemented => {
7777                code.push((block.next.source.clone(), Instruction::trap));
7778            }
7779        }
7780    }
7781
7782    Ok(code)
7783}
7784
7785#[derive(Copy, Clone, PartialEq, Eq, Debug)]
7786enum Bitness {
7787    B32,
7788    B64,
7789}
7790
7791impl Bitness {
7792    fn bits_used_mask(self) -> u64 {
7793        match self {
7794            Bitness::B32 => u64::from(u32::MAX),
7795            Bitness::B64 => u64::MAX,
7796        }
7797    }
7798}
7799
7800impl InstructionSet for Bitness {
7801    fn opcode_from_u8(self, byte: u8) -> Option<Opcode> {
7802        match self {
7803            Bitness::B32 => polkavm_common::program::ISA32_V1.opcode_from_u8(byte),
7804            Bitness::B64 => polkavm_common::program::ISA64_V1.opcode_from_u8(byte),
7805        }
7806    }
7807}
7808
7809impl From<Bitness> for u64 {
7810    fn from(value: Bitness) -> Self {
7811        match value {
7812            Bitness::B32 => 4,
7813            Bitness::B64 => 8,
7814        }
7815    }
7816}
7817
7818impl From<Bitness> for RelocationSize {
7819    fn from(value: Bitness) -> Self {
7820        match value {
7821            Bitness::B32 => RelocationSize::U32,
7822            Bitness::B64 => RelocationSize::U64,
7823        }
7824    }
7825}
7826
7827#[derive(Copy, Clone, PartialEq, Eq, Debug)]
7828pub(crate) enum RelocationSize {
7829    U8,
7830    U16,
7831    U32,
7832    U64,
7833}
7834
7835#[derive(Copy, Clone, Debug)]
7836pub(crate) enum SizeRelocationSize {
7837    SixBits,
7838    Uleb128,
7839    Generic(RelocationSize),
7840}
7841
7842#[derive(Copy, Clone, Debug)]
7843pub(crate) enum RelocationKind {
7844    Abs {
7845        target: SectionTarget,
7846        size: RelocationSize,
7847    },
7848    JumpTable {
7849        target_code: SectionTarget,
7850        target_base: SectionTarget,
7851    },
7852    Offset {
7853        origin: SectionTarget,
7854        target: SectionTarget,
7855        size: SizeRelocationSize,
7856    },
7857}
7858
7859impl RelocationKind {
7860    fn targets(&self) -> [Option<SectionTarget>; 2] {
7861        match self {
7862            RelocationKind::Abs { target, .. } => [Some(*target), None],
7863            RelocationKind::Offset { origin, target, .. } => [Some(*origin), Some(*target)],
7864            RelocationKind::JumpTable { target_code, target_base } => [Some(*target_code), Some(*target_base)],
7865        }
7866    }
7867}
7868
7869fn harvest_data_relocations<H>(
7870    elf: &Elf<H>,
7871    code_sections_set: &HashSet<SectionIndex>,
7872    section: &Section,
7873    relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
7874) -> Result<(), ProgramFromElfError>
7875where
7876    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
7877{
7878    #[derive(Debug)]
7879    enum MutOp {
7880        Add,
7881        Sub,
7882    }
7883
7884    #[derive(Debug)]
7885    enum Kind {
7886        Set(RelocationKind),
7887        Mut(MutOp, RelocationSize, SectionTarget),
7888
7889        Set6 { target: SectionTarget },
7890        Sub6 { target: SectionTarget },
7891
7892        SetUleb128 { target: SectionTarget },
7893        SubUleb128 { target: SectionTarget },
7894    }
7895
7896    if section.relocations().next().is_none() {
7897        return Ok(());
7898    }
7899
7900    let section_name = section.name();
7901    log::trace!("Harvesting data relocations from section: {}", section_name);
7902
7903    let mut for_address = BTreeMap::new();
7904    for (absolute_address, relocation) in section.relocations() {
7905        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
7906            return Err(ProgramFromElfError::other("invalid relocation offset"));
7907        };
7908
7909        if relocation.has_implicit_addend() {
7910            // AFAIK these should never be emitted for RISC-V.
7911            return Err(ProgramFromElfError::other(format!("unsupported relocation: {:?}", relocation)));
7912        }
7913
7914        let Some(target) = get_relocation_target(elf, &relocation)? else {
7915            continue;
7916        };
7917
7918        let (relocation_name, kind) = match (relocation.kind(), relocation.flags()) {
7919            (object::RelocationKind::Absolute, _)
7920                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
7921            {
7922                (
7923                    "R_RISCV_32",
7924                    Kind::Set(RelocationKind::Abs {
7925                        target,
7926                        size: RelocationSize::U32,
7927                    }),
7928                )
7929            }
7930            (object::RelocationKind::Absolute, _)
7931                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
7932            {
7933                (
7934                    "R_RISCV_64",
7935                    Kind::Set(RelocationKind::Abs {
7936                        target,
7937                        size: RelocationSize::U64,
7938                    }),
7939                )
7940            }
7941
7942            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => match reloc_kind {
7943                object::elf::R_RISCV_SET6 => ("R_RISCV_SET6", Kind::Set6 { target }),
7944                object::elf::R_RISCV_SUB6 => ("R_RISCV_SUB6", Kind::Sub6 { target }),
7945                object::elf::R_RISCV_SET8 => (
7946                    "R_RISCV_SET8",
7947                    Kind::Set(RelocationKind::Abs {
7948                        target,
7949                        size: RelocationSize::U8,
7950                    }),
7951                ),
7952                object::elf::R_RISCV_SET16 => (
7953                    "R_RISCV_SET16",
7954                    Kind::Set(RelocationKind::Abs {
7955                        target,
7956                        size: RelocationSize::U16,
7957                    }),
7958                ),
7959                object::elf::R_RISCV_ADD8 => ("R_RISCV_ADD8", Kind::Mut(MutOp::Add, RelocationSize::U8, target)),
7960                object::elf::R_RISCV_SUB8 => ("R_RISCV_SUB8", Kind::Mut(MutOp::Sub, RelocationSize::U8, target)),
7961                object::elf::R_RISCV_ADD16 => ("R_RISCV_ADD16", Kind::Mut(MutOp::Add, RelocationSize::U16, target)),
7962                object::elf::R_RISCV_SUB16 => ("R_RISCV_SUB16", Kind::Mut(MutOp::Sub, RelocationSize::U16, target)),
7963                object::elf::R_RISCV_ADD32 => ("R_RISCV_ADD32", Kind::Mut(MutOp::Add, RelocationSize::U32, target)),
7964                object::elf::R_RISCV_ADD64 => ("R_RISCV_ADD64", Kind::Mut(MutOp::Add, RelocationSize::U64, target)),
7965                object::elf::R_RISCV_SUB32 => ("R_RISCV_SUB32", Kind::Mut(MutOp::Sub, RelocationSize::U32, target)),
7966                object::elf::R_RISCV_SUB64 => ("R_RISCV_SUB64", Kind::Mut(MutOp::Sub, RelocationSize::U64, target)),
7967                object::elf::R_RISCV_SET_ULEB128 => ("R_RISCV_SET_ULEB128", Kind::SetUleb128 { target }),
7968                object::elf::R_RISCV_SUB_ULEB128 => ("R_RISCV_SUB_ULEB128", Kind::SubUleb128 { target }),
7969
7970                _ => {
7971                    return Err(ProgramFromElfError::other(format!(
7972                        "unsupported relocation in data section '{section_name}': {relocation:?}"
7973                    )))
7974                }
7975            },
7976            _ => {
7977                return Err(ProgramFromElfError::other(format!(
7978                    "unsupported relocation in data section '{section_name}': {relocation:?}"
7979                )))
7980            }
7981        };
7982
7983        log::trace!("  {relocation_name}: {section_name}[0x{relative_address:x}] (0x{absolute_address:x}): -> {target}");
7984        for_address
7985            .entry(relative_address)
7986            .or_insert_with(Vec::new)
7987            .push((relocation_name, kind));
7988    }
7989
7990    for (relative_address, list) in for_address {
7991        let current_location = SectionTarget {
7992            section_index: section.index(),
7993            offset: relative_address,
7994        };
7995
7996        struct ErrorToken; // To make sure we don't forget a `continue` anywhere.
7997        let _: ErrorToken = match &*list {
7998            [(_, Kind::Set(kind))] => {
7999                relocations.insert(current_location, *kind);
8000                continue;
8001            }
8002            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
8003                if size_1 == size_2
8004                    && matches!(*size_1, RelocationSize::U32 | RelocationSize::U64)
8005                    && code_sections_set.contains(&target_1.section_index)
8006                    && !code_sections_set.contains(&target_2.section_index) =>
8007            {
8008                if *size_1 == RelocationSize::U64 {
8009                    // We could support this, but I'm not sure if anything ever emits this,
8010                    // so let's return an error for now until somebody complains.
8011                    return Err(ProgramFromElfError::other(
8012                        "internal error: found 64-bit jump table relocation; please report this",
8013                    ));
8014                }
8015
8016                relocations.insert(
8017                    current_location,
8018                    RelocationKind::JumpTable {
8019                        target_code: *target_1,
8020                        target_base: *target_2,
8021                    },
8022                );
8023                continue;
8024            }
8025            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))] if size_1 == size_2 => {
8026                relocations.insert(
8027                    current_location,
8028                    RelocationKind::Offset {
8029                        origin: *target_2,
8030                        target: *target_1,
8031                        size: SizeRelocationSize::Generic(*size_1),
8032                    },
8033                );
8034                continue;
8035            }
8036            [(
8037                _,
8038                Kind::Set(RelocationKind::Abs {
8039                    target: target_1,
8040                    size: size_1,
8041                }),
8042            ), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
8043                if size_1 == size_2 =>
8044            {
8045                relocations.insert(
8046                    current_location,
8047                    RelocationKind::Offset {
8048                        origin: *target_2,
8049                        target: *target_1,
8050                        size: SizeRelocationSize::Generic(*size_1),
8051                    },
8052                );
8053                continue;
8054            }
8055            [(_, Kind::Set6 { target: target_1 }), (_, Kind::Sub6 { target: target_2 })] => {
8056                relocations.insert(
8057                    current_location,
8058                    RelocationKind::Offset {
8059                        origin: *target_2,
8060                        target: *target_1,
8061                        size: SizeRelocationSize::SixBits,
8062                    },
8063                );
8064                continue;
8065            }
8066            [(_, Kind::SetUleb128 { target: target_1 }), (_, Kind::SubUleb128 { target: target_2 })] => {
8067                relocations.insert(
8068                    current_location,
8069                    RelocationKind::Offset {
8070                        origin: *target_2,
8071                        target: *target_1,
8072                        size: SizeRelocationSize::Uleb128,
8073                    },
8074                );
8075                continue;
8076            }
8077            _ => ErrorToken,
8078        };
8079
8080        return Err(ProgramFromElfError::other(format!(
8081            "unsupported relocations for '{section_name}'[{relative_address:x}] (0x{absolute_address:08x}): {list}",
8082            absolute_address = section.original_address() + relative_address,
8083            list = SectionTarget::make_human_readable_in_debug_string(elf, &format!("{list:?}")),
8084        )));
8085    }
8086
8087    Ok(())
8088}
8089
8090fn read_u32(data: &[u8], relative_address: u64) -> Result<u32, ProgramFromElfError> {
8091    let target_range = relative_address as usize..relative_address as usize + 4;
8092    let value = data
8093        .get(target_range)
8094        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
8095    Ok(u32::from_le_bytes([value[0], value[1], value[2], value[3]]))
8096}
8097
8098fn read_u16(data: &[u8], relative_address: u64) -> Result<u16, ProgramFromElfError> {
8099    let target_range = relative_address as usize..relative_address as usize + 2;
8100    let value = data
8101        .get(target_range)
8102        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
8103    Ok(u16::from_le_bytes([value[0], value[1]]))
8104}
8105
8106fn read_u8(data: &[u8], relative_address: u64) -> Result<u8, ProgramFromElfError> {
8107    data.get(relative_address as usize)
8108        .ok_or(ProgramFromElfError::other("out of range relocation"))
8109        .copied()
8110}
8111
8112/// ULEB128 encode `value` and overwrite the existing value at `data_offset`, keeping the length.
8113///
8114/// See the [ELF ABI spec] and [LLD implementation] for reference.
8115///
8116/// [ELF ABI spec]: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/fbf3cbbac00ef1860ae60302a9afedb98fd31109/riscv-elf.adoc#uleb128-note
8117/// [LLD implementation]: https://github.com/llvm/llvm-project/blob/release/18.x/lld/ELF/Target.h#L310
8118fn overwrite_uleb128(data: &mut [u8], mut data_offset: usize, mut value: u64) -> Result<(), ProgramFromElfError> {
8119    loop {
8120        let Some(byte) = data.get_mut(data_offset) else {
8121            return Err(ProgramFromElfError::other("ULEB128 relocation target offset out of bounds"));
8122        };
8123        data_offset += 1;
8124
8125        if *byte & 0x80 != 0 {
8126            *byte = 0x80 | (value as u8 & 0x7f);
8127            value >>= 7;
8128        } else {
8129            *byte = value as u8;
8130            return if value > 0x80 {
8131                Err(ProgramFromElfError::other("ULEB128 relocation overflow"))
8132            } else {
8133                Ok(())
8134            };
8135        }
8136    }
8137}
8138
8139#[test]
8140fn test_overwrite_uleb128() {
8141    let value = 624485;
8142    let encoded_value = vec![0xE5u8, 0x8E, 0x26];
8143    let mut data = vec![0x80, 0x80, 0x00];
8144
8145    overwrite_uleb128(&mut data, 0, value).unwrap();
8146
8147    assert_eq!(data, encoded_value);
8148}
8149
8150fn write_u64(data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
8151    let value = value.to_le_bytes();
8152    data[relative_address as usize + 7] = value[7];
8153    data[relative_address as usize + 6] = value[6];
8154    data[relative_address as usize + 5] = value[5];
8155    data[relative_address as usize + 4] = value[4];
8156    data[relative_address as usize + 3] = value[3];
8157    data[relative_address as usize + 2] = value[2];
8158    data[relative_address as usize + 1] = value[1];
8159    data[relative_address as usize] = value[0];
8160    Ok(())
8161}
8162
8163fn write_u32(data: &mut [u8], relative_address: u64, value: u32) -> Result<(), ProgramFromElfError> {
8164    let value = value.to_le_bytes();
8165    data[relative_address as usize + 3] = value[3];
8166    data[relative_address as usize + 2] = value[2];
8167    data[relative_address as usize + 1] = value[1];
8168    data[relative_address as usize] = value[0];
8169    Ok(())
8170}
8171
8172fn write_u16(data: &mut [u8], relative_address: u64, value: u16) -> Result<(), ProgramFromElfError> {
8173    let value = value.to_le_bytes();
8174    data[relative_address as usize + 1] = value[1];
8175    data[relative_address as usize] = value[0];
8176    Ok(())
8177}
8178
8179fn harvest_code_relocations<H>(
8180    elf: &Elf<H>,
8181    section: &Section,
8182    decoder_config: &DecoderConfig,
8183    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
8184    data_relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
8185) -> Result<(), ProgramFromElfError>
8186where
8187    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8188{
8189    fn jump_or_call<T>(ra: RReg, target: T, target_return: T) -> Result<ControlInst<T>, ProgramFromElfError> {
8190        if let Some(ra) = cast_reg_non_zero(ra)? {
8191            Ok(ControlInst::Call { ra, target, target_return })
8192        } else {
8193            Ok(ControlInst::Jump { target })
8194        }
8195    }
8196
8197    #[derive(Copy, Clone)]
8198    enum HiRelocKind {
8199        PcRel,
8200        Got,
8201    }
8202
8203    impl core::fmt::Display for HiRelocKind {
8204        fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
8205            match self {
8206                HiRelocKind::PcRel => fmt.write_str("R_RISCV_PCREL_HI20"),
8207                HiRelocKind::Got => fmt.write_str("R_RISCV_GOT_HI20"),
8208            }
8209        }
8210    }
8211
8212    #[derive(Default)]
8213    struct RelocPairs {
8214        reloc_pcrel_hi20: BTreeMap<u64, (HiRelocKind, SectionTarget)>,
8215        reloc_pcrel_lo12: BTreeMap<u64, (&'static str, u64)>,
8216    }
8217
8218    if section.relocations().next().is_none() {
8219        return Ok(());
8220    }
8221
8222    let mut pcrel_relocations = RelocPairs::default();
8223
8224    let section_name = section.name();
8225    log::trace!("Harvesting code relocations from section: {}", section_name);
8226
8227    let section_data = section.data();
8228    for (absolute_address, relocation) in section.relocations() {
8229        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
8230            return Err(ProgramFromElfError::other("invalid relocation offset"));
8231        };
8232
8233        if relocation.has_implicit_addend() {
8234            // AFAIK these should never be emitted for RISC-V.
8235            return Err(ProgramFromElfError::other(format!(
8236                "unsupported relocation in section '{section_name}': {relocation:?}"
8237            )));
8238        }
8239
8240        let current_location = SectionTarget {
8241            section_index: section.index(),
8242            offset: relative_address,
8243        };
8244
8245        let relative_address = current_location.offset;
8246        let Some(target) = get_relocation_target(elf, &relocation)? else {
8247            continue;
8248        };
8249
8250        match (relocation.kind(), relocation.flags()) {
8251            (object::RelocationKind::Absolute, _)
8252                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
8253            {
8254                data_relocations.insert(
8255                    current_location,
8256                    RelocationKind::Abs {
8257                        target,
8258                        size: RelocationSize::U32,
8259                    },
8260                );
8261            }
8262            (object::RelocationKind::Absolute, _)
8263                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
8264            {
8265                data_relocations.insert(
8266                    current_location,
8267                    RelocationKind::Abs {
8268                        target,
8269                        size: RelocationSize::U64,
8270                    },
8271                );
8272            }
8273            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => {
8274                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases
8275                match reloc_kind {
8276                    object::elf::R_RISCV_CALL_PLT => {
8277                        // This relocation is for a pair of instructions, namely AUIPC + JALR, where we're allowed to delete the AUIPC if it's unnecessary.
8278                        let Some(xs) = section_data.get(current_location.offset as usize..current_location.offset as usize + 8) else {
8279                            return Err(ProgramFromElfError::other("invalid R_RISCV_CALL_PLT relocation"));
8280                        };
8281
8282                        let hi_inst_raw = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
8283                        let Some(hi_inst) = Inst::decode(decoder_config, hi_inst_raw) else {
8284                            return Err(ProgramFromElfError::other(format!(
8285                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08}"
8286                            )));
8287                        };
8288
8289                        let lo_inst_raw = u32::from_le_bytes([xs[4], xs[5], xs[6], xs[7]]);
8290                        let Some(lo_inst) = Inst::decode(decoder_config, lo_inst_raw) else {
8291                            return Err(ProgramFromElfError::other(format!(
8292                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08}"
8293                            )));
8294                        };
8295
8296                        let Inst::AddUpperImmediateToPc { dst: hi_reg, value: _ } = hi_inst else {
8297                            return Err(ProgramFromElfError::other(format!(
8298                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08} ({hi_inst:?})"
8299                            )));
8300                        };
8301
8302                        let Inst::JumpAndLinkRegister {
8303                            dst: lo_dst,
8304                            base: lo_reg,
8305                            value: _,
8306                        } = lo_inst
8307                        else {
8308                            return Err(ProgramFromElfError::other(format!(
8309                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08} ({lo_inst:?})"
8310                            )));
8311                        };
8312
8313                        if hi_reg != lo_reg {
8314                            return Err(ProgramFromElfError::other(
8315                                "R_RISCV_CALL_PLT for a pair of instructions with different destination registers",
8316                            ));
8317                        }
8318
8319                        let target_return = current_location.add(8);
8320                        instruction_overrides.insert(current_location, InstExt::nop());
8321                        instruction_overrides.insert(
8322                            current_location.add(4),
8323                            InstExt::Control(jump_or_call(lo_dst, target, target_return)?),
8324                        );
8325
8326                        log::trace!(
8327                            "  R_RISCV_CALL_PLT: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8328                            section.name(),
8329                            target
8330                        );
8331                    }
8332                    object::elf::R_RISCV_PCREL_HI20 => {
8333                        // This relocation is for an AUIPC.
8334                        pcrel_relocations
8335                            .reloc_pcrel_hi20
8336                            .insert(relative_address, (HiRelocKind::PcRel, target));
8337                        log::trace!(
8338                            "  R_RISCV_PCREL_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8339                            section.name(),
8340                            target
8341                        );
8342                    }
8343                    object::elf::R_RISCV_GOT_HI20 => {
8344                        pcrel_relocations
8345                            .reloc_pcrel_hi20
8346                            .insert(relative_address, (HiRelocKind::Got, target));
8347                        log::trace!(
8348                            "  R_RISCV_GOT_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8349                            section.name(),
8350                            target
8351                        );
8352                    }
8353                    object::elf::R_RISCV_PCREL_LO12_I => {
8354                        if target.section_index != section.index() {
8355                            return Err(ProgramFromElfError::other(
8356                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
8357                            ));
8358                        }
8359
8360                        pcrel_relocations
8361                            .reloc_pcrel_lo12
8362                            .insert(relative_address, ("R_RISCV_PCREL_LO12_I", target.offset));
8363                        log::trace!(
8364                            "  R_RISCV_PCREL_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8365                            section.name(),
8366                            target
8367                        );
8368                    }
8369                    object::elf::R_RISCV_PCREL_LO12_S => {
8370                        if target.section_index != section.index() {
8371                            return Err(ProgramFromElfError::other(
8372                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
8373                            ));
8374                        }
8375
8376                        pcrel_relocations
8377                            .reloc_pcrel_lo12
8378                            .insert(relative_address, ("R_RISCV_PCREL_LO12_S", target.offset));
8379                        log::trace!(
8380                            "  R_RISCV_PCREL_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8381                            section.name(),
8382                            target
8383                        );
8384                    }
8385                    object::elf::R_RISCV_JAL => {
8386                        let inst_raw = read_u32(section_data, relative_address)?;
8387                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8388                            return Err(ProgramFromElfError::other(format!(
8389                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08}"
8390                            )));
8391                        };
8392
8393                        let Inst::JumpAndLink { dst, .. } = inst else {
8394                            return Err(ProgramFromElfError::other(format!(
8395                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8396                            )));
8397                        };
8398
8399                        let target_return = current_location.add(4);
8400                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
8401
8402                        log::trace!(
8403                            "  R_RISCV_JAL: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8404                            section.name(),
8405                            target
8406                        );
8407                    }
8408                    object::elf::R_RISCV_BRANCH => {
8409                        let inst_raw = read_u32(section_data, relative_address)?;
8410                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8411                            return Err(ProgramFromElfError::other(format!(
8412                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08}"
8413                            )));
8414                        };
8415
8416                        let Inst::Branch { kind, src1, src2, .. } = inst else {
8417                            return Err(ProgramFromElfError::other(format!(
8418                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8419                            )));
8420                        };
8421
8422                        let target_false = current_location.add(4);
8423                        instruction_overrides.insert(
8424                            current_location,
8425                            InstExt::Control(ControlInst::Branch {
8426                                kind,
8427                                src1: cast_reg_any(src1)?,
8428                                src2: cast_reg_any(src2)?,
8429                                target_true: target,
8430                                target_false,
8431                            }),
8432                        );
8433
8434                        log::trace!(
8435                            "  R_RISCV_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8436                            section.name(),
8437                            target
8438                        );
8439                    }
8440                    object::elf::R_RISCV_HI20 => {
8441                        // This relocation is for a LUI.
8442                        let inst_raw = read_u32(section_data, relative_address)?;
8443                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8444                            return Err(ProgramFromElfError::other(format!(
8445                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08}"
8446                            )));
8447                        };
8448
8449                        let Inst::LoadUpperImmediate { dst, value: _ } = inst else {
8450                            return Err(ProgramFromElfError::other(format!(
8451                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8452                            )));
8453                        };
8454
8455                        let Some(dst) = cast_reg_non_zero(dst)? else {
8456                            return Err(ProgramFromElfError::other("R_RISCV_HI20 with a zero destination register"));
8457                        };
8458
8459                        instruction_overrides.insert(current_location, InstExt::Basic(BasicInst::LoadAddress { dst, target }));
8460
8461                        log::trace!(
8462                            "  R_RISCV_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8463                            section.name(),
8464                            target
8465                        );
8466
8467                        continue;
8468                    }
8469                    object::elf::R_RISCV_LO12_I => {
8470                        let inst_raw = read_u32(section_data, relative_address)?;
8471                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8472                            return Err(ProgramFromElfError::other(format!(
8473                                "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08}"
8474                            )));
8475                        };
8476
8477                        let new_instruction = match inst {
8478                            Inst::RegImm {
8479                                kind: RegImmKind::Add32,
8480                                dst,
8481                                src: _,
8482                                imm: _,
8483                            } => {
8484                                let Some(dst) = cast_reg_non_zero(dst)? else {
8485                                    return Err(ProgramFromElfError::other("R_RISCV_LO12_I with a zero destination register"));
8486                                };
8487
8488                                InstExt::Basic(BasicInst::LoadAddress { dst, target })
8489                            }
8490                            Inst::Load {
8491                                kind,
8492                                dst,
8493                                base: _,
8494                                offset: _,
8495                            } => {
8496                                let Some(dst) = cast_reg_non_zero(dst)? else {
8497                                    return Err(ProgramFromElfError::other("R_RISCV_LO12_I with a zero destination register"));
8498                                };
8499
8500                                InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target })
8501                            }
8502                            _ => {
8503                                return Err(ProgramFromElfError::other(format!(
8504                                    "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
8505                                    loc = current_location.fmt_human_readable(elf),
8506                                )));
8507                            }
8508                        };
8509
8510                        instruction_overrides.insert(current_location, new_instruction);
8511
8512                        log::trace!(
8513                            "  R_RISCV_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8514                            section.name(),
8515                            target
8516                        );
8517                    }
8518                    object::elf::R_RISCV_LO12_S => {
8519                        let inst_raw = read_u32(section_data, relative_address)?;
8520                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8521                            return Err(ProgramFromElfError::other(format!(
8522                                "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08}"
8523                            )));
8524                        };
8525
8526                        let new_instruction = match inst {
8527                            Inst::Store {
8528                                kind,
8529                                src,
8530                                base: _,
8531                                offset: _,
8532                            } => InstExt::Basic(BasicInst::StoreAbsolute {
8533                                kind,
8534                                src: cast_reg_any(src)?,
8535                                target,
8536                            }),
8537                            _ => {
8538                                return Err(ProgramFromElfError::other(format!(
8539                                    "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
8540                                    loc = current_location.fmt_human_readable(elf),
8541                                )));
8542                            }
8543                        };
8544
8545                        instruction_overrides.insert(current_location, new_instruction);
8546
8547                        log::trace!(
8548                            "  R_RISCV_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8549                            section.name(),
8550                            target
8551                        );
8552                    }
8553                    object::elf::R_RISCV_RVC_JUMP => {
8554                        let inst_raw = read_u16(section_data, relative_address)?;
8555                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
8556                            return Err(ProgramFromElfError::other(format!(
8557                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04}"
8558                            )));
8559                        };
8560
8561                        let (Inst::JumpAndLink { dst, .. } | Inst::JumpAndLinkRegister { dst, .. }) = inst else {
8562                            return Err(ProgramFromElfError::other(format!(
8563                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
8564                            )));
8565                        };
8566
8567                        let target_return = current_location.add(2);
8568                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
8569
8570                        log::trace!(
8571                            "  R_RISCV_RVC_JUMP: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8572                            section.name(),
8573                            target
8574                        );
8575                    }
8576                    object::elf::R_RISCV_RVC_BRANCH => {
8577                        let inst_raw = read_u16(section_data, relative_address)?;
8578                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
8579                            return Err(ProgramFromElfError::other(format!(
8580                                "R_RISCV_RVC_BRANCH for an unsupported instruction: 0x{inst_raw:04}"
8581                            )));
8582                        };
8583
8584                        let Inst::Branch { kind, src1, src2, .. } = inst else {
8585                            return Err(ProgramFromElfError::other(format!(
8586                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
8587                            )));
8588                        };
8589
8590                        let target_false = current_location.add(2);
8591                        instruction_overrides.insert(
8592                            current_location,
8593                            InstExt::Control(ControlInst::Branch {
8594                                kind,
8595                                src1: cast_reg_any(src1)?,
8596                                src2: cast_reg_any(src2)?,
8597                                target_true: target,
8598                                target_false,
8599                            }),
8600                        );
8601
8602                        log::trace!(
8603                            "  R_RISCV_RVC_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8604                            section.name(),
8605                            target
8606                        );
8607                    }
8608                    object::elf::R_RISCV_RELAX => {}
8609                    _ => {
8610                        return Err(ProgramFromElfError::other(format!(
8611                            "unsupported relocation type in section '{}': 0x{:08x}",
8612                            section.name(),
8613                            reloc_kind
8614                        )));
8615                    }
8616                }
8617            }
8618            _ => {
8619                return Err(ProgramFromElfError::other(format!(
8620                    "unsupported relocation in code section '{}': {:?}",
8621                    section.name(),
8622                    relocation
8623                )))
8624            }
8625        }
8626    }
8627
8628    for (relative_lo, (lo_rel_name, relative_hi)) in pcrel_relocations.reloc_pcrel_lo12 {
8629        let lo_inst_raw = &section_data[relative_lo as usize..][..4];
8630        let lo_inst_raw = u32::from_le_bytes([lo_inst_raw[0], lo_inst_raw[1], lo_inst_raw[2], lo_inst_raw[3]]);
8631        let lo_inst = Inst::decode(decoder_config, lo_inst_raw);
8632        let hi_inst_raw = &section_data[relative_hi as usize..][..4];
8633        let hi_inst_raw = u32::from_le_bytes([hi_inst_raw[0], hi_inst_raw[1], hi_inst_raw[2], hi_inst_raw[3]]);
8634        let hi_inst = Inst::decode(decoder_config, hi_inst_raw);
8635
8636        let Some((hi_kind, target)) = pcrel_relocations.reloc_pcrel_hi20.get(&relative_hi).copied() else {
8637            return Err(ProgramFromElfError::other(format!("{lo_rel_name} relocation at '{section_name}'0x{relative_lo:x} targets '{section_name}'0x{relative_hi:x} which doesn't have a R_RISCV_PCREL_HI20 or R_RISCV_GOT_HI20 relocation")));
8638        };
8639
8640        let Some(hi_inst) = hi_inst else {
8641            return Err(ProgramFromElfError::other(format!(
8642                "{hi_kind} relocation for an unsupported instruction at '{section_name}'0x{relative_hi:x}: 0x{hi_inst_raw:08x}"
8643            )));
8644        };
8645
8646        let Inst::AddUpperImmediateToPc { dst: hi_reg, .. } = hi_inst else {
8647            return Err(ProgramFromElfError::other(format!(
8648                "{hi_kind} relocation for an unsupported instruction at '{section_name}'[0x{relative_hi:x}]: {hi_inst:?}"
8649            )));
8650        };
8651
8652        let Some(lo_inst) = lo_inst else {
8653            return Err(ProgramFromElfError::other(format!(
8654                "{lo_rel_name} relocation for an unsupported instruction: 0x{lo_inst_raw:08x}"
8655            )));
8656        };
8657
8658        let (lo_reg, new_instruction) = if matches!(hi_kind, HiRelocKind::Got) {
8659            // For these relocations the target address points to the symbol that the code wants to reference,
8660            // but the actual address that's in the code shouldn't point to the symbol directly, but to a place
8661            // where the symbol's address can be found.
8662
8663            match lo_inst {
8664                Inst::Load {
8665                    kind: LoadKind::U64,
8666                    base,
8667                    dst,
8668                    ..
8669                } if elf.is_64() => {
8670                    let Some(dst) = cast_reg_non_zero(dst)? else {
8671                        return Err(ProgramFromElfError::other(format!(
8672                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8673                        )));
8674                    };
8675
8676                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
8677                }
8678                Inst::Load {
8679                    kind: LoadKind::I32,
8680                    base,
8681                    dst,
8682                    ..
8683                } => {
8684                    let Some(dst) = cast_reg_non_zero(dst)? else {
8685                        return Err(ProgramFromElfError::other(format!(
8686                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8687                        )));
8688                    };
8689
8690                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
8691                }
8692                _ => {
8693                    return Err(ProgramFromElfError::other(format!(
8694                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
8695                    )));
8696                }
8697            }
8698        } else {
8699            match lo_inst {
8700                Inst::RegImm {
8701                    kind: RegImmKind::Add32,
8702                    src,
8703                    dst,
8704                    ..
8705                } if !elf.is_64() => {
8706                    let Some(dst) = cast_reg_non_zero(dst)? else {
8707                        return Err(ProgramFromElfError::other(format!(
8708                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8709                        )));
8710                    };
8711
8712                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
8713                }
8714                Inst::RegImm {
8715                    kind: RegImmKind::Add64,
8716                    src,
8717                    dst,
8718                    ..
8719                } if elf.is_64() => {
8720                    let Some(dst) = cast_reg_non_zero(dst)? else {
8721                        return Err(ProgramFromElfError::other(format!(
8722                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8723                        )));
8724                    };
8725
8726                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
8727                }
8728                Inst::Load { kind, base, dst, .. } => {
8729                    if let Some(dst) = cast_reg_non_zero(dst)? {
8730                        (base, InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target }))
8731                    } else {
8732                        (base, InstExt::nop())
8733                    }
8734                }
8735                Inst::Store { kind, base, src, .. } => (
8736                    base,
8737                    InstExt::Basic(BasicInst::StoreAbsolute {
8738                        kind,
8739                        src: cast_reg_any(src)?,
8740                        target,
8741                    }),
8742                ),
8743                _ => {
8744                    return Err(ProgramFromElfError::other(format!(
8745                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
8746                    )));
8747                }
8748            }
8749        };
8750
8751        if lo_reg != hi_reg {
8752            // NOTE: These *can* apparently be sometimes different, so it's not an error if this happens.
8753            //
8754            // I've seen a case where the whole thing looked roughly like this:
8755            //
8756            //   auipc   a1,0x2057        # HI
8757            //   sw      a1,4(sp)         # Stash the HI part on the stack
8758            //   lw      a1,-460(a1)      # LO (1)
8759            //   ... a bunch of code ...
8760            //   lw      a2,4(sp)         # Reload the HI port from the stack (note different register)
8761            //   sw      a0,-460(a2)      # LO (2)
8762            log::trace!(
8763                "{lo_rel_name} + {hi_kind} relocation pair in '{section_name}' [+0x{relative_lo:x}, +0x{relative_hi:x}] uses different destination registers ({lo_reg:?} and {hi_reg:?})",
8764            );
8765        }
8766
8767        let location_hi = SectionTarget {
8768            section_index: section.index(),
8769            offset: relative_hi,
8770        };
8771        let location_lo = SectionTarget {
8772            section_index: section.index(),
8773            offset: relative_lo,
8774        };
8775
8776        // Since we support full length immediates just turn the upper instructions into a NOP.
8777        instruction_overrides.insert(location_hi, InstExt::nop());
8778        instruction_overrides.insert(location_lo, new_instruction);
8779    }
8780
8781    Ok(())
8782}
8783
8784fn parse_function_symbols<H>(elf: &Elf<H>) -> Result<Vec<(Source, String)>, ProgramFromElfError>
8785where
8786    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8787{
8788    let mut functions = Vec::new();
8789    for sym in elf.symbols() {
8790        match sym.kind() {
8791            object::elf::STT_FUNC => {
8792                let (section, offset) = sym.section_and_offset()?;
8793                let Some(name) = sym.name() else { continue };
8794
8795                if name.is_empty() {
8796                    continue;
8797                }
8798
8799                let source = Source {
8800                    section_index: section.index(),
8801                    offset_range: (offset..offset + sym.size()).into(),
8802                };
8803
8804                functions.push((source, name.to_owned()));
8805            }
8806            object::elf::STT_NOTYPE | object::elf::STT_OBJECT | object::elf::STT_SECTION | object::elf::STT_FILE => {}
8807            kind => return Err(ProgramFromElfError::other(format!("unsupported symbol type: {}", kind))),
8808        }
8809    }
8810
8811    functions.sort_unstable_by_key(|(source, _)| *source);
8812    functions.dedup_by_key(|(source, _)| *source);
8813
8814    Ok(functions)
8815}
8816
8817#[derive(Copy, Clone, PartialEq, Eq, Debug)]
8818pub enum OptLevel {
8819    O0,
8820    O1,
8821    O2,
8822}
8823
8824pub struct Config {
8825    strip: bool,
8826    opt_level: OptLevel,
8827    inline_threshold: usize,
8828    elide_unnecessary_loads: bool,
8829    dispatch_table: Vec<Vec<u8>>,
8830    min_stack_size: u32,
8831}
8832
8833impl Default for Config {
8834    fn default() -> Self {
8835        Config {
8836            strip: false,
8837            opt_level: OptLevel::O2,
8838            inline_threshold: 2,
8839            elide_unnecessary_loads: true,
8840            dispatch_table: Vec::new(),
8841            min_stack_size: VM_MIN_PAGE_SIZE * 2,
8842        }
8843    }
8844}
8845
8846impl Config {
8847    pub fn set_strip(&mut self, value: bool) -> &mut Self {
8848        self.strip = value;
8849        self
8850    }
8851
8852    pub fn set_optimize(&mut self, value: bool) -> &mut Self {
8853        self.opt_level = if value { OptLevel::O2 } else { OptLevel::O0 };
8854        self
8855    }
8856
8857    pub fn set_opt_level(&mut self, value: OptLevel) -> &mut Self {
8858        self.opt_level = value;
8859        self
8860    }
8861
8862    pub fn set_inline_threshold(&mut self, value: usize) -> &mut Self {
8863        self.inline_threshold = value;
8864        self
8865    }
8866
8867    pub fn set_elide_unnecessary_loads(&mut self, value: bool) -> &mut Self {
8868        self.elide_unnecessary_loads = value;
8869        self
8870    }
8871
8872    pub fn set_dispatch_table(&mut self, dispatch_table: Vec<Vec<u8>>) -> &mut Self {
8873        self.dispatch_table = dispatch_table;
8874        self
8875    }
8876
8877    pub fn set_min_stack_size(&mut self, value: u32) -> &mut Self {
8878        self.min_stack_size = value;
8879        self
8880    }
8881}
8882
8883pub fn program_from_elf(config: Config, data: &[u8]) -> Result<Vec<u8>, ProgramFromElfError> {
8884    match Elf::<object::elf::FileHeader32<object::endian::LittleEndian>>::parse(data) {
8885        Ok(elf) => program_from_elf_internal(config, elf),
8886        Err(ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(e))) if e.to_string() == "Unsupported ELF header" => {
8887            let elf = Elf::<object::elf::FileHeader64<object::endian::LittleEndian>>::parse(data)?;
8888            program_from_elf_internal(config, elf)
8889        }
8890        Err(e) => Err(e),
8891    }
8892}
8893
8894fn program_from_elf_internal<H>(config: Config, mut elf: Elf<H>) -> Result<Vec<u8>, ProgramFromElfError>
8895where
8896    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8897{
8898    let is_rv64 = elf.is_64();
8899    let bitness = if is_rv64 { Bitness::B64 } else { Bitness::B32 };
8900
8901    if elf.section_by_name(".got").next().is_none() {
8902        elf.add_empty_data_section(".got");
8903    }
8904
8905    let mut decoder_config = DecoderConfig::new_32bit();
8906    decoder_config.set_rv64(elf.is_64());
8907
8908    let mut sections_ro_data = Vec::new();
8909    let mut sections_rw_data = Vec::new();
8910    let mut sections_bss = Vec::new();
8911    let mut sections_code = Vec::new();
8912    let mut sections_metadata = Vec::new();
8913    let mut sections_exports = Vec::new();
8914    let mut sections_min_stack_size = Vec::new();
8915    let mut sections_other = Vec::new();
8916
8917    let mut section_map = RangeMap::new();
8918
8919    log::trace!("ELF sections:");
8920    for section in elf.sections() {
8921        let name = section.name();
8922        let is_writable = section.is_writable();
8923        log::trace!(
8924            " {}: 0x{:08x}..0x{:08x}: {} ({} bytes)",
8925            section.index(),
8926            section.original_address(),
8927            section.original_address() + section.size(),
8928            name,
8929            section.size()
8930        );
8931
8932        if section.is_allocated() && section.original_address() != 0 {
8933            section_map.insert(
8934                section.original_address()..section.original_address() + section.size(),
8935                section.index(),
8936            );
8937        }
8938
8939        if name == ".rodata"
8940            || name.starts_with(".rodata.")
8941            || name.starts_with(".srodata.")
8942            || name == ".data.rel.ro"
8943            || name.starts_with(".data.rel.ro.")
8944            || name == ".got"
8945            || name == ".got.plt"
8946            || name == ".relro_padding"
8947        {
8948            if name == ".rodata" && is_writable {
8949                return Err(ProgramFromElfError::other(format!(
8950                    "expected section '{name}' to be read-only, yet it is writable"
8951                )));
8952            }
8953
8954            sections_ro_data.push(section.index());
8955        } else if name == ".data" || name.starts_with(".data.") || name == ".sdata" || name.starts_with(".sdata.") {
8956            if !is_writable {
8957                return Err(ProgramFromElfError::other(format!(
8958                    "expected section '{name}' to be writable, yet it is read-only"
8959                )));
8960            }
8961
8962            sections_rw_data.push(section.index());
8963        } else if name == ".bss" || name.starts_with(".bss.") || name == ".sbss" || name.starts_with(".sbss.") {
8964            if !is_writable {
8965                return Err(ProgramFromElfError::other(format!(
8966                    "expected section '{name}' to be writable, yet it is read-only"
8967                )));
8968            }
8969
8970            sections_bss.push(section.index());
8971        } else if name == ".text" || name.starts_with(".text.") {
8972            if is_writable {
8973                return Err(ProgramFromElfError::other(format!(
8974                    "expected section '{name}' to be read-only, yet it is writable"
8975                )));
8976            }
8977
8978            sections_code.push(section.index());
8979        } else if name == ".polkavm_metadata" {
8980            sections_metadata.push(section.index());
8981        } else if name == ".polkavm_exports" {
8982            sections_exports.push(section.index());
8983        } else if name == ".polkavm_min_stack_size" {
8984            sections_min_stack_size.push(section.index());
8985        } else if name == ".eh_frame" || name == ".got" {
8986            continue;
8987        } else if section.is_allocated() {
8988            // We're supposed to load this section into memory at runtime, but we don't know what it is.
8989            return Err(ProgramFromElfErrorKind::UnsupportedSection(name.to_owned()).into());
8990        } else {
8991            sections_other.push(section.index());
8992        }
8993    }
8994
8995    if sections_code.is_empty() {
8996        return Err(ProgramFromElfError::other(
8997            "the program contains no code (linking empty programs is not supported!)",
8998        ));
8999    }
9000
9001    let section_regspill = elf.add_empty_data_section(".regspill");
9002    sections_rw_data.insert(0, section_regspill);
9003
9004    let code_sections_set: HashSet<SectionIndex> = sections_code.iter().copied().collect();
9005    let data_sections = sections_ro_data
9006        .iter()
9007        .chain(sections_rw_data.iter())
9008        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
9009        .chain(sections_other.iter())
9010        .chain(sections_metadata.iter())
9011        .chain(sections_exports.iter())
9012        .copied();
9013
9014    let mut relocations = BTreeMap::new();
9015    for section_index in data_sections {
9016        let section = elf.section_by_index(section_index);
9017        harvest_data_relocations(&elf, &code_sections_set, section, &mut relocations)?;
9018    }
9019
9020    let mut instruction_overrides = HashMap::new();
9021    for &section_index in &sections_code {
9022        let section = elf.section_by_index(section_index);
9023        harvest_code_relocations(&elf, section, &decoder_config, &mut instruction_overrides, &mut relocations)?;
9024    }
9025
9026    let exports = sections_exports
9027        .iter()
9028        .map(|&section_index| {
9029            let section = elf.section_by_index(section_index);
9030            extract_exports(&elf, &relocations, section)
9031        })
9032        .collect::<Result<Vec<_>, _>>()?;
9033    let mut exports: Vec<_> = exports.into_iter().flatten().collect();
9034
9035    let mut instructions = Vec::new();
9036    let mut imports = Vec::new();
9037    let mut metadata_to_nth_import = HashMap::new();
9038
9039    for &section_index in &sections_code {
9040        let section = elf.section_by_index(section_index);
9041        let initial_instruction_count = instructions.len();
9042        parse_code_section(
9043            &elf,
9044            section,
9045            &decoder_config,
9046            &relocations,
9047            &mut imports,
9048            &mut metadata_to_nth_import,
9049            &mut instruction_overrides,
9050            &mut instructions,
9051        )?;
9052
9053        if instructions.len() > initial_instruction_count {
9054            // Sometimes a section ends with a `call`, which (considering sections can be reordered) would put
9055            // the return address out of bounds of the section, so let's inject an `unimp` here to make sure this doesn't happen.
9056            //
9057            // If it ends up being unnecessary the optimizer will remove it anyway.
9058            let last_source = instructions.last().unwrap().0;
9059            let source = Source {
9060                section_index: last_source.section_index,
9061                offset_range: (last_source.offset_range.end..last_source.offset_range.end + 4).into(),
9062            };
9063            instructions.push((source, InstExt::Control(ControlInst::Unimplemented)));
9064        }
9065    }
9066
9067    if !instruction_overrides.is_empty() {
9068        return Err(ProgramFromElfError::other("internal error: instruction overrides map is not empty"));
9069    }
9070
9071    core::mem::drop(instruction_overrides);
9072
9073    assert!(instructions
9074        .iter()
9075        .all(|(source, _)| source.offset_range.start < source.offset_range.end));
9076
9077    {
9078        let strip_relocations_for_sections: HashSet<_> =
9079            sections_metadata.iter().copied().chain(sections_exports.iter().copied()).collect();
9080
9081        relocations.retain(|relocation_target, _| !strip_relocations_for_sections.contains(&relocation_target.section_index));
9082    }
9083
9084    let data_sections_set: HashSet<SectionIndex> = sections_ro_data
9085        .iter()
9086        .chain(sections_rw_data.iter())
9087        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
9088        .copied()
9089        .collect();
9090
9091    let section_to_function_name: BTreeMap<_, _> = elf
9092        .symbols()
9093        .filter_map(|symbol| {
9094            if symbol.kind() != object::elf::STT_FUNC {
9095                return None;
9096            }
9097
9098            let name = symbol.name()?;
9099            let (section, offset) = symbol.section_and_offset().ok()?;
9100            let target = SectionTarget {
9101                section_index: section.index(),
9102                offset,
9103            };
9104            Some((target, name))
9105        })
9106        .collect();
9107
9108    let all_jump_targets = harvest_all_jump_targets(&elf, &data_sections_set, &code_sections_set, &instructions, &relocations, &exports)?;
9109    let all_blocks = split_code_into_basic_blocks(&elf, &section_to_function_name, &all_jump_targets, instructions)?;
9110    for block in &all_blocks {
9111        for source in block.next.source.as_slice() {
9112            assert!(source.offset_range.start < source.offset_range.end);
9113        }
9114    }
9115
9116    let mut section_to_block = build_section_to_block_map(&all_blocks)?;
9117    let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks)?;
9118    let mut reachability_graph;
9119    let mut used_blocks;
9120
9121    let mut regspill_size = 0;
9122    if matches!(config.opt_level, OptLevel::O1 | OptLevel::O2) {
9123        reachability_graph = calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
9124        if matches!(config.opt_level, OptLevel::O2) {
9125            optimize_program(&config, &elf, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
9126        } else {
9127            for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9128                perform_nop_elimination(&mut all_blocks, current);
9129            }
9130        }
9131        used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
9132        spill_fake_registers(
9133            section_regspill,
9134            &mut all_blocks,
9135            &mut reachability_graph,
9136            &imports,
9137            &used_blocks,
9138            &mut regspill_size,
9139            is_rv64,
9140        );
9141        used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
9142        merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
9143        if matches!(config.opt_level, OptLevel::O2) {
9144            replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
9145        }
9146
9147        let expected_reachability_graph =
9148            calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
9149        if reachability_graph != expected_reachability_graph {
9150            if std::env::var("POLKAVM_LINKER_DUMP_REACHABILITY_GRAPH")
9151                .map(|value| value == "1")
9152                .unwrap_or(false)
9153            {
9154                let _ = std::fs::write("/tmp/reachability_graph_actual.txt", format!("{reachability_graph:#?}"));
9155                let _ = std::fs::write("/tmp/reachability_graph_expected.txt", format!("{expected_reachability_graph:#?}"));
9156            }
9157            panic!("internal error: inconsistent reachability after optimization; this is a bug, please report it!");
9158        }
9159    } else {
9160        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9161            perform_nop_elimination(&mut all_blocks, current);
9162        }
9163
9164        reachability_graph = ReachabilityGraph::default();
9165        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9166            let reachability = reachability_graph.for_code.entry(current).or_default();
9167
9168            reachability.always_reachable = true;
9169            reachability.always_dynamically_reachable = true;
9170        }
9171
9172        for &section_index in sections_ro_data.iter().chain(sections_rw_data.iter()) {
9173            let reachability = reachability_graph.for_data.entry(section_index).or_default();
9174
9175            reachability.always_reachable = true;
9176            reachability.always_dynamically_reachable = true;
9177        }
9178
9179        for (export_index, export) in exports.iter().enumerate() {
9180            let Some(&block_target) = section_to_block.get(&export.location) else {
9181                return Err(ProgramFromElfError::other("export points to a non-block"));
9182            };
9183
9184            reachability_graph
9185                .for_code
9186                .entry(block_target)
9187                .or_default()
9188                .exports
9189                .push(export_index);
9190        }
9191
9192        used_blocks = (0..all_blocks.len()).map(BlockTarget::from_raw).collect();
9193        spill_fake_registers(
9194            section_regspill,
9195            &mut all_blocks,
9196            &mut reachability_graph,
9197            &imports,
9198            &used_blocks,
9199            &mut regspill_size,
9200            is_rv64,
9201        );
9202    }
9203
9204    elf.extend_section_to_at_least(section_regspill, regspill_size);
9205
9206    for &section_index in &sections_other {
9207        if reachability_graph.is_data_section_reachable(section_index) {
9208            return Err(ProgramFromElfError::other(format!(
9209                "unsupported section used in program graph: '{name}'",
9210                name = elf.section_by_index(section_index).name(),
9211            )));
9212        }
9213    }
9214
9215    log::debug!("Exports found: {}", exports.len());
9216
9217    {
9218        let mut count_dynamic = 0;
9219        for reachability in reachability_graph.for_code.values() {
9220            if reachability.is_dynamically_reachable() {
9221                count_dynamic += 1;
9222            }
9223        }
9224        log::debug!(
9225            "Blocks used: {}/{} ({} dynamically reachable, {} statically reachable)",
9226            reachability_graph.for_code.len(),
9227            all_blocks.len(),
9228            count_dynamic,
9229            reachability_graph.for_code.len() - count_dynamic
9230        );
9231    }
9232
9233    let section_got = elf.add_empty_data_section(".got");
9234    sections_ro_data.push(section_got);
9235    reachability_graph.mark_data_section_reachable(section_got);
9236
9237    let mut target_to_got_offset: HashMap<AnyTarget, u64> = HashMap::new();
9238    let mut got_size = 0;
9239
9240    let mut used_imports = HashSet::new();
9241    for block in &all_blocks {
9242        if !reachability_graph.is_code_reachable(block.target) {
9243            continue;
9244        }
9245
9246        for (_, instruction) in &block.ops {
9247            match instruction {
9248                BasicInst::LoadAddressIndirect { target, .. } => {
9249                    if target_to_got_offset.contains_key(target) {
9250                        continue;
9251                    }
9252
9253                    let offset = target_to_got_offset.len() as u64 * u64::from(bitness);
9254                    target_to_got_offset.insert(*target, offset);
9255                    got_size = offset + u64::from(bitness);
9256
9257                    let target = match target {
9258                        AnyTarget::Data(target) => *target,
9259                        AnyTarget::Code(target) => all_blocks[target.index()].source.begin(),
9260                    };
9261
9262                    relocations.insert(
9263                        SectionTarget {
9264                            section_index: section_got,
9265                            offset,
9266                        },
9267                        RelocationKind::Abs {
9268                            target,
9269                            size: bitness.into(),
9270                        },
9271                    );
9272                }
9273                BasicInst::Ecalli { nth_import } => {
9274                    used_imports.insert(*nth_import);
9275                }
9276                _ => {}
9277            }
9278        }
9279    }
9280
9281    elf.extend_section_to_at_least(section_got, got_size.try_into().expect("overflow"));
9282    check_imports_and_assign_indexes(&mut imports, &used_imports)?;
9283
9284    let mut base_address_for_section = HashMap::new();
9285    let sections_ro_data: Vec<_> = sections_ro_data
9286        .into_iter()
9287        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
9288        .collect();
9289
9290    let sections_rw_data: Vec<_> = sections_rw_data
9291        .into_iter()
9292        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
9293        .collect();
9294
9295    let memory_config = extract_memory_config(
9296        &elf,
9297        &sections_ro_data,
9298        &sections_rw_data,
9299        &sections_bss,
9300        &sections_min_stack_size,
9301        &mut base_address_for_section,
9302        config.min_stack_size,
9303    )?;
9304
9305    log::trace!("Memory configuration: {:#?}", memory_config);
9306
9307    let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
9308    let code = emit_code(
9309        &section_to_function_name,
9310        &imports,
9311        &base_address_for_section,
9312        section_got,
9313        &target_to_got_offset,
9314        &all_blocks,
9315        &used_blocks,
9316        &used_imports,
9317        &jump_target_for_block,
9318        matches!(config.opt_level, OptLevel::O2),
9319        is_rv64,
9320    )?;
9321
9322    {
9323        // Assign dummy base addresses to all other sections.
9324        //
9325        // This is mostly used for debug info.
9326        for &section_index in &sections_other {
9327            let address = elf.section_by_index(section_index).original_address();
9328            assert!(!reachability_graph.is_data_section_reachable(section_index));
9329            assert!(base_address_for_section.insert(section_index, address).is_none());
9330        }
9331    }
9332
9333    for (&relocation_target, &relocation) in &relocations {
9334        let section = elf.section_by_index(relocation_target.section_index);
9335        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9336            continue;
9337        }
9338
9339        log::trace!(
9340            "Applying relocation to '{}'[0x{:x}] {relocation_target}: {:?}",
9341            section.name(),
9342            relocation_target.offset,
9343            relocation
9344        );
9345
9346        fn write_generic(size: RelocationSize, data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
9347            match size {
9348                RelocationSize::U64 => write_u64(data, relative_address, value),
9349                RelocationSize::U32 => {
9350                    let Ok(value) = u32::try_from(value) else {
9351                        return Err(ProgramFromElfError::other(
9352                            "overflow when applying relocations: value doesn't fit in an u32",
9353                        ));
9354                    };
9355
9356                    write_u32(data, relative_address, value)
9357                }
9358                RelocationSize::U16 => {
9359                    let Ok(value) = u16::try_from(value) else {
9360                        return Err(ProgramFromElfError::other(
9361                            "overflow when applying relocations: value doesn't fit in an u16",
9362                        ));
9363                    };
9364
9365                    write_u16(data, relative_address, value)
9366                }
9367                RelocationSize::U8 => {
9368                    let Ok(value) = u8::try_from(value) else {
9369                        return Err(ProgramFromElfError::other(
9370                            "overflow when applying relocations: value doesn't fit in an u8",
9371                        ));
9372                    };
9373
9374                    data[relative_address as usize] = value;
9375                    Ok(())
9376                }
9377            }
9378        }
9379
9380        match relocation {
9381            RelocationKind::Offset { origin, target, size } => {
9382                // These relocations should only be used in debug info sections and RO data sections.
9383                if reachability_graph.is_data_section_reachable(section.index()) && !matches!(size, SizeRelocationSize::Generic(..)) {
9384                    return Err(ProgramFromElfError::other(format!(
9385                        "relocation was not expected in section '{name}': {relocation:?}",
9386                        name = section.name(),
9387                    )));
9388                }
9389
9390                let Some(&origin_section_address) = base_address_for_section.get(&origin.section_index) else {
9391                    return Err(ProgramFromElfError::other(format!(
9392                        "internal error: relocation in '{name}' ({relocation_target}) refers to an origin section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
9393                        name = section.name(),
9394                        origin_name = elf.section_by_index(origin.section_index).name(),
9395                        target_name = elf.section_by_index(target.section_index).name(),
9396                    )));
9397                };
9398
9399                let Some(&target_section_address) = base_address_for_section.get(&target.section_index) else {
9400                    return Err(ProgramFromElfError::other(format!(
9401                        "internal error: relocation in '{name}' ({relocation_target}) refers to a target section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
9402                        name = section.name(),
9403                        origin_name = elf.section_by_index(origin.section_index).name(),
9404                        target_name = elf.section_by_index(target.section_index).name(),
9405                    )));
9406                };
9407
9408                let range = origin_section_address.wrapping_add(origin.offset)..target_section_address.wrapping_add(target.offset);
9409                let data = elf.section_data_mut(relocation_target.section_index);
9410                let mut value = range.end.wrapping_sub(range.start);
9411                match size {
9412                    SizeRelocationSize::Uleb128 => {
9413                        overwrite_uleb128(data, relocation_target.offset as usize, value)?;
9414                    }
9415                    SizeRelocationSize::SixBits => {
9416                        let mask = 0b00111111;
9417                        if value > mask {
9418                            return Err(ProgramFromElfError::other("six bit relocation overflow"));
9419                        }
9420
9421                        let output = (u64::from(read_u8(data, relocation_target.offset)?) & (!mask)) | (value & mask);
9422                        data[relocation_target.offset as usize] = output as u8;
9423                    }
9424                    SizeRelocationSize::Generic(size) => {
9425                        if range.end < range.start {
9426                            match size {
9427                                RelocationSize::U8 => {
9428                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9429                                        let new_value: i8 = new_value;
9430                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9431                                    }
9432                                }
9433                                RelocationSize::U16 => {
9434                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9435                                        let new_value: i16 = new_value;
9436                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9437                                    }
9438                                }
9439                                RelocationSize::U32 => {
9440                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9441                                        let new_value: i32 = new_value;
9442                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9443                                    }
9444                                }
9445                                RelocationSize::U64 => {}
9446                            }
9447                        }
9448
9449                        write_generic(size, data, relocation_target.offset, value)?;
9450                    }
9451                }
9452            }
9453            RelocationKind::Abs { target, size } => {
9454                if let Some(&block_target) = section_to_block.get(&target) {
9455                    let Some(jump_target) = jump_target_for_block[block_target.index()] else {
9456                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9457                            // Most likely debug info for something that was stripped out.
9458                            let data = elf.section_data_mut(relocation_target.section_index);
9459                            write_generic(size, data, relocation_target.offset, 0)?;
9460                            continue;
9461                        }
9462
9463                        return Err(ProgramFromElfError::other(format!(
9464                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no associated basic block",
9465                            location_name = elf.section_by_index(relocation_target.section_index).name(),
9466                            target_name = elf.section_by_index(target.section_index).name(),
9467                            target_offset = target.offset,
9468                        )));
9469                    };
9470
9471                    let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
9472                    let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
9473                        return Err(ProgramFromElfError::other("overflow when applying a jump target relocation"));
9474                    };
9475
9476                    let data = elf.section_data_mut(relocation_target.section_index);
9477                    write_generic(size, data, relocation_target.offset, jump_target.into())?;
9478                } else {
9479                    let Some(section_base) = base_address_for_section.get(&target.section_index) else {
9480                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9481                            let data = elf.section_data_mut(relocation_target.section_index);
9482                            write_generic(size, data, relocation_target.offset, 0)?;
9483                            continue;
9484                        }
9485
9486                        return Err(ProgramFromElfError::other(format!(
9487                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no relocated base address assigned",
9488                            location_name = elf.section_by_index(relocation_target.section_index).name(),
9489                            target_name = elf.section_by_index(target.section_index).name(),
9490                            target_offset = target.offset,
9491                        )));
9492                    };
9493
9494                    let Some(value) = section_base.checked_add(target.offset) else {
9495                        return Err(ProgramFromElfError::other("overflow when applying an absolute relocation"));
9496                    };
9497
9498                    let data = elf.section_data_mut(relocation_target.section_index);
9499                    write_generic(size, data, relocation_target.offset, value)?;
9500                }
9501            }
9502            RelocationKind::JumpTable { target_code, target_base } => {
9503                let Some(&block_target) = section_to_block.get(&target_code) else {
9504                    return Err(ProgramFromElfError::other(
9505                        "jump table relocation doesn't refers to a start of a basic block",
9506                    ));
9507                };
9508
9509                let Some(jump_target) = jump_target_for_block[block_target.index()] else {
9510                    return Err(ProgramFromElfError::other(
9511                        "no jump target for block was found when applying a jump table relocation",
9512                    ));
9513                };
9514
9515                let Some(section_base) = base_address_for_section.get(&target_base.section_index) else {
9516                    return Err(ProgramFromElfError::other(
9517                        "no base address for section when applying a jump table relocation",
9518                    ));
9519                };
9520
9521                let Some(base_address) = section_base.checked_add(target_base.offset) else {
9522                    return Err(ProgramFromElfError::other(
9523                        "overflow when applying a jump table relocation: section base and offset cannot be added together",
9524                    ));
9525                };
9526
9527                let Ok(base_address) = u32::try_from(base_address) else {
9528                    return Err(ProgramFromElfError::other(
9529                        "overflow when applying a jump table relocation: base address doesn't fit in a u32",
9530                    ));
9531                };
9532
9533                let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
9534                let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
9535                    return Err(ProgramFromElfError::other(
9536                        "overflow when applying a jump table relocation: jump target is too big",
9537                    ));
9538                };
9539
9540                let value = jump_target.wrapping_sub(base_address);
9541                let data = elf.section_data_mut(relocation_target.section_index);
9542                write_u32(data, relocation_target.offset, value)?;
9543            }
9544        }
9545    }
9546
9547    let mut location_map: HashMap<SectionTarget, Arc<[Location]>> = HashMap::new();
9548    if !config.strip {
9549        let mut string_cache = crate::utils::StringCache::default();
9550        let dwarf_info = crate::dwarf::load_dwarf(&mut string_cache, &elf, &relocations, &section_map)?;
9551        location_map = dwarf_info.location_map;
9552
9553        // If there is no DWARF info present try to use the symbol table as a fallback.
9554        for (source, name) in parse_function_symbols(&elf)? {
9555            if location_map.contains_key(&source.begin()) {
9556                continue;
9557            }
9558
9559            let (namespace, function_name) = split_function_name(&name);
9560            let namespace = if namespace.is_empty() {
9561                None
9562            } else {
9563                Some(string_cache.dedup(&namespace))
9564            };
9565
9566            let location = Location {
9567                kind: FrameKind::Enter,
9568                namespace,
9569                function_name: Some(string_cache.dedup(&function_name)),
9570                source_code_location: None,
9571            };
9572
9573            let location_stack: Arc<[Location]> = vec![location].into();
9574            for target in source.iter() {
9575                location_map.insert(target, Arc::clone(&location_stack));
9576            }
9577        }
9578    }
9579
9580    log::trace!("Instruction count: {}", code.len());
9581
9582    let mut builder = if elf.is_64() {
9583        ProgramBlobBuilder::new_64bit()
9584    } else {
9585        ProgramBlobBuilder::new()
9586    };
9587
9588    builder.set_ro_data_size(memory_config.ro_data_size);
9589    builder.set_rw_data_size(memory_config.rw_data_size);
9590    builder.set_stack_size(memory_config.min_stack_size);
9591
9592    let [ro_data, rw_data] = {
9593        [memory_config.ro_data, memory_config.rw_data].map(|ranges| {
9594            let mut buffer = Vec::new();
9595            for range in ranges {
9596                match range {
9597                    DataRef::Section { section_index, range } => {
9598                        let slice = &elf.section_by_index(section_index).data()[range];
9599                        buffer.extend_from_slice(slice);
9600                    }
9601                    DataRef::Padding(bytes) => {
9602                        let new_size = buffer.len() + bytes;
9603                        buffer.resize(new_size, 0);
9604                    }
9605                }
9606            }
9607            buffer
9608        })
9609    };
9610
9611    builder.set_ro_data(ro_data);
9612    builder.set_rw_data(rw_data);
9613
9614    {
9615        let mut sorted_imports = imports.clone();
9616        sorted_imports.sort_by(|a, b| {
9617            a.metadata
9618                .index
9619                .cmp(&b.metadata.index)
9620                .then_with(|| a.metadata.symbol.cmp(&b.metadata.symbol))
9621        });
9622
9623        let mut next_index = 0;
9624        for import in sorted_imports {
9625            let Some(index) = import.index else {
9626                continue;
9627            };
9628
9629            assert_eq!(index, next_index);
9630            next_index += 1;
9631
9632            builder.add_import(&import.metadata.symbol);
9633        }
9634    }
9635
9636    let mut export_count = 0;
9637    for current in used_blocks {
9638        for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
9639            let export = &exports[export_index];
9640            let jump_target = jump_target_for_block[current.index()]
9641                .expect("internal error: export metadata points to a block without a jump target assigned");
9642
9643            builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
9644            export_count += 1;
9645        }
9646    }
9647    assert_eq!(export_count, exports.len());
9648
9649    let mut locations_for_instruction: Vec<Option<Arc<[Location]>>> = Vec::with_capacity(code.len());
9650    let mut raw_code = Vec::with_capacity(code.len());
9651
9652    for (nth_inst, (source_stack, inst)) in code.into_iter().enumerate() {
9653        raw_code.push(inst);
9654
9655        let mut function_name = None;
9656        if !config.strip {
9657            // Two or more addresses can point to the same instruction (e.g. in case of macro op fusion).
9658            // Two or more instructions can also have the same address (e.g. in case of jump targets).
9659
9660            // TODO: Use a smallvec.
9661            let mut list = Vec::new();
9662            for source in source_stack.as_slice() {
9663                for offset in (source.offset_range.start..source.offset_range.end).step_by(2) {
9664                    let target = SectionTarget {
9665                        section_index: source.section_index,
9666                        offset,
9667                    };
9668
9669                    if let Some(locations) = location_map.get(&target) {
9670                        if let Some(last) = list.last() {
9671                            if locations == last {
9672                                // If we inlined a basic block from the same function do not repeat the same location.
9673                                break;
9674                            }
9675                        } else {
9676                            function_name = locations[0].function_name.as_deref();
9677                        }
9678
9679                        list.push(Arc::clone(locations));
9680                        break;
9681                    }
9682                }
9683
9684                if list.is_empty() {
9685                    // If the toplevel source doesn't have a location don't try the lower ones.
9686                    break;
9687                }
9688            }
9689
9690            if list.is_empty() {
9691                locations_for_instruction.push(None);
9692            } else if list.len() == 1 {
9693                locations_for_instruction.push(list.into_iter().next())
9694            } else {
9695                let mut new_list = Vec::new();
9696                for sublist in list {
9697                    new_list.extend(sublist.iter().cloned());
9698                }
9699
9700                locations_for_instruction.push(Some(new_list.into()));
9701            }
9702        }
9703
9704        log::trace!(
9705            "Code: 0x{source_address:x} [{function_name}] -> {source_stack} -> #{nth_inst}: {inst}",
9706            source_address = {
9707                elf.section_by_index(source_stack.top().section_index)
9708                    .original_address()
9709                    .wrapping_add(source_stack.top().offset_range.start)
9710            },
9711            function_name = function_name.unwrap_or("")
9712        );
9713    }
9714
9715    for symbol in config.dispatch_table {
9716        builder.add_dispatch_table_entry(symbol);
9717    }
9718
9719    builder.set_code(&raw_code, &jump_table);
9720
9721    let mut offsets = Vec::new();
9722    if !config.strip {
9723        let blob = ProgramBlob::parse(builder.to_vec().into())?;
9724        offsets = blob
9725            .instructions(bitness)
9726            .map(|instruction| (instruction.offset, instruction.next_offset))
9727            .collect();
9728        assert_eq!(offsets.len(), locations_for_instruction.len());
9729
9730        emit_debug_info(&mut builder, &locations_for_instruction, &offsets);
9731    }
9732
9733    let raw_blob = builder.to_vec();
9734
9735    log::debug!("Built a program of {} bytes", raw_blob.len());
9736    let blob = ProgramBlob::parse(raw_blob[..].into())?;
9737
9738    // Sanity check that our debug info was properly emitted and can be parsed.
9739    if cfg!(debug_assertions) && !config.strip {
9740        'outer: for (nth_instruction, locations) in locations_for_instruction.iter().enumerate() {
9741            let (program_counter, _) = offsets[nth_instruction];
9742            let line_program = blob.get_debug_line_program_at(program_counter).unwrap();
9743            let Some(locations) = locations else {
9744                assert!(line_program.is_none());
9745                continue;
9746            };
9747
9748            let mut line_program = line_program.unwrap();
9749            while let Some(region_info) = line_program.run().unwrap() {
9750                if !region_info.instruction_range().contains(&program_counter) {
9751                    continue;
9752                }
9753
9754                assert!(region_info.frames().len() <= locations.len());
9755                for (actual, expected) in region_info.frames().zip(locations.iter()) {
9756                    assert_eq!(actual.kind(), expected.kind);
9757                    assert_eq!(actual.namespace().unwrap(), expected.namespace.as_deref());
9758                    assert_eq!(actual.function_name_without_namespace().unwrap(), expected.function_name.as_deref());
9759                    assert_eq!(
9760                        actual.path().unwrap().map(Cow::Borrowed),
9761                        expected
9762                            .source_code_location
9763                            .as_ref()
9764                            .map(|location| simplify_path(location.path()))
9765                    );
9766                    assert_eq!(
9767                        actual.line(),
9768                        expected
9769                            .source_code_location
9770                            .as_ref()
9771                            .and_then(|location| location.line())
9772                            .and_then(|line| if line != 0 { Some(line) } else { None })
9773                    );
9774                    assert_eq!(
9775                        actual.column(),
9776                        expected
9777                            .source_code_location
9778                            .as_ref()
9779                            .and_then(|location| location.column())
9780                            .and_then(|column| if column != 0 { Some(column) } else { None })
9781                    );
9782                }
9783
9784                continue 'outer;
9785            }
9786
9787            panic!("internal error: region not found for instruction");
9788        }
9789    }
9790
9791    Ok(raw_blob)
9792}
9793
9794fn simplify_path(path: &str) -> Cow<str> {
9795    // TODO: Sanitize macOS and Windows paths.
9796    if let Some(p) = path.strip_prefix("/home/") {
9797        if let Some(index) = p.bytes().position(|byte| byte == b'/') {
9798            return format!("~{}", &p[index..]).into();
9799        }
9800    }
9801
9802    path.into()
9803}
9804
9805fn emit_debug_info(
9806    builder: &mut ProgramBlobBuilder,
9807    locations_for_instruction: &[Option<Arc<[Location]>>],
9808    offsets: &[(ProgramCounter, ProgramCounter)],
9809) {
9810    #[derive(Default)]
9811    struct DebugStringsBuilder<'a> {
9812        map: HashMap<Cow<'a, str>, u32>,
9813        section: Vec<u8>,
9814        write_protected: bool,
9815    }
9816
9817    impl<'a> DebugStringsBuilder<'a> {
9818        fn dedup_cow(&mut self, s: Cow<'a, str>) -> u32 {
9819            if let Some(offset) = self.map.get(&s) {
9820                return *offset;
9821            }
9822
9823            assert!(!self.write_protected);
9824
9825            let offset = self.section.len();
9826            let mut buffer = [0xff_u8; varint::MAX_VARINT_LENGTH];
9827            let length = varint::write_varint(s.len().try_into().expect("debug string length overflow"), &mut buffer);
9828            self.section.extend_from_slice(&buffer[..length]);
9829            self.section.extend_from_slice(s.as_bytes());
9830            let offset: u32 = offset.try_into().expect("debug string section length overflow");
9831            self.map.insert(s, offset);
9832            offset
9833        }
9834
9835        fn dedup(&mut self, s: &'a str) -> u32 {
9836            self.dedup_cow(s.into())
9837        }
9838    }
9839
9840    let mut dbg_strings = DebugStringsBuilder::default();
9841    let empty_string_id = dbg_strings.dedup("");
9842
9843    struct Group<'a> {
9844        namespace: Option<Arc<str>>,
9845        function_name: Option<Arc<str>>,
9846        path: Option<Cow<'a, str>>,
9847        instruction_position: usize,
9848        instruction_count: usize,
9849        program_counter_start: ProgramCounter,
9850        program_counter_end: ProgramCounter,
9851    }
9852
9853    impl<'a> Group<'a> {
9854        fn key(&self) -> (Option<&str>, Option<&str>, Option<&str>) {
9855            (self.namespace.as_deref(), self.function_name.as_deref(), self.path.as_deref())
9856        }
9857    }
9858
9859    let mut groups: Vec<Group> = Vec::new();
9860    for (instruction_position, locations) in locations_for_instruction.iter().enumerate() {
9861        let group = if let Some(locations) = locations {
9862            for location in locations.iter() {
9863                if let Some(ref namespace) = location.namespace {
9864                    dbg_strings.dedup(namespace);
9865                }
9866
9867                if let Some(ref name) = location.function_name {
9868                    dbg_strings.dedup(name);
9869                }
9870
9871                if let Some(ref location) = location.source_code_location {
9872                    dbg_strings.dedup_cow(simplify_path(location.path()));
9873                }
9874            }
9875
9876            let location = &locations[0];
9877            Group {
9878                namespace: location.namespace.clone(),
9879                function_name: location.function_name.clone(),
9880                path: location.source_code_location.as_ref().map(|target| simplify_path(target.path())),
9881                instruction_position,
9882                instruction_count: 1,
9883                program_counter_start: offsets[instruction_position].0,
9884                program_counter_end: offsets[instruction_position].1,
9885            }
9886        } else {
9887            Group {
9888                namespace: None,
9889                function_name: None,
9890                path: None,
9891                instruction_position,
9892                instruction_count: 1,
9893                program_counter_start: offsets[instruction_position].0,
9894                program_counter_end: offsets[instruction_position].1,
9895            }
9896        };
9897
9898        if let Some(last_group) = groups.last_mut() {
9899            if last_group.key() == group.key() {
9900                assert_eq!(last_group.instruction_position + last_group.instruction_count, instruction_position);
9901                last_group.instruction_count += 1;
9902                last_group.program_counter_end = group.program_counter_end;
9903                continue;
9904            }
9905        }
9906
9907        groups.push(group);
9908    }
9909
9910    groups.retain(|group| group.function_name.is_some() || group.path.is_some());
9911
9912    log::trace!("Location groups: {}", groups.len());
9913    dbg_strings.write_protected = true;
9914
9915    let mut section_line_programs = Vec::new();
9916    let mut info_offsets = Vec::with_capacity(groups.len());
9917    {
9918        let mut writer = Writer::new(&mut section_line_programs);
9919        let writer = &mut writer;
9920
9921        let offset_base = writer.len();
9922        writer.push_byte(program::VERSION_DEBUG_LINE_PROGRAM_V1);
9923        for group in &groups {
9924            let info_offset: u32 = (writer.len() - offset_base).try_into().expect("function info offset overflow");
9925            info_offsets.push(info_offset);
9926
9927            #[derive(Default)]
9928            struct LineProgramFrame {
9929                kind: Option<FrameKind>,
9930                namespace: Option<Arc<str>>,
9931                function_name: Option<Arc<str>>,
9932                path: Option<Arc<str>>,
9933                line: Option<u32>,
9934                column: Option<u32>,
9935            }
9936
9937            #[derive(Default)]
9938            struct LineProgramState {
9939                stack: Vec<LineProgramFrame>,
9940                stack_depth: usize,
9941                mutation_depth: usize,
9942
9943                queued_count: u32,
9944            }
9945
9946            impl LineProgramState {
9947                fn flush_if_any_are_queued(&mut self, writer: &mut Writer) {
9948                    if self.queued_count == 0 {
9949                        return;
9950                    }
9951
9952                    if self.queued_count == 1 {
9953                        writer.push_byte(LineProgramOp::FinishInstruction as u8);
9954                    } else {
9955                        writer.push_byte(LineProgramOp::FinishMultipleInstructions as u8);
9956                        writer.push_varint(self.queued_count);
9957                    }
9958
9959                    self.queued_count = 0;
9960                }
9961
9962                fn set_mutation_depth(&mut self, writer: &mut Writer, depth: usize) {
9963                    self.flush_if_any_are_queued(writer);
9964
9965                    if depth == self.mutation_depth {
9966                        return;
9967                    }
9968
9969                    writer.push_byte(LineProgramOp::SetMutationDepth as u8);
9970                    writer.push_varint(depth as u32);
9971                    self.mutation_depth = depth;
9972                }
9973
9974                fn set_stack_depth(&mut self, writer: &mut Writer, depth: usize) {
9975                    if self.stack_depth == depth {
9976                        return;
9977                    }
9978
9979                    while depth > self.stack.len() {
9980                        self.stack.push(LineProgramFrame::default());
9981                    }
9982
9983                    self.flush_if_any_are_queued(writer);
9984
9985                    writer.push_byte(LineProgramOp::SetStackDepth as u8);
9986                    writer.push_varint(depth as u32);
9987                    self.stack_depth = depth;
9988                }
9989
9990                fn finish_instruction(&mut self, writer: &mut Writer, next_depth: usize, instruction_length: u32) {
9991                    self.queued_count += instruction_length;
9992
9993                    enum Direction {
9994                        GoDown,
9995                        GoUp,
9996                    }
9997
9998                    let dir = if next_depth == self.stack_depth + 1 {
9999                        Direction::GoDown
10000                    } else if next_depth + 1 == self.stack_depth {
10001                        Direction::GoUp
10002                    } else {
10003                        return;
10004                    };
10005
10006                    while next_depth > self.stack.len() {
10007                        self.stack.push(LineProgramFrame::default());
10008                    }
10009
10010                    match (self.queued_count == 1, dir) {
10011                        (true, Direction::GoDown) => {
10012                            writer.push_byte(LineProgramOp::FinishInstructionAndIncrementStackDepth as u8);
10013                        }
10014                        (false, Direction::GoDown) => {
10015                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndIncrementStackDepth as u8);
10016                            writer.push_varint(self.queued_count);
10017                        }
10018                        (true, Direction::GoUp) => {
10019                            writer.push_byte(LineProgramOp::FinishInstructionAndDecrementStackDepth as u8);
10020                        }
10021                        (false, Direction::GoUp) => {
10022                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndDecrementStackDepth as u8);
10023                            writer.push_varint(self.queued_count);
10024                        }
10025                    }
10026
10027                    self.stack_depth = next_depth;
10028                    self.queued_count = 0;
10029                }
10030            }
10031
10032            let mut state = LineProgramState::default();
10033            for nth_instruction in group.instruction_position..group.instruction_position + group.instruction_count {
10034                let locations = locations_for_instruction[nth_instruction].as_ref().unwrap();
10035                state.set_stack_depth(writer, locations.len());
10036
10037                for (depth, location) in locations.iter().enumerate() {
10038                    let new_path = location
10039                        .source_code_location
10040                        .as_ref()
10041                        .map(|location| simplify_path(location.path()));
10042                    let new_line = location.source_code_location.as_ref().and_then(|location| location.line());
10043                    let new_column = location.source_code_location.as_ref().and_then(|location| location.column());
10044
10045                    let changed_kind = state.stack[depth].kind != Some(location.kind);
10046                    let changed_namespace = state.stack[depth].namespace != location.namespace;
10047                    let changed_function_name = state.stack[depth].function_name != location.function_name;
10048                    let changed_path = state.stack[depth].path.as_deref().map(Cow::Borrowed) != new_path;
10049                    let changed_line = state.stack[depth].line != new_line;
10050                    let changed_column = state.stack[depth].column != new_column;
10051
10052                    if changed_kind {
10053                        state.set_mutation_depth(writer, depth);
10054                        state.stack[depth].kind = Some(location.kind);
10055                        let kind = match location.kind {
10056                            FrameKind::Enter => LineProgramOp::SetKindEnter,
10057                            FrameKind::Call => LineProgramOp::SetKindCall,
10058                            FrameKind::Line => LineProgramOp::SetKindLine,
10059                        };
10060                        writer.push_byte(kind as u8);
10061                    }
10062
10063                    if changed_namespace {
10064                        state.set_mutation_depth(writer, depth);
10065                        writer.push_byte(LineProgramOp::SetNamespace as u8);
10066                        state.stack[depth].namespace = location.namespace.clone();
10067
10068                        let namespace_offset = location
10069                            .namespace
10070                            .as_ref()
10071                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
10072                        writer.push_varint(namespace_offset);
10073                    }
10074
10075                    if changed_function_name {
10076                        state.set_mutation_depth(writer, depth);
10077                        writer.push_byte(LineProgramOp::SetFunctionName as u8);
10078                        state.stack[depth].function_name = location.function_name.clone();
10079
10080                        let function_name_offset = location
10081                            .function_name
10082                            .as_ref()
10083                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
10084                        writer.push_varint(function_name_offset);
10085                    }
10086
10087                    if changed_path {
10088                        state.set_mutation_depth(writer, depth);
10089                        writer.push_byte(LineProgramOp::SetPath as u8);
10090                        state.stack[depth].path =
10091                            location
10092                                .source_code_location
10093                                .as_ref()
10094                                .map(|location| match simplify_path(location.path()) {
10095                                    Cow::Borrowed(_) => Arc::clone(location.path()),
10096                                    Cow::Owned(path) => path.into(),
10097                                });
10098
10099                        let path_offset = location
10100                            .source_code_location
10101                            .as_ref()
10102                            .map_or(empty_string_id, |location| dbg_strings.dedup_cow(simplify_path(location.path())));
10103                        writer.push_varint(path_offset);
10104                    }
10105
10106                    if changed_line {
10107                        state.set_mutation_depth(writer, depth);
10108                        match (state.stack[depth].line, new_line) {
10109                            (Some(old_value), Some(new_value)) if old_value + 1 == new_value => {
10110                                writer.push_byte(LineProgramOp::IncrementLine as u8);
10111                            }
10112                            (Some(old_value), Some(new_value)) if new_value > old_value => {
10113                                writer.push_byte(LineProgramOp::AddLine as u8);
10114                                writer.push_varint(new_value - old_value);
10115                            }
10116                            (Some(old_value), Some(new_value)) if new_value < old_value => {
10117                                writer.push_byte(LineProgramOp::SubLine as u8);
10118                                writer.push_varint(old_value - new_value);
10119                            }
10120                            _ => {
10121                                writer.push_byte(LineProgramOp::SetLine as u8);
10122                                writer.push_varint(new_line.unwrap_or(0));
10123                            }
10124                        }
10125                        state.stack[depth].line = new_line;
10126                    }
10127
10128                    if changed_column {
10129                        state.set_mutation_depth(writer, depth);
10130                        writer.push_byte(LineProgramOp::SetColumn as u8);
10131                        state.stack[depth].column = new_column;
10132                        writer.push_varint(new_column.unwrap_or(0));
10133                    }
10134                }
10135
10136                let next_depth = locations_for_instruction
10137                    .get(nth_instruction + 1)
10138                    .and_then(|next_locations| next_locations.as_ref().map(|xs| xs.len()))
10139                    .unwrap_or(0);
10140                state.finish_instruction(writer, next_depth, (offsets[nth_instruction].1).0 - (offsets[nth_instruction].0).0);
10141            }
10142
10143            state.flush_if_any_are_queued(writer);
10144            writer.push_byte(LineProgramOp::FinishProgram as u8);
10145        }
10146    }
10147
10148    assert_eq!(info_offsets.len(), groups.len());
10149
10150    let mut section_line_program_ranges = Vec::new();
10151    {
10152        let mut writer = Writer::new(&mut section_line_program_ranges);
10153        for (group, info_offset) in groups.iter().zip(info_offsets.into_iter()) {
10154            writer.push_u32(group.program_counter_start.0);
10155            writer.push_u32(group.program_counter_end.0);
10156            writer.push_u32(info_offset);
10157        }
10158    }
10159
10160    builder.add_custom_section(program::SECTION_OPT_DEBUG_STRINGS, dbg_strings.section);
10161    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAMS, section_line_programs);
10162    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAM_RANGES, section_line_program_ranges);
10163}