wasmtime_environ/compile/
module_artifacts.rs

1//! Definitions of runtime structures and metadata which are serialized into ELF
2//! with `postcard` as part of a module's compilation process.
3
4use crate::prelude::*;
5use crate::{
6    obj, CompiledFunctionInfo, CompiledModuleInfo, DebugInfoData, DefinedFuncIndex, FunctionLoc,
7    FunctionName, MemoryInitialization, Metadata, ModuleInternedTypeIndex, ModuleTranslation,
8    PrimaryMap, Tunables,
9};
10use anyhow::{bail, Result};
11use object::write::{Object, SectionId, StandardSegment, WritableBuffer};
12use object::SectionKind;
13use std::ops::Range;
14
15/// Helper structure to create an ELF file as a compilation artifact.
16///
17/// This structure exposes the process which Wasmtime will encode a core wasm
18/// module into an ELF file, notably managing data sections and all that good
19/// business going into the final file.
20pub struct ObjectBuilder<'a> {
21    /// The `object`-crate-defined ELF file write we're using.
22    obj: Object<'a>,
23
24    /// General compilation configuration.
25    tunables: &'a Tunables,
26
27    /// The section identifier for "rodata" which is where wasm data segments
28    /// will go.
29    data: SectionId,
30
31    /// The section identifier for function name information, or otherwise where
32    /// the `name` custom section of wasm is copied into.
33    ///
34    /// This is optional and lazily created on demand.
35    names: Option<SectionId>,
36
37    /// The section identifier for dwarf information copied from the original
38    /// wasm files.
39    ///
40    /// This is optional and lazily created on demand.
41    dwarf: Option<SectionId>,
42}
43
44impl<'a> ObjectBuilder<'a> {
45    /// Creates a new builder for the `obj` specified.
46    pub fn new(mut obj: Object<'a>, tunables: &'a Tunables) -> ObjectBuilder<'a> {
47        let data = obj.add_section(
48            obj.segment_name(StandardSegment::Data).to_vec(),
49            obj::ELF_WASM_DATA.as_bytes().to_vec(),
50            SectionKind::ReadOnlyData,
51        );
52        ObjectBuilder {
53            obj,
54            tunables,
55            data,
56            names: None,
57            dwarf: None,
58        }
59    }
60
61    /// Insert the wasm raw wasm-based debuginfo into the output.
62    /// Note that this is distinct from the native debuginfo
63    /// possibly generated by the native compiler, hence these sections
64    /// getting wasm-specific names.
65    pub fn push_debuginfo(
66        &mut self,
67        dwarf: &mut Vec<(u8, Range<u64>)>,
68        debuginfo: &DebugInfoData<'_>,
69    ) {
70        self.push_debug(dwarf, &debuginfo.dwarf.debug_abbrev);
71        self.push_debug(dwarf, &debuginfo.dwarf.debug_addr);
72        self.push_debug(dwarf, &debuginfo.dwarf.debug_aranges);
73        self.push_debug(dwarf, &debuginfo.dwarf.debug_info);
74        self.push_debug(dwarf, &debuginfo.dwarf.debug_line);
75        self.push_debug(dwarf, &debuginfo.dwarf.debug_line_str);
76        self.push_debug(dwarf, &debuginfo.dwarf.debug_str);
77        self.push_debug(dwarf, &debuginfo.dwarf.debug_str_offsets);
78        self.push_debug(dwarf, &debuginfo.debug_ranges);
79        self.push_debug(dwarf, &debuginfo.debug_rnglists);
80        self.push_debug(dwarf, &debuginfo.debug_cu_index);
81
82        // Sort this for binary-search-lookup later in `symbolize_context`.
83        dwarf.sort_by_key(|(id, _)| *id);
84    }
85
86    /// Completes compilation of the `translation` specified, inserting
87    /// everything necessary into the `Object` being built.
88    ///
89    /// This function will consume the final results of compiling a wasm module
90    /// and finish the ELF image in-progress as part of `self.obj` by appending
91    /// any compiler-agnostic sections.
92    ///
93    /// The auxiliary `CompiledModuleInfo` structure returned here has also been
94    /// serialized into the object returned, but if the caller will quickly
95    /// turn-around and invoke `CompiledModule::from_artifacts` after this then
96    /// the information can be passed to that method to avoid extra
97    /// deserialization. This is done to avoid a serialize-then-deserialize for
98    /// API calls like `Module::new` where the compiled module is immediately
99    /// going to be used.
100    ///
101    /// The various arguments here are:
102    ///
103    /// * `translation` - the core wasm translation that's being completed.
104    ///
105    /// * `funcs` - compilation metadata about functions within the translation
106    ///   as well as where the functions are located in the text section and any
107    ///   associated trampolines.
108    ///
109    /// * `wasm_to_array_trampolines` - list of all trampolines necessary for
110    ///   Wasm callers calling array callees (e.g. `Func::wrap`). One for each
111    ///   function signature in the module. Must be sorted by `SignatureIndex`.
112    ///
113    /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module
114    /// as a result of this append operation. This is then serialized into the
115    /// final artifact by the caller.
116    pub fn append(
117        &mut self,
118        translation: ModuleTranslation<'_>,
119        funcs: PrimaryMap<DefinedFuncIndex, CompiledFunctionInfo>,
120        wasm_to_array_trampolines: Vec<(ModuleInternedTypeIndex, FunctionLoc)>,
121    ) -> Result<CompiledModuleInfo> {
122        let ModuleTranslation {
123            mut module,
124            debuginfo,
125            has_unparsed_debuginfo,
126            data,
127            data_align,
128            passive_data,
129            ..
130        } = translation;
131
132        // Place all data from the wasm module into a section which will the
133        // source of the data later at runtime. This additionally keeps track of
134        // the offset of
135        let mut total_data_len = 0;
136        let data_offset = self
137            .obj
138            .append_section_data(self.data, &[], data_align.unwrap_or(1));
139        for (i, data) in data.iter().enumerate() {
140            // The first data segment has its alignment specified as the alignment
141            // for the entire section, but everything afterwards is adjacent so it
142            // has alignment of 1.
143            let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
144            self.obj.append_section_data(self.data, data, align);
145            total_data_len += data.len();
146        }
147        for data in passive_data.iter() {
148            self.obj.append_section_data(self.data, data, 1);
149        }
150
151        // If any names are present in the module then the `ELF_NAME_DATA` section
152        // is create and appended.
153        let mut func_names = Vec::new();
154        if debuginfo.name_section.func_names.len() > 0 {
155            let name_id = *self.names.get_or_insert_with(|| {
156                self.obj.add_section(
157                    self.obj.segment_name(StandardSegment::Data).to_vec(),
158                    obj::ELF_NAME_DATA.as_bytes().to_vec(),
159                    SectionKind::ReadOnlyData,
160                )
161            });
162            let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
163            sorted_names.sort_by_key(|(idx, _name)| *idx);
164            for (idx, name) in sorted_names {
165                let offset = self.obj.append_section_data(name_id, name.as_bytes(), 1);
166                let offset = match u32::try_from(offset) {
167                    Ok(offset) => offset,
168                    Err(_) => bail!("name section too large (> 4gb)"),
169                };
170                let len = u32::try_from(name.len()).unwrap();
171                func_names.push(FunctionName {
172                    idx: *idx,
173                    offset,
174                    len,
175                });
176            }
177        }
178
179        // Data offsets in `MemoryInitialization` are offsets within the
180        // `translation.data` list concatenated which is now present in the data
181        // segment that's appended to the object. Increase the offsets by
182        // `self.data_size` to account for any previously added module.
183        let data_offset = u32::try_from(data_offset).unwrap();
184        match &mut module.memory_initialization {
185            MemoryInitialization::Segmented(list) => {
186                for segment in list {
187                    segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
188                    segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
189                }
190            }
191            MemoryInitialization::Static { map } => {
192                for (_, segment) in map {
193                    if let Some(segment) = segment {
194                        segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
195                        segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
196                    }
197                }
198            }
199        }
200
201        // Data offsets for passive data are relative to the start of
202        // `translation.passive_data` which was appended to the data segment
203        // of this object, after active data in `translation.data`. Update the
204        // offsets to account prior modules added in addition to active data.
205        let data_offset = data_offset + u32::try_from(total_data_len).unwrap();
206        for (_, range) in module.passive_data_map.iter_mut() {
207            range.start = range.start.checked_add(data_offset).unwrap();
208            range.end = range.end.checked_add(data_offset).unwrap();
209        }
210
211        // Insert the wasm raw wasm-based debuginfo into the output, if
212        // requested. Note that this is distinct from the native debuginfo
213        // possibly generated by the native compiler, hence these sections
214        // getting wasm-specific names.
215        let mut dwarf = Vec::new();
216        if self.tunables.parse_wasm_debuginfo {
217            self.push_debuginfo(&mut dwarf, &debuginfo);
218        }
219
220        Ok(CompiledModuleInfo {
221            module,
222            funcs,
223            wasm_to_array_trampolines,
224            func_names,
225            meta: Metadata {
226                has_unparsed_debuginfo,
227                code_section_offset: debuginfo.wasm_file.code_section_offset,
228                has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
229                dwarf,
230            },
231        })
232    }
233
234    fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
235    where
236        T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
237    {
238        let data = section.reader().slice();
239        if data.is_empty() {
240            return;
241        }
242        let section_id = *self.dwarf.get_or_insert_with(|| {
243            self.obj.add_section(
244                self.obj.segment_name(StandardSegment::Debug).to_vec(),
245                obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
246                SectionKind::Debug,
247            )
248        });
249        let offset = self.obj.append_section_data(section_id, data, 1);
250        dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
251    }
252
253    /// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
254    /// structure.
255    pub fn serialize_info<T>(&mut self, info: &T)
256    where
257        T: serde::Serialize,
258    {
259        let section = self.obj.add_section(
260            self.obj.segment_name(StandardSegment::Data).to_vec(),
261            obj::ELF_WASMTIME_INFO.as_bytes().to_vec(),
262            SectionKind::ReadOnlyData,
263        );
264        let data = postcard::to_allocvec(info).unwrap();
265        self.obj.set_section_data(section, data, 1);
266    }
267
268    /// Serializes `self` into a buffer. This can be used for execution as well
269    /// as serialization.
270    pub fn finish<T: WritableBuffer>(self, t: &mut T) -> Result<()> {
271        self.obj.emit(t).map_err(|e| e.into())
272    }
273}
274
275/// A type which can be the result of serializing an object.
276pub trait FinishedObject: Sized {
277    /// State required for `finish_object`, if any.
278    type State;
279
280    /// Emit the object as `Self`.
281    fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result<Self>;
282}
283
284impl FinishedObject for Vec<u8> {
285    type State = ();
286    fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result<Self> {
287        let mut result = ObjectVec::default();
288        obj.finish(&mut result)?;
289        return Ok(result.0);
290
291        #[derive(Default)]
292        struct ObjectVec(Vec<u8>);
293
294        impl WritableBuffer for ObjectVec {
295            fn len(&self) -> usize {
296                self.0.len()
297            }
298
299            fn reserve(&mut self, additional: usize) -> Result<(), ()> {
300                assert_eq!(self.0.len(), 0, "cannot reserve twice");
301                self.0 = Vec::with_capacity(additional);
302                Ok(())
303            }
304
305            fn resize(&mut self, new_len: usize) {
306                if new_len <= self.0.len() {
307                    self.0.truncate(new_len)
308                } else {
309                    self.0.extend(vec![0; new_len - self.0.len()])
310                }
311            }
312
313            fn write_bytes(&mut self, val: &[u8]) {
314                self.0.extend(val);
315            }
316        }
317    }
318}