apple_codesign/
macho_builder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Mach-O writing.
6//!
7//! Initially authored to facilitate testing.
8
9use {
10    crate::{macho::MachoTarget, AppleCodesignError},
11    object::{
12        endian::{BigEndian, U32, U64},
13        macho::*,
14        pod::bytes_of,
15        AddressSize, Architecture, Endian, Endianness,
16    },
17};
18
19/// A Mach-O segment.
20#[derive(Debug)]
21pub struct Segment {
22    /// Name of the segment. Max of 16 bytes.
23    name: String,
24    /// Segment flags.
25    flags: u32,
26}
27
28impl Segment {
29    /// Obtain the segment name as bytes.
30    fn name_bytes(&self) -> Result<[u8; 16], AppleCodesignError> {
31        let mut v = [0; 16];
32
33        v.get_mut(..self.name.len())
34            .ok_or_else(|| {
35                AppleCodesignError::MachOWrite(format!("segment name too long: {}", self.name))
36            })?
37            .copy_from_slice(self.name.as_bytes());
38
39        Ok(v)
40    }
41
42    /// Obtain the bytes for the load command data.
43    ///
44    /// Just the segment load command. Does not include section header data.
45    #[allow(clippy::too_many_arguments)]
46    pub fn to_load_command_data(
47        &self,
48        address_size: AddressSize,
49        endian: Endianness,
50        section_count: usize,
51        vm_start: u64,
52        vm_length: u64,
53        file_offset: usize,
54        file_length: usize,
55    ) -> Result<Vec<u8>, AppleCodesignError> {
56        if address_size == AddressSize::U64 {
57            let segment = SegmentCommand64 {
58                cmd: U32::new(endian, LC_SEGMENT_64),
59                cmdsize: U32::new(
60                    endian,
61                    (std::mem::size_of::<SegmentCommand64<Endianness>>()
62                        + section_count * std::mem::size_of::<Section64<Endianness>>())
63                        as u32,
64                ),
65                segname: self.name_bytes()?,
66                vmaddr: U64::new(endian, vm_start),
67                vmsize: U64::new(endian, vm_length as _),
68                fileoff: U64::new(endian, file_offset as _),
69                filesize: U64::new(endian, file_length as _),
70                maxprot: U32::new(endian, 0),
71                initprot: U32::new(endian, 0),
72                nsects: U32::new(endian, section_count as _),
73                flags: U32::new(endian, self.flags),
74            };
75
76            Ok(bytes_of(&segment).to_vec())
77        } else {
78            let segment = SegmentCommand32 {
79                cmd: U32::new(endian, LC_SEGMENT),
80                cmdsize: U32::new(
81                    endian,
82                    (std::mem::size_of::<SegmentCommand32<Endianness>>()
83                        + section_count * std::mem::size_of::<Section32<Endianness>>())
84                        as u32,
85                ),
86                segname: self.name_bytes()?,
87                vmaddr: U32::new(endian, vm_start as _),
88                vmsize: U32::new(endian, vm_length as _),
89                fileoff: U32::new(endian, file_offset as _),
90                filesize: U32::new(endian, file_length as _),
91                maxprot: U32::new(endian, 0),
92                initprot: U32::new(endian, 0),
93                nsects: U32::new(endian, section_count as _),
94                flags: U32::new(endian, self.flags),
95            };
96
97            Ok(bytes_of(&segment).to_vec())
98        }
99    }
100}
101
102#[derive(Debug)]
103pub struct Section {
104    segment: String,
105    name: String,
106    align: usize,
107    data: Vec<u8>,
108    flags: u32,
109}
110
111impl Section {
112    /// Obtain the segment name as bytes.
113    pub fn segment_name_bytes(&self) -> Result<[u8; 16], AppleCodesignError> {
114        let mut v = [0; 16];
115
116        v.get_mut(..self.segment.len())
117            .ok_or_else(|| {
118                AppleCodesignError::MachOWrite(format!("segment name too long: {}", self.segment))
119            })?
120            .copy_from_slice(self.segment.as_bytes());
121
122        Ok(v)
123    }
124
125    /// Obtain the section name as bytes.
126    pub fn section_name_bytes(&self) -> Result<[u8; 16], AppleCodesignError> {
127        let mut v = [0; 16];
128
129        v.get_mut(..self.name.len())
130            .ok_or_else(|| {
131                AppleCodesignError::MachOWrite(format!("section name too long: {}", self.name))
132            })?
133            .copy_from_slice(self.name.as_bytes());
134
135        Ok(v)
136    }
137
138    pub fn to_section_header_data(
139        &self,
140        address_size: AddressSize,
141        endian: Endianness,
142        address: u64,
143        size: usize,
144        offset: usize,
145        alignment: usize,
146    ) -> Result<Vec<u8>, AppleCodesignError> {
147        if address_size == AddressSize::U64 {
148            let header = Section64 {
149                sectname: self.section_name_bytes()?,
150                segname: self.segment_name_bytes()?,
151                addr: U64::new(endian, address),
152                size: U64::new(endian, size as _),
153                offset: U32::new(endian, offset as _),
154                align: U32::new(endian, alignment as _),
155                reloff: U32::new(endian, 0),
156                nreloc: U32::new(endian, 0),
157                flags: U32::new(endian, self.flags),
158                reserved1: U32::new(endian, 0),
159                reserved2: U32::new(endian, 0),
160                reserved3: U32::new(endian, 0),
161            };
162
163            Ok(bytes_of(&header).to_vec())
164        } else {
165            let header = Section32 {
166                sectname: self.section_name_bytes()?,
167                segname: self.segment_name_bytes()?,
168                addr: U32::new(endian, address as _),
169                size: U32::new(endian, size as _),
170                offset: U32::new(endian, offset as _),
171                align: U32::new(endian, alignment as _),
172                reloff: U32::new(endian, 0),
173                nreloc: U32::new(endian, 0),
174                flags: U32::new(endian, self.flags),
175                reserved1: U32::new(endian, 0),
176                reserved2: U32::new(endian, 0),
177            };
178
179            Ok(bytes_of(&header).to_vec())
180        }
181    }
182}
183
184#[derive(Clone, Copy, Debug, Default)]
185struct SegmentMetadata {
186    file_offset: usize,
187    file_size: usize,
188    vm_address: u64,
189    vm_size: u64,
190}
191
192/// Describes a Mach-O section in the context of a larger file.
193#[derive(Clone, Copy, Debug, Default)]
194struct SectionMetadata {
195    /// File offset of start of section.
196    offset: usize,
197    /// Start address of section.
198    address: u64,
199}
200
201fn align_u64(offset: u64, size: u64) -> u64 {
202    (offset + (size - 1)) & !(size - 1)
203}
204
205fn align_usize(offset: usize, size: usize) -> usize {
206    (offset + (size - 1)) & !(size - 1)
207}
208
209/// Constructor of Mach-O binaries.
210///
211/// Originally written to facilitate testing so we can generate Mach-O binaries
212/// for tests. Not intended to be a fully-functional linker! Use at your own
213/// risk.
214pub struct MachOBuilder {
215    architecture: Architecture,
216    endian: Endianness,
217    address_size: AddressSize,
218    page_size: usize,
219    file_type: u32,
220    macho_flags: u32,
221    /// Start offset for __TEXT segment.
222    text_segment_start_offset: usize,
223    segments: Vec<Segment>,
224    /// Sections within the Mach-O.
225    ///
226    /// Sections are grouped by segment and each group is ordered by segment file order.
227    sections: Vec<Section>,
228
229    // Optional load commands.
230    /// Mach-O targeting.
231    ///
232    /// Turned into an LC_BUILD_VERSION load command.
233    macho_target: Option<MachoTarget>,
234}
235
236impl MachOBuilder {
237    /// Create a new instance having the specified architecture and endianness.
238    pub fn new(architecture: Architecture, endianness: Endianness, file_type: u32) -> Self {
239        let page_size = match architecture {
240            Architecture::Aarch64 => 16384,
241            Architecture::X86_64 => 4096,
242            _ => 4096,
243        };
244
245        let segments = vec![
246            Segment {
247                name: "__PAGEZERO".to_string(),
248                flags: 0,
249            },
250            Segment {
251                name: "__TEXT".to_string(),
252                flags: 0,
253            },
254            Segment {
255                name: "__DATA_CONST".to_string(),
256                flags: 0,
257            },
258            Segment {
259                name: "__DATA".to_string(),
260                flags: 0,
261            },
262            Segment {
263                name: "__LINKEDIT".to_string(),
264                flags: 0,
265            },
266        ];
267
268        let sections = vec![
269            Section {
270                segment: "__TEXT".to_string(),
271                name: "__text".to_string(),
272                align: page_size,
273                data: vec![],
274                flags: 0,
275            },
276            Section {
277                segment: "__TEXT".to_string(),
278                name: "__const".to_string(),
279                align: page_size,
280                data: vec![],
281                flags: 0,
282            },
283            Section {
284                segment: "__DATA_CONST".to_string(),
285                name: "__const".to_string(),
286                align: page_size,
287                data: vec![],
288                flags: 0,
289            },
290            Section {
291                segment: "__DATA".to_string(),
292                name: "__data".to_string(),
293                align: page_size,
294                data: vec![],
295                flags: 0,
296            },
297        ];
298
299        Self {
300            architecture,
301            endian: endianness,
302            address_size: architecture
303                .address_size()
304                .expect("address size should be known"),
305            file_type,
306            page_size,
307            macho_flags: 0,
308            text_segment_start_offset: 0,
309            segments,
310            sections,
311            macho_target: None,
312        }
313    }
314
315    /// Create a new instance for x86-64.
316    pub fn new_x86_64(file_type: u32) -> Self {
317        Self::new(Architecture::X86_64, Endianness::Little, file_type)
318    }
319
320    /// Create a new instance for aarch64.
321    pub fn new_aarch64(file_type: u32) -> Self {
322        Self::new(Architecture::Aarch64, Endianness::Little, file_type)
323    }
324
325    /// Set the Mach-O targeting info for the binary.
326    ///
327    /// Will result in a LC_BUILD_VERSION load command being emitted.
328    pub fn macho_target(mut self, target: MachoTarget) -> Self {
329        self.macho_target = Some(target);
330        self
331    }
332
333    /// Set the start offset for the __TEXT segment.
334    ///
335    /// Normally the __TEXT segment starts at 0x0.
336    ///
337    /// Very little validation is performed on the value. It may be possible
338    /// to write corrupted Mach-O by feeding this a sufficiently large number.
339    pub fn text_segment_start_offset(mut self, offset: usize) -> Self {
340        self.text_segment_start_offset = offset;
341        self
342    }
343
344    fn mach_header(
345        &self,
346        number_commands: u32,
347        size_of_commands: u32,
348    ) -> Result<Vec<u8>, AppleCodesignError> {
349        let endian = self.endian;
350
351        let (cpu_type, cpu_sub_type) = match self.architecture {
352            Architecture::Arm => (CPU_TYPE_ARM, CPU_SUBTYPE_ARM_ALL),
353            Architecture::Aarch64 => (CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL),
354            Architecture::Aarch64_Ilp32 => (CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8),
355            Architecture::I386 => (CPU_TYPE_X86, CPU_SUBTYPE_I386_ALL),
356            Architecture::X86_64 => (CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL),
357            Architecture::PowerPc => (CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL),
358            Architecture::PowerPc64 => (CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_ALL),
359            _ => {
360                return Err(AppleCodesignError::MachOWrite(format!(
361                    "unhandled architecture: {:?}",
362                    self.architecture
363                )));
364            }
365        };
366
367        if self.address_size == AddressSize::U64 {
368            let magic = if endian.is_big_endian() {
369                MH_MAGIC_64
370            } else {
371                MH_CIGAM_64
372            };
373            let header = MachHeader64 {
374                magic: U32::new(BigEndian, magic),
375                cputype: U32::new(endian, cpu_type),
376                cpusubtype: U32::new(endian, cpu_sub_type),
377                filetype: U32::new(endian, self.file_type),
378                ncmds: U32::new(endian, number_commands),
379                sizeofcmds: U32::new(endian, size_of_commands),
380                flags: U32::new(endian, self.macho_flags),
381                reserved: U32::default(),
382            };
383
384            Ok(bytes_of(&header).to_vec())
385        } else {
386            let magic = if endian.is_big_endian() {
387                MH_MAGIC
388            } else {
389                MH_CIGAM
390            };
391            let header = MachHeader32 {
392                magic: U32::new(BigEndian, magic),
393                cputype: U32::new(endian, cpu_type),
394                cpusubtype: U32::new(endian, cpu_sub_type),
395                filetype: U32::new(endian, self.file_type),
396                ncmds: U32::new(endian, number_commands),
397                sizeofcmds: U32::new(endian, size_of_commands),
398                flags: U32::new(endian, self.macho_flags),
399            };
400
401            Ok(bytes_of(&header).to_vec())
402        }
403    }
404
405    /// Length of segment load command header.
406    fn segment_header_size(&self) -> usize {
407        if self.address_size == AddressSize::U64 {
408            std::mem::size_of::<SegmentCommand64<Endianness>>()
409        } else {
410            std::mem::size_of::<SegmentCommand32<Endianness>>()
411        }
412    }
413
414    /// Length of section header.
415    fn section_header_size(&self) -> usize {
416        if self.address_size == AddressSize::U64 {
417            std::mem::size_of::<Section64<Endianness>>()
418        } else {
419            std::mem::size_of::<Section32<Endianness>>()
420        }
421    }
422
423    /// Get the sections in a named segment.
424    fn sections_in_segment<'a>(
425        &'a self,
426        segment_name: &'a str,
427    ) -> impl Iterator<Item = &'a Section> + 'a {
428        self.sections
429            .iter()
430            .filter(move |x| x.segment.as_str() == segment_name)
431    }
432
433    /// Write Mach-O data to a memory buffer.
434    pub fn write_macho(&self) -> Result<Vec<u8>, AppleCodesignError> {
435        let endian = self.endian;
436
437        // Before writing anything we do a pass to resolve metadata (lengths, file-level
438        // offsets, etc) for segments, sections, and other important data structures, as
439        // these all need to be expressed in the file header and load commands.
440
441        let mut current_file_offset = 0;
442        let mut number_commands = 0;
443
444        // Header is constant sized. So generate one with placeholder data.
445        current_file_offset += self.mach_header(0, 0)?.len();
446
447        let load_commands_offset = current_file_offset;
448
449        // The segment load commands come first. Each has a fixed size header followed by
450        // section headers describing the sections within the segment.
451        for segment in &self.segments {
452            number_commands += 1;
453            current_file_offset += self.segment_header_size()
454                + self.sections_in_segment(&segment.name).count() * self.section_header_size();
455        }
456
457        // The next set of load commands describe data in the __LINKEDIT segment.
458
459        // Symbol table.
460        number_commands += 1;
461        current_file_offset += std::mem::size_of::<SymtabCommand<Endianness>>();
462
463        // Now extra load commands.
464        if let Some(target) = &self.macho_target {
465            number_commands += 1;
466            current_file_offset += target.to_build_version_command_vec(endian).len();
467        }
468
469        // TODO support additional load commands. Build version, source version, minimum
470        // version, Uuid. Main, CodeSignature, etc.
471
472        let load_command_size = current_file_offset - load_commands_offset;
473
474        // After the load commands is the segment / section data.
475
476        let start_address = if self.address_size == AddressSize::U64 {
477            0x1_0000_0000
478        } else {
479            0x4000_0000
480        };
481
482        let mut current_address = start_address;
483
484        // Iterate through all the sections and collect metadata for them.
485        let mut section_metadata = vec![SectionMetadata::default(); self.sections.len()];
486
487        for (index, section) in self.sections.iter().enumerate() {
488            current_file_offset = align_usize(current_file_offset, section.align);
489            current_address = align_u64(current_address, section.align as _);
490
491            section_metadata[index].offset = current_file_offset;
492            section_metadata[index].address = current_address;
493
494            current_file_offset += section.data.len();
495            current_address += section.data.len() as u64;
496        }
497
498        // After the section data is the __LINKEDIT segment and all its special data.
499        current_file_offset = align_usize(current_file_offset, self.page_size);
500        current_address = align_u64(current_address, self.page_size as _);
501
502        let linkedit_start_file_offset = current_file_offset;
503        let linkedit_start_address = current_address;
504
505        let symbol_table_offset = current_file_offset;
506        let symbol_table_data = vec![0];
507        current_file_offset += symbol_table_data.len();
508
509        let string_table_offset = current_file_offset;
510        // Need to write a null name for Mach-O.
511        let string_table_data = vec![0];
512        current_file_offset += string_table_data.len();
513
514        // We're at the end of the file!
515
516        // Derive segment metadata from section metadata and special rules.
517        let mut segment_metadata = vec![SegmentMetadata::default(); self.segments.len()];
518
519        for (segment_index, segment) in self.segments.iter().enumerate() {
520            let metadata = &mut segment_metadata[segment_index];
521
522            match segment.name.as_str() {
523                "__PAGEZERO" => {
524                    // __PAGEZERO is empty in the file but is mapped to an empty virtual address
525                    // outside the used memory address range in order to trigger a fault.
526                    metadata.file_offset = 0;
527                    metadata.file_size = 0;
528                    metadata.vm_address = 0;
529                    // A constant value is obviously incorrect for binaries larger than 4 GB.
530                    metadata.vm_size = start_address;
531                }
532                "__LINKEDIT" => {
533                    metadata.file_offset = linkedit_start_file_offset;
534                    metadata.file_size = current_file_offset - linkedit_start_file_offset;
535                    metadata.vm_address = linkedit_start_address;
536                    metadata.vm_size = (current_file_offset - linkedit_start_file_offset) as _;
537                }
538                segment_name => {
539                    // All the other segments are derived from section metadata.
540                    let first_section_index = self
541                        .sections
542                        .iter()
543                        .enumerate()
544                        .find_map(|(index, section)| {
545                            if section.segment == segment_name {
546                                Some(index)
547                            } else {
548                                None
549                            }
550                        })
551                        .ok_or_else(|| {
552                            AppleCodesignError::MachOWrite(format!(
553                                "unable to find section in segment {}",
554                                segment.name
555                            ))
556                        })?;
557                    let last_section_index = self
558                        .sections
559                        .iter()
560                        .enumerate()
561                        .rfind(|(_, section)| section.segment == segment_name)
562                        .map(|(index, _)| index)
563                        .ok_or_else(|| {
564                            AppleCodesignError::MachOWrite(format!(
565                                "unable to find section in segment {}",
566                                segment.name
567                            ))
568                        })?;
569
570                    let start_file_offset = section_metadata[first_section_index].offset;
571                    let start_address = section_metadata[first_section_index].address;
572                    let end_address = section_metadata[last_section_index].address
573                        + self.sections[last_section_index].data.len() as u64;
574
575                    metadata.file_offset = start_file_offset;
576                    metadata.vm_address = start_address;
577                    metadata.vm_size = (end_address - start_address) as _;
578
579                    // End offset is next section start or start of __LINKEDIT.
580                    metadata.file_size =
581                        if let Some(next_section) = section_metadata.get(last_section_index + 1) {
582                            next_section.offset - start_file_offset
583                        } else {
584                            linkedit_start_file_offset - start_file_offset
585                        };
586
587                    // But there's a special case for __TEXT, which starts at the beginning of the
588                    // file and encompasses the header and load commands.
589                    if segment_name == "__TEXT" {
590                        metadata.file_offset = self.text_segment_start_offset;
591
592                        metadata.file_size = if let Some(next_section) =
593                            section_metadata.get(last_section_index + 1)
594                        {
595                            next_section.offset
596                        } else {
597                            current_file_offset
598                        } - self.text_segment_start_offset;
599                    }
600                }
601            }
602        }
603
604        // Now proceed with writing data.
605
606        let mut buffer = Vec::with_capacity(current_file_offset);
607
608        buffer.extend_from_slice(
609            self.mach_header(number_commands, load_command_size as _)?
610                .as_slice(),
611        );
612
613        for (index, segment) in self.segments.iter().enumerate() {
614            let metadata = &segment_metadata[index];
615
616            let segment_command_data = segment.to_load_command_data(
617                self.address_size,
618                endian,
619                self.sections_in_segment(&segment.name).count(),
620                metadata.vm_address,
621                metadata.vm_size,
622                metadata.file_offset,
623                metadata.file_size,
624            )?;
625
626            buffer.extend_from_slice(segment_command_data.as_slice());
627
628            for (index, section) in self
629                .sections
630                .iter()
631                .enumerate()
632                .filter(|(_, x)| x.segment == segment.name)
633            {
634                let metadata = &section_metadata[index];
635
636                let section_header_data = section.to_section_header_data(
637                    self.address_size,
638                    endian,
639                    metadata.address,
640                    section.data.len(),
641                    metadata.offset,
642                    section.align,
643                )?;
644
645                buffer.extend_from_slice(section_header_data.as_slice());
646            }
647        }
648
649        let symtab_command = SymtabCommand {
650            cmd: U32::new(endian, LC_SYMTAB),
651            cmdsize: U32::new(
652                endian,
653                std::mem::size_of::<SymtabCommand<Endianness>>() as u32,
654            ),
655            symoff: U32::new(endian, symbol_table_offset as _),
656            nsyms: U32::new(endian, 0),
657            stroff: U32::new(endian, string_table_offset as _),
658            strsize: U32::new(endian, string_table_data.len() as _),
659        };
660        buffer.extend_from_slice(bytes_of(&symtab_command));
661
662        if let Some(target) = &self.macho_target {
663            buffer.extend_from_slice(&target.to_build_version_command_vec(endian));
664        }
665
666        // Done with load commands. Start writing section data.
667
668        for (index, section) in self.sections.iter().enumerate() {
669            let metadata = &section_metadata[index];
670
671            // Pad zeroes until section start.
672            if metadata.offset > buffer.len() {
673                buffer.resize(metadata.offset, 0);
674            }
675
676            if !section.data.is_empty() {
677                buffer.extend_from_slice(&section.data);
678            }
679        }
680
681        buffer.resize(linkedit_start_file_offset, 0);
682
683        buffer.extend_from_slice(&symbol_table_data);
684        buffer.extend_from_slice(&string_table_data);
685
686        Ok(buffer)
687    }
688}