apple_xar/
reader.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use {
6    crate::{
7        format::{XarChecksum, XarHeader},
8        table_of_contents::{ChecksumType, File, FileType, SignatureStyle, TableOfContents},
9        Error, XarResult,
10    },
11    scroll::IOread,
12    std::{
13        cmp::min,
14        fmt::Debug,
15        io::{Cursor, Read, Seek, SeekFrom, Write},
16        path::Path,
17    },
18    x509_certificate::CapturedX509Certificate,
19};
20
21#[cfg(feature = "signing")]
22use cryptographic_message_syntax::SignedData;
23
24/// Read-only interface to a single XAR archive.
25#[derive(Debug)]
26pub struct XarReader<R: Read + Seek + Sized + Debug> {
27    /// Reader of raw XAR archive content.
28    reader: R,
29
30    /// Parsed file header.
31    header: XarHeader,
32
33    /// Parsed table of contents.
34    toc: TableOfContents,
35
36    /// Absolute offset within the reader where the heap begins.
37    heap_start_offset: u64,
38}
39
40impl<R: Read + Seek + Sized + Debug> XarReader<R> {
41    /// Construct a new XAR reader from a stream reader.
42    pub fn new(mut reader: R) -> XarResult<Self> {
43        let header = reader.ioread_with::<XarHeader>(scroll::BE)?;
44
45        let mut header_extra = vec![0u8; header.size as usize - 28];
46        reader.read_exact(&mut header_extra)?;
47
48        // Following the header is a zlib compressed table of contents.
49        // Unfortunately, serde_xml_rs takes ownership of the reader and doesn't
50        // allow returning it. So we have to buffer decompressed data before feeding
51        // it to the XML parser.
52        let toc_reader = reader.take(header.toc_length_compressed);
53        let mut toc_reader = flate2::read::ZlibDecoder::new(toc_reader);
54
55        let mut toc_data = Vec::with_capacity(header.toc_length_uncompressed as _);
56        toc_reader.read_to_end(&mut toc_data)?;
57
58        let mut reader = toc_reader.into_inner().into_inner();
59        let heap_start_offset = reader.stream_position()?;
60
61        let toc = TableOfContents::from_reader(std::io::Cursor::new(toc_data))?;
62
63        Ok(Self {
64            reader,
65            header,
66            toc,
67            heap_start_offset,
68        })
69    }
70
71    /// Obtain the inner reader.
72    pub fn into_inner(self) -> R {
73        self.reader
74    }
75
76    /// Obtain the parsed [XarHeader] file header.
77    pub fn header(&self) -> &XarHeader {
78        &self.header
79    }
80
81    /// The start offset of the heap.
82    pub fn heap_start_offset(&self) -> u64 {
83        self.heap_start_offset
84    }
85
86    /// Obtain the table of contents for this archive.
87    pub fn table_of_contents(&self) -> &TableOfContents {
88        &self.toc
89    }
90
91    /// Obtain the decoded content of the table of contents.
92    pub fn table_of_contents_decoded_data(&mut self) -> XarResult<Vec<u8>> {
93        let mut writer = flate2::write::ZlibDecoder::new(vec![]);
94        self.write_file_slice(
95            self.header.size as _,
96            self.header.toc_length_compressed as _,
97            &mut writer,
98        )?;
99
100        Ok(writer.finish()?)
101    }
102
103    /// Obtain the raw bytes holding the checksum.
104    pub fn checksum_data(&mut self) -> XarResult<Vec<u8>> {
105        let mut buf = Vec::with_capacity(self.toc.checksum.size as _);
106
107        self.write_heap_slice(
108            self.toc.checksum.offset,
109            self.toc.checksum.size as _,
110            &mut buf,
111        )?;
112
113        Ok(buf)
114    }
115
116    /// Digest the table of contents content with the specified algorithm.
117    pub fn digest_table_of_contents_with(&mut self, checksum: ChecksumType) -> XarResult<Vec<u8>> {
118        let mut writer = Cursor::new(vec![]);
119        self.write_file_slice(
120            self.header.size as _,
121            self.header.toc_length_compressed as _,
122            &mut writer,
123        )?;
124
125        checksum.digest_data(&writer.into_inner())
126    }
127
128    /// Obtain the file entries in this archive.
129    pub fn files(&self) -> XarResult<Vec<(String, File)>> {
130        self.toc.files()
131    }
132
133    /// Attempt to find the [File] entry for a given path in the archive.
134    pub fn find_file(&self, filename: &str) -> XarResult<Option<File>> {
135        Ok(self
136            .toc
137            .files()?
138            .into_iter()
139            .find_map(|(path, file)| if path == filename { Some(file) } else { None }))
140    }
141
142    /// Write a slice of the file to a writer.
143    ///
144    /// Offsets are relative from beginning of the file.
145    fn write_file_slice(
146        &mut self,
147        offset: u64,
148        size: usize,
149        writer: &mut impl Write,
150    ) -> XarResult<()> {
151        self.reader.seek(SeekFrom::Start(offset))?;
152
153        let mut remaining = size;
154        let mut buffer = Vec::with_capacity(32768);
155        buffer.resize(min(remaining, buffer.capacity()), 0);
156
157        while remaining > 0 {
158            self.reader.read_exact(&mut buffer)?;
159            remaining -= buffer.len();
160            writer.write_all(&buffer)?;
161
162            unsafe {
163                buffer.set_len(min(remaining, buffer.capacity()));
164            }
165        }
166
167        Ok(())
168    }
169
170    /// Write a slice of the heap to a writer.
171    fn write_heap_slice(
172        &mut self,
173        offset: u64,
174        size: usize,
175        writer: &mut impl Write,
176    ) -> XarResult<()> {
177        self.write_file_slice(self.heap_start_offset + offset, size, writer)
178    }
179
180    /// Write heap file data for a given file record to a writer.
181    ///
182    /// This will write the raw data backing a file as stored in the heap.
183    /// There's a good chance the raw data is encoded/compressed.
184    ///
185    /// Returns the number of bytes written.
186    pub fn write_file_data_heap_from_file(
187        &mut self,
188        file: &File,
189        writer: &mut impl Write,
190    ) -> XarResult<usize> {
191        let data = file.data.as_ref().ok_or(Error::FileNoData)?;
192
193        self.write_heap_slice(data.offset, data.length as _, writer)?;
194
195        Ok(data.length as _)
196    }
197
198    /// Write heap file data for a given file ID to a writer.
199    ///
200    /// This is a wrapper around [Self::write_file_data_heap_from_file] that
201    /// resolves the [File] given a file ID.
202    pub fn write_file_data_heap_from_id(
203        &mut self,
204        id: u64,
205        writer: &mut impl Write,
206    ) -> XarResult<usize> {
207        let file = self
208            .toc
209            .files()?
210            .into_iter()
211            .find(|(_, f)| f.id == id)
212            .ok_or(Error::InvalidFileId)?
213            .1;
214
215        self.write_file_data_heap_from_file(&file, writer)
216    }
217
218    /// Write decoded file data for a given file record to a writer.
219    ///
220    /// This will call [Self::write_file_data_heap_from_file] and will decode
221    /// that data stream, if the file data is encoded.
222    pub fn write_file_data_decoded_from_file(
223        &mut self,
224        file: &File,
225        writer: &mut impl Write,
226    ) -> XarResult<usize> {
227        let data = file.data.as_ref().ok_or(Error::FileNoData)?;
228
229        let mut writer = match data.encoding.style.as_str() {
230            "application/octet-stream" => Box::new(writer) as Box<dyn Write>,
231            "application/x-bzip2" => {
232                Box::new(bzip2::write::BzDecoder::new(writer)) as Box<dyn Write>
233            }
234            // The media type is arguably wrong, as there is no gzip header.
235            "application/x-gzip" => {
236                Box::new(flate2::write::ZlibDecoder::new(writer)) as Box<dyn Write>
237            }
238            "application/x-lzma" => Box::new(xz2::write::XzDecoder::new(writer)) as Box<dyn Write>,
239            encoding => {
240                return Err(Error::UnimplementedFileEncoding(encoding.to_string()));
241            }
242        };
243
244        self.write_file_data_heap_from_file(file, &mut writer)
245    }
246
247    /// Write decoded file data for a given file ID to a writer.
248    ///
249    /// This is a wrapper for [Self::write_file_data_decoded_from_file] that locates
250    /// the [File] entry given a file ID.
251    pub fn write_file_data_decoded_from_id(
252        &mut self,
253        id: u64,
254        writer: &mut impl Write,
255    ) -> XarResult<usize> {
256        let file = self
257            .toc
258            .files()?
259            .into_iter()
260            .find(|(_, f)| f.id == id)
261            .ok_or(Error::InvalidFileId)?
262            .1;
263
264        self.write_file_data_decoded_from_file(&file, writer)
265    }
266
267    /// Resolve data for a given path.
268    pub fn get_file_data_from_path(&mut self, path: &str) -> XarResult<Option<Vec<u8>>> {
269        if let Some(file) = self.find_file(path)? {
270            let mut buffer = Vec::<u8>::with_capacity(file.size.unwrap_or(0) as _);
271            self.write_file_data_decoded_from_file(&file, &mut buffer)?;
272
273            Ok(Some(buffer))
274        } else {
275            Ok(None)
276        }
277    }
278
279    /// Unpack the contents of the XAR archive to a given directory.
280    pub fn unpack(&mut self, dest_dir: impl AsRef<Path>) -> XarResult<()> {
281        let dest_dir = dest_dir.as_ref();
282
283        for (path, file) in self.toc.files()? {
284            let dest_path = dest_dir.join(path);
285
286            match file.file_type {
287                FileType::Directory => {
288                    std::fs::create_dir(&dest_path)?;
289                }
290                FileType::File => {
291                    let mut fh = std::fs::File::create(&dest_path)?;
292                    self.write_file_data_decoded_from_file(&file, &mut fh)?;
293                }
294                FileType::HardLink => return Err(Error::Unsupported("writing hard links")),
295                FileType::Link => return Err(Error::Unsupported("writing symlinks")),
296            }
297        }
298
299        Ok(())
300    }
301
302    /// Obtain the archive checksum.
303    ///
304    /// The checksum consists of a digest format and a raw digest.
305    pub fn checksum(&mut self) -> XarResult<(ChecksumType, Vec<u8>)> {
306        let mut data = Vec::<u8>::with_capacity(self.toc.checksum.size as _);
307        self.write_heap_slice(
308            self.toc.checksum.offset,
309            self.toc.checksum.size as _,
310            &mut data,
311        )?;
312
313        Ok((self.toc.checksum.style, data))
314    }
315
316    /// Validate the recorded checksum of the table of contents matches actual file state.
317    ///
318    /// Will `Err` if an error occurs obtaining or computing the checksums. Returns Ok
319    /// with a bool indicating if the checksums matched.
320    pub fn verify_table_of_contents_checksum(&mut self) -> XarResult<bool> {
321        let format = ChecksumType::try_from(XarChecksum::from(self.header.checksum_algorithm_id))?;
322        let actual_digest = self.digest_table_of_contents_with(format)?;
323        let recorded_digest = self.checksum()?.1;
324
325        Ok(actual_digest == recorded_digest)
326    }
327
328    /// Obtain RSA signature data from this archive.
329    ///
330    /// The returned tuple contains the raw signature data and the embedded X.509 certificates.
331    pub fn rsa_signature(&mut self) -> XarResult<Option<(Vec<u8>, Vec<CapturedX509Certificate>)>> {
332        if let Some(sig) = self.toc.find_signature(SignatureStyle::Rsa).cloned() {
333            let mut data = Vec::<u8>::with_capacity(sig.size as _);
334            self.write_heap_slice(sig.offset, sig.size as _, &mut data)?;
335
336            let certs = sig.x509_certificates()?;
337
338            Ok(Some((data, certs)))
339        } else {
340            Ok(None)
341        }
342    }
343
344    /// Verifies the RSA signature in the archive.
345    ///
346    /// This verifies that the RSA signature in the archive, if present, is a valid signature
347    /// for the archive's checksum data.
348    ///
349    /// The boolean return value indicates if signature validation was performed.
350    pub fn verify_rsa_checksum_signature(&mut self) -> XarResult<bool> {
351        let signed_data = self.checksum()?.1;
352
353        if let Some((signature, certificates)) = self.rsa_signature()? {
354            // The first certificate is the signing certificate.
355            if let Some(cert) = certificates.get(0) {
356                cert.verify_signed_data(signed_data, signature)?;
357                Ok(true)
358            } else {
359                Ok(false)
360            }
361        } else {
362            Ok(false)
363        }
364    }
365
366    /// Attempt to resolve a cryptographic message syntax (CMS) signature.
367    ///
368    /// The data signed by the CMS signature is the raw data returned by [Self::checksum].
369    #[cfg(feature = "signing")]
370    pub fn cms_signature(&mut self) -> XarResult<Option<SignedData>> {
371        if let Some(sig) = self.toc.find_signature(SignatureStyle::Cms).cloned() {
372            let mut data = Vec::<u8>::with_capacity(sig.size as _);
373            self.write_heap_slice(sig.offset, sig.size as _, &mut data)?;
374
375            Ok(Some(SignedData::parse_ber(&data)?))
376        } else {
377            Ok(None)
378        }
379    }
380
381    /// Verifies the cryptographic message syntax (CMS) signature, if present.
382    #[cfg(feature = "signing")]
383    pub fn verify_cms_signature(&mut self) -> XarResult<bool> {
384        let checksum = self.checksum()?.1;
385        let mut checked = false;
386
387        if let Some(signed_data) = self.cms_signature()? {
388            for signer in signed_data.signers() {
389                signer.verify_signature_with_signed_data(&signed_data)?;
390                signer.verify_message_digest_with_content(&checksum)?;
391                checked = true;
392            }
393        }
394
395        Ok(checked)
396    }
397}