1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use {
    crate::{
        format::{XarChecksum, XarHeader},
        table_of_contents::{ChecksumType, File, FileType, SignatureStyle, TableOfContents},
        Error, XarResult,
    },
    scroll::IOread,
    std::{
        cmp::min,
        fmt::Debug,
        io::{Cursor, Read, Seek, SeekFrom, Write},
        path::Path,
    },
    x509_certificate::CapturedX509Certificate,
};

#[cfg(feature = "signing")]
use cryptographic_message_syntax::SignedData;

/// Read-only interface to a single XAR archive.
#[derive(Debug)]
pub struct XarReader<R: Read + Seek + Sized + Debug> {
    /// Reader of raw XAR archive content.
    reader: R,

    /// Parsed file header.
    header: XarHeader,

    /// Parsed table of contents.
    toc: TableOfContents,

    /// Absolute offset within the reader where the heap begins.
    heap_start_offset: u64,
}

impl<R: Read + Seek + Sized + Debug> XarReader<R> {
    /// Construct a new XAR reader from a stream reader.
    pub fn new(mut reader: R) -> XarResult<Self> {
        let header = reader.ioread_with::<XarHeader>(scroll::BE)?;

        let mut header_extra = vec![0u8; header.size as usize - 28];
        reader.read_exact(&mut header_extra)?;

        // Following the header is a zlib compressed table of contents.
        // Unfortunately, serde_xml_rs takes ownership of the reader and doesn't
        // allow returning it. So we have to buffer decompressed data before feeding
        // it to the XML parser.
        let toc_reader = reader.take(header.toc_length_compressed);
        let mut toc_reader = flate2::read::ZlibDecoder::new(toc_reader);

        let mut toc_data = Vec::with_capacity(header.toc_length_uncompressed as _);
        toc_reader.read_to_end(&mut toc_data)?;

        let mut reader = toc_reader.into_inner().into_inner();
        let heap_start_offset = reader.stream_position()?;

        let toc = TableOfContents::from_reader(std::io::Cursor::new(toc_data))?;

        Ok(Self {
            reader,
            header,
            toc,
            heap_start_offset,
        })
    }

    /// Obtain the inner reader.
    pub fn into_inner(self) -> R {
        self.reader
    }

    /// Obtain the parsed [XarHeader] file header.
    pub fn header(&self) -> &XarHeader {
        &self.header
    }

    /// The start offset of the heap.
    pub fn heap_start_offset(&self) -> u64 {
        self.heap_start_offset
    }

    /// Obtain the table of contents for this archive.
    pub fn table_of_contents(&self) -> &TableOfContents {
        &self.toc
    }

    /// Obtain the decoded content of the table of contents.
    pub fn table_of_contents_decoded_data(&mut self) -> XarResult<Vec<u8>> {
        let mut writer = flate2::write::ZlibDecoder::new(vec![]);
        self.write_file_slice(
            self.header.size as _,
            self.header.toc_length_compressed as _,
            &mut writer,
        )?;

        Ok(writer.finish()?)
    }

    /// Obtain the raw bytes holding the checksum.
    pub fn checksum_data(&mut self) -> XarResult<Vec<u8>> {
        let mut buf = Vec::with_capacity(self.toc.checksum.size as _);

        self.write_heap_slice(
            self.toc.checksum.offset,
            self.toc.checksum.size as _,
            &mut buf,
        )?;

        Ok(buf)
    }

    /// Digest the table of contents content with the specified algorithm.
    pub fn digest_table_of_contents_with(&mut self, checksum: ChecksumType) -> XarResult<Vec<u8>> {
        let mut writer = Cursor::new(vec![]);
        self.write_file_slice(
            self.header.size as _,
            self.header.toc_length_compressed as _,
            &mut writer,
        )?;

        checksum.digest_data(&writer.into_inner())
    }

    /// Obtain the file entries in this archive.
    pub fn files(&self) -> XarResult<Vec<(String, File)>> {
        self.toc.files()
    }

    /// Attempt to find the [File] entry for a given path in the archive.
    pub fn find_file(&self, filename: &str) -> XarResult<Option<File>> {
        Ok(self
            .toc
            .files()?
            .into_iter()
            .find_map(|(path, file)| if path == filename { Some(file) } else { None }))
    }

    /// Write a slice of the file to a writer.
    ///
    /// Offsets are relative from beginning of the file.
    fn write_file_slice(
        &mut self,
        offset: u64,
        size: usize,
        writer: &mut impl Write,
    ) -> XarResult<()> {
        self.reader.seek(SeekFrom::Start(offset))?;

        let mut remaining = size;
        let mut buffer = Vec::with_capacity(32768);
        buffer.resize(min(remaining, buffer.capacity()), 0);

        while remaining > 0 {
            self.reader.read_exact(&mut buffer)?;
            remaining -= buffer.len();
            writer.write_all(&buffer)?;

            unsafe {
                buffer.set_len(min(remaining, buffer.capacity()));
            }
        }

        Ok(())
    }

    /// Write a slice of the heap to a writer.
    fn write_heap_slice(
        &mut self,
        offset: u64,
        size: usize,
        writer: &mut impl Write,
    ) -> XarResult<()> {
        self.write_file_slice(self.heap_start_offset + offset, size, writer)
    }

    /// Write heap file data for a given file record to a writer.
    ///
    /// This will write the raw data backing a file as stored in the heap.
    /// There's a good chance the raw data is encoded/compressed.
    ///
    /// Returns the number of bytes written.
    pub fn write_file_data_heap_from_file(
        &mut self,
        file: &File,
        writer: &mut impl Write,
    ) -> XarResult<usize> {
        let data = file.data.as_ref().ok_or(Error::FileNoData)?;

        self.write_heap_slice(data.offset, data.length as _, writer)?;

        Ok(data.length as _)
    }

    /// Write heap file data for a given file ID to a writer.
    ///
    /// This is a wrapper around [Self::write_file_data_heap_from_file] that
    /// resolves the [File] given a file ID.
    pub fn write_file_data_heap_from_id(
        &mut self,
        id: u64,
        writer: &mut impl Write,
    ) -> XarResult<usize> {
        let file = self
            .toc
            .files()?
            .into_iter()
            .find(|(_, f)| f.id == id)
            .ok_or(Error::InvalidFileId)?
            .1;

        self.write_file_data_heap_from_file(&file, writer)
    }

    /// Write decoded file data for a given file record to a writer.
    ///
    /// This will call [Self::write_file_data_heap_from_file] and will decode
    /// that data stream, if the file data is encoded.
    pub fn write_file_data_decoded_from_file(
        &mut self,
        file: &File,
        writer: &mut impl Write,
    ) -> XarResult<usize> {
        let data = file.data.as_ref().ok_or(Error::FileNoData)?;

        let mut writer = match data.encoding.style.as_str() {
            "application/octet-stream" => Box::new(writer) as Box<dyn Write>,
            "application/x-bzip2" => {
                Box::new(bzip2::write::BzDecoder::new(writer)) as Box<dyn Write>
            }
            // The media type is arguably wrong, as there is no gzip header.
            "application/x-gzip" => {
                Box::new(flate2::write::ZlibDecoder::new(writer)) as Box<dyn Write>
            }
            "application/x-lzma" => Box::new(xz2::write::XzDecoder::new(writer)) as Box<dyn Write>,
            encoding => {
                return Err(Error::UnimplementedFileEncoding(encoding.to_string()));
            }
        };

        self.write_file_data_heap_from_file(file, &mut writer)
    }

    /// Write decoded file data for a given file ID to a writer.
    ///
    /// This is a wrapper for [Self::write_file_data_decoded_from_file] that locates
    /// the [File] entry given a file ID.
    pub fn write_file_data_decoded_from_id(
        &mut self,
        id: u64,
        writer: &mut impl Write,
    ) -> XarResult<usize> {
        let file = self
            .toc
            .files()?
            .into_iter()
            .find(|(_, f)| f.id == id)
            .ok_or(Error::InvalidFileId)?
            .1;

        self.write_file_data_decoded_from_file(&file, writer)
    }

    /// Resolve data for a given path.
    pub fn get_file_data_from_path(&mut self, path: &str) -> XarResult<Option<Vec<u8>>> {
        if let Some(file) = self.find_file(path)? {
            let mut buffer = Vec::<u8>::with_capacity(file.size.unwrap_or(0) as _);
            self.write_file_data_decoded_from_file(&file, &mut buffer)?;

            Ok(Some(buffer))
        } else {
            Ok(None)
        }
    }

    /// Unpack the contents of the XAR archive to a given directory.
    pub fn unpack(&mut self, dest_dir: impl AsRef<Path>) -> XarResult<()> {
        let dest_dir = dest_dir.as_ref();

        for (path, file) in self.toc.files()? {
            let dest_path = dest_dir.join(path);

            match file.file_type {
                FileType::Directory => {
                    std::fs::create_dir(&dest_path)?;
                }
                FileType::File => {
                    let mut fh = std::fs::File::create(&dest_path)?;
                    self.write_file_data_decoded_from_file(&file, &mut fh)?;
                }
                FileType::HardLink => return Err(Error::Unsupported("writing hard links")),
                FileType::Link => return Err(Error::Unsupported("writing symlinks")),
            }
        }

        Ok(())
    }

    /// Obtain the archive checksum.
    ///
    /// The checksum consists of a digest format and a raw digest.
    pub fn checksum(&mut self) -> XarResult<(ChecksumType, Vec<u8>)> {
        let mut data = Vec::<u8>::with_capacity(self.toc.checksum.size as _);
        self.write_heap_slice(
            self.toc.checksum.offset,
            self.toc.checksum.size as _,
            &mut data,
        )?;

        Ok((self.toc.checksum.style, data))
    }

    /// Validate the recorded checksum of the table of contents matches actual file state.
    ///
    /// Will `Err` if an error occurs obtaining or computing the checksums. Returns Ok
    /// with a bool indicating if the checksums matched.
    pub fn verify_table_of_contents_checksum(&mut self) -> XarResult<bool> {
        let format = ChecksumType::try_from(XarChecksum::from(self.header.checksum_algorithm_id))?;
        let actual_digest = self.digest_table_of_contents_with(format)?;
        let recorded_digest = self.checksum()?.1;

        Ok(actual_digest == recorded_digest)
    }

    /// Obtain RSA signature data from this archive.
    ///
    /// The returned tuple contains the raw signature data and the embedded X.509 certificates.
    pub fn rsa_signature(&mut self) -> XarResult<Option<(Vec<u8>, Vec<CapturedX509Certificate>)>> {
        if let Some(sig) = self.toc.find_signature(SignatureStyle::Rsa).cloned() {
            let mut data = Vec::<u8>::with_capacity(sig.size as _);
            self.write_heap_slice(sig.offset, sig.size as _, &mut data)?;

            let certs = sig.x509_certificates()?;

            Ok(Some((data, certs)))
        } else {
            Ok(None)
        }
    }

    /// Verifies the RSA signature in the archive.
    ///
    /// This verifies that the RSA signature in the archive, if present, is a valid signature
    /// for the archive's checksum data.
    ///
    /// The boolean return value indicates if signature validation was performed.
    pub fn verify_rsa_checksum_signature(&mut self) -> XarResult<bool> {
        let signed_data = self.checksum()?.1;

        if let Some((signature, certificates)) = self.rsa_signature()? {
            // The first certificate is the signing certificate.
            if let Some(cert) = certificates.get(0) {
                cert.verify_signed_data(signed_data, signature)?;
                Ok(true)
            } else {
                Ok(false)
            }
        } else {
            Ok(false)
        }
    }

    /// Attempt to resolve a cryptographic message syntax (CMS) signature.
    ///
    /// The data signed by the CMS signature is the raw data returned by [Self::checksum].
    #[cfg(feature = "signing")]
    pub fn cms_signature(&mut self) -> XarResult<Option<SignedData>> {
        if let Some(sig) = self.toc.find_signature(SignatureStyle::Cms).cloned() {
            let mut data = Vec::<u8>::with_capacity(sig.size as _);
            self.write_heap_slice(sig.offset, sig.size as _, &mut data)?;

            Ok(Some(SignedData::parse_ber(&data)?))
        } else {
            Ok(None)
        }
    }

    /// Verifies the cryptographic message syntax (CMS) signature, if present.
    #[cfg(feature = "signing")]
    pub fn verify_cms_signature(&mut self) -> XarResult<bool> {
        let checksum = self.checksum()?.1;
        let mut checked = false;

        if let Some(signed_data) = self.cms_signature()? {
            for signer in signed_data.signers() {
                signer.verify_signature_with_signed_data(&signed_data)?;
                signer.verify_message_digest_with_content(&checksum)?;
                checked = true;
            }
        }

        Ok(checked)
    }
}