cpio_archive/
odc.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Portable ASCII format / old character / odc archive support.
6//!
7//! This module implements support for the *Portable ASCII format* as
8//! standardized in version 2 of the Single UNIX Specification (SUSv2).
9//! It is also commonly referred to as *old character* or *odc*.
10
11use {
12    crate::{CpioHeader, CpioReader, CpioResult, Error},
13    chrono::{DateTime, Utc},
14    is_executable::IsExecutable,
15    simple_file_manifest::{
16        FileManifest, S_IFDIR, S_IRGRP, S_IROTH, S_IRUSR, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR,
17    },
18    std::{
19        collections::HashSet,
20        ffi::CStr,
21        io::{Read, Take, Write},
22        path::Path,
23    },
24};
25
26/// Header magic for odc entries.
27pub const MAGIC: &[u8] = b"070707";
28
29const TRAILER: &str = "TRAILER!!!";
30
31fn u32_from_octal(data: &[u8]) -> CpioResult<u32> {
32    let s = std::str::from_utf8(data).map_err(|_| Error::BadHeaderString)?;
33    u32::from_str_radix(s, 8).map_err(|_| Error::BadHeaderHex(s.to_string()))
34}
35
36fn u64_from_octal(data: &[u8]) -> CpioResult<u64> {
37    let s = std::str::from_utf8(data).map_err(|_| Error::BadHeaderString)?;
38    u64::from_str_radix(s, 8).map_err(|_| Error::BadHeaderHex(s.to_string()))
39}
40
41fn read_octal_u32(reader: &mut impl Read, count: usize) -> CpioResult<u32> {
42    let mut buffer = vec![0u8; count];
43    reader.read_exact(&mut buffer)?;
44
45    u32_from_octal(&buffer)
46}
47
48fn read_octal_u64(reader: &mut impl Read, count: usize) -> CpioResult<u64> {
49    let mut buffer = vec![0u8; count];
50    reader.read_exact(&mut buffer)?;
51
52    u64_from_octal(&buffer)
53}
54
55fn write_octal(value: u64, writer: &mut impl Write, size: usize) -> CpioResult<()> {
56    let max_value = 8u64.pow(size as _);
57
58    if value > max_value {
59        return Err(Error::ValueTooLarge);
60    }
61
62    let s = format!("{value:o}");
63
64    for _ in 0..size - s.len() {
65        writer.write_all(b"0")?;
66    }
67
68    writer.write_all(s.as_bytes())?;
69
70    Ok(())
71}
72
73/// Parsed portable ASCII format header.
74#[derive(Clone, Debug)]
75pub struct OdcHeader {
76    pub dev: u32,
77    pub inode: u32,
78    pub mode: u32,
79    pub uid: u32,
80    pub gid: u32,
81    pub nlink: u32,
82    pub rdev: u32,
83    pub mtime: u32,
84    pub file_size: u64,
85    pub name: String,
86}
87
88impl OdcHeader {
89    /// Parse a header from a reader.
90    pub fn from_reader(reader: &mut impl Read) -> CpioResult<Self> {
91        let dev = read_octal_u32(reader, 6)?;
92        let inode = read_octal_u32(reader, 6)?;
93        let mode = read_octal_u32(reader, 6)?;
94        let uid = read_octal_u32(reader, 6)?;
95        let gid = read_octal_u32(reader, 6)?;
96        let nlink = read_octal_u32(reader, 6)?;
97        let rdev = read_octal_u32(reader, 6)?;
98        let mtime = read_octal_u32(reader, 11)?;
99        let name_length = read_octal_u32(reader, 6)?;
100        let file_size = read_octal_u64(reader, 11)?;
101
102        let mut name_data = vec![0u8; name_length as usize];
103        reader.read_exact(&mut name_data)?;
104
105        let name = CStr::from_bytes_with_nul(&name_data)
106            .map_err(|_| Error::FilenameDecode)?
107            .to_string_lossy()
108            .to_string();
109
110        Ok(Self {
111            dev,
112            inode,
113            mode,
114            uid,
115            gid,
116            nlink,
117            rdev,
118            mtime,
119            file_size,
120            name,
121        })
122    }
123
124    /// Write the binary header content to a writer.
125    pub fn write(&self, writer: &mut impl Write) -> CpioResult<u64> {
126        writer.write_all(MAGIC)?;
127        write_octal(self.dev as _, writer, 6)?;
128        write_octal(self.inode as _, writer, 6)?;
129        write_octal(self.mode as _, writer, 6)?;
130        write_octal(self.uid as _, writer, 6)?;
131        write_octal(self.gid as _, writer, 6)?;
132        write_octal(self.nlink as _, writer, 6)?;
133        write_octal(self.rdev as _, writer, 6)?;
134        write_octal(self.mtime as _, writer, 11)?;
135        write_octal(self.name.len() as u64 + 1u64, writer, 6)?;
136        write_octal(self.file_size, writer, 11)?;
137
138        writer.write_all(self.name.as_bytes())?;
139        writer.write_all(b"\0")?;
140
141        Ok(9 * 6 + 11 * 2 + self.name.len() as u64 + 1)
142    }
143}
144
145impl CpioHeader for OdcHeader {
146    fn device(&self) -> u32 {
147        self.dev
148    }
149
150    fn inode(&self) -> u32 {
151        self.inode
152    }
153
154    fn mode(&self) -> u32 {
155        self.mode
156    }
157
158    fn uid(&self) -> u32 {
159        self.uid
160    }
161
162    fn gid(&self) -> u32 {
163        self.gid
164    }
165
166    fn nlink(&self) -> u32 {
167        self.nlink
168    }
169
170    fn rdev(&self) -> u32 {
171        self.rdev
172    }
173
174    fn mtime(&self) -> u32 {
175        self.mtime
176    }
177
178    fn file_size(&self) -> u64 {
179        self.file_size
180    }
181
182    fn name(&self) -> &str {
183        &self.name
184    }
185}
186
187/// A cpio archive reader for *Portable ASCII format* archives.
188pub struct OdcReader<T: Read + Sized> {
189    archive_reader: Option<T>,
190    entry_reader: Option<Take<T>>,
191    seen_trailer: bool,
192}
193
194impl<T: Read + Sized> CpioReader<T> for OdcReader<T> {
195    fn new(reader: T) -> Self {
196        Self {
197            archive_reader: Some(reader),
198            entry_reader: None,
199            seen_trailer: false,
200        }
201    }
202
203    fn read_next(&mut self) -> CpioResult<Option<Box<dyn CpioHeader>>> {
204        self.finish()?;
205
206        if let Some(mut reader) = self.archive_reader.take() {
207            let mut magic = [0u8; 6];
208
209            match reader.read_exact(&mut magic) {
210                Ok(_) => {}
211                Err(ref e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
212                    return Ok(None);
213                }
214                Err(e) => {
215                    return Err(e.into());
216                }
217            }
218
219            if magic != MAGIC {
220                return Err(Error::BadMagic);
221            }
222
223            let header = OdcHeader::from_reader(&mut reader)?;
224
225            if header.name == TRAILER {
226                self.seen_trailer = true;
227                Ok(None)
228            } else {
229                self.entry_reader = Some(reader.take(header.file_size as _));
230                Ok(Some(Box::new(header)))
231            }
232        } else {
233            Ok(None)
234        }
235    }
236
237    fn finish(&mut self) -> CpioResult<()> {
238        if let Some(mut reader) = self.entry_reader.take() {
239            let mut buffer = vec![0u8; 32768];
240            loop {
241                if reader.read(&mut buffer)? == 0 {
242                    break;
243                }
244            }
245
246            // Only restore the archive reader if we haven't seen the trailer,
247            // as the trailer indicates end of archive.
248            if !self.seen_trailer {
249                self.archive_reader = Some(reader.into_inner());
250            }
251        }
252
253        Ok(())
254    }
255}
256
257impl<T: Read + Sized> Iterator for OdcReader<T> {
258    type Item = CpioResult<Box<dyn CpioHeader>>;
259
260    fn next(&mut self) -> Option<Self::Item> {
261        match self.read_next() {
262            Ok(Some(r)) => Some(Ok(r)),
263            Ok(None) => None,
264            Err(e) => Some(Err(e)),
265        }
266    }
267}
268
269impl<T: Read + Sized> Read for OdcReader<T> {
270    fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
271        if let Some(reader) = &mut self.entry_reader {
272            reader.read(buf)
273        } else {
274            Err(std::io::Error::new(
275                std::io::ErrorKind::Other,
276                "no current archive entry to read from",
277            ))
278        }
279    }
280}
281
282/// Iteratively create a cpio archive using the *Portable ASCII format*.
283///
284/// cpio archives logically consist of 2-tuples of (file header, data), so
285/// data can be streamed by iteratively feeding new entries to write.
286///
287/// cpio archives contain a special file header denoting the end of the
288/// archive. This is emitted by calling [Self::finish]. So consumers should
289/// always call this method when done writing new files.
290///
291/// By default, missing parent directories are automatically emitted when
292/// writing files. Instances track which directories have been emitted. Upon
293/// encountering a file path in a directory that has not yet been emitted,
294/// a directory entry will be emitted. This behavior can be disabled by
295/// calling [Self::auto_write_dirs].
296pub struct OdcBuilder<W: Write + Sized> {
297    writer: W,
298    default_uid: u32,
299    default_gid: u32,
300    default_mtime: DateTime<Utc>,
301    default_mode_file: u32,
302    default_mode_dir: u32,
303    auto_write_dirs: bool,
304    seen_dirs: HashSet<String>,
305    entry_count: u32,
306    finished: bool,
307}
308
309impl<W: Write + Sized> OdcBuilder<W> {
310    /// Construct a new instance which will write data to a writer.
311    pub fn new(writer: W) -> Self {
312        Self {
313            writer,
314            default_uid: 0,
315            default_gid: 0,
316            default_mtime: Utc::now(),
317            default_mode_file: S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH,
318            default_mode_dir: S_IFDIR
319                | S_IRUSR
320                | S_IWUSR
321                | S_IXUSR
322                | S_IRGRP
323                | S_IXGRP
324                | S_IROTH
325                | S_IXOTH,
326            auto_write_dirs: true,
327            seen_dirs: HashSet::new(),
328            entry_count: 0,
329            finished: false,
330        }
331    }
332
333    /// Set the default file mode to use for files.
334    pub fn default_mode_file(&mut self, mode: u32) {
335        self.default_mode_file = mode;
336    }
337
338    /// Set the default file mode to use for directories.
339    pub fn default_mode_directory(&mut self, mode: u32) {
340        self.default_mode_dir = mode;
341    }
342
343    /// Set the default user ID (UID).
344    pub fn default_user_id(&mut self, uid: u32) {
345        self.default_uid = uid;
346    }
347
348    /// Set the default group ID (GID).
349    pub fn default_group_id(&mut self, gid: u32) {
350        self.default_gid = gid;
351    }
352
353    /// Set the default modified time.
354    pub fn default_mtime(&mut self, mtime: DateTime<Utc>) {
355        self.default_mtime = mtime;
356    }
357
358    /// Set the behavior for auto writing directory entries.
359    pub fn auto_write_dirs(&mut self, value: bool) {
360        self.auto_write_dirs = value;
361    }
362
363    /// Obtain a header record representing the next header in the archive.
364    ///
365    /// The header has fields set to default values. Callers should likely
366    /// update at least the name and possibly the file size and mode.
367    ///
368    /// This will increment the inode sequence number when called.
369    pub fn next_header(&mut self) -> OdcHeader {
370        let inode = self.entry_count;
371        self.entry_count += 1;
372
373        OdcHeader {
374            dev: 0,
375            inode,
376            mode: self.default_mode_file,
377            uid: self.default_uid,
378            gid: self.default_gid,
379            nlink: 0,
380            rdev: 0,
381            mtime: self.default_mtime.timestamp() as _,
382            file_size: 0,
383            name: "".to_string(),
384        }
385    }
386
387    fn normalize_archive_path(&self, path: &str) -> String {
388        if path.starts_with("./") {
389            path.to_string()
390        } else {
391            format!("./{path}")
392        }
393    }
394
395    /// Write missing parent directory entries for a given file path.
396    fn emit_parent_directories(&mut self, file_path: &str) -> CpioResult<u64> {
397        let parts = file_path.split('/').collect::<Vec<_>>();
398
399        let mut bytes_written = 0;
400
401        for idx in 1..parts.len() {
402            let dir = parts
403                .clone()
404                .into_iter()
405                .take(idx)
406                .collect::<Vec<&str>>()
407                .join("/");
408
409            if !self.seen_dirs.contains(&dir) {
410                let mut header = self.next_header();
411                header.mode = self.default_mode_dir;
412                header.name = dir.clone();
413
414                bytes_written += header.write(&mut self.writer)?;
415                self.seen_dirs.insert(dir);
416            }
417        }
418
419        Ok(bytes_written)
420    }
421
422    /// Append a raw header and corresponding file data to the writer.
423    ///
424    /// The writer and data are written as-is.
425    ///
426    /// Only simple validation that the data length matches the length advertised
427    /// in the header is performed.
428    ///
429    /// Automatic directory emission is not processed in this mode.
430    pub fn append_header_with_data(
431        &mut self,
432        header: OdcHeader,
433        data: impl AsRef<[u8]>,
434    ) -> CpioResult<u64> {
435        let data = data.as_ref();
436
437        if header.file_size as usize != data.len() {
438            return Err(Error::SizeMismatch);
439        }
440
441        let written = header.write(&mut self.writer)?;
442        self.writer.write_all(data)?;
443
444        Ok(written + data.len() as u64)
445    }
446
447    /// Append a raw header and corresponding data from a reader to the writer.
448    ///
449    /// The header's file size must match the length of data available in the reader
450    /// or errors could occur. This method will copy all data available in the reader
451    /// to the output stream. If the number of bytes written does not match what is
452    /// reported by the header, the cpio archive stream is effectively corrupted
453    /// and an error is returned.
454    pub fn append_header_with_reader(
455        &mut self,
456        header: OdcHeader,
457        reader: &mut impl Read,
458    ) -> CpioResult<u64> {
459        let written = header.write(&mut self.writer)?;
460        let copied = std::io::copy(reader, &mut self.writer)?;
461
462        if copied != header.file_size {
463            Err(Error::SizeMismatch)
464        } else {
465            Ok(written + copied)
466        }
467    }
468
469    /// Write a regular file to the cpio archive with provided file data and file mode.
470    pub fn append_file_from_data(
471        &mut self,
472        archive_path: impl ToString,
473        data: impl AsRef<[u8]>,
474        mode: u32,
475    ) -> CpioResult<u64> {
476        let archive_path = self.normalize_archive_path(&archive_path.to_string());
477        let data = data.as_ref();
478
479        let mut bytes_written = self.emit_parent_directories(&archive_path)?;
480
481        let mut header = self.next_header();
482        header.name = archive_path;
483        header.file_size = data.len() as _;
484        header.mode = mode;
485
486        bytes_written += header.write(&mut self.writer)?;
487        self.writer.write_all(data)?;
488        bytes_written += data.len() as u64;
489
490        Ok(bytes_written)
491    }
492
493    /// Write a regular file to the cpio archive.
494    ///
495    /// This takes the relative path in the archive and the filesystem path of
496    /// the file to write. It resolves header metadata automatically given filesystem
497    /// attributes. However, the UID, GID, and mtime defaults specified on this
498    /// builder are used so archive construction is more deterministic.
499    pub fn append_file_from_path(
500        &mut self,
501        archive_path: impl ToString,
502        path: impl AsRef<Path>,
503    ) -> CpioResult<u64> {
504        let archive_path = self.normalize_archive_path(&archive_path.to_string());
505        let path = path.as_ref();
506
507        let mut fh = std::fs::File::open(path)?;
508        let metadata = fh.metadata()?;
509
510        if !metadata.is_file() {
511            return Err(Error::NotAFile(path.to_path_buf()));
512        }
513
514        // Emit parent directories first, so inode number is sequential.
515        let mut bytes_written = self.emit_parent_directories(&archive_path)?;
516
517        let mut header = self.next_header();
518        header.name = archive_path;
519        header.file_size = metadata.len();
520
521        if path.is_executable() {
522            header.mode |= S_IXUSR | S_IXGRP | S_IXOTH;
523        }
524
525        bytes_written += header.write(&mut self.writer)?;
526        bytes_written += std::io::copy(&mut fh, &mut self.writer)?;
527
528        Ok(bytes_written)
529    }
530
531    /// Append a [FileManifest] to the archive.
532    pub fn append_file_manifest(&mut self, manifest: &FileManifest) -> CpioResult<u64> {
533        let mut bytes_written = 0;
534
535        for (path, entry) in manifest.iter_entries() {
536            let mode = if entry.is_executable() { 0o755 } else { 0o644 };
537            let data = entry.resolve_content()?;
538
539            bytes_written += self.append_file_from_data(path.display().to_string(), data, mode)?;
540        }
541
542        Ok(bytes_written)
543    }
544
545    /// Finish writing the archive.
546    ///
547    /// This will emit a special header denoting the end of archive.
548    ///
549    /// Failure to call this method will result in a malformed cpio archive.
550    /// Readers may or may not handle the missing trailer correctly.
551    pub fn finish(&mut self) -> CpioResult<u64> {
552        if !self.finished {
553            let mut header = self.next_header();
554            header.name = TRAILER.to_string();
555            let count = header.write(&mut self.writer)?;
556            self.finished = true;
557
558            Ok(count)
559        } else {
560            Ok(0)
561        }
562    }
563
564    /// Consume self and return the original writer this instance was constructed from.
565    ///
566    /// This will automatically finish the archive if needed.
567    pub fn into_inner(mut self) -> CpioResult<W> {
568        self.finish()?;
569
570        Ok(self.writer)
571    }
572}
573
574#[cfg(test)]
575mod tests {
576    use {super::*, std::io::Cursor};
577
578    #[test]
579    fn write_single_file() {
580        let mut builder = OdcBuilder::new(Cursor::new(Vec::<u8>::new()));
581
582        let current_exe = std::env::current_exe().unwrap();
583        let current_exe_data = std::fs::read(&current_exe).unwrap();
584        builder
585            .append_file_from_path("child/grandchild/exe", current_exe)
586            .unwrap();
587
588        let mut reader = builder.into_inner().unwrap();
589        reader.set_position(0);
590
591        let mut reader = OdcReader::new(reader);
592
593        let mut i = 0;
594        while let Some(header) = reader.read_next().unwrap() {
595            let mut file_data = Vec::<u8>::with_capacity(header.file_size() as _);
596            reader.read_to_end(&mut file_data).unwrap();
597
598            let wanted_filename = match i {
599                0 => ".",
600                1 => "./child",
601                2 => "./child/grandchild",
602                3 => "./child/grandchild/exe",
603                _ => panic!("unexpected entry in archive: {header:?}"),
604            };
605
606            assert_eq!(header.name(), wanted_filename);
607
608            if (0..=2).contains(&i) {
609                assert_eq!(header.file_size(), 0);
610                assert_ne!(header.mode() & S_IFDIR, 0);
611            }
612
613            if i == 3 {
614                assert_eq!(&file_data, &current_exe_data);
615            }
616
617            i += 1;
618        }
619    }
620}