anyreader_walker/
entry.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
use crate::stack::AnyWalker;
use crate::walkers::{ArchiveVisitor, FileWalker, TarWalker, ZipWalker};
use anyreader::AnyFormat;
use anyreader::FormatKind;
use bytes::buf::Reader;
use bytes::{Buf, Bytes};
use std::fmt::{Debug, Display, Formatter};
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::{Path, PathBuf};

/// Represents the details of a [FileEntry], including its path and size.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct EntryDetails {
    pub path: PathBuf,
    pub size: u64,
}

impl EntryDetails {
    pub fn new(path: impl Into<PathBuf>, size: u64) -> Self {
        Self {
            path: path.into(),
            size,
        }
    }
}

impl Display for EntryDetails {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} ({} bytes)", self.path.display(), self.size)
    }
}

/// A [FileEntry] represents a file in an archive, along with its format and size.
/// It can be used to read the file's contents, and can also be used to visit the contents of
/// an archive.
///
/// # Example
/// This walks a nested tar file
/// ```
/// # use std::io::Read;
/// use std::path::PathBuf;
/// # use anyreader::test::{tar_archive, zip_archive};
/// use anyreader_walker::{FileEntry, AnyWalker, EntryDetails, FormatKind, ArchiveStack};
/// // Create a tar archive containing a nested tar archive, containing a nested zip archive
/// let tar_archive = tar_archive([
///     ("test", b"Hello, world!".to_vec()),
///     ("nested.tar", tar_archive([
///         ("nested", b"Hello, nested!".to_vec()),
///         ("nested2", b"Hello, nested2!".to_vec()),
///         ("nested_zip", zip_archive([("nested3", "Hello, nested zip!")]))
///     ])),
/// ]);
/// let entry = FileEntry::from_bytes("archive.tar.gz", tar_archive).unwrap();
///
/// #[derive(Default)]
/// struct Visitor {
///    names: Vec<PathBuf>,
///    stack: ArchiveStack
/// };
///
/// impl AnyWalker for Visitor {
///     fn visit_file_entry(&mut self, entry: &mut FileEntry<impl Read>) -> std::io::Result<()> {
///         self.names.push(self.stack.nested_path().join(&entry.path()));
///         Ok(())
///     }
///
///     fn begin_visit_archive(&mut self, details: &EntryDetails, format: FormatKind) -> std::io::Result<bool> {
///         self.stack.push_archive(&details.path);
///         Ok(true)
///     }
///     fn end_visit_archive(&mut self, details: EntryDetails, format: FormatKind) -> std::io::Result<()> {
///        self.stack.pop_archive();
///        Ok(())
///    }
/// }
///
/// let mut visitor = Visitor::default();
/// visitor.walk(entry).unwrap();
///
/// let names = visitor.names.iter().map(|p| p.to_str().unwrap()).collect::<Vec<_>>();
/// assert_eq!(names, [
///     "archive.tar.gz/test",
///     "archive.tar.gz/nested.tar/nested",
///     "archive.tar.gz/nested.tar/nested2",
///     "archive.tar.gz/nested.tar/nested_zip/nested3"
/// ]);
/// ```
pub struct FileEntry<T: Read> {
    details: EntryDetails,
    inner: AnyFormat<T>,
}

impl FileEntry<BufReader<File>> {
    pub fn from_path(path: impl AsRef<Path>) -> std::io::Result<Self> {
        let file = File::open(&path)?;
        let size = file.metadata()?.len();
        let format = AnyFormat::from_reader(BufReader::new(file))?;
        Ok(Self::new(path.as_ref().to_path_buf(), size, format))
    }
}

impl FileEntry<Reader<Bytes>> {
    pub fn from_bytes(
        path: impl AsRef<Path>,
        data: impl Into<Bytes>,
    ) -> std::io::Result<FileEntry<Reader<Bytes>>> {
        let data = data.into();
        let size = data.len() as u64;
        let inner = AnyFormat::from_reader(data.reader())?;
        Ok(FileEntry {
            details: EntryDetails::new(path.as_ref(), size),
            inner,
        })
    }
}

impl<T: Read> FileEntry<T> {
    pub fn new(path: PathBuf, size: u64, format: AnyFormat<T>) -> Self {
        Self {
            details: EntryDetails::new(path, size),
            inner: format,
        }
    }

    pub fn from_reader(path: PathBuf, size: u64, reader: T) -> std::io::Result<FileEntry<T>> {
        let inner = AnyFormat::from_reader(reader)?;
        Ok(FileEntry {
            details: EntryDetails::new(path, size),
            inner,
        })
    }

    pub fn into_components(self) -> (EntryDetails, AnyFormat<T>) {
        (self.details, self.inner)
    }

    pub fn details(&self) -> &EntryDetails {
        &self.details
    }

    pub fn path(&self) -> &Path {
        &self.details.path
    }

    pub fn size(&self) -> u64 {
        self.details.size
    }

    pub fn supports_recursion(&self) -> bool {
        matches!(self.inner.kind, FormatKind::Tar | FormatKind::Zip)
    }

    pub fn format(&self) -> FormatKind {
        self.inner.kind
    }

    pub fn get_ref(&self) -> &T {
        self.inner.get_ref()
    }
}

impl<'a, T: Read + 'a> ArchiveVisitor<'a> for FileEntry<T> {
    type Item = T;

    #[inline(always)]
    fn visit<V: AnyWalker>(mut self, visitor: &mut V) -> std::io::Result<()> {
        match self.format() {
            FormatKind::Tar => TarWalker::new(&mut self as &mut dyn Read).visit(visitor),
            FormatKind::Zip => ZipWalker::new(&mut self as &mut dyn Read).visit(visitor),
            _ => FileWalker::new(self).visit(visitor),
        }
    }
}

impl<T: Read> Debug for FileEntry<T> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ArchiveEntry")
            .field("path", &self.details.path)
            .field("size", &self.details.size)
            .field("format", &self.inner)
            .finish()
    }
}

impl<T: Read> Read for FileEntry<T> {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        self.inner.read(buf)
    }
}