gix_archive/
write.rs

1use gix_worktree_stream::{Entry, Stream};
2
3use crate::{Error, Format, Options};
4
5/// Write all stream entries in `stream` as provided by `next_entry(stream)` to `out` configured according to `opts` which
6/// also includes the streaming format.
7///
8/// ### Performance
9///
10/// * The caller should be sure `out` is fast enough. If in doubt, wrap in [`std::io::BufWriter`].
11/// * Further, big files aren't suitable for archival into `tar` archives as they require the size of the stream to be known
12///   prior to writing the header of each entry.
13#[cfg_attr(not(feature = "tar"), allow(unused_mut, unused_variables))]
14pub fn write_stream<NextFn>(
15    stream: &mut Stream,
16    mut next_entry: NextFn,
17    out: impl std::io::Write,
18    opts: Options,
19) -> Result<(), Error>
20where
21    NextFn: FnMut(&mut Stream) -> Result<Option<Entry<'_>>, gix_worktree_stream::entry::Error>,
22{
23    if opts.format == Format::InternalTransientNonPersistable {
24        return Err(Error::InternalFormatMustNotPersist);
25    }
26    #[cfg(any(feature = "tar", feature = "tar_gz"))]
27    {
28        enum State<W: std::io::Write> {
29            #[cfg(feature = "tar")]
30            Tar((tar::Builder<W>, Vec<u8>)),
31            #[cfg(feature = "tar_gz")]
32            TarGz((tar::Builder<flate2::write::GzEncoder<W>>, Vec<u8>)),
33        }
34
35        impl<W: std::io::Write> State<W> {
36            pub fn new(format: Format, mtime: gix_date::SecondsSinceUnixEpoch, out: W) -> Result<Self, Error> {
37                Ok(match format {
38                    Format::InternalTransientNonPersistable => unreachable!("handled earlier"),
39                    Format::Zip { .. } => return Err(Error::ZipWithoutSeek),
40                    #[cfg(feature = "tar")]
41                    Format::Tar => {
42                        #[cfg(feature = "tar")]
43                        {
44                            State::Tar((
45                                {
46                                    let mut ar = tar::Builder::new(out);
47                                    ar.mode(tar::HeaderMode::Deterministic);
48                                    ar
49                                },
50                                Vec::with_capacity(64 * 1024),
51                            ))
52                        }
53                        #[cfg(not(feature = "tar"))]
54                        {
55                            Err(Error::SupportNotCompiledIn { wanted: Format::Tar })
56                        }
57                    }
58                    Format::TarGz { compression_level } => {
59                        #[cfg(feature = "tar_gz")]
60                        {
61                            State::TarGz((
62                                {
63                                    let gz = flate2::GzBuilder::new().mtime(mtime as u32).write(
64                                        out,
65                                        match compression_level {
66                                            None => flate2::Compression::default(),
67                                            Some(level) => flate2::Compression::new(u32::from(level)),
68                                        },
69                                    );
70                                    let mut ar = tar::Builder::new(gz);
71                                    ar.mode(tar::HeaderMode::Deterministic);
72                                    ar
73                                },
74                                Vec::with_capacity(64 * 1024),
75                            ))
76                        }
77                        #[cfg(not(feature = "tar_gz"))]
78                        {
79                            Err(Error::SupportNotCompiledIn { wanted: Format::TarGz })
80                        }
81                    }
82                })
83            }
84        }
85
86        let mut state = State::new(opts.format, opts.modification_time, out)?;
87        while let Some(entry) = next_entry(stream)? {
88            match &mut state {
89                #[cfg(feature = "tar")]
90                State::Tar((ar, buf)) => {
91                    append_tar_entry(ar, buf, entry, opts.modification_time, &opts)?;
92                }
93                #[cfg(feature = "tar_gz")]
94                State::TarGz((ar, buf)) => {
95                    append_tar_entry(ar, buf, entry, opts.modification_time, &opts)?;
96                }
97            }
98        }
99
100        match state {
101            #[cfg(feature = "tar")]
102            State::Tar((mut ar, _)) => {
103                ar.finish()?;
104            }
105            #[cfg(feature = "tar_gz")]
106            State::TarGz((ar, _)) => {
107                ar.into_inner()?.finish()?;
108            }
109        }
110    }
111    Ok(())
112}
113
114/// Like [`write_stream()`], but requires [`std::io::Seek`] for `out`.
115///
116/// Note that `zip` is able to stream big files, which our `tar` implementation is not able to do, which makes it the
117/// only suitable container to support huge files from `git-lfs` without consuming excessive amounts of memory.
118#[cfg_attr(not(feature = "zip"), allow(unused_mut, unused_variables))]
119pub fn write_stream_seek<NextFn>(
120    stream: &mut Stream,
121    mut next_entry: NextFn,
122    out: impl std::io::Write + std::io::Seek,
123    opts: Options,
124) -> Result<(), Error>
125where
126    NextFn: FnMut(&mut Stream) -> Result<Option<Entry<'_>>, gix_worktree_stream::entry::Error>,
127{
128    let compression_level = match opts.format {
129        Format::Zip { compression_level } => compression_level.map(i64::from),
130        _other => return write_stream(stream, next_entry, out, opts),
131    };
132
133    #[cfg(feature = "zip")]
134    {
135        let mut ar = zip::write::ZipWriter::new(out);
136        let mut buf = Vec::new();
137        let zdt = jiff::Timestamp::from_second(opts.modification_time)
138            .map_err(|err| Error::InvalidModificationTime(Box::new(err)))?
139            .to_zoned(jiff::tz::TimeZone::UTC);
140        let mtime = zip::DateTime::from_date_and_time(
141            zdt.year()
142                .try_into()
143                .map_err(|err| Error::InvalidModificationTime(Box::new(err)))?,
144            // These are all OK because month, day, hour, minute and second
145            // are always positive.
146            zdt.month().try_into().expect("non-negative"),
147            zdt.day().try_into().expect("non-negative"),
148            zdt.hour().try_into().expect("non-negative"),
149            zdt.minute().try_into().expect("non-negative"),
150            zdt.second().try_into().expect("non-negative"),
151        )
152        .map_err(|err| Error::InvalidModificationTime(Box::new(err)))?;
153        while let Some(entry) = next_entry(stream)? {
154            append_zip_entry(
155                &mut ar,
156                entry,
157                &mut buf,
158                mtime,
159                compression_level,
160                opts.tree_prefix.as_ref(),
161            )?;
162        }
163        ar.finish()
164            .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
165    }
166
167    Ok(())
168}
169
170#[cfg(feature = "zip")]
171fn append_zip_entry<W: std::io::Write + std::io::Seek>(
172    ar: &mut zip::write::ZipWriter<W>,
173    mut entry: gix_worktree_stream::Entry<'_>,
174    buf: &mut Vec<u8>,
175    mtime: zip::DateTime,
176    compression_level: Option<i64>,
177    tree_prefix: Option<&bstr::BString>,
178) -> Result<(), Error> {
179    let file_opts = zip::write::FileOptions::<'_, ()>::default()
180        .compression_method(zip::CompressionMethod::Deflated)
181        .compression_level(compression_level)
182        .large_file(entry.bytes_remaining().map_or(true, |len| len > u32::MAX as usize))
183        .last_modified_time(mtime)
184        .unix_permissions(if entry.mode.is_executable() { 0o755 } else { 0o644 });
185    let path = add_prefix(entry.relative_path(), tree_prefix).into_owned();
186    match entry.mode.kind() {
187        gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable => {
188            ar.start_file(path.to_string(), file_opts)
189                .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
190            std::io::copy(&mut entry, ar)?;
191        }
192        gix_object::tree::EntryKind::Tree | gix_object::tree::EntryKind::Commit => {
193            ar.add_directory(path.to_string(), file_opts)
194                .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
195        }
196        gix_object::tree::EntryKind::Link => {
197            use bstr::ByteSlice;
198            std::io::copy(&mut entry, buf)?;
199            ar.add_symlink(path.to_string(), buf.as_bstr().to_string(), file_opts)
200                .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
201        }
202    }
203    Ok(())
204}
205
206#[cfg(any(feature = "tar", feature = "tar_gz"))]
207fn append_tar_entry<W: std::io::Write>(
208    ar: &mut tar::Builder<W>,
209    buf: &mut Vec<u8>,
210    mut entry: gix_worktree_stream::Entry<'_>,
211    mtime_seconds_since_epoch: i64,
212    opts: &Options,
213) -> Result<(), Error> {
214    let mut header = tar::Header::new_gnu();
215    header.set_mtime(mtime_seconds_since_epoch as u64);
216    header.set_entry_type(tar_entry_type(entry.mode));
217    header.set_mode(if entry.mode.is_executable() { 0o755 } else { 0o644 });
218    buf.clear();
219    std::io::copy(&mut entry, buf)?;
220
221    let path = gix_path::from_bstr(add_prefix(entry.relative_path(), opts.tree_prefix.as_ref()));
222    header.set_size(buf.len() as u64);
223
224    if entry.mode.is_link() {
225        use bstr::ByteSlice;
226        let target = gix_path::from_bstr(buf.as_bstr());
227        header.set_entry_type(tar::EntryType::Symlink);
228        header.set_size(0);
229        ar.append_link(&mut header, path, target)?;
230    } else {
231        ar.append_data(&mut header, path, buf.as_slice())?;
232    }
233    Ok(())
234}
235
236#[cfg(any(feature = "tar", feature = "tar_gz"))]
237fn tar_entry_type(mode: gix_object::tree::EntryMode) -> tar::EntryType {
238    use gix_object::tree::EntryKind;
239    use tar::EntryType;
240    match mode.kind() {
241        EntryKind::Tree | EntryKind::Commit => EntryType::Directory,
242        EntryKind::Blob => EntryType::Regular,
243        EntryKind::BlobExecutable => EntryType::Regular,
244        EntryKind::Link => EntryType::Link,
245    }
246}
247
248#[cfg(any(feature = "tar", feature = "tar_gz"))]
249fn add_prefix<'a>(relative_path: &'a bstr::BStr, prefix: Option<&bstr::BString>) -> std::borrow::Cow<'a, bstr::BStr> {
250    use std::borrow::Cow;
251    match prefix {
252        None => Cow::Borrowed(relative_path),
253        Some(prefix) => {
254            use bstr::ByteVec;
255            let mut buf = prefix.clone();
256            buf.push_str(relative_path);
257            Cow::Owned(buf)
258        }
259    }
260}