noodles_bgzf/
writer.rs

1//! BGZF writer.
2
3mod builder;
4mod compression_level;
5mod frame;
6
7pub use self::{builder::Builder, compression_level::CompressionLevel};
8
9use std::io::{self, Write};
10
11pub(crate) use self::frame::write_frame;
12use super::{gz, VirtualPosition, BGZF_HEADER_SIZE, BGZF_MAX_ISIZE};
13
14// The max DEFLATE overhead for 65536 bytes of data at compression level 0.
15//
16// For zlib (and derivatives) and libdeflate, this is 10 bytes; and for miniz_oxide, 15 bytes.
17const COMPRESSION_LEVEL_0_OVERHEAD: usize = 15;
18
19// The max size of the write buffer.
20//
21// The buffer that uses this size is the uncompressed data that is staged to be written as a BGZF
22// block. It is slightly smaller than the max allowed ISIZE to compensate for the gzip format and
23// DEFLATE overheads.
24pub(crate) const MAX_BUF_SIZE: usize =
25    BGZF_MAX_ISIZE - BGZF_HEADER_SIZE - gz::TRAILER_SIZE - COMPRESSION_LEVEL_0_OVERHEAD;
26
27// ยง 4.1.2 End-of-file marker (2020-12-03)
28pub(crate) const BGZF_EOF: [u8; 28] = [
29    0x1f, 0x8b, // ID1, ID2
30    0x08, // CM = DEFLATE
31    0x04, // FLG = FEXTRA
32    0x00, 0x00, 0x00, 0x00, // MTIME = 0
33    0x00, // XFL = 0
34    0xff, // OS = 255 (unknown)
35    0x06, 0x00, // XLEN = 6
36    0x42, 0x43, // SI1, SI2
37    0x02, 0x00, // SLEN = 2
38    0x1b, 0x00, // BSIZE = 27
39    0x03, 0x00, // CDATA
40    0x00, 0x00, 0x00, 0x00, // CRC32 = 0x00000000
41    0x00, 0x00, 0x00, 0x00, // ISIZE = 0
42];
43
44#[cfg(feature = "libdeflate")]
45pub(crate) type CompressionLevelImpl = libdeflater::CompressionLvl;
46#[cfg(not(feature = "libdeflate"))]
47pub(crate) type CompressionLevelImpl = flate2::Compression;
48
49/// A BZGF writer.
50///
51/// This implements [`std::io::Write`], consuming uncompressed data and emitting compressed data.
52///
53/// # Examples
54///
55/// ```
56/// # use std::io::{self, Write};
57/// use noodles_bgzf as bgzf;
58///
59/// let mut writer = bgzf::Writer::new(Vec::new());
60/// writer.write_all(b"noodles-bgzf")?;
61///
62/// let data = writer.finish()?;
63/// # Ok::<(), io::Error>(())
64/// ```
65#[derive(Debug)]
66pub struct Writer<W>
67where
68    W: Write,
69{
70    inner: Option<W>,
71    position: u64,
72    staging_buf: Vec<u8>,
73    compression_buf: Vec<u8>,
74    compression_level: CompressionLevelImpl,
75}
76
77impl<W> Writer<W>
78where
79    W: Write,
80{
81    /// Creates a writer with a default compression level.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// # use std::io;
87    /// use noodles_bgzf as bgzf;
88    /// let writer = bgzf::Writer::new(io::sink());
89    /// ```
90    pub fn new(inner: W) -> Self {
91        Builder::default().build_from_writer(inner)
92    }
93
94    /// Returns a reference to the underlying writer.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// # use std::io;
100    /// use noodles_bgzf as bgzf;
101    /// let writer = bgzf::Writer::new(io::sink());
102    /// let _inner = writer.get_ref();
103    /// ```
104    pub fn get_ref(&self) -> &W {
105        self.inner.as_ref().unwrap()
106    }
107
108    /// Returns the underlying writer.
109    ///
110    /// # Examples
111    ///
112    /// ```
113    /// # use std::io;
114    /// use noodles_bgzf as bgzf;
115    /// let writer = bgzf::Writer::new(io::sink());
116    /// let _inner = writer.into_inner();
117    /// ```
118    pub fn into_inner(mut self) -> W {
119        self.inner.take().unwrap()
120    }
121
122    /// Returns the current position of the stream.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// # use std::io;
128    /// use noodles_bgzf as bgzf;
129    /// let writer = bgzf::Writer::new(io::sink());
130    /// assert_eq!(writer.position(), 0);
131    /// ```
132    pub fn position(&self) -> u64 {
133        self.position
134    }
135
136    /// Returns the current virtual position of the stream.
137    ///
138    /// # Panics
139    ///
140    /// This panics if the stream flushed >= 256 TiB of compressed data.
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// # use std::io;
146    /// use noodles_bgzf as bgzf;
147    /// let writer = bgzf::Writer::new(io::sink());
148    /// assert_eq!(writer.virtual_position(), bgzf::VirtualPosition::from(0));
149    /// ```
150    pub fn virtual_position(&self) -> VirtualPosition {
151        // SAFETY: The uncompressed buffer is guaranteed to be <= `MAX_UNCOMPRESSED_POSITION`.
152        let uncompressed_position = self.staging_buf.len() as u16;
153        VirtualPosition::try_from((self.position, uncompressed_position)).unwrap()
154    }
155
156    fn flush_block(&mut self) -> io::Result<()> {
157        use crate::deflate;
158
159        let compressed_data = &mut self.compression_buf;
160        let crc32 = deflate::encode(&self.staging_buf, self.compression_level, compressed_data)?;
161
162        let inner = self.inner.as_mut().unwrap();
163        let uncompressed_len = self.staging_buf.len();
164        let block_size = write_frame(inner, compressed_data, crc32, uncompressed_len)?;
165
166        self.position += block_size as u64;
167
168        self.staging_buf.clear();
169
170        Ok(())
171    }
172
173    /// Attempts to finish the output stream by flushing any remaining buffers.
174    ///
175    /// This then appends the final BGZF EOF block.
176    ///
177    /// # Examples
178    ///
179    /// ```
180    /// # use std::io::{self, Write};
181    /// use noodles_bgzf as bgzf;
182    ///
183    /// let mut writer = bgzf::Writer::new(io::sink());
184    /// writer.write_all(b"noodles-bgzf")?;
185    ///
186    /// writer.try_finish()?;
187    /// # Ok::<(), io::Error>(())
188    /// ```
189    pub fn try_finish(&mut self) -> io::Result<()> {
190        self.flush()?;
191
192        let inner = self.inner.as_mut().unwrap();
193        let result = inner.write_all(&BGZF_EOF);
194
195        self.position += BGZF_EOF.len() as u64;
196
197        result
198    }
199
200    /// Returns the underlying writer after finishing the output stream.
201    ///
202    /// This method can only be called once. Any further usage of the writer may result in a panic.
203    ///
204    /// # Examples
205    ///
206    /// ```
207    /// # use std::io::{self, Write};
208    /// use noodles_bgzf as bgzf;
209    ///
210    /// let mut writer = bgzf::Writer::new(io::sink());
211    /// writer.write_all(b"noodles-bgzf")?;
212    ///
213    /// let data = writer.finish()?;
214    /// # Ok::<(), io::Error>(())
215    /// ```
216    pub fn finish(mut self) -> io::Result<W> {
217        self.try_finish()?;
218        let inner = self.inner.take().unwrap();
219        Ok(inner)
220    }
221
222    fn remaining(&self) -> usize {
223        MAX_BUF_SIZE - self.staging_buf.len()
224    }
225
226    fn has_remaining(&self) -> bool {
227        self.staging_buf.len() < MAX_BUF_SIZE
228    }
229}
230
231impl<W> Drop for Writer<W>
232where
233    W: Write,
234{
235    fn drop(&mut self) {
236        if self.inner.is_some() {
237            let _ = self.try_finish();
238        }
239    }
240}
241
242impl<W> Write for Writer<W>
243where
244    W: Write,
245{
246    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
247        let amt = self.remaining().min(buf.len());
248        self.staging_buf.extend(&buf[..amt]);
249
250        if !self.has_remaining() {
251            self.flush()?;
252        }
253
254        Ok(amt)
255    }
256
257    fn flush(&mut self) -> io::Result<()> {
258        if self.staging_buf.is_empty() {
259            Ok(())
260        } else {
261            self.flush_block()
262        }
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn test_virtual_position() -> Result<(), Box<dyn std::error::Error>> {
272        let mut writer = Writer::new(Vec::new());
273
274        assert_eq!(writer.virtual_position(), VirtualPosition::from(0));
275
276        writer.write_all(b"noodles")?;
277
278        assert_eq!(
279            writer.virtual_position(),
280            VirtualPosition::try_from((0, 7))?
281        );
282
283        writer.flush()?;
284
285        assert_eq!(
286            writer.virtual_position(),
287            VirtualPosition::try_from((writer.get_ref().len() as u64, 0))?
288        );
289
290        Ok(())
291    }
292
293    #[test]
294    fn test_finish() -> io::Result<()> {
295        let mut writer = Writer::new(Vec::new());
296        writer.write_all(b"noodles")?;
297
298        let data = writer.finish()?;
299        let eof_start = data.len() - BGZF_EOF.len();
300
301        assert_eq!(&data[eof_start..], BGZF_EOF);
302
303        Ok(())
304    }
305}