noodles_bgzf/writer.rs
1//! BGZF writer.
2
3mod builder;
4mod compression_level;
5mod frame;
6
7pub use self::{builder::Builder, compression_level::CompressionLevel};
8
9use std::io::{self, Write};
10
11pub(crate) use self::frame::write_frame;
12use super::{gz, VirtualPosition, BGZF_HEADER_SIZE, BGZF_MAX_ISIZE};
13
14// The max DEFLATE overhead for 65536 bytes of data at compression level 0.
15//
16// For zlib (and derivatives) and libdeflate, this is 10 bytes; and for miniz_oxide, 15 bytes.
17const COMPRESSION_LEVEL_0_OVERHEAD: usize = 15;
18
19// The max size of the write buffer.
20//
21// The buffer that uses this size is the uncompressed data that is staged to be written as a BGZF
22// block. It is slightly smaller than the max allowed ISIZE to compensate for the gzip format and
23// DEFLATE overheads.
24pub(crate) const MAX_BUF_SIZE: usize =
25 BGZF_MAX_ISIZE - BGZF_HEADER_SIZE - gz::TRAILER_SIZE - COMPRESSION_LEVEL_0_OVERHEAD;
26
27// ยง 4.1.2 End-of-file marker (2020-12-03)
28pub(crate) const BGZF_EOF: [u8; 28] = [
29 0x1f, 0x8b, // ID1, ID2
30 0x08, // CM = DEFLATE
31 0x04, // FLG = FEXTRA
32 0x00, 0x00, 0x00, 0x00, // MTIME = 0
33 0x00, // XFL = 0
34 0xff, // OS = 255 (unknown)
35 0x06, 0x00, // XLEN = 6
36 0x42, 0x43, // SI1, SI2
37 0x02, 0x00, // SLEN = 2
38 0x1b, 0x00, // BSIZE = 27
39 0x03, 0x00, // CDATA
40 0x00, 0x00, 0x00, 0x00, // CRC32 = 0x00000000
41 0x00, 0x00, 0x00, 0x00, // ISIZE = 0
42];
43
44#[cfg(feature = "libdeflate")]
45pub(crate) type CompressionLevelImpl = libdeflater::CompressionLvl;
46#[cfg(not(feature = "libdeflate"))]
47pub(crate) type CompressionLevelImpl = flate2::Compression;
48
49/// A BZGF writer.
50///
51/// This implements [`std::io::Write`], consuming uncompressed data and emitting compressed data.
52///
53/// # Examples
54///
55/// ```
56/// # use std::io::{self, Write};
57/// use noodles_bgzf as bgzf;
58///
59/// let mut writer = bgzf::Writer::new(Vec::new());
60/// writer.write_all(b"noodles-bgzf")?;
61///
62/// let data = writer.finish()?;
63/// # Ok::<(), io::Error>(())
64/// ```
65#[derive(Debug)]
66pub struct Writer<W>
67where
68 W: Write,
69{
70 inner: Option<W>,
71 position: u64,
72 staging_buf: Vec<u8>,
73 compression_buf: Vec<u8>,
74 compression_level: CompressionLevelImpl,
75}
76
77impl<W> Writer<W>
78where
79 W: Write,
80{
81 /// Creates a writer with a default compression level.
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// # use std::io;
87 /// use noodles_bgzf as bgzf;
88 /// let writer = bgzf::Writer::new(io::sink());
89 /// ```
90 pub fn new(inner: W) -> Self {
91 Builder::default().build_from_writer(inner)
92 }
93
94 /// Returns a reference to the underlying writer.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// # use std::io;
100 /// use noodles_bgzf as bgzf;
101 /// let writer = bgzf::Writer::new(io::sink());
102 /// let _inner = writer.get_ref();
103 /// ```
104 pub fn get_ref(&self) -> &W {
105 self.inner.as_ref().unwrap()
106 }
107
108 /// Returns the underlying writer.
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// # use std::io;
114 /// use noodles_bgzf as bgzf;
115 /// let writer = bgzf::Writer::new(io::sink());
116 /// let _inner = writer.into_inner();
117 /// ```
118 pub fn into_inner(mut self) -> W {
119 self.inner.take().unwrap()
120 }
121
122 /// Returns the current position of the stream.
123 ///
124 /// # Examples
125 ///
126 /// ```
127 /// # use std::io;
128 /// use noodles_bgzf as bgzf;
129 /// let writer = bgzf::Writer::new(io::sink());
130 /// assert_eq!(writer.position(), 0);
131 /// ```
132 pub fn position(&self) -> u64 {
133 self.position
134 }
135
136 /// Returns the current virtual position of the stream.
137 ///
138 /// # Panics
139 ///
140 /// This panics if the stream flushed >= 256 TiB of compressed data.
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// # use std::io;
146 /// use noodles_bgzf as bgzf;
147 /// let writer = bgzf::Writer::new(io::sink());
148 /// assert_eq!(writer.virtual_position(), bgzf::VirtualPosition::from(0));
149 /// ```
150 pub fn virtual_position(&self) -> VirtualPosition {
151 // SAFETY: The uncompressed buffer is guaranteed to be <= `MAX_UNCOMPRESSED_POSITION`.
152 let uncompressed_position = self.staging_buf.len() as u16;
153 VirtualPosition::try_from((self.position, uncompressed_position)).unwrap()
154 }
155
156 fn flush_block(&mut self) -> io::Result<()> {
157 use crate::deflate;
158
159 let compressed_data = &mut self.compression_buf;
160 let crc32 = deflate::encode(&self.staging_buf, self.compression_level, compressed_data)?;
161
162 let inner = self.inner.as_mut().unwrap();
163 let uncompressed_len = self.staging_buf.len();
164 let block_size = write_frame(inner, compressed_data, crc32, uncompressed_len)?;
165
166 self.position += block_size as u64;
167
168 self.staging_buf.clear();
169
170 Ok(())
171 }
172
173 /// Attempts to finish the output stream by flushing any remaining buffers.
174 ///
175 /// This then appends the final BGZF EOF block.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// # use std::io::{self, Write};
181 /// use noodles_bgzf as bgzf;
182 ///
183 /// let mut writer = bgzf::Writer::new(io::sink());
184 /// writer.write_all(b"noodles-bgzf")?;
185 ///
186 /// writer.try_finish()?;
187 /// # Ok::<(), io::Error>(())
188 /// ```
189 pub fn try_finish(&mut self) -> io::Result<()> {
190 self.flush()?;
191
192 let inner = self.inner.as_mut().unwrap();
193 let result = inner.write_all(&BGZF_EOF);
194
195 self.position += BGZF_EOF.len() as u64;
196
197 result
198 }
199
200 /// Returns the underlying writer after finishing the output stream.
201 ///
202 /// This method can only be called once. Any further usage of the writer may result in a panic.
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// # use std::io::{self, Write};
208 /// use noodles_bgzf as bgzf;
209 ///
210 /// let mut writer = bgzf::Writer::new(io::sink());
211 /// writer.write_all(b"noodles-bgzf")?;
212 ///
213 /// let data = writer.finish()?;
214 /// # Ok::<(), io::Error>(())
215 /// ```
216 pub fn finish(mut self) -> io::Result<W> {
217 self.try_finish()?;
218 let inner = self.inner.take().unwrap();
219 Ok(inner)
220 }
221
222 fn remaining(&self) -> usize {
223 MAX_BUF_SIZE - self.staging_buf.len()
224 }
225
226 fn has_remaining(&self) -> bool {
227 self.staging_buf.len() < MAX_BUF_SIZE
228 }
229}
230
231impl<W> Drop for Writer<W>
232where
233 W: Write,
234{
235 fn drop(&mut self) {
236 if self.inner.is_some() {
237 let _ = self.try_finish();
238 }
239 }
240}
241
242impl<W> Write for Writer<W>
243where
244 W: Write,
245{
246 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
247 let amt = self.remaining().min(buf.len());
248 self.staging_buf.extend(&buf[..amt]);
249
250 if !self.has_remaining() {
251 self.flush()?;
252 }
253
254 Ok(amt)
255 }
256
257 fn flush(&mut self) -> io::Result<()> {
258 if self.staging_buf.is_empty() {
259 Ok(())
260 } else {
261 self.flush_block()
262 }
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
271 fn test_virtual_position() -> Result<(), Box<dyn std::error::Error>> {
272 let mut writer = Writer::new(Vec::new());
273
274 assert_eq!(writer.virtual_position(), VirtualPosition::from(0));
275
276 writer.write_all(b"noodles")?;
277
278 assert_eq!(
279 writer.virtual_position(),
280 VirtualPosition::try_from((0, 7))?
281 );
282
283 writer.flush()?;
284
285 assert_eq!(
286 writer.virtual_position(),
287 VirtualPosition::try_from((writer.get_ref().len() as u64, 0))?
288 );
289
290 Ok(())
291 }
292
293 #[test]
294 fn test_finish() -> io::Result<()> {
295 let mut writer = Writer::new(Vec::new());
296 writer.write_all(b"noodles")?;
297
298 let data = writer.finish()?;
299 let eof_start = data.len() - BGZF_EOF.len();
300
301 assert_eq!(&data[eof_start..], BGZF_EOF);
302
303 Ok(())
304 }
305}