noodles_bgzf/io/
reader.rs

1//! BGZF reader.
2
3mod builder;
4pub(crate) mod frame;
5
6pub use self::builder::Builder;
7
8use std::io::{self, BufRead, Read, Seek, SeekFrom};
9
10use super::Block;
11use crate::{gzi, VirtualPosition, BGZF_MAX_ISIZE};
12
13/// A BGZF reader.
14///
15/// The reader implements both [`std::io::Read`] and [`std::io::BufRead`], consuming compressed
16/// data and emitting uncompressed data. It is internally buffered by a single block, and to
17/// correctly track (virtual) positions, the reader _cannot_ be double buffered (e.g., using
18/// [`std::io::BufReader`]).
19///
20/// # Examples
21///
22/// ```no_run
23/// # use std::{fs::File, io::{self, Read}};
24/// use noodles_bgzf as bgzf;
25/// let mut reader = File::open("data.gz").map(bgzf::io::Reader::new)?;
26/// let mut data = Vec::new();
27/// reader.read_to_end(&mut data)?;
28/// # Ok::<(), io::Error>(())
29/// ```
30pub struct Reader<R> {
31    inner: R,
32    buf: Vec<u8>,
33    position: u64,
34    block: Block,
35}
36
37impl<R> Reader<R> {
38    /// Returns a reference to the underlying reader.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// # use std::io;
44    /// use noodles_bgzf as bgzf;
45    /// let reader = bgzf::io::Reader::new(io::empty());
46    /// let _inner = reader.get_ref();
47    /// ```
48    pub fn get_ref(&self) -> &R {
49        &self.inner
50    }
51
52    /// Returns a mutable reference to the underlying reader.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// # use std::io;
58    /// use noodles_bgzf as bgzf;
59    /// let mut reader = bgzf::io::Reader::new(io::empty());
60    /// let _inner = reader.get_mut();
61    /// ```
62    pub fn get_mut(&mut self) -> &mut R {
63        &mut self.inner
64    }
65
66    /// Unwraps and returns the underlying writer.
67    ///
68    /// # Examples
69    ///
70    /// ```
71    /// # use std::io;
72    /// use noodles_bgzf as bgzf;
73    /// let reader = bgzf::io::Reader::new(io::empty());
74    /// let _inner = reader.into_inner();
75    /// ```
76    pub fn into_inner(self) -> R {
77        self.inner
78    }
79}
80
81impl<R> Reader<R>
82where
83    R: Read,
84{
85    /// Creates a BGZF reader.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// # use std::io;
91    /// use noodles_bgzf as bgzf;
92    /// let reader = bgzf::io::Reader::new(io::empty());
93    /// ```
94    pub fn new(inner: R) -> Self {
95        Builder.build_from_reader(inner)
96    }
97
98    /// Returns the current position of the stream.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// # use std::io;
104    /// use noodles_bgzf as bgzf;
105    /// let reader = bgzf::io::Reader::new(io::empty());
106    /// assert_eq!(reader.position(), 0);
107    /// ```
108    pub fn position(&self) -> u64 {
109        self.position
110    }
111
112    /// Returns the current virtual position of the stream.
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// # use std::io;
118    /// use noodles_bgzf as bgzf;
119    /// let reader = bgzf::io::Reader::new(io::empty());
120    /// assert_eq!(reader.virtual_position(), bgzf::VirtualPosition::from(0));
121    /// ```
122    pub fn virtual_position(&self) -> VirtualPosition {
123        self.block.virtual_position()
124    }
125
126    fn read_nonempty_block_with<F>(&mut self, mut f: F) -> io::Result<usize>
127    where
128        F: FnMut(&[u8], &mut Block) -> io::Result<()>,
129    {
130        use self::frame::read_frame_into;
131
132        while read_frame_into(&mut self.inner, &mut self.buf)?.is_some() {
133            f(&self.buf, &mut self.block)?;
134
135            self.block.set_position(self.position);
136            self.position += self.block.size();
137
138            if self.block.data().len() > 0 {
139                break;
140            }
141        }
142
143        Ok(self.block.data().len())
144    }
145
146    fn read_block(&mut self) -> io::Result<usize> {
147        use self::frame::parse_block;
148        self.read_nonempty_block_with(parse_block)
149    }
150
151    fn read_block_into_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
152        use self::frame::parse_block_into_buf;
153        self.read_nonempty_block_with(|src, block| parse_block_into_buf(src, block, buf))
154    }
155}
156
157impl<R> Reader<R>
158where
159    R: Read + Seek,
160{
161    /// Seeks the stream to the given virtual position.
162    ///
163    /// The underlying stream's cursor is first moved the the compressed position. A block is read,
164    /// decompressed, and has its own cursor moved to the uncompressed position.
165    ///
166    /// # Examples
167    ///
168    /// ```
169    /// # use std::io;
170    /// use noodles_bgzf as bgzf;
171    /// let mut reader = bgzf::io::Reader::new(io::empty());
172    /// reader.seek(bgzf::VirtualPosition::MIN)?;
173    /// # Ok::<(), io::Error>(())
174    /// ```
175    pub fn seek(&mut self, pos: VirtualPosition) -> io::Result<VirtualPosition> {
176        let (cpos, upos) = pos.into();
177
178        self.inner.seek(SeekFrom::Start(cpos))?;
179        self.position = cpos;
180
181        self.read_block()?;
182
183        self.block.data_mut().set_position(usize::from(upos));
184
185        Ok(pos)
186    }
187
188    /// Seeks the stream to the given uncompressed position.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// # use std::io;
194    /// use noodles_bgzf::{self as bgzf, gzi};
195    ///
196    /// let mut reader = bgzf::io::Reader::new(io::empty());
197    ///
198    /// let index = gzi::Index::default();
199    /// reader.seek_by_uncompressed_position(&index, 0)?;
200    /// # Ok::<_, io::Error>(())
201    /// ```
202    pub fn seek_by_uncompressed_position(
203        &mut self,
204        index: &gzi::Index,
205        pos: u64,
206    ) -> io::Result<u64> {
207        let virtual_position = index.query(pos)?;
208        self.seek(virtual_position)?;
209        Ok(pos)
210    }
211}
212
213impl<R> Read for Reader<R>
214where
215    R: Read,
216{
217    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
218        // If a new block is about to be read and the given buffer is guaranteed to be larger than
219        // the next block, reading to the block buffer can be skipped. The uncompressed data is
220        // decoded into the given buffer to avoid having to subsequently recopy it from the block.
221        if !self.block.data().has_remaining() && buf.len() >= BGZF_MAX_ISIZE {
222            self.read_block_into_buf(buf)
223        } else {
224            let mut src = self.fill_buf()?;
225            let amt = src.read(buf)?;
226            self.consume(amt);
227            Ok(amt)
228        }
229    }
230
231    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
232        if let Some(src) = self.block.data().as_ref().get(..buf.len()) {
233            buf.copy_from_slice(src);
234            self.consume(src.len());
235            Ok(())
236        } else {
237            default_read_exact(self, buf)
238        }
239    }
240}
241
242impl<R> BufRead for Reader<R>
243where
244    R: Read,
245{
246    fn consume(&mut self, amt: usize) {
247        self.block.data_mut().consume(amt);
248    }
249
250    fn fill_buf(&mut self) -> io::Result<&[u8]> {
251        if !self.block.data().has_remaining() {
252            self.read_block()?;
253        }
254
255        Ok(self.block.data().as_ref())
256    }
257}
258
259impl<R> crate::io::Read for Reader<R>
260where
261    R: Read,
262{
263    fn virtual_position(&self) -> VirtualPosition {
264        self.block.virtual_position()
265    }
266}
267
268impl<R> crate::io::BufRead for Reader<R> where R: Read {}
269
270impl<R> crate::io::Seek for Reader<R>
271where
272    R: Read + Seek,
273{
274    fn seek_to_virtual_position(&mut self, pos: VirtualPosition) -> io::Result<VirtualPosition> {
275        self.seek(pos)
276    }
277
278    fn seek_with_index(&mut self, index: &gzi::Index, pos: SeekFrom) -> io::Result<u64> {
279        match pos {
280            SeekFrom::Start(pos) => self.seek_by_uncompressed_position(index, pos),
281            _ => unimplemented!(),
282        }
283    }
284}
285
286pub(crate) fn default_read_exact<R>(reader: &mut R, mut buf: &mut [u8]) -> io::Result<()>
287where
288    R: Read,
289{
290    while !buf.is_empty() {
291        match reader.read(buf) {
292            Ok(0) => break,
293            Ok(n) => buf = &mut buf[n..],
294            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
295            Err(e) => return Err(e),
296        }
297    }
298
299    if buf.is_empty() {
300        Ok(())
301    } else {
302        Err(io::Error::new(
303            io::ErrorKind::UnexpectedEof,
304            "failed to fill whole buffer",
305        ))
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use std::io::Cursor;
312
313    use super::*;
314
315    #[test]
316    fn test_read_with_empty_block() -> io::Result<()> {
317        #[rustfmt::skip]
318        let data = [
319            // block 0 (b"noodles")
320            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
321            0x02, 0x00, 0x22, 0x00, 0xcb, 0xcb, 0xcf, 0x4f, 0xc9, 0x49, 0x2d, 0x06, 0x00, 0xa1,
322            0x58, 0x2a, 0x80, 0x07, 0x00, 0x00, 0x00,
323            // block 1 (b"")
324            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
325            0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
326            // block 2 (b"bgzf")
327            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
328            0x02, 0x00, 0x1f, 0x00, 0x4b, 0x4a, 0xaf, 0x4a, 0x03, 0x00, 0x20, 0x68, 0xf2, 0x8c,
329            0x04, 0x00, 0x00, 0x00,
330            // EOF block
331            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
332            0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
333        ];
334
335        let mut reader = Reader::new(&data[..]);
336        let mut buf = Vec::new();
337        reader.read_to_end(&mut buf)?;
338
339        assert_eq!(buf, b"noodlesbgzf");
340
341        Ok(())
342    }
343
344    #[test]
345    fn test_seek() -> Result<(), Box<dyn std::error::Error>> {
346        #[rustfmt::skip]
347        let data = [
348            // block 0 (b"noodles")
349            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
350            0x02, 0x00, 0x22, 0x00, 0xcb, 0xcb, 0xcf, 0x4f, 0xc9, 0x49, 0x2d, 0x06, 0x00, 0xa1,
351            0x58, 0x2a, 0x80, 0x07, 0x00, 0x00, 0x00,
352            // EOF block
353            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
354            0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
355        ];
356
357        let eof = VirtualPosition::try_from((63, 0))?;
358
359        let mut reader = Reader::new(Cursor::new(&data));
360
361        let mut buf = Vec::new();
362        reader.read_to_end(&mut buf)?;
363
364        assert_eq!(reader.virtual_position(), eof);
365
366        reader.seek(VirtualPosition::try_from((0, 3))?)?;
367
368        buf.clear();
369        reader.read_to_end(&mut buf)?;
370
371        assert_eq!(buf, b"dles");
372        assert_eq!(reader.virtual_position(), eof);
373
374        Ok(())
375    }
376
377    #[test]
378    fn test_seek_by_uncompressed_position() -> io::Result<()> {
379        #[rustfmt::skip]
380        let data = [
381            // block 0 (b"noodles")
382            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
383            0x02, 0x00, 0x22, 0x00, 0xcb, 0xcb, 0xcf, 0x4f, 0xc9, 0x49, 0x2d, 0x06, 0x00, 0xa1,
384            0x58, 0x2a, 0x80, 0x07, 0x00, 0x00, 0x00,
385            // block 1 (b"bgzf")
386            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
387            0x02, 0x00, 0x1f, 0x00, 0x4b, 0x4a, 0xaf, 0x4a, 0x03, 0x00, 0x20, 0x68, 0xf2, 0x8c,
388            0x04, 0x00, 0x00, 0x00,
389            // EOF block
390            0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x06, 0x00, 0x42, 0x43,
391            0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
392        ];
393
394        let index = gzi::Index::from(vec![(35, 7)]);
395
396        let mut reader = Reader::new(Cursor::new(&data));
397
398        reader.seek_by_uncompressed_position(&index, 3)?;
399        let mut buf = [0; 4];
400        reader.read_exact(&mut buf)?;
401        assert_eq!(&buf, b"dles");
402
403        reader.seek_by_uncompressed_position(&index, 8)?;
404        let mut buf = [0; 2];
405        reader.read_exact(&mut buf)?;
406        assert_eq!(&buf, b"gz");
407
408        Ok(())
409    }
410}