noodles_bcf/io/
reader.rs

1//! BCF reader.
2
3mod builder;
4pub mod header;
5pub(crate) mod query;
6pub(crate) mod record;
7pub(crate) mod record_buf;
8mod record_bufs;
9
10pub use self::{builder::Builder, query::Query, record_bufs::RecordBufs};
11
12use std::{
13    io::{self, BufRead, Read},
14    iter, str,
15};
16
17use noodles_bgzf as bgzf;
18use noodles_core::Region;
19use noodles_csi::BinningIndex;
20use noodles_vcf::{self as vcf, header::string_maps::ContigStringMap, variant::RecordBuf};
21
22use self::{header::read_header, record::read_record, record_buf::read_record_buf};
23use crate::Record;
24
25/// A BCF reader.
26///
27/// The BCF format is comprised of two parts: 1) a VCF header and 2) a list of records.
28pub struct Reader<R> {
29    inner: R,
30    buf: Vec<u8>,
31}
32
33impl<R> Reader<R> {
34    /// Returns a reference to the underlying reader.
35    ///
36    /// # Examples
37    ///
38    /// ```
39    /// # use std::io;
40    /// use noodles_bcf as bcf;
41    /// let reader = bcf::io::Reader::from(io::empty());
42    /// let _inner = reader.get_ref();
43    /// ```
44    pub fn get_ref(&self) -> &R {
45        &self.inner
46    }
47
48    /// Returns a mutable reference to the underlying reader.
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// # use std::io;
54    /// use noodles_bcf as bcf;
55    /// let mut reader = bcf::io::Reader::from(io::empty());
56    /// let _inner = reader.get_mut();
57    /// ```
58    pub fn get_mut(&mut self) -> &mut R {
59        &mut self.inner
60    }
61
62    /// Returns the underlying reader.
63    ///
64    /// # Examples
65    ///
66    /// ```
67    /// # use std::io;
68    /// use noodles_bcf as bcf;
69    /// let reader = bcf::io::Reader::from(io::empty());
70    /// let _inner = reader.into_inner();
71    /// ```
72    pub fn into_inner(self) -> R {
73        self.inner
74    }
75}
76
77impl<R> Reader<R>
78where
79    R: Read,
80{
81    /// Returns a BCF header reader.
82    ///
83    /// This creates an adapter that reads at most the length of the header, i.e., the BCF magic
84    /// number, the format version, and VCF header.
85    ///
86    /// It is more ergonomic to read the BCF header as a VCF header using [`Self::read_header`],
87    /// but this adapter allows for control of how the header is read, e.g., to read the raw VCF
88    /// header.
89    ///
90    /// The position of the stream is expected to be at the start.
91    ///
92    /// # Examples
93    ///
94    /// ```no_run
95    /// # use std::{fs::File, io::Read};
96    /// use noodles_bcf as bcf;
97    ///
98    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
99    ///
100    /// let mut header_reader = reader.header_reader();
101    /// header_reader.read_magic_number()?;
102    /// header_reader.read_format_version()?;
103    ///
104    /// let mut raw_vcf_header_reader = header_reader.raw_vcf_header_reader()?;
105    /// let mut raw_header = String::new();
106    /// raw_vcf_header_reader.read_to_string(&mut raw_header)?;
107    /// raw_vcf_header_reader.discard_to_end()?;
108    /// # Ok::<_, std::io::Error>(())
109    /// ```
110    pub fn header_reader(&mut self) -> header::Reader<&mut R> {
111        header::Reader::new(&mut self.inner)
112    }
113
114    /// Reads the VCF header.
115    ///
116    /// This verifies the BCF magic number, discards the file format version, and reads and parses
117    /// the raw VCF header. Associated string maps are also built from the raw header.
118    ///
119    /// The position of the stream is expected to be at the start.
120    ///
121    /// # Examples
122    ///
123    /// ```no_run
124    /// # use std::{fs::File, io};
125    /// use noodles_bcf as bcf;
126    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
127    /// let header = reader.read_header()?;
128    /// # Ok::<(), io::Error>(())
129    /// ```
130    pub fn read_header(&mut self) -> io::Result<vcf::Header> {
131        read_header(&mut self.inner)
132    }
133
134    /// Reads a single record.
135    ///
136    /// The stream is expected to be directly after the header or at the start of another record.
137    ///
138    /// It is more ergonomic to read records using an iterator (see [`Self::records`]), but using
139    /// this method directly allows the reuse of a single [`vcf::Record`] buffer.
140    ///
141    /// If successful, the record size is returned. If a record size of 0 is returned, the stream
142    /// reached EOF.
143    ///
144    /// # Examples
145    ///
146    /// ```no_run
147    /// # use std::{fs::File, io};
148    /// use noodles_bcf as bcf;
149    /// use noodles_vcf as vcf;
150    ///
151    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
152    /// let header = reader.read_header()?;
153    ///
154    /// let mut record = vcf::variant::RecordBuf::default();
155    /// reader.read_record_buf(&header, &mut record)?;
156    /// # Ok::<(), io::Error>(())
157    /// ```
158    pub fn read_record_buf(
159        &mut self,
160        header: &vcf::Header,
161        record: &mut RecordBuf,
162    ) -> io::Result<usize> {
163        read_record_buf(&mut self.inner, header, &mut self.buf, record)
164    }
165
166    /// Reads a single record without eagerly decoding (most of) its fields.
167    ///
168    /// The stream is expected to be directly after the header or at the start of another record.
169    ///
170    /// It is more ergnomic to read records using an iterator (see [`Self::records`]), but using
171    /// this method directly allows the reuse of a single [`Record`] buffer.
172    ///
173    /// If successful, the record size is returned. If a record size of 0 is returned, the stream
174    /// reached EOF.
175    ///
176    /// # Examples
177    ///
178    /// ```no_run
179    /// # use std::{fs::File, io};
180    /// use noodles_bcf as bcf;
181    ///
182    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
183    /// reader.read_header()?;
184    ///
185    /// let mut record = bcf::Record::default();
186    /// reader.read_record(&mut record)?;
187    /// # Ok::<(), io::Error>(())
188    /// ```
189    pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
190        read_record(&mut self.inner, record)
191    }
192
193    /// Returns an iterator over records starting from the current stream position.
194    ///
195    /// The stream is expected to be directly after the reference sequences or at the start of
196    /// another record.
197    ///
198    /// # Examples
199    ///
200    /// ```no_run
201    /// # use std::{fs::File, io};
202    /// use noodles_bcf as bcf;
203    ///
204    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
205    /// let header = reader.read_header()?;
206    ///
207    /// for result in reader.record_bufs(&header) {
208    ///     let record = result?;
209    ///     // ...
210    /// }
211    /// # Ok::<(), io::Error>(())
212    pub fn record_bufs<'r, 'h>(&'r mut self, header: &'h vcf::Header) -> RecordBufs<'r, 'h, R> {
213        RecordBufs::new(self, header)
214    }
215
216    /// Returns an iterator over lazy records starting from the current stream position.
217    ///
218    /// The stream is expected to be directly after the header or at the start of another record.
219    ///
220    /// # Examples
221    ///
222    /// ```no_run
223    /// # use std::{fs::File, io};
224    /// use noodles_bcf as bcf;
225    ///
226    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
227    /// reader.read_header()?;
228    ///
229    /// for result in reader.records() {
230    ///     let record = result?;
231    ///     println!("{:?}", record);
232    /// }
233    /// # Ok::<(), io::Error>(())
234    /// ```
235    pub fn records(&mut self) -> impl Iterator<Item = io::Result<Record>> + '_ {
236        let mut record = Record::default();
237
238        iter::from_fn(move || match self.read_record(&mut record) {
239            Ok(0) => None,
240            Ok(_) => Some(Ok(record.clone())),
241            Err(e) => Some(Err(e)),
242        })
243    }
244}
245
246impl<R> Reader<bgzf::Reader<R>>
247where
248    R: Read,
249{
250    /// Creates a BCF reader.
251    ///
252    /// # Examples
253    ///
254    /// ```
255    /// # use std::io;
256    /// use noodles_bcf as bcf;
257    /// let reader = bcf::io::Reader::new(io::empty());
258    /// ```
259    pub fn new(reader: R) -> Self {
260        Self::from(bgzf::Reader::new(reader))
261    }
262}
263
264impl<R> Reader<R>
265where
266    R: bgzf::io::BufRead + bgzf::io::Seek,
267{
268    /// Returns an iterator over records that intersects the given region.
269    ///
270    /// # Examples
271    ///
272    /// ```no_run
273    /// # use std::fs::File;
274    /// use noodles_bcf as bcf;
275    /// use noodles_core::Region;
276    /// use noodles_csi as csi;
277    ///
278    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
279    /// let header = reader.read_header()?;
280    ///
281    /// let index = csi::fs::read("sample.bcf.csi")?;
282    /// let region = "sq0:8-13".parse()?;
283    /// let query = reader.query(&header, &index, &region)?;
284    ///
285    /// for result in query {
286    ///     let record = result?;
287    ///     // ...
288    /// }
289    /// # Ok::<(), Box<dyn std::error::Error>>(())
290    /// ```
291    pub fn query<'r, 'h, I>(
292        &'r mut self,
293        header: &'h vcf::Header,
294        index: &I,
295        region: &Region,
296    ) -> io::Result<Query<'r, 'h, R>>
297    where
298        I: BinningIndex,
299    {
300        let reference_sequence_id = resolve_region(header.string_maps().contigs(), region)?;
301        let chunks = index.query(reference_sequence_id, region.interval())?;
302
303        Ok(Query::new(
304            &mut self.inner,
305            header,
306            chunks,
307            reference_sequence_id,
308            region.interval(),
309        ))
310    }
311}
312
313impl<R> From<R> for Reader<R> {
314    fn from(inner: R) -> Self {
315        Self {
316            inner,
317            buf: Vec::new(),
318        }
319    }
320}
321
322impl<R> vcf::variant::io::Read<R> for Reader<R>
323where
324    R: BufRead,
325{
326    fn read_variant_header(&mut self) -> io::Result<vcf::Header> {
327        self.read_header()
328    }
329
330    fn variant_records<'r, 'h: 'r>(
331        &'r mut self,
332        _: &'h vcf::Header,
333    ) -> Box<dyn Iterator<Item = io::Result<Box<dyn vcf::variant::Record>>> + 'r> {
334        Box::new(
335            self.records().map(|result| {
336                result.map(|record| Box::new(record) as Box<dyn vcf::variant::Record>)
337            }),
338        )
339    }
340}
341
342pub(crate) fn resolve_region(
343    contig_string_map: &ContigStringMap,
344    region: &Region,
345) -> io::Result<usize> {
346    let region_name = str::from_utf8(region.name())
347        .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
348
349    contig_string_map.get_index_of(region_name).ok_or_else(|| {
350        io::Error::new(
351            io::ErrorKind::InvalidInput,
352            format!("region does not exist in contigs: {region:?}"),
353        )
354    })
355}