noodles_bcf/io/reader.rs
1//! BCF reader.
2
3mod builder;
4pub mod header;
5pub(crate) mod query;
6pub(crate) mod record;
7pub(crate) mod record_buf;
8mod record_bufs;
9
10pub use self::{builder::Builder, query::Query, record_bufs::RecordBufs};
11
12use std::{
13 io::{self, BufRead, Read},
14 iter, str,
15};
16
17use noodles_bgzf as bgzf;
18use noodles_core::Region;
19use noodles_csi::BinningIndex;
20use noodles_vcf::{self as vcf, header::string_maps::ContigStringMap, variant::RecordBuf};
21
22use self::{header::read_header, record::read_record, record_buf::read_record_buf};
23use crate::Record;
24
25/// A BCF reader.
26///
27/// The BCF format is comprised of two parts: 1) a VCF header and 2) a list of records.
28pub struct Reader<R> {
29 inner: R,
30 buf: Vec<u8>,
31}
32
33impl<R> Reader<R> {
34 /// Returns a reference to the underlying reader.
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// # use std::io;
40 /// use noodles_bcf as bcf;
41 /// let reader = bcf::io::Reader::from(io::empty());
42 /// let _inner = reader.get_ref();
43 /// ```
44 pub fn get_ref(&self) -> &R {
45 &self.inner
46 }
47
48 /// Returns a mutable reference to the underlying reader.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// # use std::io;
54 /// use noodles_bcf as bcf;
55 /// let mut reader = bcf::io::Reader::from(io::empty());
56 /// let _inner = reader.get_mut();
57 /// ```
58 pub fn get_mut(&mut self) -> &mut R {
59 &mut self.inner
60 }
61
62 /// Returns the underlying reader.
63 ///
64 /// # Examples
65 ///
66 /// ```
67 /// # use std::io;
68 /// use noodles_bcf as bcf;
69 /// let reader = bcf::io::Reader::from(io::empty());
70 /// let _inner = reader.into_inner();
71 /// ```
72 pub fn into_inner(self) -> R {
73 self.inner
74 }
75}
76
77impl<R> Reader<R>
78where
79 R: Read,
80{
81 /// Returns a BCF header reader.
82 ///
83 /// This creates an adapter that reads at most the length of the header, i.e., the BCF magic
84 /// number, the format version, and VCF header.
85 ///
86 /// It is more ergonomic to read the BCF header as a VCF header using [`Self::read_header`],
87 /// but this adapter allows for control of how the header is read, e.g., to read the raw VCF
88 /// header.
89 ///
90 /// The position of the stream is expected to be at the start.
91 ///
92 /// # Examples
93 ///
94 /// ```no_run
95 /// # use std::{fs::File, io::Read};
96 /// use noodles_bcf as bcf;
97 ///
98 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
99 ///
100 /// let mut header_reader = reader.header_reader();
101 /// header_reader.read_magic_number()?;
102 /// header_reader.read_format_version()?;
103 ///
104 /// let mut raw_vcf_header_reader = header_reader.raw_vcf_header_reader()?;
105 /// let mut raw_header = String::new();
106 /// raw_vcf_header_reader.read_to_string(&mut raw_header)?;
107 /// raw_vcf_header_reader.discard_to_end()?;
108 /// # Ok::<_, std::io::Error>(())
109 /// ```
110 pub fn header_reader(&mut self) -> header::Reader<&mut R> {
111 header::Reader::new(&mut self.inner)
112 }
113
114 /// Reads the VCF header.
115 ///
116 /// This verifies the BCF magic number, discards the file format version, and reads and parses
117 /// the raw VCF header. Associated string maps are also built from the raw header.
118 ///
119 /// The position of the stream is expected to be at the start.
120 ///
121 /// # Examples
122 ///
123 /// ```no_run
124 /// # use std::{fs::File, io};
125 /// use noodles_bcf as bcf;
126 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
127 /// let header = reader.read_header()?;
128 /// # Ok::<(), io::Error>(())
129 /// ```
130 pub fn read_header(&mut self) -> io::Result<vcf::Header> {
131 read_header(&mut self.inner)
132 }
133
134 /// Reads a single record.
135 ///
136 /// The stream is expected to be directly after the header or at the start of another record.
137 ///
138 /// It is more ergonomic to read records using an iterator (see [`Self::records`]), but using
139 /// this method directly allows the reuse of a single [`vcf::Record`] buffer.
140 ///
141 /// If successful, the record size is returned. If a record size of 0 is returned, the stream
142 /// reached EOF.
143 ///
144 /// # Examples
145 ///
146 /// ```no_run
147 /// # use std::{fs::File, io};
148 /// use noodles_bcf as bcf;
149 /// use noodles_vcf as vcf;
150 ///
151 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
152 /// let header = reader.read_header()?;
153 ///
154 /// let mut record = vcf::variant::RecordBuf::default();
155 /// reader.read_record_buf(&header, &mut record)?;
156 /// # Ok::<(), io::Error>(())
157 /// ```
158 pub fn read_record_buf(
159 &mut self,
160 header: &vcf::Header,
161 record: &mut RecordBuf,
162 ) -> io::Result<usize> {
163 read_record_buf(&mut self.inner, header, &mut self.buf, record)
164 }
165
166 /// Reads a single record without eagerly decoding (most of) its fields.
167 ///
168 /// The stream is expected to be directly after the header or at the start of another record.
169 ///
170 /// It is more ergnomic to read records using an iterator (see [`Self::records`]), but using
171 /// this method directly allows the reuse of a single [`Record`] buffer.
172 ///
173 /// If successful, the record size is returned. If a record size of 0 is returned, the stream
174 /// reached EOF.
175 ///
176 /// # Examples
177 ///
178 /// ```no_run
179 /// # use std::{fs::File, io};
180 /// use noodles_bcf as bcf;
181 ///
182 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
183 /// reader.read_header()?;
184 ///
185 /// let mut record = bcf::Record::default();
186 /// reader.read_record(&mut record)?;
187 /// # Ok::<(), io::Error>(())
188 /// ```
189 pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
190 read_record(&mut self.inner, record)
191 }
192
193 /// Returns an iterator over records starting from the current stream position.
194 ///
195 /// The stream is expected to be directly after the reference sequences or at the start of
196 /// another record.
197 ///
198 /// # Examples
199 ///
200 /// ```no_run
201 /// # use std::{fs::File, io};
202 /// use noodles_bcf as bcf;
203 ///
204 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
205 /// let header = reader.read_header()?;
206 ///
207 /// for result in reader.record_bufs(&header) {
208 /// let record = result?;
209 /// // ...
210 /// }
211 /// # Ok::<(), io::Error>(())
212 pub fn record_bufs<'r, 'h>(&'r mut self, header: &'h vcf::Header) -> RecordBufs<'r, 'h, R> {
213 RecordBufs::new(self, header)
214 }
215
216 /// Returns an iterator over lazy records starting from the current stream position.
217 ///
218 /// The stream is expected to be directly after the header or at the start of another record.
219 ///
220 /// # Examples
221 ///
222 /// ```no_run
223 /// # use std::{fs::File, io};
224 /// use noodles_bcf as bcf;
225 ///
226 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
227 /// reader.read_header()?;
228 ///
229 /// for result in reader.records() {
230 /// let record = result?;
231 /// println!("{:?}", record);
232 /// }
233 /// # Ok::<(), io::Error>(())
234 /// ```
235 pub fn records(&mut self) -> impl Iterator<Item = io::Result<Record>> + '_ {
236 let mut record = Record::default();
237
238 iter::from_fn(move || match self.read_record(&mut record) {
239 Ok(0) => None,
240 Ok(_) => Some(Ok(record.clone())),
241 Err(e) => Some(Err(e)),
242 })
243 }
244}
245
246impl<R> Reader<bgzf::Reader<R>>
247where
248 R: Read,
249{
250 /// Creates a BCF reader.
251 ///
252 /// # Examples
253 ///
254 /// ```
255 /// # use std::io;
256 /// use noodles_bcf as bcf;
257 /// let reader = bcf::io::Reader::new(io::empty());
258 /// ```
259 pub fn new(reader: R) -> Self {
260 Self::from(bgzf::Reader::new(reader))
261 }
262}
263
264impl<R> Reader<R>
265where
266 R: bgzf::io::BufRead + bgzf::io::Seek,
267{
268 /// Returns an iterator over records that intersects the given region.
269 ///
270 /// # Examples
271 ///
272 /// ```no_run
273 /// # use std::fs::File;
274 /// use noodles_bcf as bcf;
275 /// use noodles_core::Region;
276 /// use noodles_csi as csi;
277 ///
278 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
279 /// let header = reader.read_header()?;
280 ///
281 /// let index = csi::fs::read("sample.bcf.csi")?;
282 /// let region = "sq0:8-13".parse()?;
283 /// let query = reader.query(&header, &index, ®ion)?;
284 ///
285 /// for result in query {
286 /// let record = result?;
287 /// // ...
288 /// }
289 /// # Ok::<(), Box<dyn std::error::Error>>(())
290 /// ```
291 pub fn query<'r, 'h, I>(
292 &'r mut self,
293 header: &'h vcf::Header,
294 index: &I,
295 region: &Region,
296 ) -> io::Result<Query<'r, 'h, R>>
297 where
298 I: BinningIndex,
299 {
300 let reference_sequence_id = resolve_region(header.string_maps().contigs(), region)?;
301 let chunks = index.query(reference_sequence_id, region.interval())?;
302
303 Ok(Query::new(
304 &mut self.inner,
305 header,
306 chunks,
307 reference_sequence_id,
308 region.interval(),
309 ))
310 }
311}
312
313impl<R> From<R> for Reader<R> {
314 fn from(inner: R) -> Self {
315 Self {
316 inner,
317 buf: Vec::new(),
318 }
319 }
320}
321
322impl<R> vcf::variant::io::Read<R> for Reader<R>
323where
324 R: BufRead,
325{
326 fn read_variant_header(&mut self) -> io::Result<vcf::Header> {
327 self.read_header()
328 }
329
330 fn variant_records<'r, 'h: 'r>(
331 &'r mut self,
332 _: &'h vcf::Header,
333 ) -> Box<dyn Iterator<Item = io::Result<Box<dyn vcf::variant::Record>>> + 'r> {
334 Box::new(
335 self.records().map(|result| {
336 result.map(|record| Box::new(record) as Box<dyn vcf::variant::Record>)
337 }),
338 )
339 }
340}
341
342pub(crate) fn resolve_region(
343 contig_string_map: &ContigStringMap,
344 region: &Region,
345) -> io::Result<usize> {
346 let region_name = str::from_utf8(region.name())
347 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
348
349 contig_string_map.get_index_of(region_name).ok_or_else(|| {
350 io::Error::new(
351 io::ErrorKind::InvalidInput,
352 format!("region does not exist in contigs: {region:?}"),
353 )
354 })
355}