noodles_bcf/io/reader/
header.rs

1//! BCF header reader.
2
3mod format_version;
4pub(crate) mod magic_number;
5pub mod vcf_header;
6
7use std::io::{self, BufRead, Read};
8
9use byteorder::{LittleEndian, ReadBytesExt};
10use noodles_vcf::{self as vcf, header::StringMaps};
11
12use self::{format_version::read_format_version, magic_number::read_magic_number};
13use crate::MAGIC_NUMBER;
14
15/// A BCF header reader.
16pub struct Reader<R> {
17    inner: R,
18}
19
20impl<R> Reader<R>
21where
22    R: Read,
23{
24    pub(super) fn new(inner: R) -> Self {
25        Self { inner }
26    }
27
28    /// Reads the magic number.
29    pub fn read_magic_number(&mut self) -> io::Result<[u8; MAGIC_NUMBER.len()]> {
30        read_magic_number(&mut self.inner)
31    }
32
33    /// Reads the format version.
34    pub fn read_format_version(&mut self) -> io::Result<(u8, u8)> {
35        read_format_version(&mut self.inner)
36    }
37
38    /// Returns a VCF header reader.
39    ///
40    /// The caller is responsible of discarding any extra padding in the header text, e.g., using
41    /// [`vcf_header::Reader::discard_to_end`].
42    pub fn raw_vcf_header_reader(&mut self) -> io::Result<vcf_header::Reader<&mut R>> {
43        let len = self.inner.read_u32::<LittleEndian>().map(u64::from)?;
44        Ok(vcf_header::Reader::new(&mut self.inner, len))
45    }
46}
47
48pub(super) fn read_header<R>(reader: &mut R) -> io::Result<vcf::Header>
49where
50    R: Read,
51{
52    let mut header_reader = Reader::new(reader);
53    read_header_inner(&mut header_reader)
54}
55
56fn read_header_inner<R>(reader: &mut Reader<R>) -> io::Result<vcf::Header>
57where
58    R: Read,
59{
60    reader
61        .read_magic_number()
62        .and_then(magic_number::validate)?;
63
64    reader.read_format_version()?;
65
66    let mut raw_vcf_header_reader = reader.raw_vcf_header_reader()?;
67    read_vcf_header(&mut raw_vcf_header_reader)
68}
69
70fn read_vcf_header<R>(reader: &mut vcf_header::Reader<R>) -> io::Result<vcf::Header>
71where
72    R: Read,
73{
74    let mut parser = vcf::header::Parser::default();
75    let mut string_maps = StringMaps::default();
76
77    let mut buf = Vec::new();
78
79    while read_line(reader, &mut buf)? != 0 {
80        let entry = parser
81            .parse_partial(&buf)
82            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
83
84        string_maps
85            .insert_entry(&entry)
86            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
87    }
88
89    reader.discard_to_end()?;
90
91    let mut header = parser
92        .finish()
93        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
94
95    *header.string_maps_mut() = string_maps;
96
97    Ok(header)
98}
99
100fn read_line<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
101where
102    R: BufRead,
103{
104    const LINE_FEED: u8 = b'\n';
105    const CARRIAGE_RETURN: u8 = b'\r';
106
107    dst.clear();
108
109    match reader.read_until(LINE_FEED, dst)? {
110        0 => Ok(0),
111        n => {
112            if dst.ends_with(&[LINE_FEED]) {
113                dst.pop();
114
115                if dst.ends_with(&[CARRIAGE_RETURN]) {
116                    dst.pop();
117                }
118            }
119
120            Ok(n)
121        }
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn test_read_header() -> io::Result<()> {
131        use vcf::header::FileFormat;
132
133        const NUL: u8 = 0x00;
134
135        let raw_header = b"##fileformat=VCFv4.3
136#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
137";
138
139        let mut data = vec![
140            b'B', b'C', b'F', // magic
141            0x02, 0x02, // major_version, minor_version
142        ];
143        data.extend(61u32.to_le_bytes()); // l_text
144        data.extend(raw_header); // text
145        data.push(NUL);
146
147        let mut reader = &data[..];
148        let actual = read_header(&mut reader)?;
149
150        let expected = vcf::Header::builder()
151            .set_file_format(FileFormat::new(4, 3))
152            .build();
153
154        assert_eq!(actual, expected);
155
156        Ok(())
157    }
158}