noodles_vcf/io/reader/
header.rs

1//! VCF header reader.
2
3use std::io::{self, BufRead, Read};
4
5use crate::{header, Header};
6
7/// A VCF header reader.
8///
9/// This is created by calling [`super::Reader::header_reader`].
10pub struct Reader<R> {
11    inner: R,
12    is_eol: bool,
13}
14
15impl<R> Reader<R> {
16    pub(super) fn new(inner: R) -> Self {
17        Self {
18            inner,
19            is_eol: true,
20        }
21    }
22}
23
24impl<R> Read for Reader<R>
25where
26    R: BufRead,
27{
28    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
29        let mut src = self.fill_buf()?;
30        let amt = src.read(buf)?;
31
32        if !src.is_empty() {
33            self.is_eol = false;
34        }
35
36        self.consume(amt);
37
38        Ok(amt)
39    }
40}
41
42impl<R> BufRead for Reader<R>
43where
44    R: BufRead,
45{
46    fn fill_buf(&mut self) -> io::Result<&[u8]> {
47        use memchr::memchr;
48
49        const PREFIX: u8 = b'#';
50        const LINE_FEED: u8 = b'\n';
51
52        let src = self.inner.fill_buf()?;
53
54        let buf = if self.is_eol && src.first().map(|&b| b != PREFIX).unwrap_or(true) {
55            &[]
56        } else if let Some(i) = memchr(LINE_FEED, src) {
57            self.is_eol = true;
58            &src[..=i]
59        } else {
60            self.is_eol = false;
61            src
62        };
63
64        Ok(buf)
65    }
66
67    fn consume(&mut self, amt: usize) {
68        self.inner.consume(amt);
69    }
70}
71
72pub(super) fn read_header<R>(reader: &mut R) -> io::Result<Header>
73where
74    R: BufRead,
75{
76    let mut reader = Reader::new(reader);
77
78    let mut parser = header::Parser::default();
79    let mut buf = Vec::new();
80
81    while read_line(&mut reader, &mut buf)? != 0 {
82        parser
83            .parse_partial(&buf)
84            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
85    }
86
87    parser
88        .finish()
89        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
90}
91
92fn read_line<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
93where
94    R: BufRead,
95{
96    const LINE_FEED: u8 = b'\n';
97    const CARRIAGE_RETURN: u8 = b'\r';
98
99    dst.clear();
100
101    match reader.read_until(LINE_FEED, dst)? {
102        0 => Ok(0),
103        n => {
104            if dst.ends_with(&[LINE_FEED]) {
105                dst.pop();
106
107                if dst.ends_with(&[CARRIAGE_RETURN]) {
108                    dst.pop();
109                }
110            }
111
112            Ok(n)
113        }
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn test_read_raw_header() -> io::Result<()> {
123        let data = b"##fileformat=VCFv4.3
124##fileDate=20200501
125#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
126sq0\t1\t.\tA\t.\t.\tPASS\t.
127";
128
129        let mut src = &data[..];
130        let mut reader = Reader::new(&mut src);
131
132        let mut actual = Vec::new();
133        reader.read_to_end(&mut actual)?;
134
135        let expected = b"##fileformat=VCFv4.3
136##fileDate=20200501
137#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
138";
139
140        assert_eq!(actual, expected);
141
142        Ok(())
143    }
144
145    #[test]
146    fn test_read_raw_header_with_no_records() -> io::Result<()> {
147        let data = b"##fileformat=VCFv4.3
148#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
149";
150
151        let mut src = &data[..];
152        let mut reader = Reader::new(&mut src);
153
154        let mut actual = Vec::new();
155        reader.read_to_end(&mut actual)?;
156
157        let expected = b"##fileformat=VCFv4.3
158#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
159";
160
161        assert_eq!(actual, expected);
162
163        Ok(())
164    }
165
166    #[test]
167    fn test_read_raw_header_with_multiple_buffer_fills() -> io::Result<()> {
168        use std::io::BufReader;
169
170        let data = b"##fileformat=VCFv4.3
171#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
172";
173
174        let mut inner = BufReader::with_capacity(16, &data[..]);
175        let mut reader = Reader::new(&mut inner);
176
177        let mut actual = Vec::new();
178        reader.read_to_end(&mut actual)?;
179
180        let expected = b"##fileformat=VCFv4.3
181#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
182";
183
184        assert_eq!(actual, expected);
185
186        Ok(())
187    }
188
189    #[test]
190    fn test_read_raw_header_with_no_header() -> io::Result<()> {
191        let mut src = &[][..];
192        let mut reader = Reader::new(&mut src);
193        let mut actual = Vec::new();
194        reader.read_to_end(&mut actual)?;
195        assert!(actual.is_empty());
196
197        let mut src = &b"sq0\t1\t.\tA\t.\t.\tPASS\t.\n"[..];
198        let mut reader = Reader::new(&mut src);
199        let mut actual = Vec::new();
200        reader.read_to_end(&mut actual)?;
201        assert!(actual.is_empty());
202
203        Ok(())
204    }
205
206    #[test]
207    fn test_read_raw_header_with_missing_end_of_line() -> io::Result<()> {
208        let data = b"##fileformat=VCFv4.3
209#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
210
211        let mut src = &data[..];
212        let mut reader = Reader::new(&mut src);
213
214        let mut actual = Vec::new();
215        reader.read_to_end(&mut actual)?;
216
217        let expected = b"##fileformat=VCFv4.3
218#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
219
220        assert_eq!(actual, expected);
221
222        Ok(())
223    }
224}