noodles_vcf/io/reader/
header.rs1use std::io::{self, BufRead, Read};
4
5use crate::{header, Header};
6
7pub struct Reader<R> {
11 inner: R,
12 is_eol: bool,
13}
14
15impl<R> Reader<R> {
16 pub(super) fn new(inner: R) -> Self {
17 Self {
18 inner,
19 is_eol: true,
20 }
21 }
22}
23
24impl<R> Read for Reader<R>
25where
26 R: BufRead,
27{
28 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
29 let mut src = self.fill_buf()?;
30 let amt = src.read(buf)?;
31
32 if !src.is_empty() {
33 self.is_eol = false;
34 }
35
36 self.consume(amt);
37
38 Ok(amt)
39 }
40}
41
42impl<R> BufRead for Reader<R>
43where
44 R: BufRead,
45{
46 fn fill_buf(&mut self) -> io::Result<&[u8]> {
47 use memchr::memchr;
48
49 const PREFIX: u8 = b'#';
50 const LINE_FEED: u8 = b'\n';
51
52 let src = self.inner.fill_buf()?;
53
54 let buf = if self.is_eol && src.first().map(|&b| b != PREFIX).unwrap_or(true) {
55 &[]
56 } else if let Some(i) = memchr(LINE_FEED, src) {
57 self.is_eol = true;
58 &src[..=i]
59 } else {
60 self.is_eol = false;
61 src
62 };
63
64 Ok(buf)
65 }
66
67 fn consume(&mut self, amt: usize) {
68 self.inner.consume(amt);
69 }
70}
71
72pub(super) fn read_header<R>(reader: &mut R) -> io::Result<Header>
73where
74 R: BufRead,
75{
76 let mut reader = Reader::new(reader);
77
78 let mut parser = header::Parser::default();
79 let mut buf = Vec::new();
80
81 while read_line(&mut reader, &mut buf)? != 0 {
82 parser
83 .parse_partial(&buf)
84 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
85 }
86
87 parser
88 .finish()
89 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
90}
91
92fn read_line<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
93where
94 R: BufRead,
95{
96 const LINE_FEED: u8 = b'\n';
97 const CARRIAGE_RETURN: u8 = b'\r';
98
99 dst.clear();
100
101 match reader.read_until(LINE_FEED, dst)? {
102 0 => Ok(0),
103 n => {
104 if dst.ends_with(&[LINE_FEED]) {
105 dst.pop();
106
107 if dst.ends_with(&[CARRIAGE_RETURN]) {
108 dst.pop();
109 }
110 }
111
112 Ok(n)
113 }
114 }
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120
121 #[test]
122 fn test_read_raw_header() -> io::Result<()> {
123 let data = b"##fileformat=VCFv4.3
124##fileDate=20200501
125#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
126sq0\t1\t.\tA\t.\t.\tPASS\t.
127";
128
129 let mut src = &data[..];
130 let mut reader = Reader::new(&mut src);
131
132 let mut actual = Vec::new();
133 reader.read_to_end(&mut actual)?;
134
135 let expected = b"##fileformat=VCFv4.3
136##fileDate=20200501
137#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
138";
139
140 assert_eq!(actual, expected);
141
142 Ok(())
143 }
144
145 #[test]
146 fn test_read_raw_header_with_no_records() -> io::Result<()> {
147 let data = b"##fileformat=VCFv4.3
148#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
149";
150
151 let mut src = &data[..];
152 let mut reader = Reader::new(&mut src);
153
154 let mut actual = Vec::new();
155 reader.read_to_end(&mut actual)?;
156
157 let expected = b"##fileformat=VCFv4.3
158#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
159";
160
161 assert_eq!(actual, expected);
162
163 Ok(())
164 }
165
166 #[test]
167 fn test_read_raw_header_with_multiple_buffer_fills() -> io::Result<()> {
168 use std::io::BufReader;
169
170 let data = b"##fileformat=VCFv4.3
171#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
172";
173
174 let mut inner = BufReader::with_capacity(16, &data[..]);
175 let mut reader = Reader::new(&mut inner);
176
177 let mut actual = Vec::new();
178 reader.read_to_end(&mut actual)?;
179
180 let expected = b"##fileformat=VCFv4.3
181#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
182";
183
184 assert_eq!(actual, expected);
185
186 Ok(())
187 }
188
189 #[test]
190 fn test_read_raw_header_with_no_header() -> io::Result<()> {
191 let mut src = &[][..];
192 let mut reader = Reader::new(&mut src);
193 let mut actual = Vec::new();
194 reader.read_to_end(&mut actual)?;
195 assert!(actual.is_empty());
196
197 let mut src = &b"sq0\t1\t.\tA\t.\t.\tPASS\t.\n"[..];
198 let mut reader = Reader::new(&mut src);
199 let mut actual = Vec::new();
200 reader.read_to_end(&mut actual)?;
201 assert!(actual.is_empty());
202
203 Ok(())
204 }
205
206 #[test]
207 fn test_read_raw_header_with_missing_end_of_line() -> io::Result<()> {
208 let data = b"##fileformat=VCFv4.3
209#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
210
211 let mut src = &data[..];
212 let mut reader = Reader::new(&mut src);
213
214 let mut actual = Vec::new();
215 reader.read_to_end(&mut actual)?;
216
217 let expected = b"##fileformat=VCFv4.3
218#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
219
220 assert_eq!(actual, expected);
221
222 Ok(())
223 }
224}