noodles_sam/io/reader/
header.rs

1//! SAM header reader.
2
3use std::io::{self, BufRead, Read};
4
5use bstr::ByteSlice;
6
7use super::read_line;
8use crate::{header, Header};
9
10/// A SAM header reader.
11///
12/// This is created by calling [`super::Reader::header_reader`].
13pub struct Reader<R> {
14    inner: R,
15    is_eol: bool,
16}
17
18impl<R> Reader<R> {
19    pub(super) fn new(inner: R) -> Self {
20        Self {
21            inner,
22            is_eol: true,
23        }
24    }
25}
26
27impl<R> Read for Reader<R>
28where
29    R: BufRead,
30{
31    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
32        let mut src = self.fill_buf()?;
33        let amt = src.read(buf)?;
34
35        if !src.is_empty() {
36            self.is_eol = false;
37        }
38
39        self.consume(amt);
40
41        Ok(amt)
42    }
43}
44
45impl<R> BufRead for Reader<R>
46where
47    R: BufRead,
48{
49    fn fill_buf(&mut self) -> io::Result<&[u8]> {
50        const PREFIX: u8 = b'@';
51        const LINE_FEED: u8 = b'\n';
52
53        let src = self.inner.fill_buf()?;
54
55        let buf = if self.is_eol && src.first().map(|&b| b != PREFIX).unwrap_or(true) {
56            &[]
57        } else if let Some(i) = src.as_bstr().find_byte(LINE_FEED) {
58            self.is_eol = true;
59            &src[..=i]
60        } else {
61            self.is_eol = false;
62            src
63        };
64
65        Ok(buf)
66    }
67
68    fn consume(&mut self, amt: usize) {
69        self.inner.consume(amt);
70    }
71}
72
73pub(super) fn read_header<R>(reader: &mut R) -> io::Result<Header>
74where
75    R: BufRead,
76{
77    let mut reader = Reader::new(reader);
78
79    let mut parser = header::Parser::default();
80    let mut buf = Vec::new();
81
82    while read_line(&mut reader, &mut buf)? != 0 {
83        parser
84            .parse_partial(&buf)
85            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
86
87        buf.clear();
88    }
89
90    Ok(parser.finish())
91}
92
93#[cfg(test)]
94mod tests {
95    use std::num::NonZeroUsize;
96
97    use super::*;
98    use crate::header::record::value::{
99        map::{self, header::Version, ReferenceSequence},
100        Map,
101    };
102
103    #[test]
104    fn test_read_header_with_no_header() -> io::Result<()> {
105        let data = b"*\t4\t*\t0\t255\t*\t*\t0\t0\t*\t*\n";
106        let mut reader = &data[..];
107        assert!(read_header(&mut reader)?.is_empty());
108        Ok(())
109    }
110
111    #[test]
112    fn test_read_header_with_no_records() -> io::Result<()> {
113        let data = "@HD\tVN:1.6\n";
114        let mut reader = data.as_bytes();
115
116        let actual = read_header(&mut reader)?;
117
118        let expected = crate::Header::builder()
119            .set_header(Map::<map::Header>::new(Version::new(1, 6)))
120            .build();
121
122        assert_eq!(actual, expected);
123
124        Ok(())
125    }
126
127    #[test]
128    fn test_read_header_with_multiple_buffer_fills() -> io::Result<()> {
129        use std::io::BufReader;
130
131        const SQ0_LN: NonZeroUsize = match NonZeroUsize::new(8) {
132            Some(length) => length,
133            None => unreachable!(),
134        };
135
136        let data = "@HD\tVN:1.6\n@SQ\tSN:sq0\tLN:8\n";
137        let mut reader = BufReader::with_capacity(16, data.as_bytes());
138
139        let actual = read_header(&mut reader)?;
140
141        let expected = crate::Header::builder()
142            .set_header(Map::<map::Header>::new(Version::new(1, 6)))
143            .add_reference_sequence("sq0", Map::<ReferenceSequence>::new(SQ0_LN))
144            .build();
145
146        assert_eq!(actual, expected);
147
148        Ok(())
149    }
150}