noodles_cram/crai/
reader.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::io::{self, BufRead, BufReader, Read};

use flate2::read::GzDecoder;

use super::Index;

/// A CRAM index reader.
pub struct Reader<R> {
    inner: BufReader<GzDecoder<R>>,
}

impl<R> Reader<R>
where
    R: Read,
{
    /// Creates a CRAM index reader.
    ///
    /// # Examples
    ///
    /// ```
    /// use noodles_cram::crai;
    /// let data = [];
    /// let reader = crai::Reader::new(&data[..]);
    /// ```
    pub fn new(inner: R) -> Self {
        Self {
            inner: BufReader::new(GzDecoder::new(inner)),
        }
    }

    /// Reads a CRAM index.
    ///
    /// The position of the stream is expected to be at the start.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// # use std::{fs::File, io};
    /// use noodles_cram::crai;
    /// let mut reader = File::open("sample.cram.crai").map(crai::Reader::new)?;
    /// let index = reader.read_index()?;
    /// # Ok::<(), io::Error>(())
    /// ```
    pub fn read_index(&mut self) -> io::Result<Index> {
        let mut buf = String::new();
        let mut index = Vec::new();

        loop {
            buf.clear();

            match read_line(&mut self.inner, &mut buf) {
                Ok(0) => break,
                Ok(_) => {
                    let record = buf
                        .parse()
                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

                    index.push(record);
                }
                Err(e) => return Err(e),
            }
        }

        Ok(index)
    }
}

fn read_line<R>(reader: &mut R, buf: &mut String) -> io::Result<usize>
where
    R: BufRead,
{
    match reader.read_line(buf) {
        Ok(0) => Ok(0),
        Ok(n) => {
            buf.pop();
            Ok(n)
        }
        Err(e) => Err(e),
    }
}

#[cfg(test)]
mod tests {
    use std::io::Write;

    use flate2::write::GzEncoder;
    use noodles_core::Position;

    use crate::crai::Record;

    use super::*;

    #[test]
    fn test_read_index() -> Result<(), Box<dyn std::error::Error>> {
        let data = b"\
0\t10946\t6765\t17711\t233\t317811
0\t17711\t121393\t317811\t233\t317811
";

        let mut writer = GzEncoder::new(Vec::new(), Default::default());
        writer.write_all(data)?;
        let compressed_data = writer.finish()?;

        let mut reader = Reader::new(&compressed_data[..]);

        let actual = reader.read_index()?;

        let expected = vec![
            Record::new(Some(0), Position::new(10946), 6765, 17711, 233, 317811),
            Record::new(Some(0), Position::new(17711), 121393, 317811, 233, 317811),
        ];

        assert_eq!(actual, expected);

        Ok(())
    }
}