noodles_csi/io/
indexed_records.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
mod record;

use noodles_core::Region;

pub use self::record::Record;

use std::io::{self, BufRead, Lines};

use self::record::parse_record;
use crate::binning_index::index::{header::format::CoordinateSystem, Header};

use super::FilterByRegion;

/// An iterator over indexed records.
pub struct IndexedRecords<R> {
    lines: Lines<R>,
    line_comment_prefix: char,
    reference_sequence_name_index: usize,
    start_position_index: usize,
    end_position_index: Option<usize>,
    coordinate_system: CoordinateSystem,
}

impl<R> IndexedRecords<R>
where
    R: BufRead,
{
    /// Creates an indexed records iterator.
    pub fn new(reader: R, header: &Header) -> Self {
        Self {
            lines: reader.lines(),
            line_comment_prefix: char::from(header.line_comment_prefix()),
            reference_sequence_name_index: header.reference_sequence_name_index(),
            start_position_index: header.start_position_index(),
            end_position_index: header.end_position_index(),
            coordinate_system: header.format().coordinate_system(),
        }
    }

    /// Creates an iterator that filters indexed records that intersect the given region.
    pub fn filter_by_region(self, region: &Region) -> FilterByRegion<Self, Record> {
        FilterByRegion::new(self, region)
    }
}

impl<R> Iterator for IndexedRecords<R>
where
    R: BufRead,
{
    type Item = io::Result<Record>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            let line = match self.lines.next()? {
                Ok(s) => s,
                Err(e) => return Some(Err(e)),
            };

            if line.starts_with(self.line_comment_prefix) {
                continue;
            }

            let result = parse_record(
                line,
                self.reference_sequence_name_index,
                self.start_position_index,
                self.end_position_index,
                self.coordinate_system,
            )
            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e));

            return Some(result);
        }
    }
}

#[cfg(test)]
mod tests {
    use bstr::BString;

    use super::*;

    #[test]
    fn test_next() -> Result<(), Box<dyn std::error::Error>> {
        let data = b"sq0\t8\t13
# noodles
sq0\t21\t34
";

        let reader = &data[..];

        let header = Header::builder()
            .set_start_position_index(1)
            .set_end_position_index(Some(2))
            .set_reference_sequence_names([BString::from("sq0")].into_iter().collect())
            .build();

        let records: Vec<_> = IndexedRecords::new(reader, &header).collect::<Result<_, _>>()?;
        let lines: Vec<_> = records.iter().map(|r| r.as_ref()).collect();

        let expected = ["sq0\t8\t13", "sq0\t21\t34"];
        assert_eq!(lines, expected);

        Ok(())
    }
}