noodles_vcf/io/reader/
query.rs

1use std::io;
2
3use noodles_bgzf as bgzf;
4use noodles_core::region::Interval;
5use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
6
7use super::Reader;
8use crate::{variant::Record as _, Header, Record};
9
10/// An iterator over records of a VCF reader that intersects a given region.
11///
12/// This is created by calling [`Reader::query`].
13pub struct Query<'r, 'h, R> {
14    reader: Reader<csi::io::Query<'r, R>>,
15    reference_sequence_name: Vec<u8>,
16    interval: Interval,
17    header: &'h Header,
18    record: Record,
19}
20
21impl<'r, 'h, R> Query<'r, 'h, R>
22where
23    R: bgzf::io::BufRead + bgzf::io::Seek,
24{
25    pub(super) fn new(
26        reader: &'r mut R,
27        chunks: Vec<Chunk>,
28        reference_sequence_name: Vec<u8>,
29        interval: Interval,
30        header: &'h Header,
31    ) -> Self {
32        Self {
33            reader: Reader::new(csi::io::Query::new(reader, chunks)),
34            reference_sequence_name,
35            interval,
36            header,
37            record: Record::default(),
38        }
39    }
40}
41
42impl<R> Iterator for Query<'_, '_, R>
43where
44    R: bgzf::io::BufRead + bgzf::io::Seek,
45{
46    type Item = io::Result<Record>;
47
48    fn next(&mut self) -> Option<Self::Item> {
49        match next_record(
50            &mut self.reader,
51            &mut self.record,
52            self.header,
53            &self.reference_sequence_name,
54            self.interval,
55        ) {
56            Ok(0) => None,
57            Ok(_) => Some(Ok(self.record.clone())),
58            Err(e) => Some(Err(e)),
59        }
60    }
61}
62
63pub(crate) fn intersects(
64    header: &Header,
65    record: &Record,
66    reference_sequence_name: &[u8],
67    region_interval: Interval,
68) -> io::Result<bool> {
69    let name = record.reference_sequence_name();
70
71    let Some(start) = record.variant_start().transpose()? else {
72        return Ok(false);
73    };
74
75    let end = record.variant_end(header)?;
76    let record_interval = Interval::from(start..=end);
77
78    Ok(name.as_bytes() == reference_sequence_name && record_interval.intersects(region_interval))
79}
80
81fn next_record<R>(
82    reader: &mut Reader<csi::io::Query<'_, R>>,
83    record: &mut Record,
84    header: &Header,
85    reference_sequence_name: &[u8],
86    interval: Interval,
87) -> io::Result<usize>
88where
89    R: bgzf::io::BufRead + bgzf::io::Seek,
90{
91    loop {
92        match reader.read_record(record)? {
93            0 => return Ok(0),
94            n => {
95                if intersects(header, record, reference_sequence_name, interval)? {
96                    return Ok(n);
97                }
98            }
99        }
100    }
101}