noodles_bcf/io/reader/
query.rs

1use std::io;
2
3use noodles_bgzf as bgzf;
4use noodles_core::region::Interval;
5use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
6use noodles_vcf::{self as vcf, variant::Record as _};
7
8use super::Reader;
9use crate::Record;
10
11/// An iterator over records of a BCF reader that intersects a given region.
12///
13/// This is created by calling [`super::Reader::query`].
14pub struct Query<'r, 'h, R> {
15    reader: Reader<csi::io::Query<'r, R>>,
16    header: &'h vcf::Header,
17    reference_sequence_id: usize,
18    interval: Interval,
19    record: Record,
20}
21
22impl<'r, 'h, R> Query<'r, 'h, R>
23where
24    R: bgzf::io::BufRead + bgzf::io::Seek,
25{
26    pub(super) fn new(
27        reader: &'r mut R,
28        header: &'h vcf::Header,
29        chunks: Vec<Chunk>,
30        reference_sequence_id: usize,
31        interval: Interval,
32    ) -> Self {
33        Self {
34            reader: Reader::from(csi::io::Query::new(reader, chunks)),
35            header,
36            reference_sequence_id,
37            interval,
38            record: Record::default(),
39        }
40    }
41}
42
43impl<R> Iterator for Query<'_, '_, R>
44where
45    R: bgzf::io::BufRead + bgzf::io::Seek,
46{
47    type Item = io::Result<Record>;
48
49    fn next(&mut self) -> Option<Self::Item> {
50        match next_record(
51            &mut self.reader,
52            &mut self.record,
53            self.header,
54            self.reference_sequence_id,
55            self.interval,
56        ) {
57            Ok(0) => None,
58            Ok(_) => Some(Ok(self.record.clone())),
59            Err(e) => Some(Err(e)),
60        }
61    }
62}
63
64fn intersects(
65    header: &vcf::Header,
66    record: &Record,
67    reference_sequence_id: usize,
68    region_interval: Interval,
69) -> io::Result<bool> {
70    let reference_sequence_name = record.reference_sequence_name(header.string_maps())?;
71
72    let id = header
73        .string_maps()
74        .contigs()
75        .get_index_of(reference_sequence_name)
76        .ok_or_else(|| {
77            io::Error::new(
78                io::ErrorKind::InvalidInput,
79                format!(
80                    "reference sequence name does not exist in contigs: {reference_sequence_name}"
81                ),
82            )
83        })?;
84
85    let Some(start) = record.variant_start().transpose()? else {
86        return Ok(false);
87    };
88
89    let end = record.variant_end(header)?;
90    let record_interval = Interval::from(start..=end);
91
92    Ok(id == reference_sequence_id && record_interval.intersects(region_interval))
93}
94
95fn next_record<R>(
96    reader: &mut Reader<csi::io::Query<'_, R>>,
97    record: &mut Record,
98    header: &vcf::Header,
99    reference_sequence_id: usize,
100    interval: Interval,
101) -> io::Result<usize>
102where
103    R: bgzf::io::BufRead + bgzf::io::Seek,
104{
105    loop {
106        match reader.read_record(record)? {
107            0 => return Ok(0),
108            n => {
109                if intersects(header, record, reference_sequence_id, interval)? {
110                    return Ok(n);
111                }
112            }
113        }
114    }
115}