noodles_bcf/io/reader/
query.rs1use std::io;
2
3use noodles_bgzf as bgzf;
4use noodles_core::region::Interval;
5use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
6use noodles_vcf::{self as vcf, variant::Record as _};
7
8use super::Reader;
9use crate::Record;
10
11pub struct Query<'r, 'h, R> {
15 reader: Reader<csi::io::Query<'r, R>>,
16 header: &'h vcf::Header,
17 reference_sequence_id: usize,
18 interval: Interval,
19 record: Record,
20}
21
22impl<'r, 'h, R> Query<'r, 'h, R>
23where
24 R: bgzf::io::BufRead + bgzf::io::Seek,
25{
26 pub(super) fn new(
27 reader: &'r mut R,
28 header: &'h vcf::Header,
29 chunks: Vec<Chunk>,
30 reference_sequence_id: usize,
31 interval: Interval,
32 ) -> Self {
33 Self {
34 reader: Reader::from(csi::io::Query::new(reader, chunks)),
35 header,
36 reference_sequence_id,
37 interval,
38 record: Record::default(),
39 }
40 }
41}
42
43impl<R> Iterator for Query<'_, '_, R>
44where
45 R: bgzf::io::BufRead + bgzf::io::Seek,
46{
47 type Item = io::Result<Record>;
48
49 fn next(&mut self) -> Option<Self::Item> {
50 match next_record(
51 &mut self.reader,
52 &mut self.record,
53 self.header,
54 self.reference_sequence_id,
55 self.interval,
56 ) {
57 Ok(0) => None,
58 Ok(_) => Some(Ok(self.record.clone())),
59 Err(e) => Some(Err(e)),
60 }
61 }
62}
63
64fn intersects(
65 header: &vcf::Header,
66 record: &Record,
67 reference_sequence_id: usize,
68 region_interval: Interval,
69) -> io::Result<bool> {
70 let reference_sequence_name = record.reference_sequence_name(header.string_maps())?;
71
72 let id = header
73 .string_maps()
74 .contigs()
75 .get_index_of(reference_sequence_name)
76 .ok_or_else(|| {
77 io::Error::new(
78 io::ErrorKind::InvalidInput,
79 format!(
80 "reference sequence name does not exist in contigs: {reference_sequence_name}"
81 ),
82 )
83 })?;
84
85 let Some(start) = record.variant_start().transpose()? else {
86 return Ok(false);
87 };
88
89 let end = record.variant_end(header)?;
90 let record_interval = Interval::from(start..=end);
91
92 Ok(id == reference_sequence_id && record_interval.intersects(region_interval))
93}
94
95fn next_record<R>(
96 reader: &mut Reader<csi::io::Query<'_, R>>,
97 record: &mut Record,
98 header: &vcf::Header,
99 reference_sequence_id: usize,
100 interval: Interval,
101) -> io::Result<usize>
102where
103 R: bgzf::io::BufRead + bgzf::io::Seek,
104{
105 loop {
106 match reader.read_record(record)? {
107 0 => return Ok(0),
108 n => {
109 if intersects(header, record, reference_sequence_id, interval)? {
110 return Ok(n);
111 }
112 }
113 }
114 }
115}