1use std::{fs::File, io, path::Path};
4
5use noodles_bgzf as bgzf;
6use noodles_csi::{
7 self as csi,
8 binning_index::{index::reference_sequence::bin::Chunk, Indexer},
9};
10use noodles_vcf::variant::Record as _;
11
12use super::io::Reader;
13use crate::Record;
14
15pub fn index<P>(src: P) -> io::Result<csi::Index>
29where
30 P: AsRef<Path>,
31{
32 let mut reader = File::open(src).map(Reader::new)?;
33 index_inner(&mut reader)
34}
35
36fn index_inner<R>(reader: &mut Reader<R>) -> io::Result<csi::Index>
37where
38 R: bgzf::io::Read,
39{
40 let header = reader.read_header()?;
41 let mut indexer = Indexer::default();
42
43 let mut record = Record::default();
44 let mut start_position = reader.get_ref().virtual_position();
45
46 while reader.read_record(&mut record)? != 0 {
47 let end_position = reader.get_ref().virtual_position();
48 let chunk = Chunk::new(start_position, end_position);
49
50 let reference_sequence_id = record.reference_sequence_id()?;
51
52 let start = record
53 .variant_start()
54 .transpose()?
55 .expect("missing variant start");
56
57 let end = record.variant_end(&header)?;
58
59 indexer.add_record(Some((reference_sequence_id, start, end, true)), chunk)?;
60
61 start_position = end_position;
62 }
63
64 Ok(indexer.build(header.contigs().len()))
65}
66
67#[cfg(test)]
68mod tests {
69 use noodles_core::Position;
70 use noodles_csi::{binning_index::ReferenceSequence as _, BinningIndex};
71 use noodles_vcf::{
72 self as vcf,
73 variant::{io::Write, RecordBuf},
74 };
75
76 use super::*;
77
78 #[test]
79 fn test_index() -> io::Result<()> {
80 let mut writer = crate::io::Writer::new(Vec::new());
81
82 let header = vcf::Header::builder()
83 .add_contig("sq0", Default::default())
84 .build();
85
86 writer.write_header(&header)?;
87
88 let record = RecordBuf::builder()
89 .set_reference_sequence_name("sq0")
90 .set_variant_start(Position::MIN)
91 .set_reference_bases("N")
92 .build();
93
94 writer.write_variant_record(&header, &record)?;
95 writer.try_finish()?;
96
97 let src = writer.into_inner().into_inner();
98 let mut reader = Reader::new(&src[..]);
99
100 let index = index_inner(&mut reader)?;
101
102 assert_eq!(index.min_shift(), 14);
103 assert_eq!(index.depth(), 5);
104 assert!(index.header().is_none());
105
106 let reference_sequences = index.reference_sequences();
107 assert_eq!(reference_sequences.len(), 1);
108
109 let sq0 = &reference_sequences[0];
110 let bins = sq0.bins();
111 assert_eq!(bins.len(), 1);
112 assert!(bins.contains_key(&4681));
113
114 assert_eq!(
115 sq0.metadata().map(|metadata| (
116 metadata.mapped_record_count(),
117 metadata.unmapped_record_count()
118 )),
119 Some((1, 0))
120 );
121
122 assert_eq!(index.unplaced_unmapped_record_count(), Some(0));
123
124 Ok(())
125 }
126}