noodles_vcf/
fs.rs

1//! VCF filesystem operations.
2
3use std::{fs::File, io, path::Path};
4
5use noodles_bgzf as bgzf;
6use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
7use noodles_tabix as tabix;
8
9use super::{io::Reader, variant::Record as _, Record};
10
11/// Indexes a bgzipped-compressed VCF file.
12///
13/// # Examples
14///
15/// ```no_run
16/// use noodles_vcf as vcf;
17/// let _index = vcf::fs::index("sample.vcf.gz")?;
18/// # Ok::<_, std::io::Error>(())
19/// ```
20pub fn index<P>(src: P) -> io::Result<tabix::Index>
21where
22    P: AsRef<Path>,
23{
24    let mut reader = File::open(src)
25        .map(bgzf::io::Reader::new)
26        .map(Reader::new)?;
27
28    index_inner(&mut reader)
29}
30
31fn index_inner<R>(reader: &mut Reader<R>) -> io::Result<tabix::Index>
32where
33    R: bgzf::io::BufRead,
34{
35    let header = reader.read_header()?;
36
37    let mut indexer = tabix::index::Indexer::default();
38    indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
39
40    let mut record = Record::default();
41    let mut start_position = reader.get_ref().virtual_position();
42
43    while reader.read_record(&mut record)? != 0 {
44        let end_position = reader.get_ref().virtual_position();
45        let chunk = Chunk::new(start_position, end_position);
46
47        let reference_sequence_name = record.reference_sequence_name();
48
49        let start = record
50            .variant_start()
51            .transpose()?
52            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing position"))?;
53
54        let end = record.variant_end(&header)?;
55
56        indexer.add_record(reference_sequence_name, start, end, chunk)?;
57
58        start_position = end_position;
59    }
60
61    Ok(indexer.build())
62}