noodles_vcf/
fs.rs

1//! VCF filesystem operations.
2
3use std::{fs::File, io, path::Path};
4
5use noodles_bgzf as bgzf;
6use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
7use noodles_tabix as tabix;
8
9use super::{io::Reader, variant::Record as _, Record};
10
11/// Indexes a bgzipped-compressed VCF file.
12///
13/// # Examples
14///
15/// ```no_run
16/// use noodles_vcf as vcf;
17/// let _index = vcf::fs::index("sample.vcf.gz")?;
18/// # Ok::<_, std::io::Error>(())
19/// ```
20pub fn index<P>(src: P) -> io::Result<tabix::Index>
21where
22    P: AsRef<Path>,
23{
24    let mut reader = File::open(src).map(bgzf::Reader::new).map(Reader::new)?;
25    index_inner(&mut reader)
26}
27
28fn index_inner<R>(reader: &mut Reader<R>) -> io::Result<tabix::Index>
29where
30    R: bgzf::io::BufRead,
31{
32    let header = reader.read_header()?;
33
34    let mut indexer = tabix::index::Indexer::default();
35    indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
36
37    let mut record = Record::default();
38    let mut start_position = reader.get_ref().virtual_position();
39
40    while reader.read_record(&mut record)? != 0 {
41        let end_position = reader.get_ref().virtual_position();
42        let chunk = Chunk::new(start_position, end_position);
43
44        let reference_sequence_name = record.reference_sequence_name();
45
46        let start = record
47            .variant_start()
48            .transpose()?
49            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing position"))?;
50
51        let end = record.variant_end(&header)?;
52
53        indexer.add_record(reference_sequence_name, start, end, chunk)?;
54
55        start_position = end_position;
56    }
57
58    Ok(indexer.build())
59}