noodles_bcf/
fs.rs

1//! BCF filesystem operations.
2
3use std::{fs::File, io, path::Path};
4
5use noodles_bgzf as bgzf;
6use noodles_csi::{
7    self as csi,
8    binning_index::{index::reference_sequence::bin::Chunk, Indexer},
9};
10use noodles_vcf::variant::Record as _;
11
12use super::io::Reader;
13use crate::Record;
14
15/// Indexes a BCF file.
16///
17/// The input must be coordinate-sorted.
18///
19/// See also [`csi::fs::write`] to write the resulting [`csi::Index`].
20///
21/// # Examples
22///
23/// ```no_run
24/// use noodles_bcf as bcf;
25/// let _index = bcf::fs::index("sample.bcf")?;
26/// Ok::<_, std::io::Error>(())
27/// ```
28pub fn index<P>(src: P) -> io::Result<csi::Index>
29where
30    P: AsRef<Path>,
31{
32    let mut reader = File::open(src).map(Reader::new)?;
33    index_inner(&mut reader)
34}
35
36fn index_inner<R>(reader: &mut Reader<R>) -> io::Result<csi::Index>
37where
38    R: bgzf::io::Read,
39{
40    let header = reader.read_header()?;
41    let mut indexer = Indexer::default();
42
43    let mut record = Record::default();
44    let mut start_position = reader.get_ref().virtual_position();
45
46    while reader.read_record(&mut record)? != 0 {
47        let end_position = reader.get_ref().virtual_position();
48        let chunk = Chunk::new(start_position, end_position);
49
50        let reference_sequence_id = record.reference_sequence_id()?;
51
52        let start = record
53            .variant_start()
54            .transpose()?
55            .expect("missing variant start");
56
57        let end = record.variant_end(&header)?;
58
59        indexer.add_record(Some((reference_sequence_id, start, end, true)), chunk)?;
60
61        start_position = end_position;
62    }
63
64    Ok(indexer.build(header.contigs().len()))
65}
66
67#[cfg(test)]
68mod tests {
69    use noodles_core::Position;
70    use noodles_csi::{binning_index::ReferenceSequence as _, BinningIndex};
71    use noodles_vcf::{
72        self as vcf,
73        variant::{io::Write, RecordBuf},
74    };
75
76    use super::*;
77
78    #[test]
79    fn test_index() -> io::Result<()> {
80        let mut writer = crate::io::Writer::new(Vec::new());
81
82        let header = vcf::Header::builder()
83            .add_contig("sq0", Default::default())
84            .build();
85
86        writer.write_header(&header)?;
87
88        let record = RecordBuf::builder()
89            .set_reference_sequence_name("sq0")
90            .set_variant_start(Position::MIN)
91            .set_reference_bases("N")
92            .build();
93
94        writer.write_variant_record(&header, &record)?;
95        writer.try_finish()?;
96
97        let src = writer.into_inner().into_inner();
98        let mut reader = Reader::new(&src[..]);
99
100        let index = index_inner(&mut reader)?;
101
102        assert_eq!(index.min_shift(), 14);
103        assert_eq!(index.depth(), 5);
104        assert!(index.header().is_none());
105
106        let reference_sequences = index.reference_sequences();
107        assert_eq!(reference_sequences.len(), 1);
108
109        let sq0 = &reference_sequences[0];
110        let bins = sq0.bins();
111        assert_eq!(bins.len(), 1);
112        assert!(bins.contains_key(&4681));
113
114        assert_eq!(
115            sq0.metadata().map(|metadata| (
116                metadata.mapped_record_count(),
117                metadata.unmapped_record_count()
118            )),
119            Some((1, 0))
120        );
121
122        assert_eq!(index.unplaced_unmapped_record_count(), Some(0));
123
124        Ok(())
125    }
126}