1use std::{fs::File, io, path::Path};
4
5use noodles_bgzf as bgzf;
6use noodles_core::Position;
7use noodles_csi::{
8 self as csi,
9 binning_index::{index::reference_sequence::bin::Chunk, Indexer},
10};
11
12use super::{
13 alignment::Record as _,
14 header::record::value::map::header::{sort_order::COORDINATE, tag::SORT_ORDER},
15 io::Reader,
16 Header, Record,
17};
18
19pub fn index<P>(src: P) -> io::Result<csi::Index>
34where
35 P: AsRef<Path>,
36{
37 let mut reader = File::open(src).map(bgzf::Reader::new).map(Reader::new)?;
38 index_inner(&mut reader)
39}
40
41fn index_inner<R>(reader: &mut Reader<R>) -> io::Result<csi::Index>
42where
43 R: bgzf::io::BufRead,
44{
45 let header = reader.read_header()?;
46
47 if !is_coordinate_sorted(&header) {
48 return Err(io::Error::new(
49 io::ErrorKind::InvalidData,
50 format!(
51 "invalid sort order: expected {:?}, got {:?}",
52 Some(COORDINATE),
53 header
54 .header()
55 .and_then(|hdr| hdr.other_fields().get(&SORT_ORDER))
56 ),
57 ));
58 }
59
60 let mut indexer = Indexer::default();
61
62 let mut record = Record::default();
63 let mut start_position = reader.get_ref().virtual_position();
64
65 while reader.read_record(&mut record)? != 0 {
66 let end_position = reader.get_ref().virtual_position();
67 let chunk = Chunk::new(start_position, end_position);
68
69 let alignment_context = match alignment_context(&header, &record)? {
70 (Some(id), Some(start), Some(end)) => {
71 let is_mapped = !record.flags()?.is_unmapped();
72 Some((id, start, end, is_mapped))
73 }
74 _ => None,
75 };
76
77 indexer.add_record(alignment_context, chunk)?;
78
79 start_position = end_position;
80 }
81
82 Ok(indexer.build(header.reference_sequences().len()))
83}
84
85fn is_coordinate_sorted(header: &Header) -> bool {
86 header
87 .header()
88 .and_then(|hdr| hdr.other_fields().get(&SORT_ORDER))
89 .map(|sort_order| sort_order == COORDINATE)
90 .unwrap_or_default()
91}
92
93fn alignment_context(
94 header: &Header,
95 record: &Record,
96) -> io::Result<(Option<usize>, Option<Position>, Option<Position>)> {
97 Ok((
98 record.reference_sequence_id(header).transpose()?,
99 record.alignment_start().transpose()?,
100 record.alignment_end().transpose()?,
101 ))
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107 use crate::header::record::value::{
108 map::{self, builder::BuildError},
109 Map,
110 };
111
112 #[test]
113 fn test_is_coordinate_sorted() -> Result<(), BuildError> {
114 let header = Header::default();
115 assert!(!is_coordinate_sorted(&header));
116
117 let header = Header::builder()
118 .set_header(
119 Map::<map::Header>::builder()
120 .insert(SORT_ORDER, COORDINATE)
121 .build()?,
122 )
123 .build();
124
125 assert!(is_coordinate_sorted(&header));
126
127 Ok(())
128 }
129}