1use std::{cmp, collections::HashMap, fs::File, io, path::Path};
4
5use noodles_core::Position;
6use noodles_fasta as fasta;
7use noodles_sam as sam;
8
9use super::{
10 container::{slice, CompressionHeader},
11 crai,
12 io::{reader::container::Slice, Reader},
13};
14use crate::io::reader::Container;
15
16pub fn index<P>(src: P) -> io::Result<crai::Index>
26where
27 P: AsRef<Path>,
28{
29 let mut reader = File::open(src).map(Reader::new)?;
30 let header = reader.read_header()?;
31
32 let mut index = Vec::new();
33
34 let mut container = Container::default();
35 let mut container_position = reader.position()?;
36
37 loop {
38 let container_len = match reader.read_container(&mut container)? {
39 0 => break,
40 n => n,
41 };
42
43 let compression_header = container.compression_header()?;
44
45 let landmarks = container.header().landmarks();
46 let slice_count = landmarks.len();
47
48 for (i, result) in container.slices().enumerate() {
49 let slice = result?;
50 let landmark = landmarks[i];
51
52 let slice_length = if i < slice_count - 1 {
53 landmarks[i + 1] - landmark
54 } else {
55 container_len - landmark
56 };
57
58 push_index_records(
59 &mut index,
60 &header,
61 &compression_header,
62 &slice,
63 container_position,
64 landmark as u64,
65 slice_length as u64,
66 )?;
67 }
68
69 container_position = reader.position()?;
70 }
71
72 Ok(index)
73}
74
75fn push_index_records(
76 index: &mut crai::Index,
77 header: &sam::Header,
78 compression_header: &CompressionHeader,
79 slice: &Slice,
80 container_position: u64,
81 landmark: u64,
82 slice_length: u64,
83) -> io::Result<()> {
84 if slice.header().reference_sequence_context().is_many() {
85 push_index_records_for_multi_reference_slice(
86 index,
87 header,
88 compression_header,
89 slice,
90 container_position,
91 landmark,
92 slice_length,
93 )
94 } else {
95 push_index_record_for_single_reference_slice(
96 index,
97 slice.header(),
98 container_position,
99 landmark,
100 slice_length,
101 )
102 }
103}
104
105#[derive(Debug)]
106struct SliceReferenceSequenceAlignmentRangeInclusive {
107 start: Option<Position>,
108 end: Option<Position>,
109}
110
111impl Default for SliceReferenceSequenceAlignmentRangeInclusive {
112 fn default() -> Self {
113 Self {
114 start: Position::new(usize::MAX),
115 end: None,
116 }
117 }
118}
119
120fn push_index_records_for_multi_reference_slice(
121 index: &mut crai::Index,
122 header: &sam::Header,
123 compression_header: &CompressionHeader,
124 slice: &Slice,
125 container_position: u64,
126 landmark: u64,
127 slice_length: u64,
128) -> io::Result<()> {
129 let mut reference_sequence_ids: HashMap<
130 Option<usize>,
131 SliceReferenceSequenceAlignmentRangeInclusive,
132 > = HashMap::new();
133
134 let (core_data_src, external_data_srcs) = slice.decode_blocks()?;
135
136 for record in slice.records(
137 fasta::Repository::default(), header,
139 compression_header,
140 &core_data_src,
141 &external_data_srcs,
142 )? {
143 let range = reference_sequence_ids
144 .entry(record.reference_sequence_id)
145 .or_default();
146
147 range.start = cmp::min(range.start, record.alignment_start);
148
149 let alignment_end = record.alignment_end();
150 range.end = cmp::max(range.end, alignment_end);
151 }
152
153 let mut sorted_reference_sequence_ids: Vec<_> =
154 reference_sequence_ids.keys().copied().collect();
155 sorted_reference_sequence_ids.sort_unstable();
156
157 for reference_sequence_id in sorted_reference_sequence_ids {
158 let (alignment_start, alignment_span) = if reference_sequence_id.is_some() {
159 let range = &reference_sequence_ids[&reference_sequence_id];
160
161 if let (Some(start), Some(end)) = (range.start, range.end) {
162 let span = usize::from(end) - usize::from(start) + 1;
163 (Some(start), span)
164 } else {
165 todo!("unhandled interval: {:?}", range);
166 }
167 } else {
168 (None, 0)
169 };
170
171 let record = crai::Record::new(
172 reference_sequence_id,
173 alignment_start,
174 alignment_span,
175 container_position,
176 landmark,
177 slice_length,
178 );
179
180 index.push(record);
181 }
182
183 Ok(())
184}
185
186fn push_index_record_for_single_reference_slice(
187 index: &mut crai::Index,
188 slice_header: &slice::Header,
189 container_position: u64,
190 landmark: u64,
191 slice_length: u64,
192) -> io::Result<()> {
193 use crate::container::ReferenceSequenceContext;
194
195 let (reference_sequence_id, alignment_start, alignment_span) =
196 match slice_header.reference_sequence_context() {
197 ReferenceSequenceContext::Some(context) => {
198 let reference_sequence_id = Some(context.reference_sequence_id());
199 let alignment_start = Some(context.alignment_start());
200 let alignment_span = context.alignment_span();
201 (reference_sequence_id, alignment_start, alignment_span)
202 }
203 ReferenceSequenceContext::None => (None, None, 0),
204 ReferenceSequenceContext::Many => unreachable!(),
205 };
206
207 let record = crai::Record::new(
208 reference_sequence_id,
209 alignment_start,
210 alignment_span,
211 container_position,
212 landmark,
213 slice_length,
214 );
215
216 index.push(record);
217
218 Ok(())
219}