1mod cigar;
4mod data;
5pub(crate) mod feature;
6mod flags;
7mod mate_flags;
8mod quality_scores;
9mod sequence;
10
11use std::{borrow::Cow, io};
12
13use bstr::{BStr, ByteSlice};
14use noodles_core::Position;
15use noodles_fasta as fasta;
16use noodles_sam::{
17 self as sam,
18 alignment::{
19 record::{data::field::Tag, MappingQuality},
20 record_buf::data::field::Value,
21 },
22};
23
24use self::{cigar::Cigar, data::Data, quality_scores::QualityScores, sequence::Sequence};
25pub(crate) use self::{feature::Feature, flags::Flags, mate_flags::MateFlags};
26use crate::{
27 container::compression_header::preservation_map::SubstitutionMatrix,
28 io::reader::container::slice::ReferenceSequence,
29};
30
31#[derive(Clone, Debug, PartialEq)]
33pub struct Record<'c> {
34 pub(crate) id: u64,
35 pub(crate) header: Option<&'c sam::Header>,
36 pub(crate) reference_sequence: Option<ReferenceSequence>,
37 pub(crate) substitution_matrix: SubstitutionMatrix,
38 pub(crate) bam_flags: sam::alignment::record::Flags,
39 pub(crate) cram_flags: Flags,
40 pub(crate) reference_sequence_id: Option<usize>,
41 pub(crate) read_length: usize,
42 pub(crate) alignment_start: Option<Position>,
43 pub(crate) read_group_id: Option<usize>,
44 pub(crate) name: Option<Cow<'c, [u8]>>,
45 pub(crate) mate_flags: MateFlags,
46 pub(crate) mate_reference_sequence_id: Option<usize>,
47 pub(crate) mate_alignment_start: Option<Position>,
48 pub(crate) template_length: i32,
49 pub(crate) mate_distance: Option<usize>,
50 pub(crate) data: Vec<(Tag, Value)>,
51 pub(crate) sequence: &'c [u8],
52 pub(crate) features: Vec<Feature<'c>>,
53 pub(crate) mapping_quality: Option<MappingQuality>,
54 pub(crate) quality_scores: &'c [u8],
55}
56
57impl Record<'_> {
58 fn alignment_span(&self) -> usize {
59 calculate_alignment_span(self.read_length, &self.features)
60 }
61
62 pub(crate) fn alignment_end(&self) -> Option<Position> {
63 self.alignment_start.and_then(|alignment_start| {
64 let end = usize::from(alignment_start) + self.alignment_span() - 1;
65 Position::new(end)
66 })
67 }
68}
69
70impl Default for Record<'_> {
71 fn default() -> Self {
72 Self {
73 id: 0,
74 header: None,
75 reference_sequence: None,
76 substitution_matrix: SubstitutionMatrix::default(),
77 bam_flags: sam::alignment::record::Flags::UNMAPPED,
78 cram_flags: Flags::default(),
79 reference_sequence_id: None,
80 read_length: 0,
81 alignment_start: None,
82 read_group_id: None,
83 name: None,
84 mate_flags: MateFlags::default(),
85 mate_reference_sequence_id: None,
86 mate_alignment_start: None,
87 template_length: 0,
88 mate_distance: None,
89 data: Vec::new(),
90 sequence: &[],
91 features: Vec::new(),
92 mapping_quality: None,
93 quality_scores: &[],
94 }
95 }
96}
97
98impl sam::alignment::Record for Record<'_> {
99 fn name(&self) -> Option<&BStr> {
100 self.name.as_deref().map(|name| name.as_bstr())
101 }
102
103 fn flags(&self) -> io::Result<sam::alignment::record::Flags> {
104 Ok(self.bam_flags)
105 }
106
107 fn reference_sequence_id<'r, 'h: 'r>(
108 &'r self,
109 _: &'h sam::Header,
110 ) -> Option<io::Result<usize>> {
111 self.reference_sequence_id.map(Ok)
112 }
113
114 fn alignment_start(&self) -> Option<io::Result<Position>> {
115 self.alignment_start.map(Ok)
116 }
117
118 fn mapping_quality(&self) -> Option<io::Result<MappingQuality>> {
119 self.mapping_quality.map(Ok)
120 }
121
122 fn cigar(&self) -> Box<dyn sam::alignment::record::Cigar + '_> {
123 Box::new(Cigar::new(
124 &self.features,
125 self.bam_flags.is_unmapped(),
126 self.read_length,
127 ))
128 }
129
130 fn mate_reference_sequence_id<'r, 'h: 'r>(
131 &'r self,
132 _: &'h sam::Header,
133 ) -> Option<io::Result<usize>> {
134 self.mate_reference_sequence_id.map(Ok)
135 }
136
137 fn mate_alignment_start(&self) -> Option<io::Result<Position>> {
138 self.mate_alignment_start.map(Ok)
139 }
140
141 fn template_length(&self) -> io::Result<i32> {
142 Ok(self.template_length)
143 }
144
145 fn sequence(&self) -> Box<dyn sam::alignment::record::Sequence + '_> {
146 if self.sequence.is_empty() {
147 let (reference_sequence, alignment_start) = match self.reference_sequence.as_ref() {
148 Some(ReferenceSequence::Embedded {
149 reference_start,
150 sequence,
151 }) => {
152 let alignment_start = usize::from(self.alignment_start.unwrap());
153 let offset = usize::from(*reference_start);
154 let offset_alignment_start =
155 Position::new(alignment_start - offset + 1).unwrap();
156 (sequence.clone(), offset_alignment_start)
157 }
158 Some(ReferenceSequence::External { sequence, .. }) => {
159 (sequence.clone(), self.alignment_start.unwrap())
160 }
161 None => (fasta::record::Sequence::default(), Position::MIN),
162 };
163
164 Box::new(Sequence::new(
165 Some(reference_sequence),
166 self.substitution_matrix.clone(),
167 &self.features,
168 alignment_start,
169 self.read_length,
170 ))
171 } else {
172 Box::new(Bases(self.sequence))
173 }
174 }
175
176 fn quality_scores(&self) -> Box<dyn sam::alignment::record::QualityScores + '_> {
177 if self.bam_flags.is_unmapped() || self.cram_flags.quality_scores_are_stored_as_array() {
178 Box::new(Scores(self.quality_scores))
179 } else {
180 Box::new(QualityScores::new(&self.features, self.read_length))
181 }
182 }
183
184 fn data(&self) -> Box<dyn sam::alignment::record::Data + '_> {
185 Box::new(Data::new(
186 self.header.unwrap(),
187 &self.data,
188 self.read_group_id,
189 ))
190 }
191
192 fn alignment_span(&self) -> Option<io::Result<usize>> {
193 Some(Ok(self.alignment_span()))
194 }
195
196 fn alignment_end(&self) -> Option<io::Result<Position>> {
197 self.alignment_end().map(Ok)
198 }
199}
200
201struct Bases<'c>(&'c [u8]);
202
203impl sam::alignment::record::Sequence for Bases<'_> {
204 fn is_empty(&self) -> bool {
205 self.0.is_empty()
206 }
207
208 fn len(&self) -> usize {
209 self.0.len()
210 }
211
212 fn get(&self, i: usize) -> Option<u8> {
213 self.0.get(i).copied()
214 }
215
216 fn split_at_checked(
217 &self,
218 _mid: usize,
219 ) -> Option<(
220 Box<dyn sam::alignment::record::Sequence + '_>,
221 Box<dyn sam::alignment::record::Sequence + '_>,
222 )> {
223 todo!()
224 }
225
226 fn iter(&self) -> Box<dyn Iterator<Item = u8> + '_> {
227 Box::new(self.0.iter().copied())
228 }
229}
230
231struct Scores<'c>(&'c [u8]);
232
233impl sam::alignment::record::QualityScores for Scores<'_> {
234 fn is_empty(&self) -> bool {
235 self.0.is_empty()
236 }
237
238 fn len(&self) -> usize {
239 self.0.len()
240 }
241
242 fn iter(&self) -> Box<dyn Iterator<Item = io::Result<u8>> + '_> {
243 Box::new(self.0.iter().copied().map(Ok))
244 }
245}
246
247pub(crate) fn calculate_alignment_span(read_length: usize, features: &[Feature]) -> usize {
248 features
249 .iter()
250 .fold(read_length, |alignment_span, feature| match feature {
251 Feature::Insertion { bases, .. } => alignment_span - bases.len(),
252 Feature::InsertBase { .. } => alignment_span - 1,
253 Feature::Deletion { len, .. } => alignment_span + len,
254 Feature::ReferenceSkip { len, .. } => alignment_span + len,
255 Feature::SoftClip { bases, .. } => alignment_span - bases.len(),
256 _ => alignment_span,
257 })
258}
259
260#[cfg(test)]
261mod tests {
262 use super::*;
263
264 #[test]
265 fn test_calculate_alignment_span() -> Result<(), noodles_core::position::TryFromIntError> {
266 let features = [];
267 assert_eq!(calculate_alignment_span(4, &features), 4);
268
269 let features = [Feature::HardClip {
270 position: Position::try_from(1)?,
271 len: 4,
272 }];
273 assert_eq!(calculate_alignment_span(4, &features), 4);
274
275 let features = [
276 Feature::Insertion {
277 position: Position::try_from(1)?,
278 bases: b"AC",
279 },
280 Feature::InsertBase {
281 position: Position::try_from(4)?,
282 base: b'G',
283 },
284 Feature::Deletion {
285 position: Position::try_from(6)?,
286 len: 3,
287 },
288 Feature::ReferenceSkip {
289 position: Position::try_from(10)?,
290 len: 5,
291 },
292 Feature::SoftClip {
293 position: Position::try_from(16)?,
294 bases: b"ACGT",
295 },
296 ];
297 assert_eq!(calculate_alignment_span(20, &features), 21);
298
299 Ok(())
300 }
301}