1mod cigar;
4pub(crate) mod data;
5pub(crate) mod feature;
6mod flags;
7mod mate_flags;
8mod quality_scores;
9mod sequence;
10
11use std::{borrow::Cow, io};
12
13use bstr::{BStr, ByteSlice};
14use noodles_core::Position;
15use noodles_fasta as fasta;
16use noodles_sam::{
17 self as sam,
18 alignment::record::{data::field::Tag, MappingQuality},
19};
20
21use self::{
22 cigar::Cigar,
23 data::{field::Value, Data},
24 quality_scores::QualityScores,
25 sequence::Sequence,
26};
27pub(crate) use self::{feature::Feature, flags::Flags, mate_flags::MateFlags};
28use crate::{
29 container::compression_header::preservation_map::SubstitutionMatrix,
30 io::reader::container::slice::ReferenceSequence,
31};
32
33#[derive(Clone, Debug, PartialEq)]
35pub struct Record<'c> {
36 pub(crate) id: u64,
37 pub(crate) header: Option<&'c sam::Header>,
38 pub(crate) reference_sequence: Option<ReferenceSequence>,
39 pub(crate) substitution_matrix: SubstitutionMatrix,
40 pub(crate) bam_flags: sam::alignment::record::Flags,
41 pub(crate) cram_flags: Flags,
42 pub(crate) reference_sequence_id: Option<usize>,
43 pub(crate) read_length: usize,
44 pub(crate) alignment_start: Option<Position>,
45 pub(crate) read_group_id: Option<usize>,
46 pub(crate) name: Option<Cow<'c, [u8]>>,
47 pub(crate) mate_flags: MateFlags,
48 pub(crate) mate_reference_sequence_id: Option<usize>,
49 pub(crate) mate_alignment_start: Option<Position>,
50 pub(crate) template_length: i32,
51 pub(crate) mate_distance: Option<usize>,
52 pub(crate) data: Vec<(Tag, Value<'c>)>,
53 pub(crate) sequence: &'c [u8],
54 pub(crate) features: Vec<Feature<'c>>,
55 pub(crate) mapping_quality: Option<MappingQuality>,
56 pub(crate) quality_scores: &'c [u8],
57}
58
59impl Record<'_> {
60 fn alignment_span(&self) -> usize {
61 calculate_alignment_span(self.read_length, &self.features)
62 }
63
64 pub(crate) fn alignment_end(&self) -> Option<Position> {
65 self.alignment_start.and_then(|alignment_start| {
66 let end = usize::from(alignment_start) + self.alignment_span() - 1;
67 Position::new(end)
68 })
69 }
70}
71
72impl Default for Record<'_> {
73 fn default() -> Self {
74 Self {
75 id: 0,
76 header: None,
77 reference_sequence: None,
78 substitution_matrix: SubstitutionMatrix::default(),
79 bam_flags: sam::alignment::record::Flags::UNMAPPED,
80 cram_flags: Flags::default(),
81 reference_sequence_id: None,
82 read_length: 0,
83 alignment_start: None,
84 read_group_id: None,
85 name: None,
86 mate_flags: MateFlags::default(),
87 mate_reference_sequence_id: None,
88 mate_alignment_start: None,
89 template_length: 0,
90 mate_distance: None,
91 data: Vec::new(),
92 sequence: &[],
93 features: Vec::new(),
94 mapping_quality: None,
95 quality_scores: &[],
96 }
97 }
98}
99
100impl sam::alignment::Record for Record<'_> {
101 fn name(&self) -> Option<&BStr> {
102 self.name.as_deref().map(|name| name.as_bstr())
103 }
104
105 fn flags(&self) -> io::Result<sam::alignment::record::Flags> {
106 Ok(self.bam_flags)
107 }
108
109 fn reference_sequence_id<'r, 'h: 'r>(
110 &'r self,
111 _: &'h sam::Header,
112 ) -> Option<io::Result<usize>> {
113 self.reference_sequence_id.map(Ok)
114 }
115
116 fn alignment_start(&self) -> Option<io::Result<Position>> {
117 self.alignment_start.map(Ok)
118 }
119
120 fn mapping_quality(&self) -> Option<io::Result<MappingQuality>> {
121 self.mapping_quality.map(Ok)
122 }
123
124 fn cigar(&self) -> Box<dyn sam::alignment::record::Cigar + '_> {
125 Box::new(Cigar::new(
126 &self.features,
127 self.bam_flags.is_unmapped(),
128 self.read_length,
129 ))
130 }
131
132 fn mate_reference_sequence_id<'r, 'h: 'r>(
133 &'r self,
134 _: &'h sam::Header,
135 ) -> Option<io::Result<usize>> {
136 self.mate_reference_sequence_id.map(Ok)
137 }
138
139 fn mate_alignment_start(&self) -> Option<io::Result<Position>> {
140 self.mate_alignment_start.map(Ok)
141 }
142
143 fn template_length(&self) -> io::Result<i32> {
144 Ok(self.template_length)
145 }
146
147 fn sequence(&self) -> Box<dyn sam::alignment::record::Sequence + '_> {
148 if self.bam_flags.is_unmapped() || self.cram_flags.sequence_is_missing() {
149 Box::new(Bases(self.sequence))
150 } else {
151 let (reference_sequence, alignment_start) = match self.reference_sequence.as_ref() {
152 Some(ReferenceSequence::Embedded {
153 reference_start,
154 sequence,
155 }) => {
156 let alignment_start = usize::from(self.alignment_start.unwrap());
157 let offset = usize::from(*reference_start);
158 let offset_alignment_start =
159 Position::new(alignment_start - offset + 1).unwrap();
160 (sequence.clone(), offset_alignment_start)
161 }
162 Some(ReferenceSequence::External { sequence, .. }) => {
163 (sequence.clone(), self.alignment_start.unwrap())
164 }
165 None => (fasta::record::Sequence::default(), Position::MIN),
166 };
167
168 Box::new(Sequence::new(
169 Some(reference_sequence),
170 self.substitution_matrix.clone(),
171 &self.features,
172 alignment_start,
173 self.read_length,
174 ))
175 }
176 }
177
178 fn quality_scores(&self) -> Box<dyn sam::alignment::record::QualityScores + '_> {
179 if self.bam_flags.is_unmapped() || self.cram_flags.quality_scores_are_stored_as_array() {
180 Box::new(Scores(self.quality_scores))
181 } else {
182 Box::new(QualityScores::new(&self.features, self.read_length))
183 }
184 }
185
186 fn data(&self) -> Box<dyn sam::alignment::record::Data + '_> {
187 if let Some(header) = self.header {
188 Box::new(Data::new(header, &self.data, self.read_group_id))
189 } else {
190 Box::new(sam::alignment::record_buf::Data::default())
191 }
192 }
193
194 fn alignment_span(&self) -> Option<io::Result<usize>> {
195 Some(Ok(self.alignment_span()))
196 }
197
198 fn alignment_end(&self) -> Option<io::Result<Position>> {
199 self.alignment_end().map(Ok)
200 }
201}
202
203struct Bases<'c>(&'c [u8]);
204
205impl sam::alignment::record::Sequence for Bases<'_> {
206 fn is_empty(&self) -> bool {
207 self.0.is_empty()
208 }
209
210 fn len(&self) -> usize {
211 self.0.len()
212 }
213
214 fn get(&self, i: usize) -> Option<u8> {
215 self.0.get(i).copied()
216 }
217
218 fn split_at_checked(
219 &self,
220 _mid: usize,
221 ) -> Option<(
222 Box<dyn sam::alignment::record::Sequence + '_>,
223 Box<dyn sam::alignment::record::Sequence + '_>,
224 )> {
225 todo!()
226 }
227
228 fn iter(&self) -> Box<dyn Iterator<Item = u8> + '_> {
229 Box::new(self.0.iter().copied())
230 }
231}
232
233struct Scores<'c>(&'c [u8]);
234
235impl sam::alignment::record::QualityScores for Scores<'_> {
236 fn is_empty(&self) -> bool {
237 self.0.is_empty()
238 }
239
240 fn len(&self) -> usize {
241 self.0.len()
242 }
243
244 fn iter(&self) -> Box<dyn Iterator<Item = io::Result<u8>> + '_> {
245 Box::new(self.0.iter().copied().map(Ok))
246 }
247}
248
249pub(crate) fn calculate_alignment_span(read_length: usize, features: &[Feature]) -> usize {
250 features
251 .iter()
252 .fold(read_length, |alignment_span, feature| match feature {
253 Feature::Insertion { bases, .. } => alignment_span - bases.len(),
254 Feature::InsertBase { .. } => alignment_span - 1,
255 Feature::Deletion { len, .. } => alignment_span + len,
256 Feature::ReferenceSkip { len, .. } => alignment_span + len,
257 Feature::SoftClip { bases, .. } => alignment_span - bases.len(),
258 _ => alignment_span,
259 })
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[test]
267 fn test_calculate_alignment_span() -> Result<(), noodles_core::position::TryFromIntError> {
268 let features = [];
269 assert_eq!(calculate_alignment_span(4, &features), 4);
270
271 let features = [Feature::HardClip {
272 position: Position::try_from(1)?,
273 len: 4,
274 }];
275 assert_eq!(calculate_alignment_span(4, &features), 4);
276
277 let features = [
278 Feature::Insertion {
279 position: Position::try_from(1)?,
280 bases: b"AC",
281 },
282 Feature::InsertBase {
283 position: Position::try_from(4)?,
284 base: b'G',
285 },
286 Feature::Deletion {
287 position: Position::try_from(6)?,
288 len: 3,
289 },
290 Feature::ReferenceSkip {
291 position: Position::try_from(10)?,
292 len: 5,
293 },
294 Feature::SoftClip {
295 position: Position::try_from(16)?,
296 bases: b"ACGT",
297 },
298 ];
299 assert_eq!(calculate_alignment_span(20, &features), 21);
300
301 Ok(())
302 }
303}