noodles_cram/
record.rs

1//! CRAM record and fields.
2
3mod cigar;
4pub(crate) mod data;
5pub(crate) mod feature;
6mod flags;
7mod mate_flags;
8mod quality_scores;
9mod sequence;
10
11use std::{borrow::Cow, io};
12
13use bstr::{BStr, ByteSlice};
14use noodles_core::Position;
15use noodles_fasta as fasta;
16use noodles_sam::{
17    self as sam,
18    alignment::record::{data::field::Tag, MappingQuality},
19};
20
21use self::{
22    cigar::Cigar,
23    data::{field::Value, Data},
24    quality_scores::QualityScores,
25    sequence::Sequence,
26};
27pub(crate) use self::{feature::Feature, flags::Flags, mate_flags::MateFlags};
28use crate::{
29    container::compression_header::preservation_map::SubstitutionMatrix,
30    io::reader::container::slice::ReferenceSequence,
31};
32
33/// A CRAM record.
34#[derive(Clone, Debug, PartialEq)]
35pub struct Record<'c> {
36    pub(crate) id: u64,
37    pub(crate) header: Option<&'c sam::Header>,
38    pub(crate) reference_sequence: Option<ReferenceSequence>,
39    pub(crate) substitution_matrix: SubstitutionMatrix,
40    pub(crate) bam_flags: sam::alignment::record::Flags,
41    pub(crate) cram_flags: Flags,
42    pub(crate) reference_sequence_id: Option<usize>,
43    pub(crate) read_length: usize,
44    pub(crate) alignment_start: Option<Position>,
45    pub(crate) read_group_id: Option<usize>,
46    pub(crate) name: Option<Cow<'c, [u8]>>,
47    pub(crate) mate_flags: MateFlags,
48    pub(crate) mate_reference_sequence_id: Option<usize>,
49    pub(crate) mate_alignment_start: Option<Position>,
50    pub(crate) template_length: i32,
51    pub(crate) mate_distance: Option<usize>,
52    pub(crate) data: Vec<(Tag, Value<'c>)>,
53    pub(crate) sequence: &'c [u8],
54    pub(crate) features: Vec<Feature<'c>>,
55    pub(crate) mapping_quality: Option<MappingQuality>,
56    pub(crate) quality_scores: &'c [u8],
57}
58
59impl Record<'_> {
60    fn alignment_span(&self) -> usize {
61        calculate_alignment_span(self.read_length, &self.features)
62    }
63
64    pub(crate) fn alignment_end(&self) -> Option<Position> {
65        self.alignment_start.and_then(|alignment_start| {
66            let end = usize::from(alignment_start) + self.alignment_span() - 1;
67            Position::new(end)
68        })
69    }
70}
71
72impl Default for Record<'_> {
73    fn default() -> Self {
74        Self {
75            id: 0,
76            header: None,
77            reference_sequence: None,
78            substitution_matrix: SubstitutionMatrix::default(),
79            bam_flags: sam::alignment::record::Flags::UNMAPPED,
80            cram_flags: Flags::default(),
81            reference_sequence_id: None,
82            read_length: 0,
83            alignment_start: None,
84            read_group_id: None,
85            name: None,
86            mate_flags: MateFlags::default(),
87            mate_reference_sequence_id: None,
88            mate_alignment_start: None,
89            template_length: 0,
90            mate_distance: None,
91            data: Vec::new(),
92            sequence: &[],
93            features: Vec::new(),
94            mapping_quality: None,
95            quality_scores: &[],
96        }
97    }
98}
99
100impl sam::alignment::Record for Record<'_> {
101    fn name(&self) -> Option<&BStr> {
102        self.name.as_deref().map(|name| name.as_bstr())
103    }
104
105    fn flags(&self) -> io::Result<sam::alignment::record::Flags> {
106        Ok(self.bam_flags)
107    }
108
109    fn reference_sequence_id<'r, 'h: 'r>(
110        &'r self,
111        _: &'h sam::Header,
112    ) -> Option<io::Result<usize>> {
113        self.reference_sequence_id.map(Ok)
114    }
115
116    fn alignment_start(&self) -> Option<io::Result<Position>> {
117        self.alignment_start.map(Ok)
118    }
119
120    fn mapping_quality(&self) -> Option<io::Result<MappingQuality>> {
121        self.mapping_quality.map(Ok)
122    }
123
124    fn cigar(&self) -> Box<dyn sam::alignment::record::Cigar + '_> {
125        Box::new(Cigar::new(
126            &self.features,
127            self.bam_flags.is_unmapped(),
128            self.read_length,
129        ))
130    }
131
132    fn mate_reference_sequence_id<'r, 'h: 'r>(
133        &'r self,
134        _: &'h sam::Header,
135    ) -> Option<io::Result<usize>> {
136        self.mate_reference_sequence_id.map(Ok)
137    }
138
139    fn mate_alignment_start(&self) -> Option<io::Result<Position>> {
140        self.mate_alignment_start.map(Ok)
141    }
142
143    fn template_length(&self) -> io::Result<i32> {
144        Ok(self.template_length)
145    }
146
147    fn sequence(&self) -> Box<dyn sam::alignment::record::Sequence + '_> {
148        if self.bam_flags.is_unmapped() || self.cram_flags.sequence_is_missing() {
149            Box::new(Bases(self.sequence))
150        } else {
151            let (reference_sequence, alignment_start) = match self.reference_sequence.as_ref() {
152                Some(ReferenceSequence::Embedded {
153                    reference_start,
154                    sequence,
155                }) => {
156                    let alignment_start = usize::from(self.alignment_start.unwrap());
157                    let offset = usize::from(*reference_start);
158                    let offset_alignment_start =
159                        Position::new(alignment_start - offset + 1).unwrap();
160                    (sequence.clone(), offset_alignment_start)
161                }
162                Some(ReferenceSequence::External { sequence, .. }) => {
163                    (sequence.clone(), self.alignment_start.unwrap())
164                }
165                None => (fasta::record::Sequence::default(), Position::MIN),
166            };
167
168            Box::new(Sequence::new(
169                Some(reference_sequence),
170                self.substitution_matrix.clone(),
171                &self.features,
172                alignment_start,
173                self.read_length,
174            ))
175        }
176    }
177
178    fn quality_scores(&self) -> Box<dyn sam::alignment::record::QualityScores + '_> {
179        if self.bam_flags.is_unmapped() || self.cram_flags.quality_scores_are_stored_as_array() {
180            Box::new(Scores(self.quality_scores))
181        } else {
182            Box::new(QualityScores::new(&self.features, self.read_length))
183        }
184    }
185
186    fn data(&self) -> Box<dyn sam::alignment::record::Data + '_> {
187        if let Some(header) = self.header {
188            Box::new(Data::new(header, &self.data, self.read_group_id))
189        } else {
190            Box::new(sam::alignment::record_buf::Data::default())
191        }
192    }
193
194    fn alignment_span(&self) -> Option<io::Result<usize>> {
195        Some(Ok(self.alignment_span()))
196    }
197
198    fn alignment_end(&self) -> Option<io::Result<Position>> {
199        self.alignment_end().map(Ok)
200    }
201}
202
203struct Bases<'c>(&'c [u8]);
204
205impl sam::alignment::record::Sequence for Bases<'_> {
206    fn is_empty(&self) -> bool {
207        self.0.is_empty()
208    }
209
210    fn len(&self) -> usize {
211        self.0.len()
212    }
213
214    fn get(&self, i: usize) -> Option<u8> {
215        self.0.get(i).copied()
216    }
217
218    fn split_at_checked(
219        &self,
220        _mid: usize,
221    ) -> Option<(
222        Box<dyn sam::alignment::record::Sequence + '_>,
223        Box<dyn sam::alignment::record::Sequence + '_>,
224    )> {
225        todo!()
226    }
227
228    fn iter(&self) -> Box<dyn Iterator<Item = u8> + '_> {
229        Box::new(self.0.iter().copied())
230    }
231}
232
233struct Scores<'c>(&'c [u8]);
234
235impl sam::alignment::record::QualityScores for Scores<'_> {
236    fn is_empty(&self) -> bool {
237        self.0.is_empty()
238    }
239
240    fn len(&self) -> usize {
241        self.0.len()
242    }
243
244    fn iter(&self) -> Box<dyn Iterator<Item = io::Result<u8>> + '_> {
245        Box::new(self.0.iter().copied().map(Ok))
246    }
247}
248
249pub(crate) fn calculate_alignment_span(read_length: usize, features: &[Feature]) -> usize {
250    features
251        .iter()
252        .fold(read_length, |alignment_span, feature| match feature {
253            Feature::Insertion { bases, .. } => alignment_span - bases.len(),
254            Feature::InsertBase { .. } => alignment_span - 1,
255            Feature::Deletion { len, .. } => alignment_span + len,
256            Feature::ReferenceSkip { len, .. } => alignment_span + len,
257            Feature::SoftClip { bases, .. } => alignment_span - bases.len(),
258            _ => alignment_span,
259        })
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn test_calculate_alignment_span() -> Result<(), noodles_core::position::TryFromIntError> {
268        let features = [];
269        assert_eq!(calculate_alignment_span(4, &features), 4);
270
271        let features = [Feature::HardClip {
272            position: Position::try_from(1)?,
273            len: 4,
274        }];
275        assert_eq!(calculate_alignment_span(4, &features), 4);
276
277        let features = [
278            Feature::Insertion {
279                position: Position::try_from(1)?,
280                bases: b"AC",
281            },
282            Feature::InsertBase {
283                position: Position::try_from(4)?,
284                base: b'G',
285            },
286            Feature::Deletion {
287                position: Position::try_from(6)?,
288                len: 3,
289            },
290            Feature::ReferenceSkip {
291                position: Position::try_from(10)?,
292                len: 5,
293            },
294            Feature::SoftClip {
295                position: Position::try_from(16)?,
296                bases: b"ACGT",
297            },
298        ];
299        assert_eq!(calculate_alignment_span(20, &features), 21);
300
301        Ok(())
302    }
303}