noodles_cram/
record.rs

1//! CRAM record and fields.
2
3mod cigar;
4mod data;
5pub(crate) mod feature;
6mod flags;
7mod mate_flags;
8mod quality_scores;
9mod sequence;
10
11use std::{borrow::Cow, io};
12
13use bstr::{BStr, ByteSlice};
14use noodles_core::Position;
15use noodles_fasta as fasta;
16use noodles_sam::{
17    self as sam,
18    alignment::{
19        record::{data::field::Tag, MappingQuality},
20        record_buf::data::field::Value,
21    },
22};
23
24use self::{cigar::Cigar, data::Data, quality_scores::QualityScores, sequence::Sequence};
25pub(crate) use self::{feature::Feature, flags::Flags, mate_flags::MateFlags};
26use crate::{
27    container::compression_header::preservation_map::SubstitutionMatrix,
28    io::reader::container::slice::ReferenceSequence,
29};
30
31/// A CRAM record.
32#[derive(Clone, Debug, PartialEq)]
33pub struct Record<'c> {
34    pub(crate) id: u64,
35    pub(crate) header: Option<&'c sam::Header>,
36    pub(crate) reference_sequence: Option<ReferenceSequence>,
37    pub(crate) substitution_matrix: SubstitutionMatrix,
38    pub(crate) bam_flags: sam::alignment::record::Flags,
39    pub(crate) cram_flags: Flags,
40    pub(crate) reference_sequence_id: Option<usize>,
41    pub(crate) read_length: usize,
42    pub(crate) alignment_start: Option<Position>,
43    pub(crate) read_group_id: Option<usize>,
44    pub(crate) name: Option<Cow<'c, [u8]>>,
45    pub(crate) mate_flags: MateFlags,
46    pub(crate) mate_reference_sequence_id: Option<usize>,
47    pub(crate) mate_alignment_start: Option<Position>,
48    pub(crate) template_length: i32,
49    pub(crate) mate_distance: Option<usize>,
50    pub(crate) data: Vec<(Tag, Value)>,
51    pub(crate) sequence: &'c [u8],
52    pub(crate) features: Vec<Feature<'c>>,
53    pub(crate) mapping_quality: Option<MappingQuality>,
54    pub(crate) quality_scores: &'c [u8],
55}
56
57impl Record<'_> {
58    fn alignment_span(&self) -> usize {
59        calculate_alignment_span(self.read_length, &self.features)
60    }
61
62    pub(crate) fn alignment_end(&self) -> Option<Position> {
63        self.alignment_start.and_then(|alignment_start| {
64            let end = usize::from(alignment_start) + self.alignment_span() - 1;
65            Position::new(end)
66        })
67    }
68}
69
70impl Default for Record<'_> {
71    fn default() -> Self {
72        Self {
73            id: 0,
74            header: None,
75            reference_sequence: None,
76            substitution_matrix: SubstitutionMatrix::default(),
77            bam_flags: sam::alignment::record::Flags::UNMAPPED,
78            cram_flags: Flags::default(),
79            reference_sequence_id: None,
80            read_length: 0,
81            alignment_start: None,
82            read_group_id: None,
83            name: None,
84            mate_flags: MateFlags::default(),
85            mate_reference_sequence_id: None,
86            mate_alignment_start: None,
87            template_length: 0,
88            mate_distance: None,
89            data: Vec::new(),
90            sequence: &[],
91            features: Vec::new(),
92            mapping_quality: None,
93            quality_scores: &[],
94        }
95    }
96}
97
98impl sam::alignment::Record for Record<'_> {
99    fn name(&self) -> Option<&BStr> {
100        self.name.as_deref().map(|name| name.as_bstr())
101    }
102
103    fn flags(&self) -> io::Result<sam::alignment::record::Flags> {
104        Ok(self.bam_flags)
105    }
106
107    fn reference_sequence_id<'r, 'h: 'r>(
108        &'r self,
109        _: &'h sam::Header,
110    ) -> Option<io::Result<usize>> {
111        self.reference_sequence_id.map(Ok)
112    }
113
114    fn alignment_start(&self) -> Option<io::Result<Position>> {
115        self.alignment_start.map(Ok)
116    }
117
118    fn mapping_quality(&self) -> Option<io::Result<MappingQuality>> {
119        self.mapping_quality.map(Ok)
120    }
121
122    fn cigar(&self) -> Box<dyn sam::alignment::record::Cigar + '_> {
123        Box::new(Cigar::new(
124            &self.features,
125            self.bam_flags.is_unmapped(),
126            self.read_length,
127        ))
128    }
129
130    fn mate_reference_sequence_id<'r, 'h: 'r>(
131        &'r self,
132        _: &'h sam::Header,
133    ) -> Option<io::Result<usize>> {
134        self.mate_reference_sequence_id.map(Ok)
135    }
136
137    fn mate_alignment_start(&self) -> Option<io::Result<Position>> {
138        self.mate_alignment_start.map(Ok)
139    }
140
141    fn template_length(&self) -> io::Result<i32> {
142        Ok(self.template_length)
143    }
144
145    fn sequence(&self) -> Box<dyn sam::alignment::record::Sequence + '_> {
146        if self.sequence.is_empty() {
147            let (reference_sequence, alignment_start) = match self.reference_sequence.as_ref() {
148                Some(ReferenceSequence::Embedded {
149                    reference_start,
150                    sequence,
151                }) => {
152                    let alignment_start = usize::from(self.alignment_start.unwrap());
153                    let offset = usize::from(*reference_start);
154                    let offset_alignment_start =
155                        Position::new(alignment_start - offset + 1).unwrap();
156                    (sequence.clone(), offset_alignment_start)
157                }
158                Some(ReferenceSequence::External { sequence, .. }) => {
159                    (sequence.clone(), self.alignment_start.unwrap())
160                }
161                None => (fasta::record::Sequence::default(), Position::MIN),
162            };
163
164            Box::new(Sequence::new(
165                Some(reference_sequence),
166                self.substitution_matrix.clone(),
167                &self.features,
168                alignment_start,
169                self.read_length,
170            ))
171        } else {
172            Box::new(Bases(self.sequence))
173        }
174    }
175
176    fn quality_scores(&self) -> Box<dyn sam::alignment::record::QualityScores + '_> {
177        if self.bam_flags.is_unmapped() || self.cram_flags.quality_scores_are_stored_as_array() {
178            Box::new(Scores(self.quality_scores))
179        } else {
180            Box::new(QualityScores::new(&self.features, self.read_length))
181        }
182    }
183
184    fn data(&self) -> Box<dyn sam::alignment::record::Data + '_> {
185        Box::new(Data::new(
186            self.header.unwrap(),
187            &self.data,
188            self.read_group_id,
189        ))
190    }
191
192    fn alignment_span(&self) -> Option<io::Result<usize>> {
193        Some(Ok(self.alignment_span()))
194    }
195
196    fn alignment_end(&self) -> Option<io::Result<Position>> {
197        self.alignment_end().map(Ok)
198    }
199}
200
201struct Bases<'c>(&'c [u8]);
202
203impl sam::alignment::record::Sequence for Bases<'_> {
204    fn is_empty(&self) -> bool {
205        self.0.is_empty()
206    }
207
208    fn len(&self) -> usize {
209        self.0.len()
210    }
211
212    fn get(&self, i: usize) -> Option<u8> {
213        self.0.get(i).copied()
214    }
215
216    fn split_at_checked(
217        &self,
218        _mid: usize,
219    ) -> Option<(
220        Box<dyn sam::alignment::record::Sequence + '_>,
221        Box<dyn sam::alignment::record::Sequence + '_>,
222    )> {
223        todo!()
224    }
225
226    fn iter(&self) -> Box<dyn Iterator<Item = u8> + '_> {
227        Box::new(self.0.iter().copied())
228    }
229}
230
231struct Scores<'c>(&'c [u8]);
232
233impl sam::alignment::record::QualityScores for Scores<'_> {
234    fn is_empty(&self) -> bool {
235        self.0.is_empty()
236    }
237
238    fn len(&self) -> usize {
239        self.0.len()
240    }
241
242    fn iter(&self) -> Box<dyn Iterator<Item = io::Result<u8>> + '_> {
243        Box::new(self.0.iter().copied().map(Ok))
244    }
245}
246
247pub(crate) fn calculate_alignment_span(read_length: usize, features: &[Feature]) -> usize {
248    features
249        .iter()
250        .fold(read_length, |alignment_span, feature| match feature {
251            Feature::Insertion { bases, .. } => alignment_span - bases.len(),
252            Feature::InsertBase { .. } => alignment_span - 1,
253            Feature::Deletion { len, .. } => alignment_span + len,
254            Feature::ReferenceSkip { len, .. } => alignment_span + len,
255            Feature::SoftClip { bases, .. } => alignment_span - bases.len(),
256            _ => alignment_span,
257        })
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_calculate_alignment_span() -> Result<(), noodles_core::position::TryFromIntError> {
266        let features = [];
267        assert_eq!(calculate_alignment_span(4, &features), 4);
268
269        let features = [Feature::HardClip {
270            position: Position::try_from(1)?,
271            len: 4,
272        }];
273        assert_eq!(calculate_alignment_span(4, &features), 4);
274
275        let features = [
276            Feature::Insertion {
277                position: Position::try_from(1)?,
278                bases: b"AC",
279            },
280            Feature::InsertBase {
281                position: Position::try_from(4)?,
282                base: b'G',
283            },
284            Feature::Deletion {
285                position: Position::try_from(6)?,
286                len: 3,
287            },
288            Feature::ReferenceSkip {
289                position: Position::try_from(10)?,
290                len: 5,
291            },
292            Feature::SoftClip {
293                position: Position::try_from(16)?,
294                bases: b"ACGT",
295            },
296        ];
297        assert_eq!(calculate_alignment_span(20, &features), 21);
298
299        Ok(())
300    }
301}