noodles_cram/container/compression_header/data_series_encodings/
data_series.rs

1use std::{error, fmt};
2
3use crate::container::block;
4
5pub static STANDARD_DATA_SERIES: &[DataSeries; 28] = &[
6    DataSeries::BamFlags,
7    DataSeries::CramFlags,
8    DataSeries::ReferenceSequenceIds,
9    DataSeries::ReadLengths,
10    DataSeries::AlignmentStarts,
11    DataSeries::ReadGroupIds,
12    DataSeries::Names,
13    DataSeries::MateFlags,
14    DataSeries::MateReferenceSequenceIds,
15    DataSeries::MateAlignmentStarts,
16    DataSeries::TemplateLengths,
17    DataSeries::MateDistances,
18    DataSeries::TagSetIds,
19    DataSeries::FeatureCounts,
20    DataSeries::FeatureCodes,
21    DataSeries::FeaturePositionDeltas,
22    DataSeries::DeletionLengths,
23    DataSeries::StretchesOfBases,
24    DataSeries::StretchesOfQualityScores,
25    DataSeries::BaseSubstitutionCodes,
26    DataSeries::InsertionBases,
27    DataSeries::ReferenceSkipLengths,
28    DataSeries::PaddingLengths,
29    DataSeries::HardClipLengths,
30    DataSeries::SoftClipBases,
31    DataSeries::MappingQualities,
32    DataSeries::Bases,
33    DataSeries::QualityScores,
34];
35
36/// A CRAM container compression header data series.
37#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
38pub enum DataSeries {
39    /// BAM bit flags (`BF`).
40    BamFlags,
41    /// CRAM bit flags (`CF`).
42    CramFlags,
43    /// Reference ID (`RI`).
44    ReferenceSequenceIds,
45    /// Read lengths (`RL`).
46    ReadLengths,
47    /// In-seq positions (`AP`).
48    AlignmentStarts,
49    /// Read groups (`RG`).
50    ReadGroupIds,
51    /// Read names (`RN`).
52    Names,
53    /// Next mate bit flags (`MF`).
54    MateFlags,
55    /// Next fragment reference sequence ID (`NS`).
56    MateReferenceSequenceIds,
57    /// Next mate alignment start (`NP`).
58    MateAlignmentStarts,
59    /// Template size (`TS`).
60    TemplateLengths,
61    /// Distance to next fragment (`NF`).
62    MateDistances,
63    /// Tag IDs (`TL`).
64    TagSetIds,
65    /// Number of read features (`FN`).
66    FeatureCounts,
67    /// Read features codes (`FC`).
68    FeatureCodes,
69    /// In-read positions (`FP`).
70    FeaturePositionDeltas,
71    /// Deletion lengths (`DL`).
72    DeletionLengths,
73    /// Stretches of bases (`BB`).
74    StretchesOfBases,
75    /// Stretches of quality scores (`QQ`).
76    StretchesOfQualityScores,
77    /// Base substitution codes (`BS`).
78    BaseSubstitutionCodes,
79    /// Insertion (`IN`).
80    InsertionBases,
81    /// Reference skip length (`RS`).
82    ReferenceSkipLengths,
83    /// Padding (`PD`).
84    PaddingLengths,
85    /// Hard clip (`HC`).
86    HardClipLengths,
87    /// Soft clip (`SC`).
88    SoftClipBases,
89    /// Mapping qualities (`MQ`).
90    MappingQualities,
91    /// Bases (`BA`).
92    Bases,
93    /// Quality scores (`QS`).
94    QualityScores,
95    /// Read tag counts (`TC`).
96    ///
97    /// This is a legacy CRAM 1.0 data series.
98    ReservedTc,
99    /// Read tag names and types (`TN`).
100    ///
101    /// This is a legacy CRAM 1.0 data series.
102    ReservedTn,
103}
104
105#[derive(Clone, Debug, Eq, PartialEq)]
106pub struct TryFromByteArrayError([u8; 2]);
107
108impl error::Error for TryFromByteArrayError {}
109
110impl fmt::Display for TryFromByteArrayError {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        write!(f, "invalid data series: {:#x?}", self.0)
113    }
114}
115
116impl TryFrom<[u8; 2]> for DataSeries {
117    type Error = TryFromByteArrayError;
118
119    fn try_from(b: [u8; 2]) -> Result<Self, Self::Error> {
120        match b {
121            [b'B', b'F'] => Ok(Self::BamFlags),
122            [b'C', b'F'] => Ok(Self::CramFlags),
123            [b'R', b'I'] => Ok(Self::ReferenceSequenceIds),
124            [b'R', b'L'] => Ok(Self::ReadLengths),
125            [b'A', b'P'] => Ok(Self::AlignmentStarts),
126            [b'R', b'G'] => Ok(Self::ReadGroupIds),
127            [b'R', b'N'] => Ok(Self::Names),
128            [b'M', b'F'] => Ok(Self::MateFlags),
129            [b'N', b'S'] => Ok(Self::MateReferenceSequenceIds),
130            [b'N', b'P'] => Ok(Self::MateAlignmentStarts),
131            [b'T', b'S'] => Ok(Self::TemplateLengths),
132            [b'N', b'F'] => Ok(Self::MateDistances),
133            [b'T', b'L'] => Ok(Self::TagSetIds),
134            [b'F', b'N'] => Ok(Self::FeatureCounts),
135            [b'F', b'C'] => Ok(Self::FeatureCodes),
136            [b'F', b'P'] => Ok(Self::FeaturePositionDeltas),
137            [b'D', b'L'] => Ok(Self::DeletionLengths),
138            [b'B', b'B'] => Ok(Self::StretchesOfBases),
139            [b'Q', b'Q'] => Ok(Self::StretchesOfQualityScores),
140            [b'B', b'S'] => Ok(Self::BaseSubstitutionCodes),
141            [b'I', b'N'] => Ok(Self::InsertionBases),
142            [b'R', b'S'] => Ok(Self::ReferenceSkipLengths),
143            [b'P', b'D'] => Ok(Self::PaddingLengths),
144            [b'H', b'C'] => Ok(Self::HardClipLengths),
145            [b'S', b'C'] => Ok(Self::SoftClipBases),
146            [b'M', b'Q'] => Ok(Self::MappingQualities),
147            [b'B', b'A'] => Ok(Self::Bases),
148            [b'Q', b'S'] => Ok(Self::QualityScores),
149            [b'T', b'C'] => Ok(Self::ReservedTc),
150            [b'T', b'N'] => Ok(Self::ReservedTn),
151            _ => Err(TryFromByteArrayError(b)),
152        }
153    }
154}
155
156impl From<DataSeries> for [u8; 2] {
157    fn from(data_series: DataSeries) -> Self {
158        match data_series {
159            DataSeries::BamFlags => [b'B', b'F'],
160            DataSeries::CramFlags => [b'C', b'F'],
161            DataSeries::ReferenceSequenceIds => [b'R', b'I'],
162            DataSeries::ReadLengths => [b'R', b'L'],
163            DataSeries::AlignmentStarts => [b'A', b'P'],
164            DataSeries::ReadGroupIds => [b'R', b'G'],
165            DataSeries::Names => [b'R', b'N'],
166            DataSeries::MateFlags => [b'M', b'F'],
167            DataSeries::MateReferenceSequenceIds => [b'N', b'S'],
168            DataSeries::MateAlignmentStarts => [b'N', b'P'],
169            DataSeries::TemplateLengths => [b'T', b'S'],
170            DataSeries::MateDistances => [b'N', b'F'],
171            DataSeries::TagSetIds => [b'T', b'L'],
172            DataSeries::FeatureCounts => [b'F', b'N'],
173            DataSeries::FeatureCodes => [b'F', b'C'],
174            DataSeries::FeaturePositionDeltas => [b'F', b'P'],
175            DataSeries::DeletionLengths => [b'D', b'L'],
176            DataSeries::StretchesOfBases => [b'B', b'B'],
177            DataSeries::StretchesOfQualityScores => [b'Q', b'Q'],
178            DataSeries::BaseSubstitutionCodes => [b'B', b'S'],
179            DataSeries::InsertionBases => [b'I', b'N'],
180            DataSeries::ReferenceSkipLengths => [b'R', b'S'],
181            DataSeries::PaddingLengths => [b'P', b'D'],
182            DataSeries::HardClipLengths => [b'H', b'C'],
183            DataSeries::SoftClipBases => [b'S', b'C'],
184            DataSeries::MappingQualities => [b'M', b'Q'],
185            DataSeries::Bases => [b'B', b'A'],
186            DataSeries::QualityScores => [b'Q', b'S'],
187            DataSeries::ReservedTc => [b'T', b'C'],
188            DataSeries::ReservedTn => [b'T', b'N'],
189        }
190    }
191}
192
193impl From<DataSeries> for block::ContentId {
194    fn from(data_series: DataSeries) -> Self {
195        match data_series {
196            DataSeries::BamFlags => 1,
197            DataSeries::CramFlags => 2,
198            DataSeries::ReferenceSequenceIds => 3,
199            DataSeries::ReadLengths => 4,
200            DataSeries::AlignmentStarts => 5,
201            DataSeries::ReadGroupIds => 6,
202            DataSeries::Names => 7,
203            DataSeries::MateFlags => 8,
204            DataSeries::MateReferenceSequenceIds => 9,
205            DataSeries::MateAlignmentStarts => 10,
206            DataSeries::TemplateLengths => 11,
207            DataSeries::MateDistances => 12,
208            DataSeries::TagSetIds => 13,
209            DataSeries::FeatureCounts => 14,
210            DataSeries::FeatureCodes => 15,
211            DataSeries::FeaturePositionDeltas => 16,
212            DataSeries::DeletionLengths => 17,
213            DataSeries::StretchesOfBases => 18,
214            DataSeries::StretchesOfQualityScores => 19,
215            DataSeries::BaseSubstitutionCodes => 20,
216            DataSeries::InsertionBases => 21,
217            DataSeries::ReferenceSkipLengths => 22,
218            DataSeries::PaddingLengths => 23,
219            DataSeries::HardClipLengths => 24,
220            DataSeries::SoftClipBases => 25,
221            DataSeries::MappingQualities => 26,
222            DataSeries::Bases => 27,
223            DataSeries::QualityScores => 28,
224            DataSeries::ReservedTc => 29,
225            DataSeries::ReservedTn => 30,
226        }
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233
234    #[test]
235    fn test_try_from_byte_array_for_data_series() {
236        assert_eq!(DataSeries::try_from([b'B', b'F']), Ok(DataSeries::BamFlags));
237        assert_eq!(
238            DataSeries::try_from([b'C', b'F']),
239            Ok(DataSeries::CramFlags)
240        );
241
242        assert_eq!(
243            DataSeries::try_from([b'R', b'I']),
244            Ok(DataSeries::ReferenceSequenceIds)
245        );
246        assert_eq!(
247            DataSeries::try_from([b'R', b'L']),
248            Ok(DataSeries::ReadLengths)
249        );
250        assert_eq!(
251            DataSeries::try_from([b'A', b'P']),
252            Ok(DataSeries::AlignmentStarts)
253        );
254        assert_eq!(
255            DataSeries::try_from([b'R', b'G']),
256            Ok(DataSeries::ReadGroupIds)
257        );
258        assert_eq!(DataSeries::try_from([b'R', b'N']), Ok(DataSeries::Names));
259        assert_eq!(
260            DataSeries::try_from([b'M', b'F']),
261            Ok(DataSeries::MateFlags)
262        );
263        assert_eq!(
264            DataSeries::try_from([b'N', b'S']),
265            Ok(DataSeries::MateReferenceSequenceIds)
266        );
267        assert_eq!(
268            DataSeries::try_from([b'N', b'P']),
269            Ok(DataSeries::MateAlignmentStarts)
270        );
271        assert_eq!(
272            DataSeries::try_from([b'T', b'S']),
273            Ok(DataSeries::TemplateLengths)
274        );
275        assert_eq!(
276            DataSeries::try_from([b'N', b'F']),
277            Ok(DataSeries::MateDistances)
278        );
279        assert_eq!(
280            DataSeries::try_from([b'T', b'L']),
281            Ok(DataSeries::TagSetIds)
282        );
283        assert_eq!(
284            DataSeries::try_from([b'F', b'N']),
285            Ok(DataSeries::FeatureCounts)
286        );
287        assert_eq!(
288            DataSeries::try_from([b'F', b'C']),
289            Ok(DataSeries::FeatureCodes)
290        );
291        assert_eq!(
292            DataSeries::try_from([b'F', b'P']),
293            Ok(DataSeries::FeaturePositionDeltas)
294        );
295        assert_eq!(
296            DataSeries::try_from([b'D', b'L']),
297            Ok(DataSeries::DeletionLengths)
298        );
299        assert_eq!(
300            DataSeries::try_from([b'B', b'B']),
301            Ok(DataSeries::StretchesOfBases)
302        );
303        assert_eq!(
304            DataSeries::try_from([b'Q', b'Q']),
305            Ok(DataSeries::StretchesOfQualityScores)
306        );
307        assert_eq!(
308            DataSeries::try_from([b'B', b'S']),
309            Ok(DataSeries::BaseSubstitutionCodes)
310        );
311        assert_eq!(
312            DataSeries::try_from([b'I', b'N']),
313            Ok(DataSeries::InsertionBases)
314        );
315        assert_eq!(
316            DataSeries::try_from([b'R', b'S']),
317            Ok(DataSeries::ReferenceSkipLengths)
318        );
319        assert_eq!(
320            DataSeries::try_from([b'P', b'D']),
321            Ok(DataSeries::PaddingLengths)
322        );
323        assert_eq!(
324            DataSeries::try_from([b'H', b'C']),
325            Ok(DataSeries::HardClipLengths)
326        );
327        assert_eq!(
328            DataSeries::try_from([b'S', b'C']),
329            Ok(DataSeries::SoftClipBases)
330        );
331        assert_eq!(
332            DataSeries::try_from([b'M', b'Q']),
333            Ok(DataSeries::MappingQualities)
334        );
335        assert_eq!(DataSeries::try_from([b'B', b'A']), Ok(DataSeries::Bases));
336        assert_eq!(
337            DataSeries::try_from([b'Q', b'S']),
338            Ok(DataSeries::QualityScores)
339        );
340        assert_eq!(
341            DataSeries::try_from([b'T', b'N']),
342            Ok(DataSeries::ReservedTn)
343        );
344        assert_eq!(
345            DataSeries::try_from([b'T', b'C']),
346            Ok(DataSeries::ReservedTc)
347        );
348
349        assert_eq!(
350            DataSeries::try_from([b'X', b'Y']),
351            Err(TryFromByteArrayError([b'X', b'Y']))
352        );
353    }
354
355    #[test]
356    fn test_from_data_series_for_u8_array() {
357        assert_eq!(<[u8; 2]>::from(DataSeries::BamFlags), [b'B', b'F']);
358        assert_eq!(<[u8; 2]>::from(DataSeries::CramFlags), [b'C', b'F']);
359        assert_eq!(
360            <[u8; 2]>::from(DataSeries::ReferenceSequenceIds),
361            [b'R', b'I']
362        );
363        assert_eq!(<[u8; 2]>::from(DataSeries::ReadLengths), [b'R', b'L']);
364        assert_eq!(<[u8; 2]>::from(DataSeries::AlignmentStarts), [b'A', b'P']);
365        assert_eq!(<[u8; 2]>::from(DataSeries::ReadGroupIds), [b'R', b'G']);
366        assert_eq!(<[u8; 2]>::from(DataSeries::Names), [b'R', b'N']);
367        assert_eq!(<[u8; 2]>::from(DataSeries::MateFlags), [b'M', b'F']);
368        assert_eq!(
369            <[u8; 2]>::from(DataSeries::MateReferenceSequenceIds),
370            [b'N', b'S']
371        );
372        assert_eq!(
373            <[u8; 2]>::from(DataSeries::MateAlignmentStarts),
374            [b'N', b'P']
375        );
376        assert_eq!(<[u8; 2]>::from(DataSeries::TemplateLengths), [b'T', b'S']);
377        assert_eq!(<[u8; 2]>::from(DataSeries::MateDistances), [b'N', b'F']);
378        assert_eq!(<[u8; 2]>::from(DataSeries::TagSetIds), [b'T', b'L']);
379        assert_eq!(<[u8; 2]>::from(DataSeries::FeatureCounts), [b'F', b'N']);
380        assert_eq!(<[u8; 2]>::from(DataSeries::FeatureCodes), [b'F', b'C']);
381        assert_eq!(
382            <[u8; 2]>::from(DataSeries::FeaturePositionDeltas),
383            [b'F', b'P']
384        );
385        assert_eq!(<[u8; 2]>::from(DataSeries::DeletionLengths), [b'D', b'L']);
386        assert_eq!(<[u8; 2]>::from(DataSeries::StretchesOfBases), [b'B', b'B']);
387        assert_eq!(
388            <[u8; 2]>::from(DataSeries::StretchesOfQualityScores),
389            [b'Q', b'Q']
390        );
391        assert_eq!(
392            <[u8; 2]>::from(DataSeries::BaseSubstitutionCodes),
393            [b'B', b'S']
394        );
395        assert_eq!(<[u8; 2]>::from(DataSeries::InsertionBases), [b'I', b'N']);
396        assert_eq!(
397            <[u8; 2]>::from(DataSeries::ReferenceSkipLengths),
398            [b'R', b'S']
399        );
400        assert_eq!(<[u8; 2]>::from(DataSeries::PaddingLengths), [b'P', b'D']);
401        assert_eq!(<[u8; 2]>::from(DataSeries::HardClipLengths), [b'H', b'C']);
402        assert_eq!(<[u8; 2]>::from(DataSeries::SoftClipBases), [b'S', b'C']);
403        assert_eq!(<[u8; 2]>::from(DataSeries::MappingQualities), [b'M', b'Q']);
404        assert_eq!(<[u8; 2]>::from(DataSeries::Bases), [b'B', b'A']);
405        assert_eq!(<[u8; 2]>::from(DataSeries::QualityScores), [b'Q', b'S']);
406        assert_eq!(<[u8; 2]>::from(DataSeries::ReservedTn), [b'T', b'N']);
407        assert_eq!(<[u8; 2]>::from(DataSeries::ReservedTc), [b'T', b'C']);
408    }
409}