noodles_sam/record/
fields.rs

1//! SAM record field.
2
3mod bounds;
4
5use std::io;
6
7use bstr::{BStr, ByteSlice};
8use lexical_core::FromLexical;
9use noodles_core::Position;
10
11pub(crate) use self::bounds::Bounds;
12use super::{Cigar, Data, QualityScores, Sequence};
13use crate::Header;
14
15const MISSING: &[u8] = b"*";
16
17#[derive(Clone, Eq, PartialEq)]
18pub(crate) struct Fields {
19    pub(crate) buf: Vec<u8>,
20    pub(crate) bounds: Bounds,
21}
22
23impl Fields {
24    pub fn name(&self) -> Option<&BStr> {
25        match &self.buf[self.bounds.name_range()] {
26            MISSING => None,
27            buf => Some(buf.as_bstr()),
28        }
29    }
30
31    pub fn flags(&self) -> io::Result<u16> {
32        let src = &self.buf[self.bounds.flags_range()];
33        lexical_core::parse(src).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
34    }
35
36    pub fn reference_sequence_id(&self, header: &Header) -> Option<io::Result<usize>> {
37        self.reference_sequence_name()
38            .map(|reference_sequence_name| {
39                get_reference_sequence_id(header, reference_sequence_name.as_ref())
40            })
41    }
42
43    pub fn reference_sequence_name(&self) -> Option<&BStr> {
44        match &self.buf[self.bounds.reference_sequence_name_range()] {
45            MISSING => None,
46            buf => Some(buf.as_bstr()),
47        }
48    }
49
50    pub fn alignment_start(&self) -> Option<io::Result<Position>> {
51        const MISSING: &[u8] = b"0";
52
53        match &self.buf[self.bounds.alignment_start_range()] {
54            MISSING => None,
55            buf => Some(parse_position(buf)),
56        }
57    }
58
59    pub fn mapping_quality(&self) -> Option<io::Result<u8>> {
60        const MISSING: &[u8] = b"255";
61
62        match &self.buf[self.bounds.mapping_quality_range()] {
63            MISSING => None,
64            buf => Some(parse_int(buf)),
65        }
66    }
67
68    pub fn cigar(&self) -> Cigar<'_> {
69        match &self.buf[self.bounds.cigar_range()] {
70            MISSING => Cigar::new(b""),
71            buf => Cigar::new(buf),
72        }
73    }
74
75    pub fn mate_reference_sequence_id(&self, header: &Header) -> Option<io::Result<usize>> {
76        self.mate_reference_sequence_name()
77            .map(|mate_reference_sequence_name| {
78                get_reference_sequence_id(header, mate_reference_sequence_name.as_ref())
79            })
80    }
81
82    pub fn mate_reference_sequence_name(&self) -> Option<&BStr> {
83        const EQ: &[u8] = b"=";
84
85        match &self.buf[self.bounds.mate_reference_sequence_name_range()] {
86            MISSING => None,
87            EQ => self.reference_sequence_name(),
88            buf => Some(buf.as_bstr()),
89        }
90    }
91
92    pub fn mate_alignment_start(&self) -> Option<io::Result<Position>> {
93        const MISSING: &[u8] = b"0";
94
95        match &self.buf[self.bounds.mate_alignment_start_range()] {
96            MISSING => None,
97            buf => Some(parse_position(buf)),
98        }
99    }
100
101    pub fn template_length(&self) -> io::Result<i32> {
102        let buf = &self.buf[self.bounds.template_length_range()];
103        parse_int(buf)
104    }
105
106    pub fn sequence(&self) -> Sequence<'_> {
107        let buf = match &self.buf[self.bounds.sequence_range()] {
108            MISSING => b"",
109            buf => buf,
110        };
111
112        Sequence::new(buf)
113    }
114
115    pub fn quality_scores(&self) -> QualityScores<'_> {
116        let buf = match &self.buf[self.bounds.quality_scores_range()] {
117            MISSING => b"",
118            buf => buf,
119        };
120
121        QualityScores::new(buf)
122    }
123
124    pub fn data(&self) -> Data<'_> {
125        let buf = &self.buf[self.bounds.data_range()];
126        Data::new(buf)
127    }
128}
129
130impl Default for Fields {
131    fn default() -> Self {
132        Self {
133            buf: Vec::from(*b"*4*0255**00**"),
134            bounds: Bounds::default(),
135        }
136    }
137}
138
139fn get_reference_sequence_id(header: &Header, reference_sequence_name: &[u8]) -> io::Result<usize> {
140    header
141        .reference_sequences()
142        .get_index_of(reference_sequence_name)
143        .ok_or_else(|| {
144            io::Error::new(
145                io::ErrorKind::InvalidData,
146                "invalid reference sequence name",
147            )
148        })
149}
150
151fn parse_position(buf: &[u8]) -> io::Result<Position> {
152    parse_int::<usize>(buf).and_then(|n| {
153        Position::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
154    })
155}
156
157fn parse_int<N: FromLexical>(buf: &[u8]) -> io::Result<N> {
158    lexical_core::parse(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
159}