mod bounds;
use std::{io, mem};
use self::bounds::Bounds;
use super::{Cigar, Data, Name, QualityScores, Sequence};
#[derive(Clone, Eq, PartialEq)]
pub(crate) struct Fields {
pub(crate) buf: Vec<u8>,
pub(crate) bounds: Bounds,
}
impl Fields {
pub(super) fn reference_sequence_id(&self) -> Option<i32> {
let src = &self.buf[bounds::REFERENCE_SEQUENCE_ID_RANGE];
get_reference_sequence_id(src.try_into().unwrap())
}
pub(super) fn alignment_start(&self) -> Option<i32> {
let src = &self.buf[bounds::ALIGNMENT_START_RANGE];
get_position(src.try_into().unwrap())
}
pub(super) fn mapping_quality(&self) -> Option<u8> {
const MISSING: u8 = 255;
match self.buf[bounds::MAPPING_QUALITY_INDEX] {
MISSING => None,
n => Some(n),
}
}
pub(super) fn flags(&self) -> u16 {
let src = &self.buf[bounds::FLAGS_RANGE];
u16::from_le_bytes(src.try_into().unwrap())
}
pub(super) fn mate_reference_sequence_id(&self) -> Option<i32> {
let src = &self.buf[bounds::MATE_REFERENCE_SEQUENCE_ID_RANGE];
get_reference_sequence_id(src.try_into().unwrap())
}
pub(super) fn mate_alignment_start(&self) -> Option<i32> {
let src = &self.buf[bounds::MATE_ALIGNMENT_START_RANGE];
get_position(src.try_into().unwrap())
}
pub(super) fn template_length(&self) -> i32 {
let src = &self.buf[bounds::TEMPLATE_LENGTH_RANGE];
i32::from_le_bytes(src.try_into().unwrap())
}
pub(super) fn name(&self) -> Option<Name<'_>> {
const MISSING: &[u8] = &[b'*', 0x00];
match &self.buf[self.bounds.name_range()] {
MISSING => None,
buf => Some(Name::new(buf)),
}
}
pub(super) fn cigar(&self) -> Cigar<'_> {
use super::data::get_raw_cigar;
const SKIP: u8 = 3;
const SOFT_CLIP: u8 = 4;
fn decode_op(buf: &[u8]) -> (u8, usize) {
let n = u32::from_le_bytes(buf.try_into().unwrap());
((n & 0x0f) as u8, usize::try_from(n >> 4).unwrap())
}
let src = &self.buf[self.bounds.cigar_range()];
if src.len() == 2 * mem::size_of::<u32>() {
let k = self.sequence().len();
let op_1 = decode_op(&src[0..4]);
let op_2 = decode_op(&src[4..8]);
if op_1 == (SOFT_CLIP, k) && matches!(op_2, (SKIP, _)) {
let mut data_src = &self.buf[self.bounds.data_range()];
if let Ok(Some(buf)) = get_raw_cigar(&mut data_src) {
return Cigar::new(buf);
}
}
}
Cigar::new(src)
}
pub(super) fn sequence(&self) -> Sequence<'_> {
let src = &self.buf[self.bounds.sequence_range()];
let quality_scores_range = self.bounds.quality_scores_range();
let base_count = quality_scores_range.end - quality_scores_range.start;
Sequence::new(src, base_count)
}
pub(super) fn quality_scores(&self) -> QualityScores<'_> {
let src = &self.buf[self.bounds.quality_scores_range()];
QualityScores::new(src)
}
pub(super) fn data(&self) -> Data<'_> {
let src = &self.buf[self.bounds.data_range()];
Data::new(src)
}
pub(crate) fn index(&mut self) -> io::Result<()> {
index(&self.buf[..], &mut self.bounds)
}
}
impl Default for Fields {
fn default() -> Self {
let buf = vec![
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02, 0xff, 0x48, 0x12, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, b'*', 0x00, ];
let bounds = Bounds {
name_end: buf.len(),
cigar_end: buf.len(),
sequence_end: buf.len(),
quality_scores_end: buf.len(),
};
Self { buf, bounds }
}
}
impl TryFrom<Vec<u8>> for Fields {
type Error = io::Error;
fn try_from(buf: Vec<u8>) -> Result<Self, Self::Error> {
let mut fields = Self {
buf,
bounds: Bounds {
name_end: 0,
cigar_end: 0,
sequence_end: 0,
quality_scores_end: 0,
},
};
fields.index()?;
Ok(fields)
}
}
fn get_reference_sequence_id(src: [u8; 4]) -> Option<i32> {
const UNMAPPED: i32 = -1;
match i32::from_le_bytes(src) {
UNMAPPED => None,
n => Some(n),
}
}
fn get_position(src: [u8; 4]) -> Option<i32> {
const MISSING: i32 = -1;
match i32::from_le_bytes(src) {
MISSING => None,
n => Some(n),
}
}
fn index(buf: &[u8], bounds: &mut Bounds) -> io::Result<()> {
const MIN_BUF_LENGTH: usize = bounds::TEMPLATE_LENGTH_RANGE.end;
if buf.len() < MIN_BUF_LENGTH {
return Err(io::Error::from(io::ErrorKind::UnexpectedEof));
}
let read_name_len = usize::from(buf[bounds::NAME_LENGTH_INDEX]);
bounds.name_end = bounds::TEMPLATE_LENGTH_RANGE.end + read_name_len;
let src = &buf[bounds::CIGAR_OP_COUNT_RANGE];
let cigar_op_count = usize::from(u16::from_le_bytes(src.try_into().unwrap()));
let cigar_len = mem::size_of::<u32>() * cigar_op_count;
bounds.cigar_end = bounds.name_end + cigar_len;
let src = &buf[bounds::READ_LENGTH_RANGE];
let base_count = usize::try_from(u32::from_le_bytes(src.try_into().unwrap()))
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let sequence_len = (base_count + 1) / 2;
bounds.sequence_end = bounds.cigar_end + sequence_len;
bounds.quality_scores_end = bounds.sequence_end + base_count;
if buf.len() < bounds.quality_scores_end {
Err(io::Error::from(io::ErrorKind::UnexpectedEof))
} else {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
static DATA: &[u8] = &[
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02, 0xff, 0x48, 0x12, 0x01, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, b'*', 0x00, 0x40, 0x00, 0x00, 0x00, 0x12, 0x48, b'N', b'D', b'L', b'S', ];
#[test]
fn test_cigar() -> io::Result<()> {
let fields = Fields::try_from(Vec::from(DATA))?;
let cigar = fields.cigar();
assert_eq!(cigar.as_ref(), &DATA[34..38]);
Ok(())
}
#[test]
fn test_cigar_with_2_cigar_ops() -> io::Result<()> {
let data = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02, 0xff, 0x48, 0x12, 0x02, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, b'*', 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x12, 0x48, b'N', b'D', b'L', b'S', ];
let fields = Fields::try_from(Vec::from(&data))?;
let cigar = fields.cigar();
assert_eq!(cigar.as_ref(), &data[34..42]);
Ok(())
}
#[test]
fn test_cigar_with_overflowing_cigar() -> io::Result<()> {
let data = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02, 0xff, 0x48, 0x12, 0x02, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, b'*', 0x00, 0x44, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x12, 0x48, b'N', b'D', b'L', b'S', b'C', b'G', b'B', b'I', 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00,
0x00, ];
let fields = Fields::try_from(Vec::from(&data))?;
let cigar = fields.cigar();
assert_eq!(cigar.as_ref(), &data[56..]);
Ok(())
}
#[test]
fn test_index() -> io::Result<()> {
let mut fields = Fields::default();
fields.buf.clear();
fields.buf.extend(DATA);
fields.index()?;
assert_eq!(fields.bounds.name_range(), 32..34);
assert_eq!(fields.bounds.cigar_range(), 34..38);
assert_eq!(fields.bounds.sequence_range(), 38..40);
assert_eq!(fields.bounds.quality_scores_range(), 40..44);
assert_eq!(fields.bounds.data_range(), 44..);
Ok(())
}
}