1use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17use std::u32;
18
19use byteorder::{LittleEndian, ReadBytesExt};
20
21use crate::bam::Error;
22use crate::bam::HeaderView;
23use crate::errors::Result;
24use crate::htslib;
25use crate::utils;
26#[cfg(feature = "serde_feature")]
27use serde::{self, Deserialize, Serialize};
28
29use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
30use bio_types::genome;
31use bio_types::sequence::SequenceRead;
32use bio_types::sequence::SequenceReadPairOrientation;
33use bio_types::strand::ReqStrand;
34
35macro_rules! flag {
37 ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
38 pub fn $get(&self) -> bool {
39 self.inner().core.flag & $bit != 0
40 }
41
42 pub fn $set(&mut self) {
43 self.inner_mut().core.flag |= $bit;
44 }
45
46 pub fn $unset(&mut self) {
47 self.inner_mut().core.flag &= !$bit;
48 }
49 };
50}
51
52pub struct Record {
54 pub inner: htslib::bam1_t,
55 own: bool,
56 cigar: Option<CigarStringView>,
57 header: Option<Rc<HeaderView>>,
58}
59
60unsafe impl Send for Record {}
61unsafe impl Sync for Record {}
62
63impl Clone for Record {
64 fn clone(&self) -> Self {
65 let mut copy = Record::new();
66 unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
67 copy
68 }
69}
70
71impl PartialEq for Record {
72 fn eq(&self, other: &Record) -> bool {
73 self.tid() == other.tid()
74 && self.pos() == other.pos()
75 && self.bin() == other.bin()
76 && self.mapq() == other.mapq()
77 && self.flags() == other.flags()
78 && self.mtid() == other.mtid()
79 && self.mpos() == other.mpos()
80 && self.insert_size() == other.insert_size()
81 && self.data() == other.data()
82 && self.inner().core.l_extranul == other.inner().core.l_extranul
83 }
84}
85
86impl Eq for Record {}
87
88impl fmt::Debug for Record {
89 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
90 fmt.write_fmt(format_args!(
91 "Record(tid: {}, pos: {})",
92 self.tid(),
93 self.pos()
94 ))
95 }
96}
97
98impl Default for Record {
99 fn default() -> Self {
100 Self::new()
101 }
102}
103
104#[inline]
105fn extranul_from_qname(qname: &[u8]) -> usize {
106 let qlen = qname.len() + 1;
107 if qlen % 4 != 0 {
108 4 - qlen % 4
109 } else {
110 0
111 }
112}
113
114impl Record {
115 pub fn new() -> Self {
117 let mut record = Record {
118 inner: unsafe { MaybeUninit::zeroed().assume_init() },
119 own: true,
120 cigar: None,
121 header: None,
122 };
123 record.set_qname(b"");
126 record.set_unmapped();
129 record.set_tid(-1);
130 record.set_pos(-1);
131 record.set_mpos(-1);
132 record.set_mtid(-1);
133 record
134 }
135
136 pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
137 Record {
138 inner: {
139 #[allow(clippy::uninit_assumed_init)]
140 let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
141 unsafe {
142 ::libc::memcpy(
143 &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
144 from as *const ::libc::c_void,
145 size_of::<htslib::bam1_t>(),
146 );
147 }
148 inner
149 },
150 own: false,
151 cigar: None,
152 header: None,
153 }
154 }
155
156 pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
158 let mut record = Self::new();
159
160 let mut sam_copy = Vec::with_capacity(sam.len() + 1);
161 sam_copy.extend(sam);
162 sam_copy.push(0);
163
164 let mut sam_string = htslib::kstring_t {
165 s: sam_copy.as_ptr() as *mut c_char,
166 l: sam_copy.len(),
167 m: sam_copy.len(),
168 };
169
170 let succ = unsafe {
171 htslib::sam_parse1(
172 &mut sam_string,
173 header_view.inner_ptr() as *mut htslib::bam_hdr_t,
174 record.inner_ptr_mut(),
175 )
176 };
177
178 if succ == 0 {
179 Ok(record)
180 } else {
181 Err(Error::BamParseSAM {
182 rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
183 })
184 }
185 }
186
187 pub fn set_header(&mut self, header: Rc<HeaderView>) {
188 self.header = Some(header);
189 }
190
191 pub(super) fn data(&self) -> &[u8] {
192 unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
193 }
194
195 #[inline]
196 pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
197 &mut self.inner
198 }
199
200 #[inline]
201 pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
202 &mut self.inner as *mut htslib::bam1_t
203 }
204
205 #[inline]
206 pub fn inner(&self) -> &htslib::bam1_t {
207 &self.inner
208 }
209
210 #[inline]
211 pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
212 &self.inner as *const htslib::bam1_t
213 }
214
215 pub fn tid(&self) -> i32 {
217 self.inner().core.tid
218 }
219
220 pub fn set_tid(&mut self, tid: i32) {
222 self.inner_mut().core.tid = tid;
223 }
224
225 pub fn pos(&self) -> i64 {
227 self.inner().core.pos
228 }
229
230 pub fn set_pos(&mut self, pos: i64) {
232 self.inner_mut().core.pos = pos;
233 }
234
235 pub fn bin(&self) -> u16 {
236 self.inner().core.bin
237 }
238
239 pub fn set_bin(&mut self, bin: u16) {
240 self.inner_mut().core.bin = bin;
241 }
242
243 pub fn mapq(&self) -> u8 {
245 self.inner().core.qual
246 }
247
248 pub fn set_mapq(&mut self, mapq: u8) {
250 self.inner_mut().core.qual = mapq;
251 }
252
253 pub fn strand(&self) -> ReqStrand {
255 let reverse = self.flags() & 0x10 != 0;
256 if reverse {
257 ReqStrand::Reverse
258 } else {
259 ReqStrand::Forward
260 }
261 }
262
263 pub fn flags(&self) -> u16 {
265 self.inner().core.flag
266 }
267
268 pub fn set_flags(&mut self, flags: u16) {
270 self.inner_mut().core.flag = flags;
271 }
272
273 pub fn unset_flags(&mut self) {
275 self.inner_mut().core.flag = 0;
276 }
277
278 pub fn mtid(&self) -> i32 {
280 self.inner().core.mtid
281 }
282
283 pub fn set_mtid(&mut self, mtid: i32) {
285 self.inner_mut().core.mtid = mtid;
286 }
287
288 pub fn mpos(&self) -> i64 {
290 self.inner().core.mpos
291 }
292
293 pub fn set_mpos(&mut self, mpos: i64) {
295 self.inner_mut().core.mpos = mpos;
296 }
297
298 pub fn insert_size(&self) -> i64 {
300 self.inner().core.isize_
301 }
302
303 pub fn set_insert_size(&mut self, insert_size: i64) {
305 self.inner_mut().core.isize_ = insert_size;
306 }
307
308 fn qname_capacity(&self) -> usize {
309 self.inner().core.l_qname as usize
310 }
311
312 fn qname_len(&self) -> usize {
313 self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
315 }
316
317 pub fn qname(&self) -> &[u8] {
319 &self.data()[..self.qname_len()]
320 }
321
322 pub fn set_data(&mut self, new_data: &[u8]) {
324 self.cigar = None;
325
326 self.inner_mut().l_data = new_data.len() as i32;
327 if (self.inner().m_data as i32) < self.inner().l_data {
328 let l_data = self.inner().l_data;
330 self.realloc_var_data(l_data as usize);
331 }
332
333 let data =
335 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
336 utils::copy_memory(new_data, data);
337 }
338
339 pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
347 assert!(qname.len() < 255);
348 assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
349
350 self.cigar = None;
351
352 let cigar_width = if let Some(cigar_string) = cigar {
353 cigar_string.len()
354 } else {
355 0
356 } * 4;
357 let q_len = qname.len() + 1;
358 let extranul = extranul_from_qname(qname);
359
360 let orig_aux_offset = self.qname_capacity()
361 + 4 * self.cigar_len()
362 + (self.seq_len() + 1) / 2
363 + self.seq_len();
364 let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
365 assert!(orig_aux_offset <= self.inner.l_data as usize);
366 let aux_len = self.inner.l_data as usize - orig_aux_offset;
367 self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
368 if (self.inner().m_data as i32) < self.inner().l_data {
369 let l_data = self.inner().l_data;
371 self.realloc_var_data(l_data as usize);
372 }
373
374 if aux_len > 0 && orig_aux_offset != new_aux_offset {
376 let data =
377 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
378 data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
379 }
380
381 let data =
382 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
383
384 utils::copy_memory(qname, data);
386 for i in 0..=extranul {
387 data[qname.len() + i] = b'\0';
388 }
389 let mut i = q_len + extranul;
390 self.inner_mut().core.l_qname = i as u16;
391 self.inner_mut().core.l_extranul = extranul as u8;
392
393 if let Some(cigar_string) = cigar {
395 let cigar_data = unsafe {
396 #[allow(clippy::cast_ptr_alignment)]
398 slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
399 };
400 for (i, c) in cigar_string.iter().enumerate() {
401 cigar_data[i] = c.encode();
402 }
403 self.inner_mut().core.n_cigar = cigar_string.len() as u32;
404 i += cigar_string.len() * 4;
405 } else {
406 self.inner_mut().core.n_cigar = 0;
407 };
408
409 {
411 for j in (0..seq.len()).step_by(2) {
412 data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
413 | (if j + 1 < seq.len() {
414 ENCODE_BASE[seq[j + 1] as usize]
415 } else {
416 0
417 });
418 }
419 self.inner_mut().core.l_qseq = seq.len() as i32;
420 i += (seq.len() + 1) / 2;
421 }
422
423 utils::copy_memory(qual, &mut data[i..]);
425 }
426
427 pub fn set_qname(&mut self, new_qname: &[u8]) {
429 assert!(new_qname.len() < 252);
431
432 let old_q_len = self.qname_capacity();
433 let extranul = extranul_from_qname(new_qname);
435 let new_q_len = new_qname.len() + 1 + extranul;
436
437 let other_len = self.inner_mut().l_data - old_q_len as i32;
439
440 if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
441 self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
442 } else if new_q_len > old_q_len {
443 self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
444
445 if (self.inner().m_data as i32) < self.inner().l_data {
447 let l_data = self.inner().l_data;
449 self.realloc_var_data(l_data as usize);
450 }
451 }
452
453 if new_q_len != old_q_len {
454 unsafe {
456 let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
457
458 ::libc::memmove(
459 data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
460 data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
461 other_len as usize,
462 );
463 }
464 }
465
466 let data =
468 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
469 utils::copy_memory(new_qname, data);
470 for i in 0..=extranul {
471 data[new_q_len - i - 1] = b'\0';
472 }
473 self.inner_mut().core.l_qname = new_q_len as u16;
474 self.inner_mut().core.l_extranul = extranul as u8;
475 }
476
477 fn realloc_var_data(&mut self, new_len: usize) {
478 let new_len = new_len as u32;
480 let new_request = new_len + 32 - (new_len % 32);
481
482 let ptr = unsafe {
483 ::libc::realloc(
484 self.inner().data as *mut ::libc::c_void,
485 new_request as usize,
486 ) as *mut u8
487 };
488
489 if ptr.is_null() {
490 panic!("ran out of memory in rust_htslib trying to realloc");
491 }
492
493 self.inner_mut().m_data = new_request;
496 self.inner_mut().data = ptr;
497
498 self.own = true;
500 }
501
502 pub fn cigar_len(&self) -> usize {
503 self.inner().core.n_cigar as usize
504 }
505
506 pub fn raw_cigar(&self) -> &[u32] {
509 #[allow(clippy::cast_ptr_alignment)]
511 unsafe {
512 slice::from_raw_parts(
513 self.data()[self.qname_capacity()..].as_ptr() as *const u32,
514 self.cigar_len(),
515 )
516 }
517 }
518
519 pub fn cigar(&self) -> CigarStringView {
521 match self.cigar {
522 Some(ref c) => c.clone(),
523 None => self.unpack_cigar(),
524 }
525 }
526
527 pub fn cigar_cached(&self) -> Option<&CigarStringView> {
529 self.cigar.as_ref()
530 }
531
532 pub fn cache_cigar(&mut self) {
534 self.cigar = Some(self.unpack_cigar())
535 }
536
537 fn unpack_cigar(&self) -> CigarStringView {
539 CigarString(
540 self.raw_cigar()
541 .iter()
542 .map(|&c| {
543 let len = c >> 4;
544 match c & 0b1111 {
545 0 => Cigar::Match(len),
546 1 => Cigar::Ins(len),
547 2 => Cigar::Del(len),
548 3 => Cigar::RefSkip(len),
549 4 => Cigar::SoftClip(len),
550 5 => Cigar::HardClip(len),
551 6 => Cigar::Pad(len),
552 7 => Cigar::Equal(len),
553 8 => Cigar::Diff(len),
554 _ => panic!("Unexpected cigar operation"),
555 }
556 })
557 .collect(),
558 )
559 .into_view(self.pos())
560 }
561
562 pub fn seq_len(&self) -> usize {
563 self.inner().core.l_qseq as usize
564 }
565
566 fn seq_data(&self) -> &[u8] {
567 let offset = self.qname_capacity() + self.cigar_len() * 4;
568 &self.data()[offset..][..(self.seq_len() + 1) / 2]
569 }
570
571 pub fn seq(&self) -> Seq<'_> {
573 Seq {
574 encoded: self.seq_data(),
575 len: self.seq_len(),
576 }
577 }
578
579 pub fn qual(&self) -> &[u8] {
583 &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
584 [..self.seq_len()]
585 }
586
587 pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
592 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
593 let aux = unsafe {
594 htslib::bam_aux_get(
595 &self.inner as *const htslib::bam1_t,
596 c_str.as_ptr() as *mut c_char,
597 )
598 };
599 unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
600 }
601
602 unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
603 const TAG_LEN: isize = 2;
604 const TYPE_ID_LEN: isize = 1;
606
607 if aux.is_null() {
608 return Err(Error::BamAuxTagNotFound);
609 }
610
611 let (data, type_size) = match *aux {
612 b'A' => {
613 let type_size = size_of::<u8>();
614 (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
615 }
616 b'c' => {
617 let type_size = size_of::<i8>();
618 (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
619 }
620 b'C' => {
621 let type_size = size_of::<u8>();
622 (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
623 }
624 b's' => {
625 let type_size = size_of::<i16>();
626 (
627 Aux::I16(
628 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
629 .read_i16::<LittleEndian>()
630 .map_err(|_| Error::BamAuxParsingError)?,
631 ),
632 type_size,
633 )
634 }
635 b'S' => {
636 let type_size = size_of::<u16>();
637 (
638 Aux::U16(
639 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
640 .read_u16::<LittleEndian>()
641 .map_err(|_| Error::BamAuxParsingError)?,
642 ),
643 type_size,
644 )
645 }
646 b'i' => {
647 let type_size = size_of::<i32>();
648 (
649 Aux::I32(
650 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
651 .read_i32::<LittleEndian>()
652 .map_err(|_| Error::BamAuxParsingError)?,
653 ),
654 type_size,
655 )
656 }
657 b'I' => {
658 let type_size = size_of::<u32>();
659 (
660 Aux::U32(
661 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
662 .read_u32::<LittleEndian>()
663 .map_err(|_| Error::BamAuxParsingError)?,
664 ),
665 type_size,
666 )
667 }
668 b'f' => {
669 let type_size = size_of::<f32>();
670 (
671 Aux::Float(
672 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
673 .read_f32::<LittleEndian>()
674 .map_err(|_| Error::BamAuxParsingError)?,
675 ),
676 type_size,
677 )
678 }
679 b'd' => {
680 let type_size = size_of::<f64>();
681 (
682 Aux::Double(
683 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684 .read_f64::<LittleEndian>()
685 .map_err(|_| Error::BamAuxParsingError)?,
686 ),
687 type_size,
688 )
689 }
690 b'Z' | b'H' => {
691 let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
692 let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
693 (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
694 }
695 b'B' => {
696 const ARRAY_INNER_TYPE_LEN: isize = 1;
697 const ARRAY_COUNT_LEN: isize = 4;
698
699 let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
701
702 let length =
703 slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
704 .read_u32::<LittleEndian>()
705 .map_err(|_| Error::BamAuxParsingError)? as usize;
706
707 let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
709 b'c' => (
710 Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
711 aux.offset(array_data_offset),
712 length,
713 ))),
714 length,
715 ),
716 b'C' => (
717 Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
718 aux.offset(array_data_offset),
719 length,
720 ))),
721 length,
722 ),
723 b's' => (
724 Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
725 aux.offset(array_data_offset),
726 length * size_of::<i16>(),
727 ))),
728 length * std::mem::size_of::<i16>(),
729 ),
730 b'S' => (
731 Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
732 aux.offset(array_data_offset),
733 length * size_of::<u16>(),
734 ))),
735 length * std::mem::size_of::<u16>(),
736 ),
737 b'i' => (
738 Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
739 aux.offset(array_data_offset),
740 length * size_of::<i32>(),
741 ))),
742 length * std::mem::size_of::<i32>(),
743 ),
744 b'I' => (
745 Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
746 aux.offset(array_data_offset),
747 length * size_of::<u32>(),
748 ))),
749 length * std::mem::size_of::<u32>(),
750 ),
751 b'f' => (
752 Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
753 aux.offset(array_data_offset),
754 length * size_of::<f32>(),
755 ))),
756 length * std::mem::size_of::<f32>(),
757 ),
758 _ => {
759 return Err(Error::BamAuxUnknownType);
760 }
761 };
762 (
763 array_data,
764 ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
766 )
767 }
768 _ => {
769 return Err(Error::BamAuxUnknownType);
770 }
771 };
772
773 Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
775 }
776
777 pub fn aux_iter(&self) -> AuxIter {
782 AuxIter {
783 aux: &self.data()[
786 self.qname_capacity()
788 + self.cigar_len() * std::mem::size_of::<u32>()
790 + (self.seq_len() + 1) / 2
792 + self.seq_len()..],
794 }
795 }
796
797 pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
799 if self.aux(tag).is_ok() {
803 return Err(Error::BamAuxTagAlreadyPresent);
804 }
805
806 let ctag = tag.as_ptr() as *mut c_char;
807 let ret = unsafe {
808 match value {
809 Aux::Char(v) => htslib::bam_aux_append(
810 self.inner_ptr_mut(),
811 ctag,
812 b'A' as c_char,
813 size_of::<u8>() as i32,
814 [v].as_mut_ptr() as *mut u8,
815 ),
816 Aux::I8(v) => htslib::bam_aux_append(
817 self.inner_ptr_mut(),
818 ctag,
819 b'c' as c_char,
820 size_of::<i8>() as i32,
821 [v].as_mut_ptr() as *mut u8,
822 ),
823 Aux::U8(v) => htslib::bam_aux_append(
824 self.inner_ptr_mut(),
825 ctag,
826 b'C' as c_char,
827 size_of::<u8>() as i32,
828 [v].as_mut_ptr() as *mut u8,
829 ),
830 Aux::I16(v) => htslib::bam_aux_append(
831 self.inner_ptr_mut(),
832 ctag,
833 b's' as c_char,
834 size_of::<i16>() as i32,
835 [v].as_mut_ptr() as *mut u8,
836 ),
837 Aux::U16(v) => htslib::bam_aux_append(
838 self.inner_ptr_mut(),
839 ctag,
840 b'S' as c_char,
841 size_of::<u16>() as i32,
842 [v].as_mut_ptr() as *mut u8,
843 ),
844 Aux::I32(v) => htslib::bam_aux_append(
845 self.inner_ptr_mut(),
846 ctag,
847 b'i' as c_char,
848 size_of::<i32>() as i32,
849 [v].as_mut_ptr() as *mut u8,
850 ),
851 Aux::U32(v) => htslib::bam_aux_append(
852 self.inner_ptr_mut(),
853 ctag,
854 b'I' as c_char,
855 size_of::<u32>() as i32,
856 [v].as_mut_ptr() as *mut u8,
857 ),
858 Aux::Float(v) => htslib::bam_aux_append(
859 self.inner_ptr_mut(),
860 ctag,
861 b'f' as c_char,
862 size_of::<f32>() as i32,
863 [v].as_mut_ptr() as *mut u8,
864 ),
865 Aux::Double(v) => htslib::bam_aux_append(
867 self.inner_ptr_mut(),
868 ctag,
869 b'd' as c_char,
870 size_of::<f64>() as i32,
871 [v].as_mut_ptr() as *mut u8,
872 ),
873 Aux::String(v) => {
874 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
875 htslib::bam_aux_append(
876 self.inner_ptr_mut(),
877 ctag,
878 b'Z' as c_char,
879 (v.len() + 1) as i32,
880 c_str.as_ptr() as *mut u8,
881 )
882 }
883 Aux::HexByteArray(v) => {
884 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
885 htslib::bam_aux_append(
886 self.inner_ptr_mut(),
887 ctag,
888 b'H' as c_char,
889 (v.len() + 1) as i32,
890 c_str.as_ptr() as *mut u8,
891 )
892 }
893 Aux::ArrayI8(aux_array) => match aux_array {
895 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
896 self.inner_ptr_mut(),
897 ctag,
898 b'c',
899 inner.len() as u32,
900 inner.slice.as_ptr() as *mut ::libc::c_void,
901 ),
902 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
903 self.inner_ptr_mut(),
904 ctag,
905 b'c',
906 inner.len() as u32,
907 inner.slice.as_ptr() as *mut ::libc::c_void,
908 ),
909 },
910 Aux::ArrayU8(aux_array) => match aux_array {
911 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
912 self.inner_ptr_mut(),
913 ctag,
914 b'C',
915 inner.len() as u32,
916 inner.slice.as_ptr() as *mut ::libc::c_void,
917 ),
918 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
919 self.inner_ptr_mut(),
920 ctag,
921 b'C',
922 inner.len() as u32,
923 inner.slice.as_ptr() as *mut ::libc::c_void,
924 ),
925 },
926 Aux::ArrayI16(aux_array) => match aux_array {
927 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
928 self.inner_ptr_mut(),
929 ctag,
930 b's',
931 inner.len() as u32,
932 inner.slice.as_ptr() as *mut ::libc::c_void,
933 ),
934 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
935 self.inner_ptr_mut(),
936 ctag,
937 b's',
938 inner.len() as u32,
939 inner.slice.as_ptr() as *mut ::libc::c_void,
940 ),
941 },
942 Aux::ArrayU16(aux_array) => match aux_array {
943 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
944 self.inner_ptr_mut(),
945 ctag,
946 b'S',
947 inner.len() as u32,
948 inner.slice.as_ptr() as *mut ::libc::c_void,
949 ),
950 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
951 self.inner_ptr_mut(),
952 ctag,
953 b'S',
954 inner.len() as u32,
955 inner.slice.as_ptr() as *mut ::libc::c_void,
956 ),
957 },
958 Aux::ArrayI32(aux_array) => match aux_array {
959 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
960 self.inner_ptr_mut(),
961 ctag,
962 b'i',
963 inner.len() as u32,
964 inner.slice.as_ptr() as *mut ::libc::c_void,
965 ),
966 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
967 self.inner_ptr_mut(),
968 ctag,
969 b'i',
970 inner.len() as u32,
971 inner.slice.as_ptr() as *mut ::libc::c_void,
972 ),
973 },
974 Aux::ArrayU32(aux_array) => match aux_array {
975 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
976 self.inner_ptr_mut(),
977 ctag,
978 b'I',
979 inner.len() as u32,
980 inner.slice.as_ptr() as *mut ::libc::c_void,
981 ),
982 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
983 self.inner_ptr_mut(),
984 ctag,
985 b'I',
986 inner.len() as u32,
987 inner.slice.as_ptr() as *mut ::libc::c_void,
988 ),
989 },
990 Aux::ArrayFloat(aux_array) => match aux_array {
991 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
992 self.inner_ptr_mut(),
993 ctag,
994 b'f',
995 inner.len() as u32,
996 inner.slice.as_ptr() as *mut ::libc::c_void,
997 ),
998 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
999 self.inner_ptr_mut(),
1000 ctag,
1001 b'f',
1002 inner.len() as u32,
1003 inner.slice.as_ptr() as *mut ::libc::c_void,
1004 ),
1005 },
1006 }
1007 };
1008
1009 if ret < 0 {
1010 Err(Error::BamAux)
1011 } else {
1012 Ok(())
1013 }
1014 }
1015
1016 pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1018 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1019 let aux = unsafe {
1020 htslib::bam_aux_get(
1021 &self.inner as *const htslib::bam1_t,
1022 c_str.as_ptr() as *mut c_char,
1023 )
1024 };
1025 unsafe {
1026 if aux.is_null() {
1027 Err(Error::BamAuxTagNotFound)
1028 } else {
1029 htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1030 Ok(())
1031 }
1032 }
1033 }
1034
1035 pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1067 BaseModificationsIter::new(self)
1068 }
1069
1070 pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1074 BaseModificationsPositionIter::new(self)
1075 }
1076
1077 pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1081 if self.is_paired()
1082 && !self.is_unmapped()
1083 && !self.is_mate_unmapped()
1084 && self.tid() == self.mtid()
1085 {
1086 if self.pos() == self.mpos() {
1087 return SequenceReadPairOrientation::None;
1089 }
1090
1091 let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1092 (
1093 self.pos(),
1094 self.mpos(),
1095 !self.is_reverse(),
1096 !self.is_mate_reverse(),
1097 )
1098 } else {
1099 (
1100 self.mpos(),
1101 self.pos(),
1102 !self.is_mate_reverse(),
1103 !self.is_reverse(),
1104 )
1105 };
1106
1107 if pos_1 < pos_2 {
1108 match (fwd_1, fwd_2) {
1109 (true, true) => SequenceReadPairOrientation::F1F2,
1110 (true, false) => SequenceReadPairOrientation::F1R2,
1111 (false, true) => SequenceReadPairOrientation::R1F2,
1112 (false, false) => SequenceReadPairOrientation::R1R2,
1113 }
1114 } else {
1115 match (fwd_2, fwd_1) {
1116 (true, true) => SequenceReadPairOrientation::F2F1,
1117 (true, false) => SequenceReadPairOrientation::F2R1,
1118 (false, true) => SequenceReadPairOrientation::R2F1,
1119 (false, false) => SequenceReadPairOrientation::R2R1,
1120 }
1121 }
1122 } else {
1123 SequenceReadPairOrientation::None
1124 }
1125 }
1126
1127 flag!(is_paired, set_paired, unset_paired, 1u16);
1128 flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1129 flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1130 flag!(
1131 is_mate_unmapped,
1132 set_mate_unmapped,
1133 unset_mate_unmapped,
1134 8u16
1135 );
1136 flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1137 flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1138 flag!(
1139 is_first_in_template,
1140 set_first_in_template,
1141 unset_first_in_template,
1142 64u16
1143 );
1144 flag!(
1145 is_last_in_template,
1146 set_last_in_template,
1147 unset_last_in_template,
1148 128u16
1149 );
1150 flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1151 flag!(
1152 is_quality_check_failed,
1153 set_quality_check_failed,
1154 unset_quality_check_failed,
1155 512u16
1156 );
1157 flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1158 flag!(
1159 is_supplementary,
1160 set_supplementary,
1161 unset_supplementary,
1162 2048u16
1163 );
1164}
1165
1166impl Drop for Record {
1167 fn drop(&mut self) {
1168 if self.own {
1169 unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1170 }
1171 }
1172}
1173
1174impl SequenceRead for Record {
1175 fn name(&self) -> &[u8] {
1176 self.qname()
1177 }
1178
1179 fn base(&self, i: usize) -> u8 {
1180 *decode_base_unchecked(encoded_base(self.seq_data(), i))
1181 }
1182
1183 fn base_qual(&self, i: usize) -> u8 {
1184 self.qual()[i]
1185 }
1186
1187 fn len(&self) -> usize {
1188 self.seq_len()
1189 }
1190
1191 fn is_empty(&self) -> bool {
1192 self.len() == 0
1193 }
1194}
1195
1196impl genome::AbstractInterval for Record {
1197 fn contig(&self) -> &str {
1199 let tid = self.tid();
1200 if tid < 0 {
1201 panic!("invalid tid, must be at least zero");
1202 }
1203 str::from_utf8(
1204 self.header
1205 .as_ref()
1206 .expect(
1207 "header must be set (this is the case if the record has been read from a file)",
1208 )
1209 .tid2name(tid as u32),
1210 )
1211 .expect("unable to interpret contig name as UTF-8")
1212 }
1213
1214 fn range(&self) -> ops::Range<genome::Position> {
1216 let end_pos = self
1217 .cigar_cached()
1218 .expect("cigar has not been cached yet, call cache_cigar() first")
1219 .end_pos() as u64;
1220
1221 if self.pos() < 0 {
1222 panic!("invalid position, must be positive")
1223 }
1224
1225 self.pos() as u64..end_pos
1226 }
1227}
1228
1229#[derive(Debug, PartialEq)]
1284pub enum Aux<'a> {
1285 Char(u8),
1286 I8(i8),
1287 U8(u8),
1288 I16(i16),
1289 U16(u16),
1290 I32(i32),
1291 U32(u32),
1292 Float(f32),
1293 Double(f64), String(&'a str),
1295 HexByteArray(&'a str),
1296 ArrayI8(AuxArray<'a, i8>),
1297 ArrayU8(AuxArray<'a, u8>),
1298 ArrayI16(AuxArray<'a, i16>),
1299 ArrayU16(AuxArray<'a, u16>),
1300 ArrayI32(AuxArray<'a, i32>),
1301 ArrayU32(AuxArray<'a, u32>),
1302 ArrayFloat(AuxArray<'a, f32>),
1303}
1304
1305unsafe impl<'a> Send for Aux<'a> {}
1306unsafe impl<'a> Sync for Aux<'a> {}
1307
1308pub trait AuxArrayElement: Copy {
1310 fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1311}
1312
1313impl AuxArrayElement for i8 {
1314 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1315 std::io::Cursor::new(bytes).read_i8().ok()
1316 }
1317}
1318impl AuxArrayElement for u8 {
1319 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1320 std::io::Cursor::new(bytes).read_u8().ok()
1321 }
1322}
1323impl AuxArrayElement for i16 {
1324 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1325 std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1326 }
1327}
1328impl AuxArrayElement for u16 {
1329 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1330 std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1331 }
1332}
1333impl AuxArrayElement for i32 {
1334 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1335 std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1336 }
1337}
1338impl AuxArrayElement for u32 {
1339 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1340 std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1341 }
1342}
1343impl AuxArrayElement for f32 {
1344 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1345 std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1346 }
1347}
1348
1349#[derive(Debug)]
1393pub enum AuxArray<'a, T> {
1394 TargetType(AuxArrayTargetType<'a, T>),
1395 RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1396}
1397
1398impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1399where
1400 T: AuxArrayElement + PartialEq,
1401{
1402 fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1403 use AuxArray::*;
1404 match (self, other) {
1405 (TargetType(v), TargetType(v_other)) => v == v_other,
1406 (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1407 (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1408 (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1409 }
1410 }
1411}
1412
1413impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1415where
1416 I: AuxArrayElement,
1417 T: AsRef<[I]> + ?Sized,
1418{
1419 fn from(src: &'a T) -> Self {
1420 AuxArray::TargetType(AuxArrayTargetType {
1421 slice: src.as_ref(),
1422 })
1423 }
1424}
1425
1426impl<'a, T> AuxArray<'a, T>
1427where
1428 T: AuxArrayElement,
1429{
1430 pub fn get(&self, index: usize) -> Option<T> {
1432 match self {
1433 AuxArray::TargetType(v) => v.get(index),
1434 AuxArray::RawLeBytes(v) => v.get(index),
1435 }
1436 }
1437
1438 pub fn len(&self) -> usize {
1440 match self {
1441 AuxArray::TargetType(a) => a.len(),
1442 AuxArray::RawLeBytes(a) => a.len(),
1443 }
1444 }
1445
1446 pub fn is_empty(&self) -> bool {
1448 self.len() == 0
1449 }
1450
1451 pub fn iter(&self) -> AuxArrayIter<T> {
1453 AuxArrayIter {
1454 index: 0,
1455 array: self,
1456 }
1457 }
1458
1459 fn from_bytes(bytes: &'a [u8]) -> Self {
1461 Self::RawLeBytes(AuxArrayRawLeBytes {
1462 slice: bytes,
1463 phantom_data: PhantomData,
1464 })
1465 }
1466}
1467
1468#[doc(hidden)]
1470#[derive(Debug, PartialEq)]
1471pub struct AuxArrayTargetType<'a, T> {
1472 slice: &'a [T],
1473}
1474
1475impl<'a, T> AuxArrayTargetType<'a, T>
1476where
1477 T: AuxArrayElement,
1478{
1479 fn get(&self, index: usize) -> Option<T> {
1480 self.slice.get(index).copied()
1481 }
1482
1483 fn len(&self) -> usize {
1484 self.slice.len()
1485 }
1486}
1487
1488#[doc(hidden)]
1490#[derive(Debug, PartialEq)]
1491pub struct AuxArrayRawLeBytes<'a, T> {
1492 slice: &'a [u8],
1493 phantom_data: PhantomData<T>,
1494}
1495
1496impl<'a, T> AuxArrayRawLeBytes<'a, T>
1497where
1498 T: AuxArrayElement,
1499{
1500 fn get(&self, index: usize) -> Option<T> {
1501 let type_size = std::mem::size_of::<T>();
1502 if index * type_size + type_size > self.slice.len() {
1503 return None;
1504 }
1505 T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1506 }
1507
1508 fn len(&self) -> usize {
1509 self.slice.len() / std::mem::size_of::<T>()
1510 }
1511}
1512
1513pub struct AuxArrayIter<'a, T> {
1517 index: usize,
1518 array: &'a AuxArray<'a, T>,
1519}
1520
1521impl<T> Iterator for AuxArrayIter<'_, T>
1522where
1523 T: AuxArrayElement,
1524{
1525 type Item = T;
1526 fn next(&mut self) -> Option<Self::Item> {
1527 let value = self.array.get(self.index);
1528 self.index += 1;
1529 value
1530 }
1531
1532 fn size_hint(&self) -> (usize, Option<usize>) {
1533 let array_length = self.array.len() - self.index;
1534 (array_length, Some(array_length))
1535 }
1536}
1537
1538pub struct AuxIter<'a> {
1549 aux: &'a [u8],
1550}
1551
1552impl<'a> Iterator for AuxIter<'a> {
1553 type Item = Result<(&'a [u8], Aux<'a>)>;
1554
1555 fn next(&mut self) -> Option<Self::Item> {
1556 if self.aux.is_empty() {
1558 return None;
1559 }
1560 if (1..=3).contains(&self.aux.len()) {
1562 self.aux = &[];
1564 return Some(Err(Error::BamAuxParsingError));
1565 }
1566 let tag = &self.aux[..2];
1567 Some(unsafe {
1568 let data_ptr = self.aux[2..].as_ptr();
1569 Record::read_aux_field(data_ptr)
1570 .map(|(aux, offset)| {
1571 self.aux = &self.aux[offset..];
1572 (tag, aux)
1573 })
1574 .map_err(|e| {
1575 self.aux = &[];
1577 e
1578 })
1579 })
1580 }
1581}
1582
1583static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1584static ENCODE_BASE: [u8; 256] = [
1585 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1586 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1587 1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1588 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1589 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1590 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1591 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1592 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1593 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1594 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1595 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1596];
1597
1598#[inline]
1599fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1600 (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1601}
1602
1603#[inline]
1604unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1605 (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1606}
1607
1608#[inline]
1609fn decode_base_unchecked(base: u8) -> &'static u8 {
1610 unsafe { DECODE_BASE.get_unchecked(base as usize) }
1611}
1612
1613#[derive(Debug, Copy, Clone)]
1615pub struct Seq<'a> {
1616 pub encoded: &'a [u8],
1617 len: usize,
1618}
1619
1620impl<'a> Seq<'a> {
1621 #[inline]
1623 pub fn encoded_base(&self, i: usize) -> u8 {
1624 encoded_base(self.encoded, i)
1625 }
1626
1627 #[inline]
1629 pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1630 encoded_base_unchecked(self.encoded, i)
1631 }
1632
1633 #[inline]
1637 pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1638 *decode_base_unchecked(self.encoded_base_unchecked(i))
1639 }
1640
1641 pub fn as_bytes(&self) -> Vec<u8> {
1643 (0..self.len()).map(|i| self[i]).collect()
1644 }
1645
1646 pub fn len(&self) -> usize {
1648 self.len
1649 }
1650
1651 pub fn is_empty(&self) -> bool {
1652 self.len() == 0
1653 }
1654}
1655
1656impl<'a> ops::Index<usize> for Seq<'a> {
1657 type Output = u8;
1658
1659 fn index(&self, index: usize) -> &u8 {
1661 decode_base_unchecked(self.encoded_base(index))
1662 }
1663}
1664
1665unsafe impl<'a> Send for Seq<'a> {}
1666unsafe impl<'a> Sync for Seq<'a> {}
1667
1668#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1669#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1670pub enum Cigar {
1671 Match(u32), Ins(u32), Del(u32), RefSkip(u32), SoftClip(u32), HardClip(u32), Pad(u32), Equal(u32), Diff(u32), }
1681
1682impl Cigar {
1683 fn encode(self) -> u32 {
1684 match self {
1685 Cigar::Match(len) => len << 4, Cigar::Ins(len) => len << 4 | 1,
1687 Cigar::Del(len) => len << 4 | 2,
1688 Cigar::RefSkip(len) => len << 4 | 3,
1689 Cigar::SoftClip(len) => len << 4 | 4,
1690 Cigar::HardClip(len) => len << 4 | 5,
1691 Cigar::Pad(len) => len << 4 | 6,
1692 Cigar::Equal(len) => len << 4 | 7,
1693 Cigar::Diff(len) => len << 4 | 8,
1694 }
1695 }
1696
1697 pub fn len(self) -> u32 {
1699 match self {
1700 Cigar::Match(len) => len,
1701 Cigar::Ins(len) => len,
1702 Cigar::Del(len) => len,
1703 Cigar::RefSkip(len) => len,
1704 Cigar::SoftClip(len) => len,
1705 Cigar::HardClip(len) => len,
1706 Cigar::Pad(len) => len,
1707 Cigar::Equal(len) => len,
1708 Cigar::Diff(len) => len,
1709 }
1710 }
1711
1712 pub fn is_empty(self) -> bool {
1713 self.len() == 0
1714 }
1715
1716 pub fn char(self) -> char {
1718 match self {
1719 Cigar::Match(_) => 'M',
1720 Cigar::Ins(_) => 'I',
1721 Cigar::Del(_) => 'D',
1722 Cigar::RefSkip(_) => 'N',
1723 Cigar::SoftClip(_) => 'S',
1724 Cigar::HardClip(_) => 'H',
1725 Cigar::Pad(_) => 'P',
1726 Cigar::Equal(_) => '=',
1727 Cigar::Diff(_) => 'X',
1728 }
1729 }
1730}
1731
1732impl fmt::Display for Cigar {
1733 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1734 fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1735 }
1736}
1737
1738unsafe impl Send for Cigar {}
1739unsafe impl Sync for Cigar {}
1740
1741custom_derive! {
1742 #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1761 #[derive(NewtypeDeref,
1762 NewtypeDerefMut,
1763 NewtypeIndex(usize),
1764 NewtypeIndexMut(usize),
1765 NewtypeFrom,
1766 PartialEq,
1767 PartialOrd,
1768 Eq,
1769 NewtypeDebug,
1770 Clone,
1771 Hash
1772 )]
1773 pub struct CigarString(pub Vec<Cigar>);
1774}
1775
1776impl CigarString {
1777 pub fn into_view(self, pos: i64) -> CigarStringView {
1779 CigarStringView::new(self, pos)
1780 }
1781
1782 pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1787 match alignment.mode {
1788 AlignmentMode::Global => {
1789 panic!(" Bam cigar fn not supported for Global Alignment mode")
1790 }
1791 AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1792 _ => {}
1793 }
1794
1795 let mut cigar = Vec::new();
1796 if alignment.operations.is_empty() {
1797 return CigarString(cigar);
1798 }
1799
1800 let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1801 AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1802 AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1803 AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1804 AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1805 _ => {}
1806 };
1807
1808 if alignment.xstart > 0 {
1809 cigar.push(if hard_clip {
1810 Cigar::HardClip(alignment.xstart as u32)
1811 } else {
1812 Cigar::SoftClip(alignment.xstart as u32)
1813 });
1814 }
1815
1816 let mut last = alignment.operations[0];
1817 let mut k = 1u32;
1818 for &op in alignment.operations[1..].iter() {
1819 if op == last {
1820 k += 1;
1821 } else {
1822 add_op(last, k, &mut cigar);
1823 k = 1;
1824 }
1825 last = op;
1826 }
1827 add_op(last, k, &mut cigar);
1828 if alignment.xlen > alignment.xend {
1829 cigar.push(if hard_clip {
1830 Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1831 } else {
1832 Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1833 });
1834 }
1835
1836 CigarString(cigar)
1837 }
1838}
1839
1840impl TryFrom<&[u8]> for CigarString {
1841 type Error = Error;
1842
1843 fn try_from(bytes: &[u8]) -> Result<Self> {
1864 let mut inner = Vec::new();
1865 let mut i = 0;
1866 let text_len = bytes.len();
1867 while i < text_len {
1868 let mut j = i;
1869 while j < text_len && bytes[j].is_ascii_digit() {
1870 j += 1;
1871 }
1872 if i == j {
1874 return Err(Error::BamParseCigar {
1875 msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1876 });
1877 }
1878 let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1880 msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1881 })?;
1882 let n = s.parse().map_err(|_| Error::BamParseCigar {
1883 msg: format!("Unable to parse &str '{:?}' to u32.", s),
1884 })?;
1885 let op = &bytes[j];
1887 inner.push(match op {
1888 b'M' => Cigar::Match(n),
1889 b'I' => Cigar::Ins(n),
1890 b'D' => Cigar::Del(n),
1891 b'N' => Cigar::RefSkip(n),
1892 b'H' => {
1893 if i == 0 || j + 1 == text_len {
1894 Cigar::HardClip(n)
1895 } else {
1896 return Err(Error::BamParseCigar {
1897 msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1898 .to_owned(),
1899 });
1900 }
1901 }
1902 b'S' => {
1903 if i == 0
1904 || j + 1 == text_len
1905 || bytes[i-1] == b'H'
1906 || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1907 Cigar::SoftClip(n)
1908 } else {
1909 return Err(Error::BamParseCigar {
1910 msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1911 .to_owned(),
1912 });
1913 }
1914 },
1915 b'P' => Cigar::Pad(n),
1916 b'=' => Cigar::Equal(n),
1917 b'X' => Cigar::Diff(n),
1918 op => {
1919 return Err(Error::BamParseCigar {
1920 msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1921 })
1922 }
1923 });
1924 i = j + 1;
1925 }
1926 Ok(CigarString(inner))
1927 }
1928}
1929
1930impl TryFrom<&str> for CigarString {
1931 type Error = Error;
1932
1933 fn try_from(text: &str) -> Result<Self> {
1954 let bytes = text.as_bytes();
1955 if text.chars().count() != bytes.len() {
1956 return Err(Error::BamParseCigar {
1957 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1958 });
1959 }
1960 CigarString::try_from(bytes)
1961 }
1962}
1963
1964impl<'a> CigarString {
1965 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1966 self.into_iter()
1967 }
1968}
1969
1970impl<'a> IntoIterator for &'a CigarString {
1971 type Item = &'a Cigar;
1972 type IntoIter = ::std::slice::Iter<'a, Cigar>;
1973
1974 fn into_iter(self) -> Self::IntoIter {
1975 self.0.iter()
1976 }
1977}
1978
1979impl fmt::Display for CigarString {
1980 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1981 for op in self {
1982 fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
1983 }
1984 Ok(())
1985 }
1986}
1987
1988fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
1990 match (cigar.next(), cigar.next()) {
1991 (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
1992 *s as i64
1993 }
1994 _ => 0,
1995 }
1996}
1997
1998#[derive(Eq, PartialEq, Clone, Debug)]
1999pub struct CigarStringView {
2000 inner: CigarString,
2001 pos: i64,
2002}
2003
2004impl CigarStringView {
2005 pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2007 CigarStringView { inner: c, pos }
2008 }
2009
2010 pub fn end_pos(&self) -> i64 {
2012 let mut pos = self.pos;
2013 for c in self {
2014 match c {
2015 Cigar::Match(l)
2016 | Cigar::RefSkip(l)
2017 | Cigar::Del(l)
2018 | Cigar::Equal(l)
2019 | Cigar::Diff(l) => pos += *l as i64,
2020 Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2022 }
2023 }
2024 pos
2025 }
2026
2027 pub fn pos(&self) -> i64 {
2029 self.pos
2030 }
2031
2032 pub fn leading_softclips(&self) -> i64 {
2034 calc_softclips(self.iter())
2035 }
2036
2037 pub fn trailing_softclips(&self) -> i64 {
2039 calc_softclips(self.iter().rev())
2040 }
2041
2042 pub fn leading_hardclips(&self) -> i64 {
2044 self.first().map_or(0, |cigar| {
2045 if let Cigar::HardClip(s) = cigar {
2046 *s as i64
2047 } else {
2048 0
2049 }
2050 })
2051 }
2052
2053 pub fn trailing_hardclips(&self) -> i64 {
2055 self.last().map_or(0, |cigar| {
2056 if let Cigar::HardClip(s) = cigar {
2057 *s as i64
2058 } else {
2059 0
2060 }
2061 })
2062 }
2063
2064 pub fn read_pos(
2074 &self,
2075 ref_pos: u32,
2076 include_softclips: bool,
2077 include_dels: bool,
2078 ) -> Result<Option<u32>> {
2079 let mut rpos = self.pos as u32; let mut qpos = 0u32; let mut j = 0; for (i, c) in self.iter().enumerate() {
2086 match c {
2087 Cigar::Match(_) |
2088 Cigar::Diff(_) |
2089 Cigar::Equal(_) |
2090 Cigar::Ins(_) => {
2093 j = i;
2094 break;
2095 },
2096 Cigar::SoftClip(l) => {
2097 j = i;
2098 if include_softclips {
2099 rpos = rpos.saturating_sub(*l);
2103 }
2104 break;
2105 },
2106 Cigar::Del(l) => {
2107 rpos += l;
2115 },
2116 Cigar::RefSkip(_) => {
2117 return Err(Error::BamUnexpectedCigarOperation {
2118 msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2119 });
2120 },
2121 Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2122 return Err(Error::BamUnexpectedCigarOperation{
2123 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2124 });
2125 },
2126 Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2128 Cigar::Pad(_) | Cigar::HardClip(_) => ()
2130 }
2131 }
2132
2133 let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2134 cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2135 };
2136
2137 while rpos <= ref_pos && j < self.len() {
2138 match self[j] {
2139 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2141 qpos += ref_pos - rpos;
2144 return Ok(Some(qpos));
2145 }
2146 Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2147 qpos += ref_pos - rpos;
2148 return Ok(Some(qpos));
2149 }
2150 Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2151 return Ok(Some(qpos));
2153 }
2154 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2156 rpos += l;
2157 qpos += l;
2158 j += 1;
2159 }
2160 Cigar::SoftClip(l) => {
2161 qpos += l;
2162 j += 1;
2163 if include_softclips {
2164 rpos += l;
2165 }
2166 }
2167 Cigar::Ins(l) => {
2168 qpos += l;
2169 j += 1;
2170 }
2171 Cigar::RefSkip(l) | Cigar::Del(l) => {
2172 rpos += l;
2173 j += 1;
2174 }
2175 Cigar::Pad(_) => {
2176 j += 1;
2177 }
2178 Cigar::HardClip(_) if j < self.len() - 1 => {
2179 return Err(Error::BamUnexpectedCigarOperation{
2180 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2181 });
2182 }
2183 Cigar::HardClip(_) => return Ok(None),
2184 }
2185 }
2186
2187 Ok(None)
2188 }
2189
2190 pub fn take(self) -> CigarString {
2192 self.inner
2193 }
2194}
2195
2196impl ops::Deref for CigarStringView {
2197 type Target = CigarString;
2198
2199 fn deref(&self) -> &CigarString {
2200 &self.inner
2201 }
2202}
2203
2204impl ops::Index<usize> for CigarStringView {
2205 type Output = Cigar;
2206
2207 fn index(&self, index: usize) -> &Cigar {
2208 self.inner.index(index)
2209 }
2210}
2211
2212impl ops::IndexMut<usize> for CigarStringView {
2213 fn index_mut(&mut self, index: usize) -> &mut Cigar {
2214 self.inner.index_mut(index)
2215 }
2216}
2217
2218impl<'a> CigarStringView {
2219 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2220 self.inner.into_iter()
2221 }
2222}
2223
2224impl<'a> IntoIterator for &'a CigarStringView {
2225 type Item = &'a Cigar;
2226 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2227
2228 fn into_iter(self) -> Self::IntoIter {
2229 self.inner.into_iter()
2230 }
2231}
2232
2233impl fmt::Display for CigarStringView {
2234 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2235 self.inner.fmt(fmt)
2236 }
2237}
2238
2239pub struct BaseModificationMetadata {
2240 pub strand: i32,
2241 pub implicit: i32,
2242 pub canonical: u8,
2243}
2244
2245pub struct BaseModificationState<'a> {
2248 record: &'a Record,
2249 state: *mut htslib::hts_base_mod_state,
2250 buffer: Vec<htslib::hts_base_mod>,
2251 buffer_pos: i32,
2252}
2253
2254impl BaseModificationState<'_> {
2255 fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2260 let mut bm = unsafe {
2261 BaseModificationState {
2262 record: r,
2263 state: hts_sys::hts_base_mod_state_alloc(),
2264 buffer: Vec::new(),
2265 buffer_pos: -1,
2266 }
2267 };
2268
2269 if bm.state.is_null() {
2270 panic!("Unable to allocate memory for hts_base_mod_state");
2271 }
2272
2273 unsafe {
2275 let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2276 if ret != 0 {
2277 return Err(Error::BamBaseModificationTagNotFound);
2278 }
2279 }
2280
2281 let types = bm.recorded();
2282 bm.buffer.reserve(types.len());
2283 return Ok(bm);
2284 }
2285
2286 pub fn buffer_next_mods(&mut self) -> Result<usize> {
2287 unsafe {
2288 let ret = hts_sys::bam_next_basemod(
2289 self.record.inner_ptr(),
2290 self.state,
2291 self.buffer.as_mut_ptr(),
2292 self.buffer.capacity() as i32,
2293 &mut self.buffer_pos,
2294 );
2295
2296 if ret < 0 {
2297 return Err(Error::BamBaseModificationIterationFailed);
2298 }
2299
2300 if ret as usize > self.buffer.capacity() {
2304 return Err(Error::BamBaseModificationTooManyMods);
2305 }
2306
2307 self.buffer.set_len(ret as usize);
2310
2311 return Ok(ret as usize);
2312 }
2313 }
2314
2315 pub fn recorded<'a>(&self) -> &'a [i32] {
2318 unsafe {
2319 let mut n: i32 = 0;
2320 let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2321
2322 if data_ptr.is_null() {
2324 panic!("Unable to obtain pointer to base modifications");
2325 }
2326 assert!(n >= 0);
2327 return slice::from_raw_parts(data_ptr, n as usize);
2328 }
2329 }
2330
2331 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2337 unsafe {
2338 let mut strand: i32 = 0;
2339 let mut implicit: i32 = 0;
2340 let mut canonical: c_char = 0;
2342
2343 let ret = hts_sys::bam_mods_query_type(
2344 self.state,
2345 code,
2346 &mut strand,
2347 &mut implicit,
2348 &mut canonical,
2349 );
2350 if ret == -1 {
2351 return Err(Error::BamBaseModificationTypeNotFound);
2352 } else {
2353 return Ok(BaseModificationMetadata {
2354 strand,
2355 implicit,
2356 canonical: canonical.try_into().unwrap(),
2357 });
2358 }
2359 }
2360 }
2361}
2362
2363impl Drop for BaseModificationState<'_> {
2364 fn drop<'a>(&mut self) {
2365 unsafe {
2366 hts_sys::hts_base_mod_state_free(self.state);
2367 }
2368 }
2369}
2370
2371pub struct BaseModificationsPositionIter<'a> {
2374 mod_state: BaseModificationState<'a>,
2375}
2376
2377impl BaseModificationsPositionIter<'_> {
2378 fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
2379 let state = BaseModificationState::new(r)?;
2380 Ok(BaseModificationsPositionIter { mod_state: state })
2381 }
2382
2383 pub fn recorded<'a>(&self) -> &'a [i32] {
2384 return self.mod_state.recorded();
2385 }
2386
2387 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2388 return self.mod_state.query_type(code);
2389 }
2390}
2391
2392impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2393 type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2394
2395 fn next(&mut self) -> Option<Self::Item> {
2396 let ret = self.mod_state.buffer_next_mods();
2397
2398 match ret {
2403 Ok(num_mods) => {
2404 if num_mods == 0 {
2405 return None;
2406 } else {
2407 let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2408 return Some(Ok(data));
2409 }
2410 }
2411 Err(e) => return Some(Err(e)),
2412 }
2413 }
2414}
2415
2416pub struct BaseModificationsIter<'a> {
2419 mod_state: BaseModificationState<'a>,
2420 buffer_idx: usize,
2421}
2422
2423impl BaseModificationsIter<'_> {
2424 fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2425 let state = BaseModificationState::new(r)?;
2426 Ok(BaseModificationsIter {
2427 mod_state: state,
2428 buffer_idx: 0,
2429 })
2430 }
2431
2432 pub fn recorded<'a>(&self) -> &'a [i32] {
2433 return self.mod_state.recorded();
2434 }
2435
2436 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2437 return self.mod_state.query_type(code);
2438 }
2439}
2440
2441impl<'a> Iterator for BaseModificationsIter<'a> {
2442 type Item = Result<(i32, hts_sys::hts_base_mod)>;
2443
2444 fn next(&mut self) -> Option<Self::Item> {
2445 if self.buffer_idx == self.mod_state.buffer.len() {
2446 let ret = self.mod_state.buffer_next_mods();
2449
2450 match ret {
2451 Ok(num_mods) => {
2452 if num_mods == 0 {
2453 return None;
2455 } else {
2456 self.buffer_idx = 0;
2458 }
2459 }
2460 Err(e) => return Some(Err(e)),
2461 }
2462 }
2463
2464 assert!(self.buffer_idx < self.mod_state.buffer.len());
2466 let data = (
2467 self.mod_state.buffer_pos,
2468 self.mod_state.buffer[self.buffer_idx],
2469 );
2470 self.buffer_idx += 1;
2471 return Some(Ok(data));
2472 }
2473}
2474
2475#[cfg(test)]
2476mod tests {
2477 use super::*;
2478
2479 #[test]
2480 fn test_cigar_string() {
2481 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2482
2483 assert_eq!(cigar[0], Cigar::Match(100));
2484 assert_eq!(format!("{}", cigar), "100M10S");
2485 for op in &cigar {
2486 println!("{}", op);
2487 }
2488 }
2489
2490 #[test]
2491 fn test_cigar_string_view_pos() {
2492 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2493 assert_eq!(cigar.pos(), 5);
2494 }
2495
2496 #[test]
2497 fn test_cigar_string_leading_softclips() {
2498 let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2499 assert_eq!(cigar.leading_softclips(), 10);
2500 let cigar2 = CigarString(vec![
2501 Cigar::HardClip(5),
2502 Cigar::SoftClip(10),
2503 Cigar::Match(100),
2504 ])
2505 .into_view(0);
2506 assert_eq!(cigar2.leading_softclips(), 10);
2507 }
2508
2509 #[test]
2510 fn test_cigar_string_trailing_softclips() {
2511 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2512 assert_eq!(cigar.trailing_softclips(), 10);
2513 let cigar2 = CigarString(vec![
2514 Cigar::Match(100),
2515 Cigar::SoftClip(10),
2516 Cigar::HardClip(5),
2517 ])
2518 .into_view(0);
2519 assert_eq!(cigar2.trailing_softclips(), 10);
2520 }
2521
2522 #[test]
2523 fn test_cigar_read_pos() {
2524 let vpos = 5; let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2532 assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2533
2534 let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2542 assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2543 assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2544
2545 let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2554 assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2555 assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2556
2557 let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2563 assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2564
2565 let c05 = CigarString(vec![
2571 Cigar::Equal(2),
2572 Cigar::Del(2),
2573 Cigar::Diff(1),
2574 Cigar::Equal(2),
2575 ])
2576 .into_view(0);
2577 assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2578
2579 let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2585 assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2586 assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2587
2588 let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2594 assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2595 assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2596
2597 let c08 = CigarString(vec![
2603 Cigar::Equal(1),
2604 Cigar::Diff(1),
2605 Cigar::RefSkip(3),
2606 Cigar::Match(2),
2607 ])
2608 .into_view(2);
2609 assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2610 assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2611
2612 let c09 = CigarString(vec![
2618 Cigar::HardClip(3),
2619 Cigar::Equal(2),
2620 Cigar::HardClip(3),
2621 Cigar::Equal(2),
2622 ])
2623 .into_view(2);
2624 assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2625
2626 let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2632 assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2633
2634 let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2640 assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2641
2642 let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2648 assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2649
2650 let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2656 assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2657
2658 let vpos2 = 15;
2660 let c14 = CigarString(vec![
2665 Cigar::HardClip(5),
2666 Cigar::SoftClip(3),
2667 Cigar::Equal(1),
2668 Cigar::Pad(2),
2669 Cigar::Match(1),
2670 Cigar::Diff(1),
2671 Cigar::Ins(3),
2672 Cigar::Match(2),
2673 Cigar::Del(1),
2674 Cigar::Ins(2),
2675 Cigar::Equal(2),
2676 Cigar::RefSkip(3),
2677 Cigar::Match(3),
2678 Cigar::Equal(2),
2679 Cigar::SoftClip(5),
2680 Cigar::HardClip(2),
2681 ])
2682 .into_view(0);
2683 assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2684
2685 let c15 =
2691 CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2692 assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2693
2694 let c16 =
2697 CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2698 assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2699 }
2700
2701 #[test]
2702 fn test_clone() {
2703 let mut rec = Record::new();
2704 rec.set_pos(300);
2705 rec.set_qname(b"read1");
2706 let clone = rec.clone();
2707 assert_eq!(rec, clone);
2708 }
2709
2710 #[test]
2711 fn test_flags() {
2712 let mut rec = Record::new();
2713
2714 rec.set_paired();
2715 assert_eq!(rec.is_paired(), true);
2716
2717 rec.set_supplementary();
2718 assert_eq!(rec.is_supplementary(), true);
2719 assert_eq!(rec.is_supplementary(), true);
2720
2721 rec.unset_paired();
2722 assert_eq!(rec.is_paired(), false);
2723 assert_eq!(rec.is_supplementary(), true);
2724
2725 rec.unset_supplementary();
2726 assert_eq!(rec.is_paired(), false);
2727 assert_eq!(rec.is_supplementary(), false);
2728 }
2729
2730 #[test]
2731 fn test_cigar_parse() {
2732 let cigar = "1S20M1D2I3X1=2H";
2733 let parsed = CigarString::try_from(cigar).unwrap();
2734 assert_eq!(parsed.to_string(), cigar);
2735 }
2736}
2737
2738#[cfg(test)]
2739mod alignment_cigar_tests {
2740 use super::*;
2741 use crate::bam::{Read, Reader};
2742 use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2743 use bio_types::alignment::{Alignment, AlignmentMode};
2744
2745 #[test]
2746 fn test_cigar() {
2747 let alignment = Alignment {
2748 score: 5,
2749 xstart: 3,
2750 ystart: 0,
2751 xend: 9,
2752 yend: 10,
2753 ylen: 10,
2754 xlen: 10,
2755 operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2756 mode: AlignmentMode::Semiglobal,
2757 };
2758 assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2759 assert_eq!(
2760 CigarString::from_alignment(&alignment, false).0,
2761 vec![
2762 Cigar::SoftClip(3),
2763 Cigar::Equal(3),
2764 Cigar::Diff(1),
2765 Cigar::Ins(2),
2766 Cigar::Del(2),
2767 Cigar::SoftClip(1),
2768 ]
2769 );
2770
2771 let alignment = Alignment {
2772 score: 5,
2773 xstart: 0,
2774 ystart: 5,
2775 xend: 4,
2776 yend: 10,
2777 ylen: 10,
2778 xlen: 5,
2779 operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2780 mode: AlignmentMode::Custom,
2781 };
2782 assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2783 assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2784 assert_eq!(
2785 CigarString::from_alignment(&alignment, false).0,
2786 vec![
2787 Cigar::Equal(1),
2788 Cigar::Diff(2),
2789 Cigar::Ins(1),
2790 Cigar::Del(2),
2791 Cigar::SoftClip(1),
2792 ]
2793 );
2794 assert_eq!(
2795 CigarString::from_alignment(&alignment, true).0,
2796 vec![
2797 Cigar::Equal(1),
2798 Cigar::Diff(2),
2799 Cigar::Ins(1),
2800 Cigar::Del(2),
2801 Cigar::HardClip(1),
2802 ]
2803 );
2804
2805 let alignment = Alignment {
2806 score: 5,
2807 xstart: 0,
2808 ystart: 5,
2809 xend: 3,
2810 yend: 8,
2811 ylen: 10,
2812 xlen: 3,
2813 operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2814 mode: AlignmentMode::Custom,
2815 };
2816 assert_eq!(alignment.cigar(false), "1X1=1X");
2817 assert_eq!(
2818 CigarString::from_alignment(&alignment, false).0,
2819 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2820 );
2821
2822 let alignment = Alignment {
2823 score: 5,
2824 xstart: 0,
2825 ystart: 5,
2826 xend: 3,
2827 yend: 8,
2828 ylen: 10,
2829 xlen: 3,
2830 operations: vec![Subst, Match, Subst],
2831 mode: AlignmentMode::Semiglobal,
2832 };
2833 assert_eq!(alignment.cigar(false), "1X1=1X");
2834 assert_eq!(
2835 CigarString::from_alignment(&alignment, false).0,
2836 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2837 );
2838 }
2839
2840 #[test]
2841 fn test_read_orientation_f1r2() {
2842 let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2843
2844 for res in bam.records() {
2845 let record = res.unwrap();
2846 assert_eq!(
2847 record.read_pair_orientation(),
2848 SequenceReadPairOrientation::F1R2
2849 );
2850 }
2851 }
2852
2853 #[test]
2854 fn test_read_orientation_f2r1() {
2855 let mut bam = Reader::from_path(&"test/test_nonstandard_orientation.sam").unwrap();
2856
2857 for res in bam.records() {
2858 let record = res.unwrap();
2859 assert_eq!(
2860 record.read_pair_orientation(),
2861 SequenceReadPairOrientation::F2R1
2862 );
2863 }
2864 }
2865
2866 #[test]
2867 fn test_read_orientation_supplementary() {
2868 let mut bam = Reader::from_path(&"test/test_orientation_supplementary.sam").unwrap();
2869
2870 for res in bam.records() {
2871 let record = res.unwrap();
2872 assert_eq!(
2873 record.read_pair_orientation(),
2874 SequenceReadPairOrientation::F2R1
2875 );
2876 }
2877 }
2878
2879 #[test]
2880 pub fn test_cigar_parsing_non_ascii_error() {
2881 let cigar_str = "43ጷ";
2882 let expected_error = Err(Error::BamParseCigar {
2883 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2884 });
2885
2886 let result = CigarString::try_from(cigar_str);
2887 assert_eq!(expected_error, result);
2888 }
2889
2890 #[test]
2891 pub fn test_cigar_parsing() {
2892 let cigar_strs = vec![
2894 "1H10M4D100I300N1102=10P25X11S", "100M", "", "1H1=1H", "1S1=1S", "11H11S11=11S11H", "10H",
2901 "10S",
2902 ];
2903 let cigars = vec![
2905 CigarString(vec![
2906 Cigar::HardClip(1),
2907 Cigar::Match(10),
2908 Cigar::Del(4),
2909 Cigar::Ins(100),
2910 Cigar::RefSkip(300),
2911 Cigar::Equal(1102),
2912 Cigar::Pad(10),
2913 Cigar::Diff(25),
2914 Cigar::SoftClip(11),
2915 ]),
2916 CigarString(vec![Cigar::Match(100)]),
2917 CigarString(vec![]),
2918 CigarString(vec![
2919 Cigar::HardClip(1),
2920 Cigar::Equal(1),
2921 Cigar::HardClip(1),
2922 ]),
2923 CigarString(vec![
2924 Cigar::SoftClip(1),
2925 Cigar::Equal(1),
2926 Cigar::SoftClip(1),
2927 ]),
2928 CigarString(vec![
2929 Cigar::HardClip(11),
2930 Cigar::SoftClip(11),
2931 Cigar::Equal(11),
2932 Cigar::SoftClip(11),
2933 Cigar::HardClip(11),
2934 ]),
2935 CigarString(vec![Cigar::HardClip(10)]),
2936 CigarString(vec![Cigar::SoftClip(10)]),
2937 ];
2938 for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2940 let cigar_parse = CigarString::try_from(cigar_str)
2941 .expect(&format!("Unable to parse cigar: {}", cigar_str));
2942 assert_eq!(&cigar_parse, truth);
2943 }
2944 }
2945}
2946
2947#[cfg(test)]
2948mod basemod_tests {
2949 use crate::bam::{Read, Reader};
2950
2951 #[test]
2952 pub fn test_count_recorded() {
2953 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2954
2955 for r in bam.records() {
2956 let record = r.unwrap();
2957 if let Ok(mods) = record.basemods_iter() {
2958 let n = mods.recorded().len();
2959 assert_eq!(n, 3);
2960 };
2961 }
2962 }
2963
2964 #[test]
2965 pub fn test_query_type() {
2966 let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2967
2968 let mut n_fwd = 0;
2969 let mut n_rev = 0;
2970
2971 for r in bam.records() {
2972 let record = r.unwrap();
2973 if let Ok(mods) = record.basemods_iter() {
2974 for mod_code in mods.recorded() {
2975 if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2976 if mod_metadata.strand == 0 {
2977 n_fwd += 1;
2978 }
2979 if mod_metadata.strand == 1 {
2980 n_rev += 1;
2981 }
2982 }
2983 }
2984 };
2985 }
2986 assert_eq!(n_fwd, 2);
2987 assert_eq!(n_rev, 2);
2988 }
2989
2990 #[test]
2991 pub fn test_mod_iter() {
2992 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2993 let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
2994 let mut i = 0;
2995
2996 for r in bam.records() {
2997 let record = r.unwrap();
2998 for res in record.basemods_iter().unwrap() {
2999 if let Ok((position, _m)) = res {
3000 assert_eq!(position, expected_positions[i]);
3001 i += 1;
3002 }
3003 }
3004 }
3005 }
3006
3007 #[test]
3008 pub fn test_position_iter() {
3009 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3010 let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3011 let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3012 let mut i = 0;
3013
3014 for r in bam.records() {
3015 let record = r.unwrap();
3016 for res in record.basemods_position_iter().unwrap() {
3017 if let Ok((position, elements)) = res {
3018 assert_eq!(position, expected_positions[i]);
3019 assert_eq!(elements.len(), expected_counts[i]);
3020 i += 1;
3021 }
3022 }
3023 }
3024 }
3025}