rust_htslib/bam/
record.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17use std::u32;
18
19use byteorder::{LittleEndian, ReadBytesExt};
20
21use crate::bam::Error;
22use crate::bam::HeaderView;
23use crate::errors::Result;
24use crate::htslib;
25use crate::utils;
26#[cfg(feature = "serde_feature")]
27use serde::{self, Deserialize, Serialize};
28
29use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
30use bio_types::genome;
31use bio_types::sequence::SequenceRead;
32use bio_types::sequence::SequenceReadPairOrientation;
33use bio_types::strand::ReqStrand;
34
35/// A macro creating methods for flag access.
36macro_rules! flag {
37    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
38        pub fn $get(&self) -> bool {
39            self.inner().core.flag & $bit != 0
40        }
41
42        pub fn $set(&mut self) {
43            self.inner_mut().core.flag |= $bit;
44        }
45
46        pub fn $unset(&mut self) {
47            self.inner_mut().core.flag &= !$bit;
48        }
49    };
50}
51
52/// A BAM record.
53pub struct Record {
54    pub inner: htslib::bam1_t,
55    own: bool,
56    cigar: Option<CigarStringView>,
57    header: Option<Rc<HeaderView>>,
58}
59
60unsafe impl Send for Record {}
61unsafe impl Sync for Record {}
62
63impl Clone for Record {
64    fn clone(&self) -> Self {
65        let mut copy = Record::new();
66        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
67        copy
68    }
69}
70
71impl PartialEq for Record {
72    fn eq(&self, other: &Record) -> bool {
73        self.tid() == other.tid()
74            && self.pos() == other.pos()
75            && self.bin() == other.bin()
76            && self.mapq() == other.mapq()
77            && self.flags() == other.flags()
78            && self.mtid() == other.mtid()
79            && self.mpos() == other.mpos()
80            && self.insert_size() == other.insert_size()
81            && self.data() == other.data()
82            && self.inner().core.l_extranul == other.inner().core.l_extranul
83    }
84}
85
86impl Eq for Record {}
87
88impl fmt::Debug for Record {
89    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
90        fmt.write_fmt(format_args!(
91            "Record(tid: {}, pos: {})",
92            self.tid(),
93            self.pos()
94        ))
95    }
96}
97
98impl Default for Record {
99    fn default() -> Self {
100        Self::new()
101    }
102}
103
104#[inline]
105fn extranul_from_qname(qname: &[u8]) -> usize {
106    let qlen = qname.len() + 1;
107    if qlen % 4 != 0 {
108        4 - qlen % 4
109    } else {
110        0
111    }
112}
113
114impl Record {
115    /// Create an empty BAM record.
116    pub fn new() -> Self {
117        let mut record = Record {
118            inner: unsafe { MaybeUninit::zeroed().assume_init() },
119            own: true,
120            cigar: None,
121            header: None,
122        };
123        // The read/query name needs to be set as empty to properly initialize
124        // the record
125        record.set_qname(b"");
126        // Developer note: these are needed so the returned record is properly
127        // initialized as unmapped.
128        record.set_unmapped();
129        record.set_tid(-1);
130        record.set_pos(-1);
131        record.set_mpos(-1);
132        record.set_mtid(-1);
133        record
134    }
135
136    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
137        Record {
138            inner: {
139                #[allow(clippy::uninit_assumed_init)]
140                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
141                unsafe {
142                    ::libc::memcpy(
143                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
144                        from as *const ::libc::c_void,
145                        size_of::<htslib::bam1_t>(),
146                    );
147                }
148                inner
149            },
150            own: false,
151            cigar: None,
152            header: None,
153        }
154    }
155
156    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
157    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
158        let mut record = Self::new();
159
160        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
161        sam_copy.extend(sam);
162        sam_copy.push(0);
163
164        let mut sam_string = htslib::kstring_t {
165            s: sam_copy.as_ptr() as *mut c_char,
166            l: sam_copy.len(),
167            m: sam_copy.len(),
168        };
169
170        let succ = unsafe {
171            htslib::sam_parse1(
172                &mut sam_string,
173                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
174                record.inner_ptr_mut(),
175            )
176        };
177
178        if succ == 0 {
179            Ok(record)
180        } else {
181            Err(Error::BamParseSAM {
182                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
183            })
184        }
185    }
186
187    pub fn set_header(&mut self, header: Rc<HeaderView>) {
188        self.header = Some(header);
189    }
190
191    pub(super) fn data(&self) -> &[u8] {
192        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
193    }
194
195    #[inline]
196    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
197        &mut self.inner
198    }
199
200    #[inline]
201    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
202        &mut self.inner as *mut htslib::bam1_t
203    }
204
205    #[inline]
206    pub fn inner(&self) -> &htslib::bam1_t {
207        &self.inner
208    }
209
210    #[inline]
211    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
212        &self.inner as *const htslib::bam1_t
213    }
214
215    /// Get target id.
216    pub fn tid(&self) -> i32 {
217        self.inner().core.tid
218    }
219
220    /// Set target id.
221    pub fn set_tid(&mut self, tid: i32) {
222        self.inner_mut().core.tid = tid;
223    }
224
225    /// Get position (0-based).
226    pub fn pos(&self) -> i64 {
227        self.inner().core.pos
228    }
229
230    /// Set position (0-based).
231    pub fn set_pos(&mut self, pos: i64) {
232        self.inner_mut().core.pos = pos;
233    }
234
235    pub fn bin(&self) -> u16 {
236        self.inner().core.bin
237    }
238
239    pub fn set_bin(&mut self, bin: u16) {
240        self.inner_mut().core.bin = bin;
241    }
242
243    /// Get MAPQ.
244    pub fn mapq(&self) -> u8 {
245        self.inner().core.qual
246    }
247
248    /// Set MAPQ.
249    pub fn set_mapq(&mut self, mapq: u8) {
250        self.inner_mut().core.qual = mapq;
251    }
252
253    /// Get strand information from record flags.
254    pub fn strand(&self) -> ReqStrand {
255        let reverse = self.flags() & 0x10 != 0;
256        if reverse {
257            ReqStrand::Reverse
258        } else {
259            ReqStrand::Forward
260        }
261    }
262
263    /// Get raw flags.
264    pub fn flags(&self) -> u16 {
265        self.inner().core.flag
266    }
267
268    /// Set raw flags.
269    pub fn set_flags(&mut self, flags: u16) {
270        self.inner_mut().core.flag = flags;
271    }
272
273    /// Unset all flags.
274    pub fn unset_flags(&mut self) {
275        self.inner_mut().core.flag = 0;
276    }
277
278    /// Get target id of mate.
279    pub fn mtid(&self) -> i32 {
280        self.inner().core.mtid
281    }
282
283    /// Set target id of mate.
284    pub fn set_mtid(&mut self, mtid: i32) {
285        self.inner_mut().core.mtid = mtid;
286    }
287
288    /// Get mate position.
289    pub fn mpos(&self) -> i64 {
290        self.inner().core.mpos
291    }
292
293    /// Set mate position.
294    pub fn set_mpos(&mut self, mpos: i64) {
295        self.inner_mut().core.mpos = mpos;
296    }
297
298    /// Get insert size.
299    pub fn insert_size(&self) -> i64 {
300        self.inner().core.isize_
301    }
302
303    /// Set insert size.
304    pub fn set_insert_size(&mut self, insert_size: i64) {
305        self.inner_mut().core.isize_ = insert_size;
306    }
307
308    fn qname_capacity(&self) -> usize {
309        self.inner().core.l_qname as usize
310    }
311
312    fn qname_len(&self) -> usize {
313        // discount all trailing zeros (the default one and extra nulls)
314        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
315    }
316
317    /// Get qname (read name). Complexity: O(1).
318    pub fn qname(&self) -> &[u8] {
319        &self.data()[..self.qname_len()]
320    }
321
322    /// Set the variable length data buffer
323    pub fn set_data(&mut self, new_data: &[u8]) {
324        self.cigar = None;
325
326        self.inner_mut().l_data = new_data.len() as i32;
327        if (self.inner().m_data as i32) < self.inner().l_data {
328            // Verbosity due to lexical borrowing
329            let l_data = self.inner().l_data;
330            self.realloc_var_data(l_data as usize);
331        }
332
333        // Copy new data into buffer
334        let data =
335            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
336        utils::copy_memory(new_data, data);
337    }
338
339    /// Set variable length data (qname, cigar, seq, qual).
340    /// The aux data is left unchanged.
341    /// `qual` is Phred-scaled quality values, without any offset.
342    /// NOTE: seq.len() must equal qual.len() or this method
343    /// will panic. If you don't have quality values use
344    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
345    /// be recognized as missing QVs by `samtools`.
346    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
347        assert!(qname.len() < 255);
348        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
349
350        self.cigar = None;
351
352        let cigar_width = if let Some(cigar_string) = cigar {
353            cigar_string.len()
354        } else {
355            0
356        } * 4;
357        let q_len = qname.len() + 1;
358        let extranul = extranul_from_qname(qname);
359
360        let orig_aux_offset = self.qname_capacity()
361            + 4 * self.cigar_len()
362            + (self.seq_len() + 1) / 2
363            + self.seq_len();
364        let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
365        assert!(orig_aux_offset <= self.inner.l_data as usize);
366        let aux_len = self.inner.l_data as usize - orig_aux_offset;
367        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
368        if (self.inner().m_data as i32) < self.inner().l_data {
369            // Verbosity due to lexical borrowing
370            let l_data = self.inner().l_data;
371            self.realloc_var_data(l_data as usize);
372        }
373
374        // Copy the aux data.
375        if aux_len > 0 && orig_aux_offset != new_aux_offset {
376            let data =
377                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
378            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
379        }
380
381        let data =
382            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
383
384        // qname
385        utils::copy_memory(qname, data);
386        for i in 0..=extranul {
387            data[qname.len() + i] = b'\0';
388        }
389        let mut i = q_len + extranul;
390        self.inner_mut().core.l_qname = i as u16;
391        self.inner_mut().core.l_extranul = extranul as u8;
392
393        // cigar
394        if let Some(cigar_string) = cigar {
395            let cigar_data = unsafe {
396                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
397                #[allow(clippy::cast_ptr_alignment)]
398                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
399            };
400            for (i, c) in cigar_string.iter().enumerate() {
401                cigar_data[i] = c.encode();
402            }
403            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
404            i += cigar_string.len() * 4;
405        } else {
406            self.inner_mut().core.n_cigar = 0;
407        };
408
409        // seq
410        {
411            for j in (0..seq.len()).step_by(2) {
412                data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
413                    | (if j + 1 < seq.len() {
414                        ENCODE_BASE[seq[j + 1] as usize]
415                    } else {
416                        0
417                    });
418            }
419            self.inner_mut().core.l_qseq = seq.len() as i32;
420            i += (seq.len() + 1) / 2;
421        }
422
423        // qual
424        utils::copy_memory(qual, &mut data[i..]);
425    }
426
427    /// Replace current qname with a new one.
428    pub fn set_qname(&mut self, new_qname: &[u8]) {
429        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
430        assert!(new_qname.len() < 252);
431
432        let old_q_len = self.qname_capacity();
433        // We're going to add a terminal NUL
434        let extranul = extranul_from_qname(new_qname);
435        let new_q_len = new_qname.len() + 1 + extranul;
436
437        // Length of data after qname
438        let other_len = self.inner_mut().l_data - old_q_len as i32;
439
440        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
441            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
442        } else if new_q_len > old_q_len {
443            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
444
445            // Reallocate if necessary
446            if (self.inner().m_data as i32) < self.inner().l_data {
447                // Verbosity due to lexical borrowing
448                let l_data = self.inner().l_data;
449                self.realloc_var_data(l_data as usize);
450            }
451        }
452
453        if new_q_len != old_q_len {
454            // Move other data to new location
455            unsafe {
456                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
457
458                ::libc::memmove(
459                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
460                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
461                    other_len as usize,
462                );
463            }
464        }
465
466        // Copy qname data
467        let data =
468            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
469        utils::copy_memory(new_qname, data);
470        for i in 0..=extranul {
471            data[new_q_len - i - 1] = b'\0';
472        }
473        self.inner_mut().core.l_qname = new_q_len as u16;
474        self.inner_mut().core.l_extranul = extranul as u8;
475    }
476
477    fn realloc_var_data(&mut self, new_len: usize) {
478        // pad request
479        let new_len = new_len as u32;
480        let new_request = new_len + 32 - (new_len % 32);
481
482        let ptr = unsafe {
483            ::libc::realloc(
484                self.inner().data as *mut ::libc::c_void,
485                new_request as usize,
486            ) as *mut u8
487        };
488
489        if ptr.is_null() {
490            panic!("ran out of memory in rust_htslib trying to realloc");
491        }
492
493        // don't update m_data until we know we have
494        // a successful allocation.
495        self.inner_mut().m_data = new_request;
496        self.inner_mut().data = ptr;
497
498        // we now own inner.data
499        self.own = true;
500    }
501
502    pub fn cigar_len(&self) -> usize {
503        self.inner().core.n_cigar as usize
504    }
505
506    /// Get reference to raw cigar string representation (as stored in BAM file).
507    /// Usually, the method `Record::cigar` should be used instead.
508    pub fn raw_cigar(&self) -> &[u32] {
509        //cigar is always aligned to 4 bytes - so this is safe
510        #[allow(clippy::cast_ptr_alignment)]
511        unsafe {
512            slice::from_raw_parts(
513                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
514                self.cigar_len(),
515            )
516        }
517    }
518
519    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
520    pub fn cigar(&self) -> CigarStringView {
521        match self.cigar {
522            Some(ref c) => c.clone(),
523            None => self.unpack_cigar(),
524        }
525    }
526
527    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
528    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
529        self.cigar.as_ref()
530    }
531
532    /// Decode the cigar string and cache it inside the `Record`
533    pub fn cache_cigar(&mut self) {
534        self.cigar = Some(self.unpack_cigar())
535    }
536
537    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
538    fn unpack_cigar(&self) -> CigarStringView {
539        CigarString(
540            self.raw_cigar()
541                .iter()
542                .map(|&c| {
543                    let len = c >> 4;
544                    match c & 0b1111 {
545                        0 => Cigar::Match(len),
546                        1 => Cigar::Ins(len),
547                        2 => Cigar::Del(len),
548                        3 => Cigar::RefSkip(len),
549                        4 => Cigar::SoftClip(len),
550                        5 => Cigar::HardClip(len),
551                        6 => Cigar::Pad(len),
552                        7 => Cigar::Equal(len),
553                        8 => Cigar::Diff(len),
554                        _ => panic!("Unexpected cigar operation"),
555                    }
556                })
557                .collect(),
558        )
559        .into_view(self.pos())
560    }
561
562    pub fn seq_len(&self) -> usize {
563        self.inner().core.l_qseq as usize
564    }
565
566    fn seq_data(&self) -> &[u8] {
567        let offset = self.qname_capacity() + self.cigar_len() * 4;
568        &self.data()[offset..][..(self.seq_len() + 1) / 2]
569    }
570
571    /// Get read sequence. Complexity: O(1).
572    pub fn seq(&self) -> Seq<'_> {
573        Seq {
574            encoded: self.seq_data(),
575            len: self.seq_len(),
576        }
577    }
578
579    /// Get base qualities (PHRED-scaled probability that base is wrong).
580    /// This does not entail any offsets, hence the qualities can be used directly without
581    /// e.g. subtracting 33. Complexity: O(1).
582    pub fn qual(&self) -> &[u8] {
583        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
584            [..self.seq_len()]
585    }
586
587    /// Look up an auxiliary field by its tag.
588    ///
589    /// Only the first two bytes of a given tag are used for the look-up of a field.
590    /// See [`Aux`] for more details.
591    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
592        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
593        let aux = unsafe {
594            htslib::bam_aux_get(
595                &self.inner as *const htslib::bam1_t,
596                c_str.as_ptr() as *mut c_char,
597            )
598        };
599        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
600    }
601
602    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
603        const TAG_LEN: isize = 2;
604        // Used for skipping type identifier
605        const TYPE_ID_LEN: isize = 1;
606
607        if aux.is_null() {
608            return Err(Error::BamAuxTagNotFound);
609        }
610
611        let (data, type_size) = match *aux {
612            b'A' => {
613                let type_size = size_of::<u8>();
614                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
615            }
616            b'c' => {
617                let type_size = size_of::<i8>();
618                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
619            }
620            b'C' => {
621                let type_size = size_of::<u8>();
622                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
623            }
624            b's' => {
625                let type_size = size_of::<i16>();
626                (
627                    Aux::I16(
628                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
629                            .read_i16::<LittleEndian>()
630                            .map_err(|_| Error::BamAuxParsingError)?,
631                    ),
632                    type_size,
633                )
634            }
635            b'S' => {
636                let type_size = size_of::<u16>();
637                (
638                    Aux::U16(
639                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
640                            .read_u16::<LittleEndian>()
641                            .map_err(|_| Error::BamAuxParsingError)?,
642                    ),
643                    type_size,
644                )
645            }
646            b'i' => {
647                let type_size = size_of::<i32>();
648                (
649                    Aux::I32(
650                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
651                            .read_i32::<LittleEndian>()
652                            .map_err(|_| Error::BamAuxParsingError)?,
653                    ),
654                    type_size,
655                )
656            }
657            b'I' => {
658                let type_size = size_of::<u32>();
659                (
660                    Aux::U32(
661                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
662                            .read_u32::<LittleEndian>()
663                            .map_err(|_| Error::BamAuxParsingError)?,
664                    ),
665                    type_size,
666                )
667            }
668            b'f' => {
669                let type_size = size_of::<f32>();
670                (
671                    Aux::Float(
672                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
673                            .read_f32::<LittleEndian>()
674                            .map_err(|_| Error::BamAuxParsingError)?,
675                    ),
676                    type_size,
677                )
678            }
679            b'd' => {
680                let type_size = size_of::<f64>();
681                (
682                    Aux::Double(
683                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684                            .read_f64::<LittleEndian>()
685                            .map_err(|_| Error::BamAuxParsingError)?,
686                    ),
687                    type_size,
688                )
689            }
690            b'Z' | b'H' => {
691                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
692                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
693                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
694            }
695            b'B' => {
696                const ARRAY_INNER_TYPE_LEN: isize = 1;
697                const ARRAY_COUNT_LEN: isize = 4;
698
699                // Used for skipping metadata
700                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
701
702                let length =
703                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
704                        .read_u32::<LittleEndian>()
705                        .map_err(|_| Error::BamAuxParsingError)? as usize;
706
707                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
708                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
709                    b'c' => (
710                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
711                            aux.offset(array_data_offset),
712                            length,
713                        ))),
714                        length,
715                    ),
716                    b'C' => (
717                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
718                            aux.offset(array_data_offset),
719                            length,
720                        ))),
721                        length,
722                    ),
723                    b's' => (
724                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
725                            aux.offset(array_data_offset),
726                            length * size_of::<i16>(),
727                        ))),
728                        length * std::mem::size_of::<i16>(),
729                    ),
730                    b'S' => (
731                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
732                            aux.offset(array_data_offset),
733                            length * size_of::<u16>(),
734                        ))),
735                        length * std::mem::size_of::<u16>(),
736                    ),
737                    b'i' => (
738                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
739                            aux.offset(array_data_offset),
740                            length * size_of::<i32>(),
741                        ))),
742                        length * std::mem::size_of::<i32>(),
743                    ),
744                    b'I' => (
745                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
746                            aux.offset(array_data_offset),
747                            length * size_of::<u32>(),
748                        ))),
749                        length * std::mem::size_of::<u32>(),
750                    ),
751                    b'f' => (
752                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
753                            aux.offset(array_data_offset),
754                            length * size_of::<f32>(),
755                        ))),
756                        length * std::mem::size_of::<f32>(),
757                    ),
758                    _ => {
759                        return Err(Error::BamAuxUnknownType);
760                    }
761                };
762                (
763                    array_data,
764                    // Offset: array-specific metadata + array size
765                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
766                )
767            }
768            _ => {
769                return Err(Error::BamAuxUnknownType);
770            }
771        };
772
773        // Offset: metadata + type size
774        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
775    }
776
777    /// Returns an iterator over the auxiliary fields of the record.
778    ///
779    /// When an error occurs, the `Err` variant will be returned
780    /// and the iterator will not be able to advance anymore.
781    pub fn aux_iter(&self) -> AuxIter {
782        AuxIter {
783            // In order to get to the aux data section of a `bam::Record`
784            // we need to skip fields in front of it
785            aux: &self.data()[
786                // NUL terminated read name:
787                self.qname_capacity()
788                // CIGAR (uint32_t):
789                + self.cigar_len() * std::mem::size_of::<u32>()
790                // Read sequence (4-bit encoded):
791                + (self.seq_len() + 1) / 2
792                // Base qualities (char):
793                + self.seq_len()..],
794        }
795    }
796
797    /// Add auxiliary data.
798    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
799        // Don't allow pushing aux data when the given tag is already present in the record.
800        // `htslib` seems to allow this (for non-array values), which can lead to problems
801        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
802        if self.aux(tag).is_ok() {
803            return Err(Error::BamAuxTagAlreadyPresent);
804        }
805
806        let ctag = tag.as_ptr() as *mut c_char;
807        let ret = unsafe {
808            match value {
809                Aux::Char(v) => htslib::bam_aux_append(
810                    self.inner_ptr_mut(),
811                    ctag,
812                    b'A' as c_char,
813                    size_of::<u8>() as i32,
814                    [v].as_mut_ptr() as *mut u8,
815                ),
816                Aux::I8(v) => htslib::bam_aux_append(
817                    self.inner_ptr_mut(),
818                    ctag,
819                    b'c' as c_char,
820                    size_of::<i8>() as i32,
821                    [v].as_mut_ptr() as *mut u8,
822                ),
823                Aux::U8(v) => htslib::bam_aux_append(
824                    self.inner_ptr_mut(),
825                    ctag,
826                    b'C' as c_char,
827                    size_of::<u8>() as i32,
828                    [v].as_mut_ptr() as *mut u8,
829                ),
830                Aux::I16(v) => htslib::bam_aux_append(
831                    self.inner_ptr_mut(),
832                    ctag,
833                    b's' as c_char,
834                    size_of::<i16>() as i32,
835                    [v].as_mut_ptr() as *mut u8,
836                ),
837                Aux::U16(v) => htslib::bam_aux_append(
838                    self.inner_ptr_mut(),
839                    ctag,
840                    b'S' as c_char,
841                    size_of::<u16>() as i32,
842                    [v].as_mut_ptr() as *mut u8,
843                ),
844                Aux::I32(v) => htslib::bam_aux_append(
845                    self.inner_ptr_mut(),
846                    ctag,
847                    b'i' as c_char,
848                    size_of::<i32>() as i32,
849                    [v].as_mut_ptr() as *mut u8,
850                ),
851                Aux::U32(v) => htslib::bam_aux_append(
852                    self.inner_ptr_mut(),
853                    ctag,
854                    b'I' as c_char,
855                    size_of::<u32>() as i32,
856                    [v].as_mut_ptr() as *mut u8,
857                ),
858                Aux::Float(v) => htslib::bam_aux_append(
859                    self.inner_ptr_mut(),
860                    ctag,
861                    b'f' as c_char,
862                    size_of::<f32>() as i32,
863                    [v].as_mut_ptr() as *mut u8,
864                ),
865                // Not part of specs but implemented in `htslib`:
866                Aux::Double(v) => htslib::bam_aux_append(
867                    self.inner_ptr_mut(),
868                    ctag,
869                    b'd' as c_char,
870                    size_of::<f64>() as i32,
871                    [v].as_mut_ptr() as *mut u8,
872                ),
873                Aux::String(v) => {
874                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
875                    htslib::bam_aux_append(
876                        self.inner_ptr_mut(),
877                        ctag,
878                        b'Z' as c_char,
879                        (v.len() + 1) as i32,
880                        c_str.as_ptr() as *mut u8,
881                    )
882                }
883                Aux::HexByteArray(v) => {
884                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
885                    htslib::bam_aux_append(
886                        self.inner_ptr_mut(),
887                        ctag,
888                        b'H' as c_char,
889                        (v.len() + 1) as i32,
890                        c_str.as_ptr() as *mut u8,
891                    )
892                }
893                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
894                Aux::ArrayI8(aux_array) => match aux_array {
895                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
896                        self.inner_ptr_mut(),
897                        ctag,
898                        b'c',
899                        inner.len() as u32,
900                        inner.slice.as_ptr() as *mut ::libc::c_void,
901                    ),
902                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
903                        self.inner_ptr_mut(),
904                        ctag,
905                        b'c',
906                        inner.len() as u32,
907                        inner.slice.as_ptr() as *mut ::libc::c_void,
908                    ),
909                },
910                Aux::ArrayU8(aux_array) => match aux_array {
911                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
912                        self.inner_ptr_mut(),
913                        ctag,
914                        b'C',
915                        inner.len() as u32,
916                        inner.slice.as_ptr() as *mut ::libc::c_void,
917                    ),
918                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
919                        self.inner_ptr_mut(),
920                        ctag,
921                        b'C',
922                        inner.len() as u32,
923                        inner.slice.as_ptr() as *mut ::libc::c_void,
924                    ),
925                },
926                Aux::ArrayI16(aux_array) => match aux_array {
927                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
928                        self.inner_ptr_mut(),
929                        ctag,
930                        b's',
931                        inner.len() as u32,
932                        inner.slice.as_ptr() as *mut ::libc::c_void,
933                    ),
934                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
935                        self.inner_ptr_mut(),
936                        ctag,
937                        b's',
938                        inner.len() as u32,
939                        inner.slice.as_ptr() as *mut ::libc::c_void,
940                    ),
941                },
942                Aux::ArrayU16(aux_array) => match aux_array {
943                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
944                        self.inner_ptr_mut(),
945                        ctag,
946                        b'S',
947                        inner.len() as u32,
948                        inner.slice.as_ptr() as *mut ::libc::c_void,
949                    ),
950                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
951                        self.inner_ptr_mut(),
952                        ctag,
953                        b'S',
954                        inner.len() as u32,
955                        inner.slice.as_ptr() as *mut ::libc::c_void,
956                    ),
957                },
958                Aux::ArrayI32(aux_array) => match aux_array {
959                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
960                        self.inner_ptr_mut(),
961                        ctag,
962                        b'i',
963                        inner.len() as u32,
964                        inner.slice.as_ptr() as *mut ::libc::c_void,
965                    ),
966                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
967                        self.inner_ptr_mut(),
968                        ctag,
969                        b'i',
970                        inner.len() as u32,
971                        inner.slice.as_ptr() as *mut ::libc::c_void,
972                    ),
973                },
974                Aux::ArrayU32(aux_array) => match aux_array {
975                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
976                        self.inner_ptr_mut(),
977                        ctag,
978                        b'I',
979                        inner.len() as u32,
980                        inner.slice.as_ptr() as *mut ::libc::c_void,
981                    ),
982                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
983                        self.inner_ptr_mut(),
984                        ctag,
985                        b'I',
986                        inner.len() as u32,
987                        inner.slice.as_ptr() as *mut ::libc::c_void,
988                    ),
989                },
990                Aux::ArrayFloat(aux_array) => match aux_array {
991                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
992                        self.inner_ptr_mut(),
993                        ctag,
994                        b'f',
995                        inner.len() as u32,
996                        inner.slice.as_ptr() as *mut ::libc::c_void,
997                    ),
998                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
999                        self.inner_ptr_mut(),
1000                        ctag,
1001                        b'f',
1002                        inner.len() as u32,
1003                        inner.slice.as_ptr() as *mut ::libc::c_void,
1004                    ),
1005                },
1006            }
1007        };
1008
1009        if ret < 0 {
1010            Err(Error::BamAux)
1011        } else {
1012            Ok(())
1013        }
1014    }
1015
1016    // Delete auxiliary tag.
1017    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1018        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1019        let aux = unsafe {
1020            htslib::bam_aux_get(
1021                &self.inner as *const htslib::bam1_t,
1022                c_str.as_ptr() as *mut c_char,
1023            )
1024        };
1025        unsafe {
1026            if aux.is_null() {
1027                Err(Error::BamAuxTagNotFound)
1028            } else {
1029                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1030                Ok(())
1031            }
1032        }
1033    }
1034
1035    /// Access the base modifications associated with this Record through the MM tag.
1036    /// Example:
1037    /// ```
1038    ///    use rust_htslib::bam::{Read, Reader, Record};
1039    ///    let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
1040    ///    let mut mod_count = 0;
1041    ///    for r in bam.records() {
1042    ///        let record = r.unwrap();
1043    ///        if let Ok(mods) = record.basemods_iter() {
1044    ///            // print metadata for the modifications present in this record
1045    ///            for mod_code in mods.recorded() {
1046    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1047    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1048    ///                              mod_code, *mod_code as u8 as char,
1049    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1050    ///                }
1051    ///            }
1052    ///
1053    ///            // iterate over the modifications in this record
1054    ///            // the modifications are returned as a tuple with the
1055    ///            // position within SEQ and an hts_base_mod struct
1056    ///            for res in mods {
1057    ///                if let Ok( (position, m) ) = res {
1058    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1059    ///                    mod_count += 1;
1060    ///                }
1061    ///            }
1062    ///        };
1063    ///    }
1064    ///    assert_eq!(mod_count, 14);
1065    /// ```
1066    pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1067        BaseModificationsIter::new(self)
1068    }
1069
1070    /// An iterator that returns all of the modifications for each position as a vector.
1071    /// This is useful for the case where multiple possible modifications can be annotated
1072    /// at a single position (for example a C could be 5-mC or 5-hmC)
1073    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1074        BaseModificationsPositionIter::new(self)
1075    }
1076
1077    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1078    /// is not paired, mates are not mapping to the same contig, or mates start at the
1079    /// same position.
1080    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1081        if self.is_paired()
1082            && !self.is_unmapped()
1083            && !self.is_mate_unmapped()
1084            && self.tid() == self.mtid()
1085        {
1086            if self.pos() == self.mpos() {
1087                // both reads start at the same position, we cannot decide on the orientation.
1088                return SequenceReadPairOrientation::None;
1089            }
1090
1091            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1092                (
1093                    self.pos(),
1094                    self.mpos(),
1095                    !self.is_reverse(),
1096                    !self.is_mate_reverse(),
1097                )
1098            } else {
1099                (
1100                    self.mpos(),
1101                    self.pos(),
1102                    !self.is_mate_reverse(),
1103                    !self.is_reverse(),
1104                )
1105            };
1106
1107            if pos_1 < pos_2 {
1108                match (fwd_1, fwd_2) {
1109                    (true, true) => SequenceReadPairOrientation::F1F2,
1110                    (true, false) => SequenceReadPairOrientation::F1R2,
1111                    (false, true) => SequenceReadPairOrientation::R1F2,
1112                    (false, false) => SequenceReadPairOrientation::R1R2,
1113                }
1114            } else {
1115                match (fwd_2, fwd_1) {
1116                    (true, true) => SequenceReadPairOrientation::F2F1,
1117                    (true, false) => SequenceReadPairOrientation::F2R1,
1118                    (false, true) => SequenceReadPairOrientation::R2F1,
1119                    (false, false) => SequenceReadPairOrientation::R2R1,
1120                }
1121            }
1122        } else {
1123            SequenceReadPairOrientation::None
1124        }
1125    }
1126
1127    flag!(is_paired, set_paired, unset_paired, 1u16);
1128    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1129    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1130    flag!(
1131        is_mate_unmapped,
1132        set_mate_unmapped,
1133        unset_mate_unmapped,
1134        8u16
1135    );
1136    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1137    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1138    flag!(
1139        is_first_in_template,
1140        set_first_in_template,
1141        unset_first_in_template,
1142        64u16
1143    );
1144    flag!(
1145        is_last_in_template,
1146        set_last_in_template,
1147        unset_last_in_template,
1148        128u16
1149    );
1150    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1151    flag!(
1152        is_quality_check_failed,
1153        set_quality_check_failed,
1154        unset_quality_check_failed,
1155        512u16
1156    );
1157    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1158    flag!(
1159        is_supplementary,
1160        set_supplementary,
1161        unset_supplementary,
1162        2048u16
1163    );
1164}
1165
1166impl Drop for Record {
1167    fn drop(&mut self) {
1168        if self.own {
1169            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1170        }
1171    }
1172}
1173
1174impl SequenceRead for Record {
1175    fn name(&self) -> &[u8] {
1176        self.qname()
1177    }
1178
1179    fn base(&self, i: usize) -> u8 {
1180        *decode_base_unchecked(encoded_base(self.seq_data(), i))
1181    }
1182
1183    fn base_qual(&self, i: usize) -> u8 {
1184        self.qual()[i]
1185    }
1186
1187    fn len(&self) -> usize {
1188        self.seq_len()
1189    }
1190
1191    fn is_empty(&self) -> bool {
1192        self.len() == 0
1193    }
1194}
1195
1196impl genome::AbstractInterval for Record {
1197    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1198    fn contig(&self) -> &str {
1199        let tid = self.tid();
1200        if tid < 0 {
1201            panic!("invalid tid, must be at least zero");
1202        }
1203        str::from_utf8(
1204            self.header
1205                .as_ref()
1206                .expect(
1207                    "header must be set (this is the case if the record has been read from a file)",
1208                )
1209                .tid2name(tid as u32),
1210        )
1211        .expect("unable to interpret contig name as UTF-8")
1212    }
1213
1214    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1215    fn range(&self) -> ops::Range<genome::Position> {
1216        let end_pos = self
1217            .cigar_cached()
1218            .expect("cigar has not been cached yet, call cache_cigar() first")
1219            .end_pos() as u64;
1220
1221        if self.pos() < 0 {
1222            panic!("invalid position, must be positive")
1223        }
1224
1225        self.pos() as u64..end_pos
1226    }
1227}
1228
1229/// Auxiliary record data
1230///
1231/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1232///
1233/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1234///
1235/// # Examples
1236///
1237/// ```
1238/// use rust_htslib::{
1239///     bam,
1240///     bam::record::{Aux, AuxArray},
1241///     errors::Error,
1242/// };
1243///
1244/// //Set up BAM record
1245/// let bam_header = bam::Header::new();
1246/// let mut record = bam::Record::from_sam(
1247///     &mut bam::HeaderView::from_header(&bam_header),
1248///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1249/// )
1250/// .unwrap();
1251///
1252/// // Add an integer field
1253/// let aux_integer_field = Aux::I32(1234);
1254/// record.push_aux(b"XI", aux_integer_field).unwrap();
1255///
1256/// match record.aux(b"XI") {
1257///     Ok(value) => {
1258///         // Typically, callers expect an aux field to be of a certain type.
1259///         // If that's not the case, the value can be `match`ed exhaustively.
1260///         if let Aux::I32(v) = value {
1261///             assert_eq!(v, 1234);
1262///         }
1263///     }
1264///     Err(e) => {
1265///         panic!("Error reading aux field: {}", e);
1266///     }
1267/// }
1268///
1269/// // Add an array field
1270/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1271/// let slice_of_data = &array_like_data;
1272/// let aux_array: AuxArray<f32> = slice_of_data.into();
1273/// let aux_array_field = Aux::ArrayFloat(aux_array);
1274/// record.push_aux(b"XA", aux_array_field).unwrap();
1275///
1276/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1277///     let read_array = array.iter().collect::<Vec<_>>();
1278///     assert_eq!(read_array, array_like_data);
1279/// } else {
1280///     panic!("Could not read array data");
1281/// }
1282/// ```
1283#[derive(Debug, PartialEq)]
1284pub enum Aux<'a> {
1285    Char(u8),
1286    I8(i8),
1287    U8(u8),
1288    I16(i16),
1289    U16(u16),
1290    I32(i32),
1291    U32(u32),
1292    Float(f32),
1293    Double(f64), // Not part of specs but implemented in `htslib`
1294    String(&'a str),
1295    HexByteArray(&'a str),
1296    ArrayI8(AuxArray<'a, i8>),
1297    ArrayU8(AuxArray<'a, u8>),
1298    ArrayI16(AuxArray<'a, i16>),
1299    ArrayU16(AuxArray<'a, u16>),
1300    ArrayI32(AuxArray<'a, i32>),
1301    ArrayU32(AuxArray<'a, u32>),
1302    ArrayFloat(AuxArray<'a, f32>),
1303}
1304
1305unsafe impl<'a> Send for Aux<'a> {}
1306unsafe impl<'a> Sync for Aux<'a> {}
1307
1308/// Types that can be used in aux arrays.
1309pub trait AuxArrayElement: Copy {
1310    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1311}
1312
1313impl AuxArrayElement for i8 {
1314    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1315        std::io::Cursor::new(bytes).read_i8().ok()
1316    }
1317}
1318impl AuxArrayElement for u8 {
1319    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1320        std::io::Cursor::new(bytes).read_u8().ok()
1321    }
1322}
1323impl AuxArrayElement for i16 {
1324    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1325        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1326    }
1327}
1328impl AuxArrayElement for u16 {
1329    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1330        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1331    }
1332}
1333impl AuxArrayElement for i32 {
1334    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1335        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1336    }
1337}
1338impl AuxArrayElement for u32 {
1339    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1340        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1341    }
1342}
1343impl AuxArrayElement for f32 {
1344    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1345        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1346    }
1347}
1348
1349/// Provides access to aux arrays.
1350///
1351/// Provides methods to either retrieve single elements or an iterator over the
1352/// array.
1353///
1354/// This type is used for wrapping both, array data that was read from a
1355/// BAM record and slices of data that are going to be stored in one.
1356///
1357/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1358/// can be constructed via the `From` trait which is implemented for all
1359/// supported types (see [`AuxArrayElement`] for a list).
1360///
1361/// # Examples
1362///
1363/// ```
1364/// use rust_htslib::{
1365///     bam,
1366///     bam::record::{Aux, AuxArray},
1367/// };
1368///
1369/// //Set up BAM record
1370/// let bam_header = bam::Header::new();
1371/// let mut record = bam::Record::from_sam(
1372///     &mut bam::HeaderView::from_header(&bam_header),
1373///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1374/// ).unwrap();
1375///
1376/// let data = vec![0.4, 0.3, 0.2, 0.1];
1377/// let slice_of_data = &data;
1378/// let aux_array: AuxArray<f32> = slice_of_data.into();
1379/// let aux_field = Aux::ArrayFloat(aux_array);
1380/// record.push_aux(b"XA", aux_field);
1381///
1382/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1383///     // Retrieve the second element from the array
1384///     assert_eq!(array.get(1).unwrap(), 0.3);
1385///     // Iterate over the array and collect it into a `Vec`
1386///     let read_array = array.iter().collect::<Vec<_>>();
1387///     assert_eq!(read_array, data);
1388/// } else {
1389///     panic!("Could not read array data");
1390/// }
1391/// ```
1392#[derive(Debug)]
1393pub enum AuxArray<'a, T> {
1394    TargetType(AuxArrayTargetType<'a, T>),
1395    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1396}
1397
1398impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1399where
1400    T: AuxArrayElement + PartialEq,
1401{
1402    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1403        use AuxArray::*;
1404        match (self, other) {
1405            (TargetType(v), TargetType(v_other)) => v == v_other,
1406            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1407            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1408            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1409        }
1410    }
1411}
1412
1413/// Create AuxArrays from slices of allowed target types.
1414impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1415where
1416    I: AuxArrayElement,
1417    T: AsRef<[I]> + ?Sized,
1418{
1419    fn from(src: &'a T) -> Self {
1420        AuxArray::TargetType(AuxArrayTargetType {
1421            slice: src.as_ref(),
1422        })
1423    }
1424}
1425
1426impl<'a, T> AuxArray<'a, T>
1427where
1428    T: AuxArrayElement,
1429{
1430    /// Returns the element at a position or None if out of bounds.
1431    pub fn get(&self, index: usize) -> Option<T> {
1432        match self {
1433            AuxArray::TargetType(v) => v.get(index),
1434            AuxArray::RawLeBytes(v) => v.get(index),
1435        }
1436    }
1437
1438    /// Returns the number of elements in the array.
1439    pub fn len(&self) -> usize {
1440        match self {
1441            AuxArray::TargetType(a) => a.len(),
1442            AuxArray::RawLeBytes(a) => a.len(),
1443        }
1444    }
1445
1446    /// Returns true if the array contains no elements.
1447    pub fn is_empty(&self) -> bool {
1448        self.len() == 0
1449    }
1450
1451    /// Returns an iterator over the array.
1452    pub fn iter(&self) -> AuxArrayIter<T> {
1453        AuxArrayIter {
1454            index: 0,
1455            array: self,
1456        }
1457    }
1458
1459    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1460    fn from_bytes(bytes: &'a [u8]) -> Self {
1461        Self::RawLeBytes(AuxArrayRawLeBytes {
1462            slice: bytes,
1463            phantom_data: PhantomData,
1464        })
1465    }
1466}
1467
1468/// Encapsulates slice of target type.
1469#[doc(hidden)]
1470#[derive(Debug, PartialEq)]
1471pub struct AuxArrayTargetType<'a, T> {
1472    slice: &'a [T],
1473}
1474
1475impl<'a, T> AuxArrayTargetType<'a, T>
1476where
1477    T: AuxArrayElement,
1478{
1479    fn get(&self, index: usize) -> Option<T> {
1480        self.slice.get(index).copied()
1481    }
1482
1483    fn len(&self) -> usize {
1484        self.slice.len()
1485    }
1486}
1487
1488/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1489#[doc(hidden)]
1490#[derive(Debug, PartialEq)]
1491pub struct AuxArrayRawLeBytes<'a, T> {
1492    slice: &'a [u8],
1493    phantom_data: PhantomData<T>,
1494}
1495
1496impl<'a, T> AuxArrayRawLeBytes<'a, T>
1497where
1498    T: AuxArrayElement,
1499{
1500    fn get(&self, index: usize) -> Option<T> {
1501        let type_size = std::mem::size_of::<T>();
1502        if index * type_size + type_size > self.slice.len() {
1503            return None;
1504        }
1505        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1506    }
1507
1508    fn len(&self) -> usize {
1509        self.slice.len() / std::mem::size_of::<T>()
1510    }
1511}
1512
1513/// Aux array iterator
1514///
1515/// This struct is created by the [`AuxArray::iter`] method.
1516pub struct AuxArrayIter<'a, T> {
1517    index: usize,
1518    array: &'a AuxArray<'a, T>,
1519}
1520
1521impl<T> Iterator for AuxArrayIter<'_, T>
1522where
1523    T: AuxArrayElement,
1524{
1525    type Item = T;
1526    fn next(&mut self) -> Option<Self::Item> {
1527        let value = self.array.get(self.index);
1528        self.index += 1;
1529        value
1530    }
1531
1532    fn size_hint(&self) -> (usize, Option<usize>) {
1533        let array_length = self.array.len() - self.index;
1534        (array_length, Some(array_length))
1535    }
1536}
1537
1538/// Auxiliary data iterator
1539///
1540/// This struct is created by the [`Record::aux_iter`] method.
1541///
1542/// This iterator returns `Result`s that wrap tuples containing
1543/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1544/// well as an `Aux` enum that wraps the associated value.
1545///
1546/// When an error occurs, the `Err` variant will be returned
1547/// and the iterator will not be able to advance anymore.
1548pub struct AuxIter<'a> {
1549    aux: &'a [u8],
1550}
1551
1552impl<'a> Iterator for AuxIter<'a> {
1553    type Item = Result<(&'a [u8], Aux<'a>)>;
1554
1555    fn next(&mut self) -> Option<Self::Item> {
1556        // We're finished
1557        if self.aux.is_empty() {
1558            return None;
1559        }
1560        // Incomplete aux data
1561        if (1..=3).contains(&self.aux.len()) {
1562            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1563            self.aux = &[];
1564            return Some(Err(Error::BamAuxParsingError));
1565        }
1566        let tag = &self.aux[..2];
1567        Some(unsafe {
1568            let data_ptr = self.aux[2..].as_ptr();
1569            Record::read_aux_field(data_ptr)
1570                .map(|(aux, offset)| {
1571                    self.aux = &self.aux[offset..];
1572                    (tag, aux)
1573                })
1574                .map_err(|e| {
1575                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1576                    self.aux = &[];
1577                    e
1578                })
1579        })
1580    }
1581}
1582
1583static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1584static ENCODE_BASE: [u8; 256] = [
1585    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1586    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1587    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1588    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1589    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1590    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1591    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1592    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1593    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1594    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1595    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1596];
1597
1598#[inline]
1599fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1600    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1601}
1602
1603#[inline]
1604unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1605    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1606}
1607
1608#[inline]
1609fn decode_base_unchecked(base: u8) -> &'static u8 {
1610    unsafe { DECODE_BASE.get_unchecked(base as usize) }
1611}
1612
1613/// The sequence of a record.
1614#[derive(Debug, Copy, Clone)]
1615pub struct Seq<'a> {
1616    pub encoded: &'a [u8],
1617    len: usize,
1618}
1619
1620impl<'a> Seq<'a> {
1621    /// Return encoded base. Complexity: O(1).
1622    #[inline]
1623    pub fn encoded_base(&self, i: usize) -> u8 {
1624        encoded_base(self.encoded, i)
1625    }
1626
1627    /// Return encoded base. Complexity: O(1).
1628    #[inline]
1629    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1630        encoded_base_unchecked(self.encoded, i)
1631    }
1632
1633    /// Obtain decoded base without performing bounds checking.
1634    /// Use index based access seq()[i], for checked, safe access.
1635    /// Complexity: O(1).
1636    #[inline]
1637    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1638        *decode_base_unchecked(self.encoded_base_unchecked(i))
1639    }
1640
1641    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1642    pub fn as_bytes(&self) -> Vec<u8> {
1643        (0..self.len()).map(|i| self[i]).collect()
1644    }
1645
1646    /// Return length (in bases) of the sequence.
1647    pub fn len(&self) -> usize {
1648        self.len
1649    }
1650
1651    pub fn is_empty(&self) -> bool {
1652        self.len() == 0
1653    }
1654}
1655
1656impl<'a> ops::Index<usize> for Seq<'a> {
1657    type Output = u8;
1658
1659    /// Return decoded base at given position within read. Complexity: O(1).
1660    fn index(&self, index: usize) -> &u8 {
1661        decode_base_unchecked(self.encoded_base(index))
1662    }
1663}
1664
1665unsafe impl<'a> Send for Seq<'a> {}
1666unsafe impl<'a> Sync for Seq<'a> {}
1667
1668#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1669#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1670pub enum Cigar {
1671    Match(u32),    // M
1672    Ins(u32),      // I
1673    Del(u32),      // D
1674    RefSkip(u32),  // N
1675    SoftClip(u32), // S
1676    HardClip(u32), // H
1677    Pad(u32),      // P
1678    Equal(u32),    // =
1679    Diff(u32),     // X
1680}
1681
1682impl Cigar {
1683    fn encode(self) -> u32 {
1684        match self {
1685            Cigar::Match(len) => len << 4, // | 0,
1686            Cigar::Ins(len) => len << 4 | 1,
1687            Cigar::Del(len) => len << 4 | 2,
1688            Cigar::RefSkip(len) => len << 4 | 3,
1689            Cigar::SoftClip(len) => len << 4 | 4,
1690            Cigar::HardClip(len) => len << 4 | 5,
1691            Cigar::Pad(len) => len << 4 | 6,
1692            Cigar::Equal(len) => len << 4 | 7,
1693            Cigar::Diff(len) => len << 4 | 8,
1694        }
1695    }
1696
1697    /// Return the length of the CIGAR.
1698    pub fn len(self) -> u32 {
1699        match self {
1700            Cigar::Match(len) => len,
1701            Cigar::Ins(len) => len,
1702            Cigar::Del(len) => len,
1703            Cigar::RefSkip(len) => len,
1704            Cigar::SoftClip(len) => len,
1705            Cigar::HardClip(len) => len,
1706            Cigar::Pad(len) => len,
1707            Cigar::Equal(len) => len,
1708            Cigar::Diff(len) => len,
1709        }
1710    }
1711
1712    pub fn is_empty(self) -> bool {
1713        self.len() == 0
1714    }
1715
1716    /// Return the character representing the CIGAR.
1717    pub fn char(self) -> char {
1718        match self {
1719            Cigar::Match(_) => 'M',
1720            Cigar::Ins(_) => 'I',
1721            Cigar::Del(_) => 'D',
1722            Cigar::RefSkip(_) => 'N',
1723            Cigar::SoftClip(_) => 'S',
1724            Cigar::HardClip(_) => 'H',
1725            Cigar::Pad(_) => 'P',
1726            Cigar::Equal(_) => '=',
1727            Cigar::Diff(_) => 'X',
1728        }
1729    }
1730}
1731
1732impl fmt::Display for Cigar {
1733    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1734        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1735    }
1736}
1737
1738unsafe impl Send for Cigar {}
1739unsafe impl Sync for Cigar {}
1740
1741custom_derive! {
1742    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1743    ///
1744    /// # Example
1745    ///
1746    /// ```
1747    /// use rust_htslib::bam::record::{Cigar, CigarString};
1748    ///
1749    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1750    ///
1751    /// // access by index
1752    /// assert_eq!(cigar[0], Cigar::Match(100));
1753    /// // format into classical string representation
1754    /// assert_eq!(format!("{}", cigar), "100M10S");
1755    /// // iterate
1756    /// for op in &cigar {
1757    ///    println!("{}", op);
1758    /// }
1759    /// ```
1760    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1761    #[derive(NewtypeDeref,
1762            NewtypeDerefMut,
1763             NewtypeIndex(usize),
1764             NewtypeIndexMut(usize),
1765             NewtypeFrom,
1766             PartialEq,
1767             PartialOrd,
1768             Eq,
1769             NewtypeDebug,
1770             Clone,
1771             Hash
1772    )]
1773    pub struct CigarString(pub Vec<Cigar>);
1774}
1775
1776impl CigarString {
1777    /// Create a `CigarStringView` from this CigarString at position `pos`
1778    pub fn into_view(self, pos: i64) -> CigarStringView {
1779        CigarStringView::new(self, pos)
1780    }
1781
1782    /// Calculate the bam cigar from the alignment struct. x is the target string
1783    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
1784    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
1785    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
1786    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1787        match alignment.mode {
1788            AlignmentMode::Global => {
1789                panic!(" Bam cigar fn not supported for Global Alignment mode")
1790            }
1791            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1792            _ => {}
1793        }
1794
1795        let mut cigar = Vec::new();
1796        if alignment.operations.is_empty() {
1797            return CigarString(cigar);
1798        }
1799
1800        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1801            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1802            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1803            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1804            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1805            _ => {}
1806        };
1807
1808        if alignment.xstart > 0 {
1809            cigar.push(if hard_clip {
1810                Cigar::HardClip(alignment.xstart as u32)
1811            } else {
1812                Cigar::SoftClip(alignment.xstart as u32)
1813            });
1814        }
1815
1816        let mut last = alignment.operations[0];
1817        let mut k = 1u32;
1818        for &op in alignment.operations[1..].iter() {
1819            if op == last {
1820                k += 1;
1821            } else {
1822                add_op(last, k, &mut cigar);
1823                k = 1;
1824            }
1825            last = op;
1826        }
1827        add_op(last, k, &mut cigar);
1828        if alignment.xlen > alignment.xend {
1829            cigar.push(if hard_clip {
1830                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1831            } else {
1832                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1833            });
1834        }
1835
1836        CigarString(cigar)
1837    }
1838}
1839
1840impl TryFrom<&[u8]> for CigarString {
1841    type Error = Error;
1842
1843    /// Create a CigarString from given &[u8].
1844    /// # Example
1845    /// ```
1846    /// use rust_htslib::bam::record::*;
1847    /// use rust_htslib::bam::record::CigarString;
1848    /// use rust_htslib::bam::record::Cigar::*;
1849    /// use std::convert::TryFrom;
1850    ///
1851    /// let cigar_str = "2H10M5X3=2H".as_bytes();
1852    /// let cigar = CigarString::try_from(cigar_str)
1853    ///     .expect("Unable to parse cigar string.");
1854    /// let expected_cigar = CigarString(vec![
1855    ///     HardClip(2),
1856    ///     Match(10),
1857    ///     Diff(5),
1858    ///     Equal(3),
1859    ///     HardClip(2),
1860    /// ]);
1861    /// assert_eq!(cigar, expected_cigar);
1862    /// ```
1863    fn try_from(bytes: &[u8]) -> Result<Self> {
1864        let mut inner = Vec::new();
1865        let mut i = 0;
1866        let text_len = bytes.len();
1867        while i < text_len {
1868            let mut j = i;
1869            while j < text_len && bytes[j].is_ascii_digit() {
1870                j += 1;
1871            }
1872            // check that length is provided
1873            if i == j {
1874                return Err(Error::BamParseCigar {
1875                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1876                });
1877            }
1878            // get the length of the operation
1879            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1880                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1881            })?;
1882            let n = s.parse().map_err(|_| Error::BamParseCigar {
1883                msg: format!("Unable to parse &str '{:?}' to u32.", s),
1884            })?;
1885            // get the operation
1886            let op = &bytes[j];
1887            inner.push(match op {
1888                b'M' => Cigar::Match(n),
1889                b'I' => Cigar::Ins(n),
1890                b'D' => Cigar::Del(n),
1891                b'N' => Cigar::RefSkip(n),
1892                b'H' => {
1893                    if i == 0 || j + 1 == text_len {
1894                        Cigar::HardClip(n)
1895                    } else {
1896                        return Err(Error::BamParseCigar {
1897                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1898                                .to_owned(),
1899                        });
1900                    }
1901                }
1902                b'S' => {
1903                    if i == 0
1904                        || j + 1 == text_len
1905                        || bytes[i-1] == b'H'
1906                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1907                        Cigar::SoftClip(n)
1908                    } else {
1909                        return Err(Error::BamParseCigar {
1910                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1911                            .to_owned(),
1912                        });
1913                    }
1914                },
1915                b'P' => Cigar::Pad(n),
1916                b'=' => Cigar::Equal(n),
1917                b'X' => Cigar::Diff(n),
1918                op => {
1919                    return Err(Error::BamParseCigar {
1920                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1921                    })
1922                }
1923            });
1924            i = j + 1;
1925        }
1926        Ok(CigarString(inner))
1927    }
1928}
1929
1930impl TryFrom<&str> for CigarString {
1931    type Error = Error;
1932
1933    /// Create a CigarString from given &str.
1934    /// # Example
1935    /// ```
1936    /// use rust_htslib::bam::record::*;
1937    /// use rust_htslib::bam::record::CigarString;
1938    /// use rust_htslib::bam::record::Cigar::*;
1939    /// use std::convert::TryFrom;
1940    ///
1941    /// let cigar_str = "2H10M5X3=2H";
1942    /// let cigar = CigarString::try_from(cigar_str)
1943    ///     .expect("Unable to parse cigar string.");
1944    /// let expected_cigar = CigarString(vec![
1945    ///     HardClip(2),
1946    ///     Match(10),
1947    ///     Diff(5),
1948    ///     Equal(3),
1949    ///     HardClip(2),
1950    /// ]);
1951    /// assert_eq!(cigar, expected_cigar);
1952    /// ```
1953    fn try_from(text: &str) -> Result<Self> {
1954        let bytes = text.as_bytes();
1955        if text.chars().count() != bytes.len() {
1956            return Err(Error::BamParseCigar {
1957                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1958            });
1959        }
1960        CigarString::try_from(bytes)
1961    }
1962}
1963
1964impl<'a> CigarString {
1965    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1966        self.into_iter()
1967    }
1968}
1969
1970impl<'a> IntoIterator for &'a CigarString {
1971    type Item = &'a Cigar;
1972    type IntoIter = ::std::slice::Iter<'a, Cigar>;
1973
1974    fn into_iter(self) -> Self::IntoIter {
1975        self.0.iter()
1976    }
1977}
1978
1979impl fmt::Display for CigarString {
1980    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1981        for op in self {
1982            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
1983        }
1984        Ok(())
1985    }
1986}
1987
1988// Get number of leading/trailing softclips if a CigarString taking hardclips into account
1989fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
1990    match (cigar.next(), cigar.next()) {
1991        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
1992            *s as i64
1993        }
1994        _ => 0,
1995    }
1996}
1997
1998#[derive(Eq, PartialEq, Clone, Debug)]
1999pub struct CigarStringView {
2000    inner: CigarString,
2001    pos: i64,
2002}
2003
2004impl CigarStringView {
2005    /// Construct a new CigarStringView from a CigarString at a position
2006    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2007        CigarStringView { inner: c, pos }
2008    }
2009
2010    /// Get (exclusive) end position of alignment.
2011    pub fn end_pos(&self) -> i64 {
2012        let mut pos = self.pos;
2013        for c in self {
2014            match c {
2015                Cigar::Match(l)
2016                | Cigar::RefSkip(l)
2017                | Cigar::Del(l)
2018                | Cigar::Equal(l)
2019                | Cigar::Diff(l) => pos += *l as i64,
2020                // these don't add to end_pos on reference
2021                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2022            }
2023        }
2024        pos
2025    }
2026
2027    /// Get the start position of the alignment (0-based).
2028    pub fn pos(&self) -> i64 {
2029        self.pos
2030    }
2031
2032    /// Get number of bases softclipped at the beginning of the alignment.
2033    pub fn leading_softclips(&self) -> i64 {
2034        calc_softclips(self.iter())
2035    }
2036
2037    /// Get number of bases softclipped at the end of the alignment.
2038    pub fn trailing_softclips(&self) -> i64 {
2039        calc_softclips(self.iter().rev())
2040    }
2041
2042    /// Get number of bases hardclipped at the beginning of the alignment.
2043    pub fn leading_hardclips(&self) -> i64 {
2044        self.first().map_or(0, |cigar| {
2045            if let Cigar::HardClip(s) = cigar {
2046                *s as i64
2047            } else {
2048                0
2049            }
2050        })
2051    }
2052
2053    /// Get number of bases hardclipped at the end of the alignment.
2054    pub fn trailing_hardclips(&self) -> i64 {
2055        self.last().map_or(0, |cigar| {
2056            if let Cigar::HardClip(s) = cigar {
2057                *s as i64
2058            } else {
2059                0
2060            }
2061        })
2062    }
2063
2064    /// For a given position in the reference, get corresponding position within read.
2065    /// If reference position is outside of the read alignment, return None.
2066    ///
2067    /// # Arguments
2068    ///
2069    /// * `ref_pos` - the reference position
2070    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2071    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2072    ///
2073    pub fn read_pos(
2074        &self,
2075        ref_pos: u32,
2076        include_softclips: bool,
2077        include_dels: bool,
2078    ) -> Result<Option<u32>> {
2079        let mut rpos = self.pos as u32; // reference position
2080        let mut qpos = 0u32; // position within read
2081        let mut j = 0; // index into cigar operation vector
2082
2083        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2084        // because all augmentations of qpos and rpos before that are invalid
2085        for (i, c) in self.iter().enumerate() {
2086            match c {
2087                Cigar::Match(_) |
2088                Cigar::Diff(_)  |
2089                Cigar::Equal(_) |
2090                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2091                // before matching positions
2092                Cigar::Ins(_) => {
2093                    j = i;
2094                    break;
2095                },
2096                Cigar::SoftClip(l) => {
2097                    j = i;
2098                    if include_softclips {
2099                        // Alignment starts with softclip and we want to include it in the
2100                        // projection of the reference position. However, the POS field does not
2101                        // include the softclip. Hence we have to subtract its length.
2102                        rpos = rpos.saturating_sub(*l);
2103                    }
2104                    break;
2105                },
2106                Cigar::Del(l) => {
2107                    // METHOD: leading deletions can happen in case of trimmed reads where
2108                    // a primer has been removed AFTER read mapping.
2109                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2110                    // with fgbio. While leading deletions should be impossible with
2111                    // normal read mapping, they make perfect sense with primer trimming
2112                    // because the mapper still had the evidence to decide in favor of
2113                    // the deletion via the primer sequence.
2114                    rpos += l;
2115                },
2116                Cigar::RefSkip(_) => {
2117                    return Err(Error::BamUnexpectedCigarOperation {
2118                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2119                    });
2120                },
2121                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2122                    return Err(Error::BamUnexpectedCigarOperation{
2123                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2124                    });
2125                },
2126                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2127                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2128                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2129                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2130            }
2131        }
2132
2133        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2134            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2135        };
2136
2137        while rpos <= ref_pos && j < self.len() {
2138            match self[j] {
2139                // potential SNV evidence
2140                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2141                    // difference between desired position and first position of current cigar
2142                    // operation
2143                    qpos += ref_pos - rpos;
2144                    return Ok(Some(qpos));
2145                }
2146                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2147                    qpos += ref_pos - rpos;
2148                    return Ok(Some(qpos));
2149                }
2150                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2151                    // qpos shall resemble the start of the deletion
2152                    return Ok(Some(qpos));
2153                }
2154                // for others, just increase pos and qpos as needed
2155                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2156                    rpos += l;
2157                    qpos += l;
2158                    j += 1;
2159                }
2160                Cigar::SoftClip(l) => {
2161                    qpos += l;
2162                    j += 1;
2163                    if include_softclips {
2164                        rpos += l;
2165                    }
2166                }
2167                Cigar::Ins(l) => {
2168                    qpos += l;
2169                    j += 1;
2170                }
2171                Cigar::RefSkip(l) | Cigar::Del(l) => {
2172                    rpos += l;
2173                    j += 1;
2174                }
2175                Cigar::Pad(_) => {
2176                    j += 1;
2177                }
2178                Cigar::HardClip(_) if j < self.len() - 1 => {
2179                    return Err(Error::BamUnexpectedCigarOperation{
2180                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2181                    });
2182                }
2183                Cigar::HardClip(_) => return Ok(None),
2184            }
2185        }
2186
2187        Ok(None)
2188    }
2189
2190    /// transfer ownership of the Cigar out of the CigarView
2191    pub fn take(self) -> CigarString {
2192        self.inner
2193    }
2194}
2195
2196impl ops::Deref for CigarStringView {
2197    type Target = CigarString;
2198
2199    fn deref(&self) -> &CigarString {
2200        &self.inner
2201    }
2202}
2203
2204impl ops::Index<usize> for CigarStringView {
2205    type Output = Cigar;
2206
2207    fn index(&self, index: usize) -> &Cigar {
2208        self.inner.index(index)
2209    }
2210}
2211
2212impl ops::IndexMut<usize> for CigarStringView {
2213    fn index_mut(&mut self, index: usize) -> &mut Cigar {
2214        self.inner.index_mut(index)
2215    }
2216}
2217
2218impl<'a> CigarStringView {
2219    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2220        self.inner.into_iter()
2221    }
2222}
2223
2224impl<'a> IntoIterator for &'a CigarStringView {
2225    type Item = &'a Cigar;
2226    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2227
2228    fn into_iter(self) -> Self::IntoIter {
2229        self.inner.into_iter()
2230    }
2231}
2232
2233impl fmt::Display for CigarStringView {
2234    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2235        self.inner.fmt(fmt)
2236    }
2237}
2238
2239pub struct BaseModificationMetadata {
2240    pub strand: i32,
2241    pub implicit: i32,
2242    pub canonical: u8,
2243}
2244
2245/// struct containing the internal state required to access
2246/// the base modifications for a bam::Record
2247pub struct BaseModificationState<'a> {
2248    record: &'a Record,
2249    state: *mut htslib::hts_base_mod_state,
2250    buffer: Vec<htslib::hts_base_mod>,
2251    buffer_pos: i32,
2252}
2253
2254impl BaseModificationState<'_> {
2255    /// Initialize a new BaseModification struct from a bam::Record
2256    /// This function allocates memory for the state structure
2257    /// and initializes the iterator to the start of the modification
2258    /// records.
2259    fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2260        let mut bm = unsafe {
2261            BaseModificationState {
2262                record: r,
2263                state: hts_sys::hts_base_mod_state_alloc(),
2264                buffer: Vec::new(),
2265                buffer_pos: -1,
2266            }
2267        };
2268
2269        if bm.state.is_null() {
2270            panic!("Unable to allocate memory for hts_base_mod_state");
2271        }
2272
2273        // parse the MM tag to initialize the state
2274        unsafe {
2275            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2276            if ret != 0 {
2277                return Err(Error::BamBaseModificationTagNotFound);
2278            }
2279        }
2280
2281        let types = bm.recorded();
2282        bm.buffer.reserve(types.len());
2283        return Ok(bm);
2284    }
2285
2286    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2287        unsafe {
2288            let ret = hts_sys::bam_next_basemod(
2289                self.record.inner_ptr(),
2290                self.state,
2291                self.buffer.as_mut_ptr(),
2292                self.buffer.capacity() as i32,
2293                &mut self.buffer_pos,
2294            );
2295
2296            if ret < 0 {
2297                return Err(Error::BamBaseModificationIterationFailed);
2298            }
2299
2300            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2301            // return the actual number of modifications found. We return an error to the caller
2302            // in the case where there was insufficient storage to return all mods.
2303            if ret as usize > self.buffer.capacity() {
2304                return Err(Error::BamBaseModificationTooManyMods);
2305            }
2306
2307            // we read the modifications directly into the vector, which does
2308            // not update the length so needs to be manually set
2309            self.buffer.set_len(ret as usize);
2310
2311            return Ok(ret as usize);
2312        }
2313    }
2314
2315    /// Return an array containing the modification codes listed for this record.
2316    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2317    pub fn recorded<'a>(&self) -> &'a [i32] {
2318        unsafe {
2319            let mut n: i32 = 0;
2320            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2321
2322            // htslib should not return a null pointer, even when there are no base mods
2323            if data_ptr.is_null() {
2324                panic!("Unable to obtain pointer to base modifications");
2325            }
2326            assert!(n >= 0);
2327            return slice::from_raw_parts(data_ptr, n as usize);
2328        }
2329    }
2330
2331    /// Return metadata for the specified character code indicating the strand
2332    /// the base modification was called on, whether the tag uses implicit mode
2333    /// and the ascii code for the canonical base.
2334    /// If there are multiple modifications with the same code this will return the data
2335    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2336    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2337        unsafe {
2338            let mut strand: i32 = 0;
2339            let mut implicit: i32 = 0;
2340            // This may be i8 or u8 in hts_sys.
2341            let mut canonical: c_char = 0;
2342
2343            let ret = hts_sys::bam_mods_query_type(
2344                self.state,
2345                code,
2346                &mut strand,
2347                &mut implicit,
2348                &mut canonical,
2349            );
2350            if ret == -1 {
2351                return Err(Error::BamBaseModificationTypeNotFound);
2352            } else {
2353                return Ok(BaseModificationMetadata {
2354                    strand,
2355                    implicit,
2356                    canonical: canonical.try_into().unwrap(),
2357                });
2358            }
2359        }
2360    }
2361}
2362
2363impl Drop for BaseModificationState<'_> {
2364    fn drop<'a>(&mut self) {
2365        unsafe {
2366            hts_sys::hts_base_mod_state_free(self.state);
2367        }
2368    }
2369}
2370
2371/// Iterator over the base modifications that returns
2372/// a vector for all of the mods at each position
2373pub struct BaseModificationsPositionIter<'a> {
2374    mod_state: BaseModificationState<'a>,
2375}
2376
2377impl BaseModificationsPositionIter<'_> {
2378    fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
2379        let state = BaseModificationState::new(r)?;
2380        Ok(BaseModificationsPositionIter { mod_state: state })
2381    }
2382
2383    pub fn recorded<'a>(&self) -> &'a [i32] {
2384        return self.mod_state.recorded();
2385    }
2386
2387    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2388        return self.mod_state.query_type(code);
2389    }
2390}
2391
2392impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2393    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2394
2395    fn next(&mut self) -> Option<Self::Item> {
2396        let ret = self.mod_state.buffer_next_mods();
2397
2398        // Three possible things happened in buffer_next_mods:
2399        // 1. the htslib API call was successful but there are no more mods
2400        // 2. ths htslib API call was successful and we read some mods
2401        // 3. the htslib API call failed, we propogate the error wrapped in an option
2402        match ret {
2403            Ok(num_mods) => {
2404                if num_mods == 0 {
2405                    return None;
2406                } else {
2407                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2408                    return Some(Ok(data));
2409                }
2410            }
2411            Err(e) => return Some(Err(e)),
2412        }
2413    }
2414}
2415
2416/// Iterator over the base modifications that returns
2417/// the next modification found, one by one
2418pub struct BaseModificationsIter<'a> {
2419    mod_state: BaseModificationState<'a>,
2420    buffer_idx: usize,
2421}
2422
2423impl BaseModificationsIter<'_> {
2424    fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2425        let state = BaseModificationState::new(r)?;
2426        Ok(BaseModificationsIter {
2427            mod_state: state,
2428            buffer_idx: 0,
2429        })
2430    }
2431
2432    pub fn recorded<'a>(&self) -> &'a [i32] {
2433        return self.mod_state.recorded();
2434    }
2435
2436    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2437        return self.mod_state.query_type(code);
2438    }
2439}
2440
2441impl<'a> Iterator for BaseModificationsIter<'a> {
2442    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2443
2444    fn next(&mut self) -> Option<Self::Item> {
2445        if self.buffer_idx == self.mod_state.buffer.len() {
2446            // need to use the internal state to read the next
2447            // set of modifications into the buffer
2448            let ret = self.mod_state.buffer_next_mods();
2449
2450            match ret {
2451                Ok(num_mods) => {
2452                    if num_mods == 0 {
2453                        // done iterating
2454                        return None;
2455                    } else {
2456                        // we read some mods, reset the position in the buffer then fall through
2457                        self.buffer_idx = 0;
2458                    }
2459                }
2460                Err(e) => return Some(Err(e)),
2461            }
2462        }
2463
2464        // if we got here when there are mods buffered that we haven't emitted yet
2465        assert!(self.buffer_idx < self.mod_state.buffer.len());
2466        let data = (
2467            self.mod_state.buffer_pos,
2468            self.mod_state.buffer[self.buffer_idx],
2469        );
2470        self.buffer_idx += 1;
2471        return Some(Ok(data));
2472    }
2473}
2474
2475#[cfg(test)]
2476mod tests {
2477    use super::*;
2478
2479    #[test]
2480    fn test_cigar_string() {
2481        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2482
2483        assert_eq!(cigar[0], Cigar::Match(100));
2484        assert_eq!(format!("{}", cigar), "100M10S");
2485        for op in &cigar {
2486            println!("{}", op);
2487        }
2488    }
2489
2490    #[test]
2491    fn test_cigar_string_view_pos() {
2492        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2493        assert_eq!(cigar.pos(), 5);
2494    }
2495
2496    #[test]
2497    fn test_cigar_string_leading_softclips() {
2498        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2499        assert_eq!(cigar.leading_softclips(), 10);
2500        let cigar2 = CigarString(vec![
2501            Cigar::HardClip(5),
2502            Cigar::SoftClip(10),
2503            Cigar::Match(100),
2504        ])
2505        .into_view(0);
2506        assert_eq!(cigar2.leading_softclips(), 10);
2507    }
2508
2509    #[test]
2510    fn test_cigar_string_trailing_softclips() {
2511        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2512        assert_eq!(cigar.trailing_softclips(), 10);
2513        let cigar2 = CigarString(vec![
2514            Cigar::Match(100),
2515            Cigar::SoftClip(10),
2516            Cigar::HardClip(5),
2517        ])
2518        .into_view(0);
2519        assert_eq!(cigar2.trailing_softclips(), 10);
2520    }
2521
2522    #[test]
2523    fn test_cigar_read_pos() {
2524        let vpos = 5; // variant position
2525
2526        // Ignore leading HardClip
2527        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2528        // var:                       V
2529        // c01: 7H                 M  M
2530        // qpos:                  00 01
2531        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2532        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2533
2534        // Skip leading SoftClip or use as pre-POS matches
2535        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2536        // var:                       V
2537        // c02: 5H2S         M  M  M  M  M  M
2538        // qpos:  00        02 03 04 05 06 07
2539        // c02: 5H     S  S  M  M  M  M  M  M
2540        // qpos:      00 01 02 03 04 05 06 07
2541        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2542        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2543        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2544
2545        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2546        // pre-POS matches
2547        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2548        // var:                       V
2549        // c03:  3S                      M  M
2550        // qpos: 00                     03 04
2551        // c03:                 S  S  S  M  M
2552        // qpos:               00 01 02 03 04
2553        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2554        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2555        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2556
2557        // Skip leading Insertion before variant position
2558        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2559        // var:                       V
2560        // c04:  3I                X  X  X
2561        // qpos: 00               03 04 05
2562        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2563        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2564
2565        // Matches and deletion before variant position
2566        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2567        // var:                       V
2568        // c05:        =  =  D  D  X  =  =
2569        // qpos:      00 01       02 03 04 05
2570        let c05 = CigarString(vec![
2571            Cigar::Equal(2),
2572            Cigar::Del(2),
2573            Cigar::Diff(1),
2574            Cigar::Equal(2),
2575        ])
2576        .into_view(0);
2577        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2578
2579        // single nucleotide Deletion covering variant position
2580        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2581        // var:                       V
2582        // c06:                 =  =  D  X  X
2583        // qpos:               00 01    02 03
2584        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2585        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2586        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2587
2588        // three nucleotide Deletion covering variant position
2589        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2590        // var:                       V
2591        // c07:              =  =  D  D  D  M  M
2592        // qpos:            00 01          02 03
2593        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2594        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2595        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2596
2597        // three nucleotide RefSkip covering variant position
2598        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2599        // var:                       V
2600        // c08:              =  X  N  N  N  M  M
2601        // qpos:            00 01          02 03
2602        let c08 = CigarString(vec![
2603            Cigar::Equal(1),
2604            Cigar::Diff(1),
2605            Cigar::RefSkip(3),
2606            Cigar::Match(2),
2607        ])
2608        .into_view(2);
2609        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2610        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2611
2612        // internal hard clip before variant pos
2613        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2614        // var:                          V
2615        // c09: 3H           =  = 3H  =  =
2616        // qpos:            00 01    02 03
2617        let c09 = CigarString(vec![
2618            Cigar::HardClip(3),
2619            Cigar::Equal(2),
2620            Cigar::HardClip(3),
2621            Cigar::Equal(2),
2622        ])
2623        .into_view(2);
2624        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2625
2626        // Deletion right before variant position
2627        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2628        // var:                       V
2629        // c10:           M  M  D  D  M  M
2630        // qpos:         00 01       02 03
2631        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2632        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2633
2634        // Insertion right before variant position
2635        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2636        // var:                          V
2637        // c11:                 M  M 3I  M
2638        // qpos:               00 01 02 05 06
2639        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2640        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2641
2642        // Insertion right after variant position
2643        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2644        // var:                       V
2645        // c12:                 M  M  M 2I  =
2646        // qpos:               00 01 02 03 05
2647        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2648        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2649
2650        // Deletion right after variant position
2651        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2652        // var:                       V
2653        // c13:                 M  M  M  D  =
2654        // qpos:               00 01 02    03
2655        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2656        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2657
2658        // A messy and complicated example, including a Pad operation
2659        let vpos2 = 15;
2660        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2661        // var:                                                           V
2662        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2663        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2664        let c14 = CigarString(vec![
2665            Cigar::HardClip(5),
2666            Cigar::SoftClip(3),
2667            Cigar::Equal(1),
2668            Cigar::Pad(2),
2669            Cigar::Match(1),
2670            Cigar::Diff(1),
2671            Cigar::Ins(3),
2672            Cigar::Match(2),
2673            Cigar::Del(1),
2674            Cigar::Ins(2),
2675            Cigar::Equal(2),
2676            Cigar::RefSkip(3),
2677            Cigar::Match(3),
2678            Cigar::Equal(2),
2679            Cigar::SoftClip(5),
2680            Cigar::HardClip(2),
2681        ])
2682        .into_view(0);
2683        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2684
2685        // HardClip after Pad
2686        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2687        // var:                       V
2688        // c15: 5P1H            =  =  =
2689        // qpos:               00 01 02
2690        let c15 =
2691            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2692        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2693
2694        // only HardClip and Pad operations
2695        // c16: 7H5P2H
2696        let c16 =
2697            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2698        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2699    }
2700
2701    #[test]
2702    fn test_clone() {
2703        let mut rec = Record::new();
2704        rec.set_pos(300);
2705        rec.set_qname(b"read1");
2706        let clone = rec.clone();
2707        assert_eq!(rec, clone);
2708    }
2709
2710    #[test]
2711    fn test_flags() {
2712        let mut rec = Record::new();
2713
2714        rec.set_paired();
2715        assert_eq!(rec.is_paired(), true);
2716
2717        rec.set_supplementary();
2718        assert_eq!(rec.is_supplementary(), true);
2719        assert_eq!(rec.is_supplementary(), true);
2720
2721        rec.unset_paired();
2722        assert_eq!(rec.is_paired(), false);
2723        assert_eq!(rec.is_supplementary(), true);
2724
2725        rec.unset_supplementary();
2726        assert_eq!(rec.is_paired(), false);
2727        assert_eq!(rec.is_supplementary(), false);
2728    }
2729
2730    #[test]
2731    fn test_cigar_parse() {
2732        let cigar = "1S20M1D2I3X1=2H";
2733        let parsed = CigarString::try_from(cigar).unwrap();
2734        assert_eq!(parsed.to_string(), cigar);
2735    }
2736}
2737
2738#[cfg(test)]
2739mod alignment_cigar_tests {
2740    use super::*;
2741    use crate::bam::{Read, Reader};
2742    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2743    use bio_types::alignment::{Alignment, AlignmentMode};
2744
2745    #[test]
2746    fn test_cigar() {
2747        let alignment = Alignment {
2748            score: 5,
2749            xstart: 3,
2750            ystart: 0,
2751            xend: 9,
2752            yend: 10,
2753            ylen: 10,
2754            xlen: 10,
2755            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2756            mode: AlignmentMode::Semiglobal,
2757        };
2758        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2759        assert_eq!(
2760            CigarString::from_alignment(&alignment, false).0,
2761            vec![
2762                Cigar::SoftClip(3),
2763                Cigar::Equal(3),
2764                Cigar::Diff(1),
2765                Cigar::Ins(2),
2766                Cigar::Del(2),
2767                Cigar::SoftClip(1),
2768            ]
2769        );
2770
2771        let alignment = Alignment {
2772            score: 5,
2773            xstart: 0,
2774            ystart: 5,
2775            xend: 4,
2776            yend: 10,
2777            ylen: 10,
2778            xlen: 5,
2779            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2780            mode: AlignmentMode::Custom,
2781        };
2782        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2783        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2784        assert_eq!(
2785            CigarString::from_alignment(&alignment, false).0,
2786            vec![
2787                Cigar::Equal(1),
2788                Cigar::Diff(2),
2789                Cigar::Ins(1),
2790                Cigar::Del(2),
2791                Cigar::SoftClip(1),
2792            ]
2793        );
2794        assert_eq!(
2795            CigarString::from_alignment(&alignment, true).0,
2796            vec![
2797                Cigar::Equal(1),
2798                Cigar::Diff(2),
2799                Cigar::Ins(1),
2800                Cigar::Del(2),
2801                Cigar::HardClip(1),
2802            ]
2803        );
2804
2805        let alignment = Alignment {
2806            score: 5,
2807            xstart: 0,
2808            ystart: 5,
2809            xend: 3,
2810            yend: 8,
2811            ylen: 10,
2812            xlen: 3,
2813            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2814            mode: AlignmentMode::Custom,
2815        };
2816        assert_eq!(alignment.cigar(false), "1X1=1X");
2817        assert_eq!(
2818            CigarString::from_alignment(&alignment, false).0,
2819            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2820        );
2821
2822        let alignment = Alignment {
2823            score: 5,
2824            xstart: 0,
2825            ystart: 5,
2826            xend: 3,
2827            yend: 8,
2828            ylen: 10,
2829            xlen: 3,
2830            operations: vec![Subst, Match, Subst],
2831            mode: AlignmentMode::Semiglobal,
2832        };
2833        assert_eq!(alignment.cigar(false), "1X1=1X");
2834        assert_eq!(
2835            CigarString::from_alignment(&alignment, false).0,
2836            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2837        );
2838    }
2839
2840    #[test]
2841    fn test_read_orientation_f1r2() {
2842        let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2843
2844        for res in bam.records() {
2845            let record = res.unwrap();
2846            assert_eq!(
2847                record.read_pair_orientation(),
2848                SequenceReadPairOrientation::F1R2
2849            );
2850        }
2851    }
2852
2853    #[test]
2854    fn test_read_orientation_f2r1() {
2855        let mut bam = Reader::from_path(&"test/test_nonstandard_orientation.sam").unwrap();
2856
2857        for res in bam.records() {
2858            let record = res.unwrap();
2859            assert_eq!(
2860                record.read_pair_orientation(),
2861                SequenceReadPairOrientation::F2R1
2862            );
2863        }
2864    }
2865
2866    #[test]
2867    fn test_read_orientation_supplementary() {
2868        let mut bam = Reader::from_path(&"test/test_orientation_supplementary.sam").unwrap();
2869
2870        for res in bam.records() {
2871            let record = res.unwrap();
2872            assert_eq!(
2873                record.read_pair_orientation(),
2874                SequenceReadPairOrientation::F2R1
2875            );
2876        }
2877    }
2878
2879    #[test]
2880    pub fn test_cigar_parsing_non_ascii_error() {
2881        let cigar_str = "43ጷ";
2882        let expected_error = Err(Error::BamParseCigar {
2883                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2884            });
2885
2886        let result = CigarString::try_from(cigar_str);
2887        assert_eq!(expected_error, result);
2888    }
2889
2890    #[test]
2891    pub fn test_cigar_parsing() {
2892        // parsing test cases
2893        let cigar_strs = vec![
2894            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
2895            "100M",                          // test a single op
2896            "",                              // test empty input
2897            "1H1=1H",                        // test simple hardclip
2898            "1S1=1S",                        // test simple softclip
2899            "11H11S11=11S11H",               // test complex softclip
2900            "10H",
2901            "10S",
2902        ];
2903        // expected results
2904        let cigars = vec![
2905            CigarString(vec![
2906                Cigar::HardClip(1),
2907                Cigar::Match(10),
2908                Cigar::Del(4),
2909                Cigar::Ins(100),
2910                Cigar::RefSkip(300),
2911                Cigar::Equal(1102),
2912                Cigar::Pad(10),
2913                Cigar::Diff(25),
2914                Cigar::SoftClip(11),
2915            ]),
2916            CigarString(vec![Cigar::Match(100)]),
2917            CigarString(vec![]),
2918            CigarString(vec![
2919                Cigar::HardClip(1),
2920                Cigar::Equal(1),
2921                Cigar::HardClip(1),
2922            ]),
2923            CigarString(vec![
2924                Cigar::SoftClip(1),
2925                Cigar::Equal(1),
2926                Cigar::SoftClip(1),
2927            ]),
2928            CigarString(vec![
2929                Cigar::HardClip(11),
2930                Cigar::SoftClip(11),
2931                Cigar::Equal(11),
2932                Cigar::SoftClip(11),
2933                Cigar::HardClip(11),
2934            ]),
2935            CigarString(vec![Cigar::HardClip(10)]),
2936            CigarString(vec![Cigar::SoftClip(10)]),
2937        ];
2938        // compare
2939        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2940            let cigar_parse = CigarString::try_from(cigar_str)
2941                .expect(&format!("Unable to parse cigar: {}", cigar_str));
2942            assert_eq!(&cigar_parse, truth);
2943        }
2944    }
2945}
2946
2947#[cfg(test)]
2948mod basemod_tests {
2949    use crate::bam::{Read, Reader};
2950
2951    #[test]
2952    pub fn test_count_recorded() {
2953        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2954
2955        for r in bam.records() {
2956            let record = r.unwrap();
2957            if let Ok(mods) = record.basemods_iter() {
2958                let n = mods.recorded().len();
2959                assert_eq!(n, 3);
2960            };
2961        }
2962    }
2963
2964    #[test]
2965    pub fn test_query_type() {
2966        let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2967
2968        let mut n_fwd = 0;
2969        let mut n_rev = 0;
2970
2971        for r in bam.records() {
2972            let record = r.unwrap();
2973            if let Ok(mods) = record.basemods_iter() {
2974                for mod_code in mods.recorded() {
2975                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2976                        if mod_metadata.strand == 0 {
2977                            n_fwd += 1;
2978                        }
2979                        if mod_metadata.strand == 1 {
2980                            n_rev += 1;
2981                        }
2982                    }
2983                }
2984            };
2985        }
2986        assert_eq!(n_fwd, 2);
2987        assert_eq!(n_rev, 2);
2988    }
2989
2990    #[test]
2991    pub fn test_mod_iter() {
2992        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2993        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
2994        let mut i = 0;
2995
2996        for r in bam.records() {
2997            let record = r.unwrap();
2998            for res in record.basemods_iter().unwrap() {
2999                if let Ok((position, _m)) = res {
3000                    assert_eq!(position, expected_positions[i]);
3001                    i += 1;
3002                }
3003            }
3004        }
3005    }
3006
3007    #[test]
3008    pub fn test_position_iter() {
3009        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3010        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3011        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3012        let mut i = 0;
3013
3014        for r in bam.records() {
3015            let record = r.unwrap();
3016            for res in record.basemods_position_iter().unwrap() {
3017                if let Ok((position, elements)) = res {
3018                    assert_eq!(position, expected_positions[i]);
3019                    assert_eq!(elements.len(), expected_counts[i]);
3020                    i += 1;
3021                }
3022            }
3023        }
3024    }
3025}