rust_htslib/bam/
mod.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Module for working with SAM, BAM, and CRAM files.
7
8pub mod buffer;
9pub mod ext;
10pub mod header;
11pub mod index;
12pub mod pileup;
13pub mod record;
14
15#[cfg(feature = "serde_feature")]
16pub mod record_serde;
17
18use std::ffi;
19use std::os::raw::c_char;
20use std::path::Path;
21use std::rc::Rc;
22use std::slice;
23use std::str;
24
25use url::Url;
26
27use crate::errors::{Error, Result};
28use crate::htslib;
29use crate::tpool::ThreadPool;
30use crate::utils::path_as_bytes;
31
32pub use crate::bam::buffer::RecordBuffer;
33pub use crate::bam::header::Header;
34pub use crate::bam::record::Record;
35use hts_sys::{hts_fmt_option, sam_fields};
36use std::convert::{TryFrom, TryInto};
37use std::mem::MaybeUninit;
38
39/// # Safety
40///
41/// Implementation for `Read::set_threads` and `Writer::set_threads`.
42unsafe fn set_threads(htsfile: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
43    assert!(n_threads != 0, "n_threads must be > 0");
44
45    if htslib::hts_set_threads(htsfile, n_threads as i32) != 0 {
46        Err(Error::SetThreads)
47    } else {
48        Ok(())
49    }
50}
51
52unsafe fn set_thread_pool(htsfile: *mut htslib::htsFile, tpool: &ThreadPool) -> Result<()> {
53    let mut b = tpool.handle.borrow_mut();
54
55    if htslib::hts_set_thread_pool(htsfile, &mut b.inner as *mut _) != 0 {
56        Err(Error::ThreadPool)
57    } else {
58        Ok(())
59    }
60}
61
62/// # Safety
63///
64/// Set the reference FAI index path in a `htslib::htsFile` struct for reading CRAM format.
65pub unsafe fn set_fai_filename<P: AsRef<Path>>(
66    htsfile: *mut htslib::htsFile,
67    fasta_path: P,
68) -> Result<()> {
69    let path = if let Some(ext) = fasta_path.as_ref().extension() {
70        fasta_path
71            .as_ref()
72            .with_extension(format!("{}.fai", ext.to_str().unwrap()))
73    } else {
74        fasta_path.as_ref().with_extension(".fai")
75    };
76    let p: &Path = path.as_ref();
77    let c_str = ffi::CString::new(p.to_str().unwrap().as_bytes()).unwrap();
78    if htslib::hts_set_fai_filename(htsfile, c_str.as_ptr()) == 0 {
79        Ok(())
80    } else {
81        Err(Error::BamInvalidReferencePath { path: p.to_owned() })
82    }
83}
84
85/// A trait for a BAM reader with a read method.
86pub trait Read: Sized {
87    /// Read next BAM record into given record.
88    /// Use this method in combination with a single allocated record to avoid the reallocations
89    /// occurring with the iterator.
90    ///
91    /// # Arguments
92    ///
93    /// * `record` - the record to be filled
94    ///
95    /// # Returns
96    ///
97    /// Some(Ok(())) if the record was read and None if no more records to read
98    ///
99    /// Example:
100    /// ```
101    /// use rust_htslib::errors::Error;
102    /// use rust_htslib::bam::{Read, IndexedReader, Record};
103    ///
104    /// let mut bam = IndexedReader::from_path(&"test/test.bam").unwrap();
105    /// bam.fetch((0, 1000, 2000)); // reads on tid 0, from 1000bp to 2000bp
106    /// let mut record = Record::new();
107    /// while let Some(result) = bam.read(&mut record) {
108    ///     match result {
109    ///         Ok(_) => {
110    ///             println!("Read sequence: {:?}", record.seq().as_bytes());
111    ///         }
112    ///         Err(_) => panic!("BAM parsing failed...")
113    ///     }
114    /// }
115    /// ```
116    ///
117    /// Consider using [`rc_records`](#tymethod.rc_records) instead.
118    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
119
120    /// Iterator over the records of the seeked region.
121    /// Note that, while being convenient, this is less efficient than pre-allocating a
122    /// `Record` and reading into it with the `read` method, since every iteration involves
123    /// the allocation of a new `Record`.
124    ///
125    /// This is about 10% slower than record in micro benchmarks.
126    ///
127    /// Consider using [`rc_records`](#tymethod.rc_records) instead.
128    fn records(&mut self) -> Records<'_, Self>;
129
130    /// Records iterator using an Rc to avoid allocating a Record each turn.
131    /// This is about 1% slower than the [`read`](#tymethod.read) based API in micro benchmarks,
132    /// but has nicer ergonomics (and might not actually be slower in your applications).
133    ///
134    /// Example:
135    /// ```
136    /// use rust_htslib::errors::Error;
137    /// use rust_htslib::bam::{Read, Reader, Record};
138    /// use rust_htslib::htslib; // for BAM_F*
139    /// let mut bam = Reader::from_path(&"test/test.bam").unwrap();
140    ///
141    /// for read in
142    ///     bam.rc_records()
143    ///     .map(|x| x.expect("Failure parsing Bam file"))
144    ///     .filter(|read|
145    ///         read.flags()
146    ///          & (htslib::BAM_FUNMAP
147    ///              | htslib::BAM_FSECONDARY
148    ///              | htslib::BAM_FQCFAIL
149    ///              | htslib::BAM_FDUP) as u16
150    ///          == 0
151    ///     )
152    ///     .filter(|read| !read.is_reverse()) {
153    ///     println!("Found a forward read: {:?}", read.qname());
154    /// }
155    ///
156    /// //or to add the read qnames into a Vec
157    /// let collected: Vec<_> = bam.rc_records().map(|read| read.unwrap().qname().to_vec()).collect();
158    ///
159    ///
160    /// ```
161    fn rc_records(&mut self) -> RcRecords<'_, Self>;
162
163    /// Iterator over pileups.
164    fn pileup(&mut self) -> pileup::Pileups<'_, Self>;
165
166    /// Return the htsFile struct
167    fn htsfile(&self) -> *mut htslib::htsFile;
168
169    /// Return the header.
170    fn header(&self) -> &HeaderView;
171
172    /// Seek to the given virtual offset in the file
173    fn seek(&mut self, offset: i64) -> Result<()> {
174        let htsfile = unsafe { self.htsfile().as_ref() }.expect("bug: null pointer to htsFile");
175        let ret = match htsfile.format.format {
176            htslib::htsExactFormat_cram => unsafe {
177                i64::from(htslib::cram_seek(
178                    htsfile.fp.cram,
179                    offset as libc::off_t,
180                    libc::SEEK_SET,
181                ))
182            },
183            _ => unsafe { htslib::bgzf_seek(htsfile.fp.bgzf, offset, libc::SEEK_SET) },
184        };
185
186        if ret == 0 {
187            Ok(())
188        } else {
189            Err(Error::FileSeek)
190        }
191    }
192
193    /// Report the current virtual offset
194    fn tell(&self) -> i64 {
195        // this reimplements the bgzf_tell macro
196        let htsfile = unsafe { self.htsfile().as_ref() }.expect("bug: null pointer to htsFile");
197        let bgzf = unsafe { *htsfile.fp.bgzf };
198        (bgzf.block_address << 16) | (i64::from(bgzf.block_offset) & 0xFFFF)
199    }
200
201    /// Activate multi-threaded BAM read support in htslib. This should permit faster
202    /// reading of large BAM files.
203    ///
204    /// Setting `nthreads` to `0` does not change the current state.  Note that it is not
205    /// possible to set the number of background threads below `1` once it has been set.
206    ///
207    /// # Arguments
208    ///
209    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
210    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
211        unsafe { set_threads(self.htsfile(), n_threads) }
212    }
213
214    /// Use a shared thread-pool for writing. This permits controlling the total
215    /// thread count when multiple readers and writers are working simultaneously.
216    /// A thread pool can be created with `crate::tpool::ThreadPool::new(n_threads)`
217    ///
218    /// # Arguments
219    ///
220    /// * `tpool` - thread pool to use for compression work.
221    fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()>;
222
223    /// If the underlying file is in CRAM format, allows modifying CRAM options.
224    /// Note that this method does *not* check that the underlying file actually is in CRAM format.
225    ///
226    /// # Examples
227    ///
228    /// Set the required fields to RNAME and FLAG,
229    /// potentially allowing htslib to skip over the rest,
230    /// resulting in faster iteration:
231    /// ```
232    /// use rust_htslib::bam::{Read, Reader};
233    /// use hts_sys;
234    /// let mut cram = Reader::from_path(&"test/test_cram.cram").unwrap();
235    /// cram.set_cram_options(hts_sys::hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS,
236    ///             hts_sys::sam_fields_SAM_RNAME | hts_sys::sam_fields_SAM_FLAG).unwrap();
237    /// ```
238    fn set_cram_options(&mut self, fmt_opt: hts_fmt_option, fields: sam_fields) -> Result<()> {
239        unsafe {
240            if hts_sys::hts_set_opt(self.htsfile(), fmt_opt, fields) != 0 {
241                Err(Error::HtsSetOpt)
242            } else {
243                Ok(())
244            }
245        }
246    }
247}
248
249/// A BAM reader.
250#[derive(Debug)]
251pub struct Reader {
252    htsfile: *mut htslib::htsFile,
253    header: Rc<HeaderView>,
254    tpool: Option<ThreadPool>,
255}
256
257unsafe impl Send for Reader {}
258
259impl Reader {
260    /// Create a new Reader from path.
261    ///
262    /// # Arguments
263    ///
264    /// * `path` - the path to open.
265    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
266        Self::new(&path_as_bytes(path, true)?)
267    }
268
269    /// Create a new Reader from STDIN.
270    pub fn from_stdin() -> Result<Self> {
271        Self::new(b"-")
272    }
273
274    /// Create a new Reader from URL.
275    pub fn from_url(url: &Url) -> Result<Self> {
276        Self::new(url.as_str().as_bytes())
277    }
278
279    /// Create a new Reader.
280    ///
281    /// # Arguments
282    ///
283    /// * `path` - the path to open. Use "-" for stdin.
284    fn new(path: &[u8]) -> Result<Self> {
285        let htsfile = hts_open(path, b"r")?;
286
287        let header = unsafe { htslib::sam_hdr_read(htsfile) };
288        if header.is_null() {
289            return Err(Error::BamOpen {
290                target: String::from_utf8_lossy(path).to_string(),
291            });
292        }
293
294        // Invalidate the `text` representation of the header
295        unsafe {
296            let _ = htslib::sam_hdr_line_name(header, b"SQ".as_ptr().cast::<c_char>(), 0);
297        }
298
299        Ok(Reader {
300            htsfile,
301            header: Rc::new(HeaderView::new(header)),
302            tpool: None,
303        })
304    }
305
306    extern "C" fn pileup_read(
307        data: *mut ::std::os::raw::c_void,
308        record: *mut htslib::bam1_t,
309    ) -> i32 {
310        let mut _self = unsafe { (data as *mut Self).as_mut().unwrap() };
311        unsafe {
312            htslib::sam_read1(
313                _self.htsfile(),
314                _self.header().inner_ptr() as *mut hts_sys::sam_hdr_t,
315                record,
316            )
317        }
318    }
319
320    /// Iterator over the records between the (optional) virtual offsets `start` and `end`
321    ///
322    /// # Arguments
323    ///
324    /// * `start` - Optional starting virtual offset to seek to. Throws an error if it is not
325    /// a valid virtual offset.
326    ///
327    /// * `end` - Read until the virtual offset is less than `end`
328    pub fn iter_chunk(&mut self, start: Option<i64>, end: Option<i64>) -> ChunkIterator<'_, Self> {
329        if let Some(pos) = start {
330            self.seek(pos)
331                .expect("Failed to seek to the starting position");
332        };
333
334        ChunkIterator { reader: self, end }
335    }
336
337    /// Set the reference path for reading CRAM files.
338    ///
339    /// # Arguments
340    ///
341    /// * `path` - path to the FASTA reference
342    pub fn set_reference<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
343        unsafe { set_fai_filename(self.htsfile, path) }
344    }
345}
346
347impl Read for Reader {
348    /// Read the next BAM record into the given `Record`.
349    /// Returns `None` if there are no more records.
350    ///
351    /// This method is useful if you want to read records as fast as possible as the
352    /// `Record` can be reused. A more ergonomic approach is to use the [records](Reader::records)
353    /// iterator.
354    ///
355    /// # Errors
356    /// If there are any issues with reading the next record an error will be returned.
357    ///
358    /// # Examples
359    ///
360    /// ```
361    /// use rust_htslib::errors::Error;
362    /// use rust_htslib::bam::{Read, Reader, Record};
363    ///
364    /// let mut bam = Reader::from_path(&"test/test.bam")?;
365    /// let mut record = Record::new();
366    ///
367    /// // Print the TID of each record
368    /// while let Some(r) = bam.read(&mut record) {
369    ///    r.expect("Failed to parse record");
370    ///    println!("TID: {}", record.tid())
371    /// }
372    /// # Ok::<(), Error>(())
373    /// ```
374    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
375        match unsafe {
376            htslib::sam_read1(
377                self.htsfile,
378                self.header().inner_ptr() as *mut hts_sys::sam_hdr_t,
379                record.inner_ptr_mut(),
380            )
381        } {
382            -1 => None,
383            -2 => Some(Err(Error::BamTruncatedRecord)),
384            -4 => Some(Err(Error::BamInvalidRecord)),
385            _ => {
386                record.set_header(Rc::clone(&self.header));
387
388                Some(Ok(()))
389            }
390        }
391    }
392
393    /// Iterator over the records of the fetched region.
394    /// Note that, while being convenient, this is less efficient than pre-allocating a
395    /// `Record` and reading into it with the `read` method, since every iteration involves
396    /// the allocation of a new `Record`.
397    fn records(&mut self) -> Records<'_, Self> {
398        Records { reader: self }
399    }
400
401    fn rc_records(&mut self) -> RcRecords<'_, Self> {
402        RcRecords {
403            reader: self,
404            record: Rc::new(record::Record::new()),
405        }
406    }
407
408    fn pileup(&mut self) -> pileup::Pileups<'_, Self> {
409        let _self = self as *const Self;
410        let itr = unsafe {
411            htslib::bam_plp_init(
412                Some(Reader::pileup_read),
413                _self as *mut ::std::os::raw::c_void,
414            )
415        };
416        pileup::Pileups::new(self, itr)
417    }
418
419    fn htsfile(&self) -> *mut htslib::htsFile {
420        self.htsfile
421    }
422
423    fn header(&self) -> &HeaderView {
424        &self.header
425    }
426
427    fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
428        unsafe { set_thread_pool(self.htsfile(), tpool)? }
429        self.tpool = Some(tpool.clone());
430        Ok(())
431    }
432}
433
434impl Drop for Reader {
435    fn drop(&mut self) {
436        unsafe {
437            htslib::hts_close(self.htsfile);
438        }
439    }
440}
441
442/// Conversion type for start/stop coordinates
443/// only public because it's leaked by the conversions
444#[doc(hidden)]
445pub struct FetchCoordinate(i64);
446
447//the old sam spec
448impl From<i32> for FetchCoordinate {
449    fn from(coord: i32) -> FetchCoordinate {
450        FetchCoordinate(coord as i64)
451    }
452}
453
454// to support un-annotated literals (type interference fails on those)
455impl From<u32> for FetchCoordinate {
456    fn from(coord: u32) -> FetchCoordinate {
457        FetchCoordinate(coord as i64)
458    }
459}
460
461//the new sam spec
462impl From<i64> for FetchCoordinate {
463    fn from(coord: i64) -> FetchCoordinate {
464        FetchCoordinate(coord)
465    }
466}
467
468//what some of our header methods return
469impl From<u64> for FetchCoordinate {
470    fn from(coord: u64) -> FetchCoordinate {
471        FetchCoordinate(coord.try_into().expect("Coordinate exceeded 2^^63-1"))
472    }
473}
474
475/// Enum for [IndexdReader.fetch()](struct.IndexedReader.html#method.fetch) arguments.
476///
477/// tids may be converted From<>:
478/// * i32 (correct as per spec)
479/// * u32 (because of header.tid. Will panic if above 2^31-1).
480///
481///Coordinates may be (via FetchCoordinate)
482/// * i32 (as of the sam v1 spec)
483/// * i64 (as of the htslib 'large coordinate' extension (even though they are not supported in BAM)
484/// * u32 (because that's what rust literals will default to)
485/// * u64 (because of header.target_len(). Will panic if above 2^^63-1).
486#[derive(Debug)]
487pub enum FetchDefinition<'a> {
488    /// tid, start, stop,
489    Region(i32, i64, i64),
490    /// 'named-reference', start, stop tuple.
491    RegionString(&'a [u8], i64, i64),
492    ///complete reference. May be i32 or u32 (which panics if above 2^31-')
493    CompleteTid(i32),
494    ///complete reference by name (&[u8] or &str)
495    String(&'a [u8]),
496    /// Every read
497    All,
498    /// Only reads with the BAM flag BAM_FUNMAP (which might not be all reads with reference = -1)
499    Unmapped,
500}
501
502impl<'a, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(i32, X, Y)>
503    for FetchDefinition<'a>
504{
505    fn from(tup: (i32, X, Y)) -> FetchDefinition<'a> {
506        let start: FetchCoordinate = tup.1.into();
507        let stop: FetchCoordinate = tup.2.into();
508        FetchDefinition::Region(tup.0, start.0, stop.0)
509    }
510}
511
512impl<'a, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(u32, X, Y)>
513    for FetchDefinition<'a>
514{
515    fn from(tup: (u32, X, Y)) -> FetchDefinition<'a> {
516        let start: FetchCoordinate = tup.1.into();
517        let stop: FetchCoordinate = tup.2.into();
518        FetchDefinition::Region(
519            tup.0.try_into().expect("Tid exceeded 2^31-1"),
520            start.0,
521            stop.0,
522        )
523    }
524}
525
526//non tuple impls
527impl<'a> From<i32> for FetchDefinition<'a> {
528    fn from(tid: i32) -> FetchDefinition<'a> {
529        FetchDefinition::CompleteTid(tid)
530    }
531}
532
533impl<'a> From<u32> for FetchDefinition<'a> {
534    fn from(tid: u32) -> FetchDefinition<'a> {
535        let tid: i32 = tid.try_into().expect("tid exceeded 2^31-1");
536        FetchDefinition::CompleteTid(tid)
537    }
538}
539
540impl<'a> From<&'a str> for FetchDefinition<'a> {
541    fn from(s: &'a str) -> FetchDefinition<'a> {
542        FetchDefinition::String(s.as_bytes())
543    }
544}
545
546//also accept &[u8;n] literals
547impl<'a> From<&'a [u8]> for FetchDefinition<'a> {
548    fn from(s: &'a [u8]) -> FetchDefinition<'a> {
549        FetchDefinition::String(s)
550    }
551}
552
553//also accept &[u8;n] literals
554impl<'a, T: AsRef<[u8]>> From<&'a T> for FetchDefinition<'a> {
555    fn from(s: &'a T) -> FetchDefinition<'a> {
556        FetchDefinition::String(s.as_ref())
557    }
558}
559
560impl<'a, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(&'a str, X, Y)>
561    for FetchDefinition<'a>
562{
563    fn from(tup: (&'a str, X, Y)) -> FetchDefinition<'a> {
564        let start: FetchCoordinate = tup.1.into();
565        let stop: FetchCoordinate = tup.2.into();
566        FetchDefinition::RegionString(tup.0.as_bytes(), start.0, stop.0)
567    }
568}
569
570impl<'a, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(&'a [u8], X, Y)>
571    for FetchDefinition<'a>
572{
573    fn from(tup: (&'a [u8], X, Y)) -> FetchDefinition<'a> {
574        let start: FetchCoordinate = tup.1.into();
575        let stop: FetchCoordinate = tup.2.into();
576        FetchDefinition::RegionString(tup.0, start.0, stop.0)
577    }
578}
579
580//also accept &[u8;n] literals
581impl<'a, T: AsRef<[u8]>, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(&'a T, X, Y)>
582    for FetchDefinition<'a>
583{
584    fn from(tup: (&'a T, X, Y)) -> FetchDefinition<'a> {
585        let start: FetchCoordinate = tup.1.into();
586        let stop: FetchCoordinate = tup.2.into();
587        FetchDefinition::RegionString(tup.0.as_ref(), start.0, stop.0)
588    }
589}
590
591#[derive(Debug)]
592pub struct IndexedReader {
593    htsfile: *mut htslib::htsFile,
594    header: Rc<HeaderView>,
595    idx: Rc<IndexView>,
596    itr: Option<*mut htslib::hts_itr_t>,
597    tpool: Option<ThreadPool>,
598}
599
600unsafe impl Send for IndexedReader {}
601
602impl IndexedReader {
603    /// Create a new Reader from path.
604    ///
605    /// # Arguments
606    ///
607    /// * `path` - the path to open.
608    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
609        Self::new(&path_as_bytes(path, true)?)
610    }
611
612    pub fn from_path_and_index<P: AsRef<Path>>(path: P, index_path: P) -> Result<Self> {
613        Self::new_with_index_path(
614            &path_as_bytes(path, true)?,
615            &path_as_bytes(index_path, true)?,
616        )
617    }
618
619    pub fn from_url(url: &Url) -> Result<Self> {
620        Self::new(url.as_str().as_bytes())
621    }
622
623    /// Create a new Reader.
624    ///
625    /// # Arguments
626    ///
627    /// * `path` - the path. Use "-" for stdin.
628    fn new(path: &[u8]) -> Result<Self> {
629        let htsfile = hts_open(path, b"r")?;
630        let header = unsafe { htslib::sam_hdr_read(htsfile) };
631        let c_str = ffi::CString::new(path).unwrap();
632        let idx = unsafe { htslib::sam_index_load(htsfile, c_str.as_ptr()) };
633        if idx.is_null() {
634            Err(Error::BamInvalidIndex {
635                target: str::from_utf8(path).unwrap().to_owned(),
636            })
637        } else {
638            Ok(IndexedReader {
639                htsfile,
640                header: Rc::new(HeaderView::new(header)),
641                idx: Rc::new(IndexView::new(idx)),
642                itr: None,
643                tpool: None,
644            })
645        }
646    }
647    /// Create a new Reader.
648    ///
649    /// # Arguments
650    ///
651    /// * `path` - the path. Use "-" for stdin.
652    /// * `index_path` - the index path to use
653    fn new_with_index_path(path: &[u8], index_path: &[u8]) -> Result<Self> {
654        let htsfile = hts_open(path, b"r")?;
655        let header = unsafe { htslib::sam_hdr_read(htsfile) };
656        let c_str_path = ffi::CString::new(path).unwrap();
657        let c_str_index_path = ffi::CString::new(index_path).unwrap();
658        let idx = unsafe {
659            htslib::sam_index_load2(htsfile, c_str_path.as_ptr(), c_str_index_path.as_ptr())
660        };
661        if idx.is_null() {
662            Err(Error::BamInvalidIndex {
663                target: str::from_utf8(path).unwrap().to_owned(),
664            })
665        } else {
666            Ok(IndexedReader {
667                htsfile,
668                header: Rc::new(HeaderView::new(header)),
669                idx: Rc::new(IndexView::new(idx)),
670                itr: None,
671                tpool: None,
672            })
673        }
674    }
675
676    /// Define the region from which .read() or .records will retrieve reads.
677    ///
678    /// Both iterating (with [.records()](trait.Read.html#tymethod.records)) and looping without allocation (with [.read()](trait.Read.html#tymethod.read) are a two stage process:
679    /// 1. 'fetch' the region of interest
680    /// 2. iter/loop through the reads.
681    ///
682    /// Example:
683    /// ```
684    /// use rust_htslib::bam::{IndexedReader, Read};
685    /// let mut bam = IndexedReader::from_path(&"test/test.bam").unwrap();
686    /// bam.fetch(("chrX", 10000, 20000)); // coordinates 10000..20000 on reference named "chrX"
687    /// for read in bam.records() {
688    ///     println!("read name: {:?}", read.unwrap().qname());
689    /// }
690    /// ```
691    ///
692    /// The arguments may be anything that can be converted into a FetchDefinition
693    /// such as
694    ///
695    /// * fetch(tid: u32) -> fetch everything on this reference
696    /// * fetch(reference_name: &[u8] | &str) -> fetch everything on this reference
697    /// * fetch((tid: i32, start: i64, stop: i64)): -> fetch in this region on this tid
698    /// * fetch((reference_name: &[u8] | &str, start: i64, stop: i64) -> fetch in this region on this tid
699    /// * fetch(FetchDefinition::All) or fetch(".") -> Fetch overything
700    /// * fetch(FetchDefinition::Unmapped) or fetch("*") -> Fetch unmapped (as signified by the 'unmapped' flag in the BAM - might be unreliable with some aligners.
701    ///
702    /// The start / stop coordinates will take i64 (the correct type as of htslib's 'large
703    /// coordinates' expansion), i32, u32, and u64 (with a possible panic! if the coordinate
704    /// won't fit an i64).
705    ///
706    /// `start` and `stop` are zero-based. `start` is inclusive, `stop` is exclusive.
707    ///
708    /// This replaces the old fetch and fetch_str implementations.
709    pub fn fetch<'a, T: Into<FetchDefinition<'a>>>(&mut self, fetch_definition: T) -> Result<()> {
710        //this 'compile time redirect' safes us
711        //from monomorphing the 'meat' of the fetch function
712        self._inner_fetch(fetch_definition.into())
713    }
714
715    fn _inner_fetch(&mut self, fetch_definition: FetchDefinition) -> Result<()> {
716        match fetch_definition {
717            FetchDefinition::Region(tid, start, stop) => {
718                self._fetch_by_coord_tuple(tid, start, stop)
719            }
720            FetchDefinition::RegionString(s, start, stop) => {
721                let tid = self.header().tid(s);
722                match tid {
723                    Some(tid) => self._fetch_by_coord_tuple(tid as i32, start, stop),
724                    None => Err(Error::Fetch),
725                }
726            }
727            FetchDefinition::CompleteTid(tid) => {
728                let len = self.header().target_len(tid as u32);
729                match len {
730                    Some(len) => self._fetch_by_coord_tuple(tid, 0, len as i64),
731                    None => Err(Error::Fetch),
732                }
733            }
734            FetchDefinition::String(s) => {
735                // either a target-name or a samtools style definition
736                let tid = self.header().tid(s);
737                match tid {
738                    Some(tid) => {
739                        //'large position' spec says target len must will fit into an i64.
740                        let len: i64 = self.header.target_len(tid).unwrap().try_into().unwrap();
741                        self._fetch_by_coord_tuple(tid as i32, 0, len)
742                    }
743                    None => self._fetch_by_str(s),
744                }
745            }
746            FetchDefinition::All => self._fetch_by_str(b"."),
747            FetchDefinition::Unmapped => self._fetch_by_str(b"*"),
748        }
749    }
750
751    fn _fetch_by_coord_tuple(&mut self, tid: i32, beg: i64, end: i64) -> Result<()> {
752        if let Some(itr) = self.itr {
753            unsafe { htslib::hts_itr_destroy(itr) }
754        }
755        let itr = unsafe { htslib::sam_itr_queryi(self.index().inner_ptr(), tid, beg, end) };
756        if itr.is_null() {
757            self.itr = None;
758            Err(Error::Fetch)
759        } else {
760            self.itr = Some(itr);
761            Ok(())
762        }
763    }
764
765    fn _fetch_by_str(&mut self, region: &[u8]) -> Result<()> {
766        if let Some(itr) = self.itr {
767            unsafe { htslib::hts_itr_destroy(itr) }
768        }
769        let rstr = ffi::CString::new(region).unwrap();
770        let rptr = rstr.as_ptr();
771        let itr = unsafe {
772            htslib::sam_itr_querys(
773                self.index().inner_ptr(),
774                self.header().inner_ptr() as *mut hts_sys::sam_hdr_t,
775                rptr,
776            )
777        };
778        if itr.is_null() {
779            self.itr = None;
780            Err(Error::Fetch)
781        } else {
782            self.itr = Some(itr);
783            Ok(())
784        }
785    }
786
787    extern "C" fn pileup_read(
788        data: *mut ::std::os::raw::c_void,
789        record: *mut htslib::bam1_t,
790    ) -> i32 {
791        let _self = unsafe { (data as *mut Self).as_mut().unwrap() };
792        match _self.itr {
793            Some(itr) => itr_next(_self.htsfile, itr, record), // read fetched region
794            None => unsafe {
795                htslib::sam_read1(
796                    _self.htsfile,
797                    _self.header().inner_ptr() as *mut hts_sys::sam_hdr_t,
798                    record,
799                )
800            }, // ordinary reading
801        }
802    }
803
804    /// Set the reference path for reading CRAM files.
805    ///
806    /// # Arguments
807    ///
808    /// * `path` - path to the FASTA reference
809    pub fn set_reference<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
810        unsafe { set_fai_filename(self.htsfile, path) }
811    }
812
813    pub fn index(&self) -> &IndexView {
814        &self.idx
815    }
816
817    // Analogous to slow_idxstats in samtools, see
818    // https://github.com/samtools/samtools/blob/556c60fdff977c0e6cadc4c2581661f187098b4d/bam_index.c#L140-L199
819    unsafe fn slow_idxstats(&mut self) -> Result<Vec<(i64, u64, u64, u64)>> {
820        self.set_cram_options(
821            hts_sys::hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS,
822            hts_sys::sam_fields_SAM_RNAME | hts_sys::sam_fields_SAM_FLAG,
823        )?;
824        let header = self.header();
825        let h = header.inner;
826        let mut ret;
827        let mut last_tid = -2;
828        let fp = self.htsfile();
829
830        let nref =
831            usize::try_from(hts_sys::sam_hdr_nref(h)).map_err(|_| Error::NoSequencesInReference)?;
832        if nref == 0 {
833            return Ok(vec![]);
834        }
835        let mut counts = vec![vec![0; 2]; nref + 1];
836        let mut bb: hts_sys::bam1_t = MaybeUninit::zeroed().assume_init();
837        let b = &mut bb as *mut hts_sys::bam1_t;
838        loop {
839            ret = hts_sys::sam_read1(fp, h, b);
840            if ret < 0 {
841                break;
842            }
843            let tid = (*b).core.tid;
844            if tid >= nref as i32 || tid < -1 {
845                return Err(Error::InvalidTid { tid });
846            }
847
848            if tid != last_tid {
849                if (last_tid >= -1) && (counts[tid as usize][0] + counts[tid as usize][1]) > 0 {
850                    return Err(Error::BamUnsorted);
851                }
852                last_tid = tid;
853            }
854
855            let idx = if ((*b).core.flag as u32 & hts_sys::BAM_FUNMAP) > 0 {
856                1
857            } else {
858                0
859            };
860            counts[(*b).core.tid as usize][idx] += 1;
861        }
862
863        if ret == -1 {
864            let res = (0..nref)
865                .map(|i| {
866                    (
867                        i as i64,
868                        header.target_len(i as u32).unwrap(),
869                        counts[i][0],
870                        counts[i][1],
871                    )
872                })
873                .chain([(-1, 0, counts[nref][0], counts[nref][1])])
874                .collect();
875            Ok(res)
876        } else {
877            Err(Error::SlowIdxStats)
878        }
879    }
880
881    /// Similar to samtools idxstats, this returns a vector of tuples
882    /// containing the target id, length, number of mapped reads, and number of unmapped reads.
883    /// The last entry in the vector corresponds to the unmapped reads for the entire file, with
884    /// the tid set to -1.
885    pub fn index_stats(&mut self) -> Result<Vec<(i64, u64, u64, u64)>> {
886        let header = self.header();
887        let index = self.index();
888        if index.inner_ptr().is_null() {
889            panic!("Index is null");
890        }
891        // the quick index stats method only works for BAM files, not SAM or CRAM
892        unsafe {
893            if (*self.htsfile()).format.format != htslib::htsExactFormat_bam {
894                return self.slow_idxstats();
895            }
896        }
897        Ok((0..header.target_count())
898            .map(|tid| {
899                let (mapped, unmapped) = index.number_mapped_unmapped(tid);
900                let tlen = header.target_len(tid).unwrap();
901                (tid as i64, tlen, mapped, unmapped)
902            })
903            .chain([(-1, 0, 0, index.number_unmapped())])
904            .collect::<_>())
905    }
906}
907
908#[derive(Debug)]
909pub struct IndexView {
910    inner: *mut hts_sys::hts_idx_t,
911    owned: bool,
912}
913
914impl IndexView {
915    fn new(hts_idx: *mut hts_sys::hts_idx_t) -> Self {
916        Self {
917            inner: hts_idx,
918            owned: true,
919        }
920    }
921
922    #[inline]
923    pub fn inner(&self) -> &hts_sys::hts_idx_t {
924        unsafe { self.inner.as_ref().unwrap() }
925    }
926
927    #[inline]
928    // Pointer to inner hts_idx_t struct
929    pub fn inner_ptr(&self) -> *const hts_sys::hts_idx_t {
930        self.inner
931    }
932
933    #[inline]
934    pub fn inner_mut(&mut self) -> &mut hts_sys::hts_idx_t {
935        unsafe { self.inner.as_mut().unwrap() }
936    }
937
938    #[inline]
939    // Mutable pointer to hts_idx_t struct
940    pub fn inner_ptr_mut(&mut self) -> *mut hts_sys::hts_idx_t {
941        self.inner
942    }
943
944    /// Get the number of mapped and unmapped reads for a given target id
945    /// FIXME only valid for BAM, not SAM/CRAM
946    fn number_mapped_unmapped(&self, tid: u32) -> (u64, u64) {
947        let (mut mapped, mut unmapped) = (0, 0);
948        unsafe {
949            hts_sys::hts_idx_get_stat(self.inner, tid as i32, &mut mapped, &mut unmapped);
950        }
951        (mapped, unmapped)
952    }
953
954    /// Get the total number of unmapped reads in the file
955    /// FIXME only valid for BAM, not SAM/CRAM
956    fn number_unmapped(&self) -> u64 {
957        unsafe { hts_sys::hts_idx_get_n_no_coor(self.inner) }
958    }
959}
960
961impl Drop for IndexView {
962    fn drop(&mut self) {
963        if self.owned {
964            unsafe {
965                htslib::hts_idx_destroy(self.inner);
966            }
967        }
968    }
969}
970
971impl Read for IndexedReader {
972    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
973        match self.itr {
974            Some(itr) => {
975                match itr_next(self.htsfile, itr, &mut record.inner as *mut htslib::bam1_t) {
976                    -1 => None,
977                    -2 => Some(Err(Error::BamTruncatedRecord)),
978                    -4 => Some(Err(Error::BamInvalidRecord)),
979                    _ => {
980                        record.set_header(Rc::clone(&self.header));
981
982                        Some(Ok(()))
983                    }
984                }
985            }
986            None => None,
987        }
988    }
989
990    /// Iterator over the records of the fetched region.
991    /// Note that, while being convenient, this is less efficient than pre-allocating a
992    /// `Record` and reading into it with the `read` method, since every iteration involves
993    /// the allocation of a new `Record`.
994    fn records(&mut self) -> Records<'_, Self> {
995        Records { reader: self }
996    }
997
998    fn rc_records(&mut self) -> RcRecords<'_, Self> {
999        RcRecords {
1000            reader: self,
1001            record: Rc::new(record::Record::new()),
1002        }
1003    }
1004
1005    fn pileup(&mut self) -> pileup::Pileups<'_, Self> {
1006        let _self = self as *const Self;
1007        let itr = unsafe {
1008            htslib::bam_plp_init(
1009                Some(IndexedReader::pileup_read),
1010                _self as *mut ::std::os::raw::c_void,
1011            )
1012        };
1013        pileup::Pileups::new(self, itr)
1014    }
1015
1016    fn htsfile(&self) -> *mut htslib::htsFile {
1017        self.htsfile
1018    }
1019
1020    fn header(&self) -> &HeaderView {
1021        &self.header
1022    }
1023
1024    fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
1025        unsafe { set_thread_pool(self.htsfile(), tpool)? }
1026        self.tpool = Some(tpool.clone());
1027        Ok(())
1028    }
1029}
1030
1031impl Drop for IndexedReader {
1032    fn drop(&mut self) {
1033        unsafe {
1034            if self.itr.is_some() {
1035                htslib::hts_itr_destroy(self.itr.unwrap());
1036            }
1037            htslib::hts_close(self.htsfile);
1038        }
1039    }
1040}
1041
1042#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1043pub enum Format {
1044    Sam,
1045    Bam,
1046    Cram,
1047}
1048
1049impl Format {
1050    fn write_mode(self) -> &'static [u8] {
1051        match self {
1052            Format::Sam => b"w",
1053            Format::Bam => b"wb",
1054            Format::Cram => b"wc",
1055        }
1056    }
1057}
1058
1059/// A BAM writer.
1060#[derive(Debug)]
1061pub struct Writer {
1062    f: *mut htslib::htsFile,
1063    header: Rc<HeaderView>,
1064    tpool: Option<ThreadPool>,
1065}
1066
1067unsafe impl Send for Writer {}
1068
1069impl Writer {
1070    /// Create a new SAM/BAM/CRAM file.
1071    ///
1072    /// # Arguments
1073    ///
1074    /// * `path` - the path.
1075    /// * `header` - header definition to use
1076    /// * `format` - the format to use (SAM/BAM/CRAM)
1077    pub fn from_path<P: AsRef<Path>>(
1078        path: P,
1079        header: &header::Header,
1080        format: Format,
1081    ) -> Result<Self> {
1082        Self::new(&path_as_bytes(path, false)?, format.write_mode(), header)
1083    }
1084
1085    /// Create a new SAM/BAM/CRAM file at STDOUT.
1086    ///
1087    /// # Arguments
1088    ///
1089    /// * `header` - header definition to use
1090    /// * `format` - the format to use (SAM/BAM/CRAM)
1091    pub fn from_stdout(header: &header::Header, format: Format) -> Result<Self> {
1092        Self::new(b"-", format.write_mode(), header)
1093    }
1094
1095    /// Create a new SAM/BAM/CRAM file.
1096    ///
1097    /// # Arguments
1098    ///
1099    /// * `path` - the path. Use "-" for stdout.
1100    /// * `mode` - write mode, refer to htslib::hts_open()
1101    /// * `header` - header definition to use
1102    fn new(path: &[u8], mode: &[u8], header: &header::Header) -> Result<Self> {
1103        let f = hts_open(path, mode)?;
1104
1105        // sam_hdr_parse does not populate the text and l_text fields of the header_record.
1106        // This causes non-SQ headers to be dropped in the output BAM file.
1107        // To avoid this, we copy the All header to a new C-string that is allocated with malloc,
1108        // and set this into header_record manually.
1109        let header_record = unsafe {
1110            let mut header_string = header.to_bytes();
1111            if !header_string.is_empty() && header_string[header_string.len() - 1] != b'\n' {
1112                header_string.push(b'\n');
1113            }
1114            let l_text = header_string.len();
1115            let text = ::libc::malloc(l_text + 1);
1116            libc::memset(text, 0, l_text + 1);
1117            libc::memcpy(
1118                text,
1119                header_string.as_ptr() as *const ::libc::c_void,
1120                header_string.len(),
1121            );
1122
1123            //println!("{}", str::from_utf8(&header_string).unwrap());
1124            let rec = htslib::sam_hdr_parse(
1125                ((l_text + 1) as usize).try_into().unwrap(),
1126                text as *const c_char,
1127            );
1128
1129            (*rec).text = text as *mut c_char;
1130            (*rec).l_text = l_text;
1131            rec
1132        };
1133
1134        unsafe {
1135            htslib::sam_hdr_write(f, header_record);
1136        }
1137
1138        Ok(Writer {
1139            f,
1140            header: Rc::new(HeaderView::new(header_record)),
1141            tpool: None,
1142        })
1143    }
1144
1145    /// Activate multi-threaded BAM write support in htslib. This should permit faster
1146    /// writing of large BAM files.
1147    ///
1148    /// # Arguments
1149    ///
1150    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
1151    pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
1152        unsafe { set_threads(self.f, n_threads) }
1153    }
1154
1155    /// Use a shared thread-pool for writing. This permits controlling the total
1156    /// thread count when multiple readers and writers are working simultaneously.
1157    /// A thread pool can be created with `crate::tpool::ThreadPool::new(n_threads)`
1158    ///
1159    /// # Arguments
1160    ///
1161    /// * `tpool` - thread pool to use for compression work.
1162    pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
1163        unsafe { set_thread_pool(self.f, tpool)? }
1164        self.tpool = Some(tpool.clone());
1165        Ok(())
1166    }
1167
1168    /// Write record to BAM.
1169    ///
1170    /// # Arguments
1171    ///
1172    /// * `record` - the record to write
1173    pub fn write(&mut self, record: &record::Record) -> Result<()> {
1174        if unsafe { htslib::sam_write1(self.f, self.header.inner(), record.inner_ptr()) } == -1 {
1175            Err(Error::WriteRecord)
1176        } else {
1177            Ok(())
1178        }
1179    }
1180
1181    /// Return the header.
1182    pub fn header(&self) -> &HeaderView {
1183        &self.header
1184    }
1185
1186    /// Set the reference path for reading CRAM files.
1187    ///
1188    /// # Arguments
1189    ///
1190    /// * `path` - path to the FASTA reference
1191    pub fn set_reference<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
1192        unsafe { set_fai_filename(self.f, path) }
1193    }
1194
1195    /// Set the compression level for writing BAM/CRAM files.
1196    ///
1197    /// # Arguments
1198    ///
1199    /// * `compression_level` - `CompressionLevel` enum variant
1200    pub fn set_compression_level(&mut self, compression_level: CompressionLevel) -> Result<()> {
1201        let level = compression_level.convert()?;
1202        match unsafe {
1203            htslib::hts_set_opt(
1204                self.f,
1205                htslib::hts_fmt_option_HTS_OPT_COMPRESSION_LEVEL,
1206                level,
1207            )
1208        } {
1209            0 => Ok(()),
1210            _ => Err(Error::BamInvalidCompressionLevel { level }),
1211        }
1212    }
1213}
1214
1215/// Compression levels in BAM/CRAM files
1216///
1217/// * Uncompressed: No compression, zlib level 0
1218/// * Fastest: Lowest compression level, zlib level 1
1219/// * Maximum: Highest compression level, zlib level 9
1220/// * Level(i): Custom compression level in the range [0, 9]
1221#[derive(Debug, Clone, Copy)]
1222pub enum CompressionLevel {
1223    Uncompressed,
1224    Fastest,
1225    Maximum,
1226    Level(u32),
1227}
1228
1229impl CompressionLevel {
1230    // Convert and check the variants of the `CompressionLevel` enum to a numeric level
1231    fn convert(self) -> Result<u32> {
1232        match self {
1233            CompressionLevel::Uncompressed => Ok(0),
1234            CompressionLevel::Fastest => Ok(1),
1235            CompressionLevel::Maximum => Ok(9),
1236            CompressionLevel::Level(i @ 0..=9) => Ok(i),
1237            CompressionLevel::Level(i) => Err(Error::BamInvalidCompressionLevel { level: i }),
1238        }
1239    }
1240}
1241
1242impl Drop for Writer {
1243    fn drop(&mut self) {
1244        unsafe {
1245            htslib::hts_close(self.f);
1246        }
1247    }
1248}
1249
1250/// Iterator over the records of a BAM.
1251#[derive(Debug)]
1252pub struct Records<'a, R: Read> {
1253    reader: &'a mut R,
1254}
1255
1256impl<'a, R: Read> Iterator for Records<'a, R> {
1257    type Item = Result<record::Record>;
1258
1259    fn next(&mut self) -> Option<Result<record::Record>> {
1260        let mut record = record::Record::new();
1261        match self.reader.read(&mut record) {
1262            None => None,
1263            Some(Ok(_)) => Some(Ok(record)),
1264            Some(Err(err)) => Some(Err(err)),
1265        }
1266    }
1267}
1268
1269/// Iterator over the records of a BAM, using an Rc.
1270///
1271/// See [rc_records](trait.Read.html#tymethod.rc_records).
1272#[derive(Debug)]
1273pub struct RcRecords<'a, R: Read> {
1274    reader: &'a mut R,
1275    record: Rc<record::Record>,
1276}
1277
1278impl<'a, R: Read> Iterator for RcRecords<'a, R> {
1279    type Item = Result<Rc<record::Record>>;
1280
1281    fn next(&mut self) -> Option<Self::Item> {
1282        let mut record = match Rc::get_mut(&mut self.record) {
1283            //not make_mut, we don't need a clone
1284            Some(x) => x,
1285            None => {
1286                self.record = Rc::new(record::Record::new());
1287                Rc::get_mut(&mut self.record).unwrap()
1288            }
1289        };
1290
1291        match self.reader.read(&mut record) {
1292            None => None,
1293            Some(Ok(_)) => Some(Ok(Rc::clone(&self.record))),
1294            Some(Err(err)) => Some(Err(err)),
1295        }
1296    }
1297}
1298
1299/// Iterator over the records of a BAM until the virtual offset is less than `end`
1300pub struct ChunkIterator<'a, R: Read> {
1301    reader: &'a mut R,
1302    end: Option<i64>,
1303}
1304
1305impl<'a, R: Read> Iterator for ChunkIterator<'a, R> {
1306    type Item = Result<record::Record>;
1307    fn next(&mut self) -> Option<Result<record::Record>> {
1308        if let Some(pos) = self.end {
1309            if self.reader.tell() >= pos {
1310                return None;
1311            }
1312        }
1313        let mut record = record::Record::new();
1314        match self.reader.read(&mut record) {
1315            None => None,
1316            Some(Ok(_)) => Some(Ok(record)),
1317            Some(Err(err)) => Some(Err(err)),
1318        }
1319    }
1320}
1321
1322/// Wrapper for opening a BAM file.
1323fn hts_open(path: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
1324    let cpath = ffi::CString::new(path).unwrap();
1325    let path = str::from_utf8(path).unwrap();
1326    let c_str = ffi::CString::new(mode).unwrap();
1327    let ret = unsafe { htslib::hts_open(cpath.as_ptr(), c_str.as_ptr()) };
1328    if ret.is_null() {
1329        Err(Error::BamOpen {
1330            target: path.to_owned(),
1331        })
1332    } else {
1333        if !mode.contains(&b'w') {
1334            unsafe {
1335                // Comparison against 'htsFormatCategory_sequence_data' doesn't handle text files correctly
1336                // hence the explicit checks against all supported exact formats
1337                if (*ret).format.format != htslib::htsExactFormat_sam
1338                    && (*ret).format.format != htslib::htsExactFormat_bam
1339                    && (*ret).format.format != htslib::htsExactFormat_cram
1340                {
1341                    return Err(Error::BamOpen {
1342                        target: path.to_owned(),
1343                    });
1344                }
1345            }
1346        }
1347        Ok(ret)
1348    }
1349}
1350
1351/// Wrapper for iterating an indexed BAM file.
1352fn itr_next(
1353    htsfile: *mut htslib::htsFile,
1354    itr: *mut htslib::hts_itr_t,
1355    record: *mut htslib::bam1_t,
1356) -> i32 {
1357    unsafe {
1358        htslib::hts_itr_next(
1359            (*htsfile).fp.bgzf,
1360            itr,
1361            record as *mut ::std::os::raw::c_void,
1362            htsfile as *mut ::std::os::raw::c_void,
1363        )
1364    }
1365}
1366
1367#[derive(Debug)]
1368pub struct HeaderView {
1369    inner: *mut htslib::bam_hdr_t,
1370    owned: bool,
1371}
1372
1373impl HeaderView {
1374    /// Create a new HeaderView from a pre-populated Header object
1375    pub fn from_header(header: &Header) -> Self {
1376        let mut header_string = header.to_bytes();
1377        if !header_string.is_empty() && header_string[header_string.len() - 1] != b'\n' {
1378            header_string.push(b'\n');
1379        }
1380        Self::from_bytes(&header_string)
1381    }
1382
1383    /// Create a new HeaderView from bytes
1384    pub fn from_bytes(header_string: &[u8]) -> Self {
1385        let header_record = unsafe {
1386            let l_text = header_string.len();
1387            let text = ::libc::malloc(l_text + 1);
1388            ::libc::memset(text, 0, l_text + 1);
1389            ::libc::memcpy(
1390                text,
1391                header_string.as_ptr() as *const ::libc::c_void,
1392                header_string.len(),
1393            );
1394
1395            let rec = htslib::sam_hdr_parse((l_text + 1), text as *const c_char);
1396            (*rec).text = text as *mut c_char;
1397            (*rec).l_text = l_text;
1398            rec
1399        };
1400
1401        HeaderView::new(header_record)
1402    }
1403
1404    /// Create a new HeaderView from the underlying Htslib type, and own it.
1405    pub fn new(inner: *mut htslib::bam_hdr_t) -> Self {
1406        HeaderView { inner, owned: true }
1407    }
1408
1409    #[inline]
1410    pub fn inner(&self) -> &htslib::bam_hdr_t {
1411        unsafe { self.inner.as_ref().unwrap() }
1412    }
1413
1414    #[inline]
1415    // Pointer to inner bam_hdr_t struct
1416    pub fn inner_ptr(&self) -> *const htslib::bam_hdr_t {
1417        self.inner
1418    }
1419
1420    #[inline]
1421    pub fn inner_mut(&mut self) -> &mut htslib::bam_hdr_t {
1422        unsafe { self.inner.as_mut().unwrap() }
1423    }
1424
1425    #[inline]
1426    // Mutable pointer to bam_hdr_t struct
1427    pub fn inner_ptr_mut(&mut self) -> *mut htslib::bam_hdr_t {
1428        self.inner
1429    }
1430
1431    pub fn tid(&self, name: &[u8]) -> Option<u32> {
1432        let c_str = ffi::CString::new(name).expect("Expected valid name.");
1433        let tid = unsafe { htslib::sam_hdr_name2tid(self.inner, c_str.as_ptr()) };
1434        if tid < 0 {
1435            None
1436        } else {
1437            Some(tid as u32)
1438        }
1439    }
1440
1441    pub fn tid2name(&self, tid: u32) -> &[u8] {
1442        unsafe { ffi::CStr::from_ptr(htslib::sam_hdr_tid2name(self.inner, tid as i32)).to_bytes() }
1443    }
1444
1445    pub fn target_count(&self) -> u32 {
1446        self.inner().n_targets as u32
1447    }
1448
1449    pub fn target_names(&self) -> Vec<&[u8]> {
1450        let names = unsafe {
1451            slice::from_raw_parts(self.inner().target_name, self.target_count() as usize)
1452        };
1453        names
1454            .iter()
1455            .map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() })
1456            .collect()
1457    }
1458
1459    pub fn target_len(&self, tid: u32) -> Option<u64> {
1460        let inner = unsafe { *self.inner };
1461        if (tid as i32) < inner.n_targets {
1462            let l: &[u32] =
1463                unsafe { slice::from_raw_parts(inner.target_len, inner.n_targets as usize) };
1464            Some(l[tid as usize] as u64)
1465        } else {
1466            None
1467        }
1468    }
1469
1470    /// Retrieve the textual SAM header as bytes
1471    pub fn as_bytes(&self) -> &[u8] {
1472        unsafe {
1473            let rebuilt_hdr = htslib::sam_hdr_str(self.inner);
1474            if rebuilt_hdr.is_null() {
1475                return b"";
1476            }
1477            ffi::CStr::from_ptr(rebuilt_hdr).to_bytes()
1478        }
1479    }
1480}
1481
1482impl Clone for HeaderView {
1483    fn clone(&self) -> Self {
1484        HeaderView {
1485            inner: unsafe { htslib::sam_hdr_dup(self.inner) },
1486            owned: true,
1487        }
1488    }
1489}
1490
1491impl Drop for HeaderView {
1492    fn drop(&mut self) {
1493        if self.owned {
1494            unsafe {
1495                htslib::sam_hdr_destroy(self.inner);
1496            }
1497        }
1498    }
1499}
1500
1501#[cfg(test)]
1502mod tests {
1503    use super::header::HeaderRecord;
1504    use super::record::{Aux, Cigar, CigarString};
1505    use super::*;
1506    use std::collections::HashMap;
1507    use std::fs;
1508    use std::path::Path;
1509    use std::str;
1510
1511    fn gold() -> (
1512        [&'static [u8]; 6],
1513        [u16; 6],
1514        [&'static [u8]; 6],
1515        [&'static [u8]; 6],
1516        [CigarString; 6],
1517    ) {
1518        let names = [
1519            &b"I"[..],
1520            &b"II.14978392"[..],
1521            &b"III"[..],
1522            &b"IV"[..],
1523            &b"V"[..],
1524            &b"VI"[..],
1525        ];
1526        let flags = [16u16, 16u16, 16u16, 16u16, 16u16, 2048u16];
1527        let seqs = [
1528            &b"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC\
1529TAAGCCTAAGCCTAAGCCTAA"[..],
1530            &b"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC\
1531TAAGCCTAAGCCTAAGCCTAA"[..],
1532            &b"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC\
1533TAAGCCTAAGCCTAAGCCTAA"[..],
1534            &b"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC\
1535TAAGCCTAAGCCTAAGCCTAA"[..],
1536            &b"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC\
1537TAAGCCTAAGCCTAAGCCTAA"[..],
1538            &b"ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTT\
1539TTCATAAAATTATCGATTTTA"[..],
1540        ];
1541        let quals = [
1542            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1543CCCCCCCCCCCCCCCCCCC"[..],
1544            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1545CCCCCCCCCCCCCCCCCCC"[..],
1546            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1547CCCCCCCCCCCCCCCCCCC"[..],
1548            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1549CCCCCCCCCCCCCCCCCCC"[..],
1550            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1551CCCCCCCCCCCCCCCCCCC"[..],
1552            &b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC\
1553CCCCCCCCCCCCCCCCCCC"[..],
1554        ];
1555        let cigars = [
1556            CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
1557            CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
1558            CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
1559            CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
1560            CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
1561            CigarString(vec![Cigar::Match(27), Cigar::Del(100000), Cigar::Match(73)]),
1562        ];
1563        (names, flags, seqs, quals, cigars)
1564    }
1565
1566    fn compare_inner_bam_cram_records(cram_records: &[Record], bam_records: &[Record]) {
1567        // Selectively compares bam1_t struct fields from BAM and CRAM
1568        for (c1, b1) in cram_records.iter().zip(bam_records.iter()) {
1569            // CRAM vs BAM l_data is off by 3, see: https://github.com/rust-bio/rust-htslib/pull/184#issuecomment-590133544
1570            // The rest of the fields should be identical:
1571            assert_eq!(c1.cigar(), b1.cigar());
1572            assert_eq!(c1.inner().core.pos, b1.inner().core.pos);
1573            assert_eq!(c1.inner().core.mpos, b1.inner().core.mpos);
1574            assert_eq!(c1.inner().core.mtid, b1.inner().core.mtid);
1575            assert_eq!(c1.inner().core.tid, b1.inner().core.tid);
1576            assert_eq!(c1.inner().core.bin, b1.inner().core.bin);
1577            assert_eq!(c1.inner().core.qual, b1.inner().core.qual);
1578            assert_eq!(c1.inner().core.l_extranul, b1.inner().core.l_extranul);
1579            assert_eq!(c1.inner().core.flag, b1.inner().core.flag);
1580            assert_eq!(c1.inner().core.l_qname, b1.inner().core.l_qname);
1581            assert_eq!(c1.inner().core.n_cigar, b1.inner().core.n_cigar);
1582            assert_eq!(c1.inner().core.l_qseq, b1.inner().core.l_qseq);
1583            assert_eq!(c1.inner().core.isize_, b1.inner().core.isize_);
1584            //... except m_data
1585        }
1586    }
1587
1588    #[test]
1589    fn test_read() {
1590        let (names, flags, seqs, quals, cigars) = gold();
1591        let mut bam = Reader::from_path(&Path::new("test/test.bam")).expect("Error opening file.");
1592        let del_len = [1, 1, 1, 1, 1, 100000];
1593
1594        for (i, record) in bam.records().enumerate() {
1595            let rec = record.expect("Expected valid record");
1596            assert_eq!(rec.qname(), names[i]);
1597            assert_eq!(rec.flags(), flags[i]);
1598            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1599
1600            let cigar = rec.cigar();
1601            assert_eq!(*cigar, cigars[i]);
1602
1603            let end_pos = cigar.end_pos();
1604            assert_eq!(end_pos, rec.pos() + 100 + del_len[i]);
1605            assert_eq!(
1606                cigar
1607                    .read_pos(end_pos as u32 - 10, false, false)
1608                    .unwrap()
1609                    .unwrap(),
1610                90
1611            );
1612            assert_eq!(
1613                cigar
1614                    .read_pos(rec.pos() as u32 + 20, false, false)
1615                    .unwrap()
1616                    .unwrap(),
1617                20
1618            );
1619            assert_eq!(cigar.read_pos(4000000, false, false).unwrap(), None);
1620            // fix qual offset
1621            let qual: Vec<u8> = quals[i].iter().map(|&q| q - 33).collect();
1622            assert_eq!(rec.qual(), &qual[..]);
1623        }
1624    }
1625
1626    #[test]
1627    fn test_seek() {
1628        let mut bam = Reader::from_path(&Path::new("test/test.bam")).expect("Error opening file.");
1629
1630        let mut names_by_voffset = HashMap::new();
1631
1632        let mut offset = bam.tell();
1633        let mut rec = Record::new();
1634        loop {
1635            match bam.read(&mut rec) {
1636                Some(r) => r.expect("error reading bam"),
1637                None => break,
1638            };
1639            let qname = str::from_utf8(rec.qname()).unwrap().to_string();
1640            println!("{} {}", offset, qname);
1641            names_by_voffset.insert(offset, qname);
1642            offset = bam.tell();
1643        }
1644
1645        for (offset, qname) in names_by_voffset.iter() {
1646            println!("{} {}", offset, qname);
1647            bam.seek(*offset).unwrap();
1648            match bam.read(&mut rec) {
1649                Some(r) => r.unwrap(),
1650                None => {}
1651            };
1652            let rec_qname = str::from_utf8(rec.qname()).unwrap().to_string();
1653            assert_eq!(qname, &rec_qname);
1654        }
1655    }
1656
1657    #[test]
1658    fn test_read_sam_header() {
1659        let bam = Reader::from_path(&"test/test.bam").expect("Error opening file.");
1660
1661        let true_header = "@SQ\tSN:CHROMOSOME_I\tLN:15072423\n@SQ\tSN:CHROMOSOME_II\tLN:15279345\
1662             \n@SQ\tSN:CHROMOSOME_III\tLN:13783700\n@SQ\tSN:CHROMOSOME_IV\tLN:17493793\n@SQ\t\
1663             SN:CHROMOSOME_V\tLN:20924149\n"
1664            .to_string();
1665        let header_text = String::from_utf8(bam.header.as_bytes().to_owned()).unwrap();
1666        assert_eq!(header_text, true_header);
1667    }
1668
1669    #[test]
1670    fn test_read_against_sam() {
1671        let mut bam = Reader::from_path("./test/bam2sam_out.sam").unwrap();
1672        for read in bam.records() {
1673            let _read = read.unwrap();
1674        }
1675    }
1676
1677    fn _test_read_indexed_common(mut bam: IndexedReader) {
1678        let (names, flags, seqs, quals, cigars) = gold();
1679        let sq_1 = b"CHROMOSOME_I";
1680        let sq_2 = b"CHROMOSOME_II";
1681        let tid_1 = bam.header.tid(sq_1).expect("Expected tid.");
1682        let tid_2 = bam.header.tid(sq_2).expect("Expected tid.");
1683        assert!(bam.header.target_len(tid_1).expect("Expected target len.") == 15072423);
1684
1685        // fetch to position containing reads
1686        bam.fetch((tid_1, 0, 2))
1687            .expect("Expected successful fetch.");
1688        assert!(bam.records().count() == 6);
1689
1690        // compare reads
1691        bam.fetch((tid_1, 0, 2))
1692            .expect("Expected successful fetch.");
1693        for (i, record) in bam.records().enumerate() {
1694            let rec = record.expect("Expected valid record");
1695
1696            println!("{}", str::from_utf8(rec.qname()).unwrap());
1697            assert_eq!(rec.qname(), names[i]);
1698            assert_eq!(rec.flags(), flags[i]);
1699            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1700            assert_eq!(*rec.cigar(), cigars[i]);
1701            // fix qual offset
1702            let qual: Vec<u8> = quals[i].iter().map(|&q| q - 33).collect();
1703            assert_eq!(rec.qual(), &qual[..]);
1704            assert_eq!(rec.aux(b"NotAvailableAux"), Err(Error::BamAuxTagNotFound));
1705        }
1706
1707        // fetch to empty position
1708        bam.fetch((tid_2, 1, 1))
1709            .expect("Expected successful fetch.");
1710        assert!(bam.records().count() == 0);
1711
1712        // repeat with byte-string based fetch
1713
1714        // fetch to position containing reads
1715        // using coordinate-string chr:start-stop
1716        bam.fetch(format!("{}:{}-{}", str::from_utf8(sq_1).unwrap(), 0, 2).as_bytes())
1717            .expect("Expected successful fetch.");
1718        assert!(bam.records().count() == 6);
1719        // using &str and exercising some of the coordinate conversion funcs
1720        bam.fetch((str::from_utf8(sq_1).unwrap(), 0 as u32, 2 as u64))
1721            .expect("Expected successful fetch.");
1722        assert!(bam.records().count() == 6);
1723        // using a slice
1724        bam.fetch((&sq_1[..], 0, 2))
1725            .expect("Expected successful fetch.");
1726        assert!(bam.records().count() == 6);
1727        // using a literal
1728        bam.fetch((sq_1, 0, 2)).expect("Expected successful fetch.");
1729        assert!(bam.records().count() == 6);
1730
1731        // using a tid
1732        bam.fetch((0i32, 0u32, 2i64))
1733            .expect("Expected successful fetch.");
1734        assert!(bam.records().count() == 6);
1735        // using a tid:u32
1736        bam.fetch((0u32, 0u32, 2i64))
1737            .expect("Expected successful fetch.");
1738        assert!(bam.records().count() == 6);
1739
1740        // compare reads
1741        bam.fetch(format!("{}:{}-{}", str::from_utf8(sq_1).unwrap(), 0, 2).as_bytes())
1742            .expect("Expected successful fetch.");
1743        for (i, record) in bam.records().enumerate() {
1744            let rec = record.expect("Expected valid record");
1745
1746            println!("{}", str::from_utf8(rec.qname()).unwrap());
1747            assert_eq!(rec.qname(), names[i]);
1748            assert_eq!(rec.flags(), flags[i]);
1749            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1750            assert_eq!(*rec.cigar(), cigars[i]);
1751            // fix qual offset
1752            let qual: Vec<u8> = quals[i].iter().map(|&q| q - 33).collect();
1753            assert_eq!(rec.qual(), &qual[..]);
1754            assert_eq!(rec.aux(b"NotAvailableAux"), Err(Error::BamAuxTagNotFound));
1755        }
1756
1757        // fetch to empty position
1758        bam.fetch(format!("{}:{}-{}", str::from_utf8(sq_2).unwrap(), 1, 1).as_bytes())
1759            .expect("Expected successful fetch.");
1760        assert!(bam.records().count() == 0);
1761
1762        //all on a tid
1763        bam.fetch(0).expect("Expected successful fetch.");
1764        assert!(bam.records().count() == 6);
1765        //all on a tid:u32
1766        bam.fetch(0u32).expect("Expected successful fetch.");
1767        assert!(bam.records().count() == 6);
1768
1769        //all on a tid - by &[u8]
1770        bam.fetch(sq_1).expect("Expected successful fetch.");
1771        assert!(bam.records().count() == 6);
1772        //all on a tid - by str
1773        bam.fetch(str::from_utf8(sq_1).unwrap())
1774            .expect("Expected successful fetch.");
1775        assert!(bam.records().count() == 6);
1776
1777        //all reads
1778        bam.fetch(FetchDefinition::All)
1779            .expect("Expected successful fetch.");
1780        assert!(bam.records().count() == 6);
1781
1782        //all reads
1783        bam.fetch(".").expect("Expected successful fetch.");
1784        assert!(bam.records().count() == 6);
1785
1786        //all unmapped
1787        bam.fetch(FetchDefinition::Unmapped)
1788            .expect("Expected successful fetch.");
1789        assert_eq!(bam.records().count(), 1); // expect one 'truncade record' Record.
1790
1791        bam.fetch("*").expect("Expected successful fetch.");
1792        assert_eq!(bam.records().count(), 1); // expect one 'truncade record' Record.
1793    }
1794
1795    #[test]
1796    fn test_read_indexed() {
1797        let bam = IndexedReader::from_path(&"test/test.bam").expect("Expected valid index.");
1798        _test_read_indexed_common(bam);
1799    }
1800
1801    #[test]
1802    fn test_read_indexed_different_index_name() {
1803        let bam = IndexedReader::from_path_and_index(
1804            &"test/test_different_index_name.bam",
1805            &"test/test.bam.bai",
1806        )
1807        .expect("Expected valid index.");
1808        _test_read_indexed_common(bam);
1809    }
1810
1811    #[test]
1812    fn test_set_record() {
1813        let (names, _, seqs, quals, cigars) = gold();
1814
1815        let mut rec = record::Record::new();
1816        rec.set_reverse();
1817        rec.set(names[0], Some(&cigars[0]), seqs[0], quals[0]);
1818        // note: this segfaults if you push_aux() before set()
1819        //       because set() obliterates aux
1820        rec.push_aux(b"NM", Aux::I32(15)).unwrap();
1821
1822        assert_eq!(rec.qname(), names[0]);
1823        assert_eq!(*rec.cigar(), cigars[0]);
1824        assert_eq!(rec.seq().as_bytes(), seqs[0]);
1825        assert_eq!(rec.qual(), quals[0]);
1826        assert!(rec.is_reverse());
1827        assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1828    }
1829
1830    #[test]
1831    fn test_set_repeated() {
1832        let mut rec = Record::new();
1833        rec.set(
1834            b"123",
1835            Some(&CigarString(vec![Cigar::Match(3)])),
1836            b"AAA",
1837            b"III",
1838        );
1839        rec.push_aux(b"AS", Aux::I32(12345)).unwrap();
1840        assert_eq!(rec.qname(), b"123");
1841        assert_eq!(rec.seq().as_bytes(), b"AAA");
1842        assert_eq!(rec.qual(), b"III");
1843        assert_eq!(rec.aux(b"AS").unwrap(), Aux::I32(12345));
1844
1845        rec.set(
1846            b"1234",
1847            Some(&CigarString(vec![Cigar::SoftClip(1), Cigar::Match(3)])),
1848            b"AAAA",
1849            b"IIII",
1850        );
1851        assert_eq!(rec.qname(), b"1234");
1852        assert_eq!(rec.seq().as_bytes(), b"AAAA");
1853        assert_eq!(rec.qual(), b"IIII");
1854        assert_eq!(rec.aux(b"AS").unwrap(), Aux::I32(12345));
1855
1856        rec.set(
1857            b"12",
1858            Some(&CigarString(vec![Cigar::Match(2)])),
1859            b"AA",
1860            b"II",
1861        );
1862        assert_eq!(rec.qname(), b"12");
1863        assert_eq!(rec.seq().as_bytes(), b"AA");
1864        assert_eq!(rec.qual(), b"II");
1865        assert_eq!(rec.aux(b"AS").unwrap(), Aux::I32(12345));
1866    }
1867
1868    #[test]
1869    fn test_set_qname() {
1870        let (names, _, seqs, quals, cigars) = gold();
1871
1872        assert!(names[0] != names[1]);
1873
1874        for i in 0..names.len() {
1875            let mut rec = record::Record::new();
1876            rec.set(names[i], Some(&cigars[i]), seqs[i], quals[i]);
1877            rec.push_aux(b"NM", Aux::I32(15)).unwrap();
1878
1879            assert_eq!(rec.qname(), names[i]);
1880            assert_eq!(*rec.cigar(), cigars[i]);
1881            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1882            assert_eq!(rec.qual(), quals[i]);
1883            assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1884
1885            // Equal length qname
1886            assert!(rec.qname()[0] != b'X');
1887            rec.set_qname(b"X");
1888            assert_eq!(rec.qname(), b"X");
1889
1890            // Longer qname
1891            let mut longer_name = names[i].to_owned().clone();
1892            let extension = b"BuffaloBUffaloBUFFaloBUFFAloBUFFALoBUFFALO";
1893            longer_name.extend(extension.iter());
1894            rec.set_qname(&longer_name);
1895
1896            assert_eq!(rec.qname(), longer_name.as_slice());
1897            assert_eq!(*rec.cigar(), cigars[i]);
1898            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1899            assert_eq!(rec.qual(), quals[i]);
1900            assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1901
1902            // Shorter qname
1903            let shorter_name = b"42";
1904            rec.set_qname(shorter_name);
1905
1906            assert_eq!(rec.qname(), shorter_name);
1907            assert_eq!(*rec.cigar(), cigars[i]);
1908            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1909            assert_eq!(rec.qual(), quals[i]);
1910            assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1911
1912            // Zero-length qname
1913            rec.set_qname(b"");
1914
1915            assert_eq!(rec.qname(), b"");
1916            assert_eq!(*rec.cigar(), cigars[i]);
1917            assert_eq!(rec.seq().as_bytes(), seqs[i]);
1918            assert_eq!(rec.qual(), quals[i]);
1919            assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1920        }
1921    }
1922
1923    #[test]
1924    fn test_set_qname2() {
1925        let mut _header = Header::new();
1926        _header.push_record(
1927            HeaderRecord::new(b"SQ")
1928                .push_tag(b"SN", &"1")
1929                .push_tag(b"LN", &10000000),
1930        );
1931        let header = HeaderView::from_header(&_header);
1932
1933        let line =
1934            b"blah1	0	1	1	255	1M	*	0	0	A	F	CB:Z:AAAA-1	UR:Z:AAAA	UB:Z:AAAA	GX:Z:G1	xf:i:1	fx:Z:G1\tli:i:0\ttf:Z:cC";
1935
1936        let mut rec = Record::from_sam(&header, line).unwrap();
1937        assert_eq!(rec.qname(), b"blah1");
1938        rec.set_qname(b"r0");
1939        assert_eq!(rec.qname(), b"r0");
1940    }
1941
1942    #[test]
1943    fn test_remove_aux() {
1944        let mut bam = Reader::from_path(&Path::new("test/test.bam")).expect("Error opening file.");
1945
1946        for record in bam.records() {
1947            let mut rec = record.expect("Expected valid record");
1948
1949            if rec.aux(b"XS").is_ok() {
1950                rec.remove_aux(b"XS").unwrap();
1951            }
1952
1953            if rec.aux(b"YT").is_ok() {
1954                rec.remove_aux(b"YT").unwrap();
1955            }
1956
1957            assert!(rec.remove_aux(b"ab").is_err());
1958
1959            assert_eq!(rec.aux(b"XS"), Err(Error::BamAuxTagNotFound));
1960            assert_eq!(rec.aux(b"YT"), Err(Error::BamAuxTagNotFound));
1961        }
1962    }
1963
1964    #[test]
1965    fn test_write() {
1966        let (names, _, seqs, quals, cigars) = gold();
1967
1968        let tmp = tempfile::Builder::new()
1969            .prefix("rust-htslib")
1970            .tempdir()
1971            .expect("Cannot create temp dir");
1972        let bampath = tmp.path().join("test.bam");
1973        println!("{:?}", bampath);
1974        {
1975            let mut bam = Writer::from_path(
1976                &bampath,
1977                Header::new().push_record(
1978                    HeaderRecord::new(b"SQ")
1979                        .push_tag(b"SN", &"chr1")
1980                        .push_tag(b"LN", &15072423),
1981                ),
1982                Format::Bam,
1983            )
1984            .expect("Error opening file.");
1985
1986            for i in 0..names.len() {
1987                let mut rec = record::Record::new();
1988                rec.set(names[i], Some(&cigars[i]), seqs[i], quals[i]);
1989                rec.push_aux(b"NM", Aux::I32(15)).unwrap();
1990
1991                bam.write(&rec).expect("Failed to write record.");
1992            }
1993        }
1994
1995        {
1996            let mut bam = Reader::from_path(&bampath).expect("Error opening file.");
1997
1998            for i in 0..names.len() {
1999                let mut rec = record::Record::new();
2000                match bam.read(&mut rec) {
2001                    Some(r) => r.expect("Failed to read record."),
2002                    None => {}
2003                };
2004
2005                assert_eq!(rec.qname(), names[i]);
2006                assert_eq!(*rec.cigar(), cigars[i]);
2007                assert_eq!(rec.seq().as_bytes(), seqs[i]);
2008                assert_eq!(rec.qual(), quals[i]);
2009                assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
2010            }
2011        }
2012
2013        tmp.close().expect("Failed to delete temp dir");
2014    }
2015
2016    #[test]
2017    fn test_write_threaded() {
2018        let (names, _, seqs, quals, cigars) = gold();
2019
2020        let tmp = tempfile::Builder::new()
2021            .prefix("rust-htslib")
2022            .tempdir()
2023            .expect("Cannot create temp dir");
2024        let bampath = tmp.path().join("test.bam");
2025        println!("{:?}", bampath);
2026        {
2027            let mut bam = Writer::from_path(
2028                &bampath,
2029                Header::new().push_record(
2030                    HeaderRecord::new(b"SQ")
2031                        .push_tag(b"SN", &"chr1")
2032                        .push_tag(b"LN", &15072423),
2033                ),
2034                Format::Bam,
2035            )
2036            .expect("Error opening file.");
2037            bam.set_threads(4).unwrap();
2038
2039            for i in 0..10000 {
2040                let mut rec = record::Record::new();
2041                let idx = i % names.len();
2042                rec.set(names[idx], Some(&cigars[idx]), seqs[idx], quals[idx]);
2043                rec.push_aux(b"NM", Aux::I32(15)).unwrap();
2044                rec.set_pos(i as i64);
2045
2046                bam.write(&rec).expect("Failed to write record.");
2047            }
2048        }
2049
2050        {
2051            let mut bam = Reader::from_path(&bampath).expect("Error opening file.");
2052
2053            for (i, _rec) in bam.records().enumerate() {
2054                let idx = i % names.len();
2055
2056                let rec = _rec.expect("Failed to read record.");
2057
2058                assert_eq!(rec.pos(), i as i64);
2059                assert_eq!(rec.qname(), names[idx]);
2060                assert_eq!(*rec.cigar(), cigars[idx]);
2061                assert_eq!(rec.seq().as_bytes(), seqs[idx]);
2062                assert_eq!(rec.qual(), quals[idx]);
2063                assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
2064            }
2065        }
2066
2067        tmp.close().expect("Failed to delete temp dir");
2068    }
2069
2070    #[test]
2071    fn test_write_shared_tpool() {
2072        let (names, _, seqs, quals, cigars) = gold();
2073
2074        let tmp = tempfile::Builder::new()
2075            .prefix("rust-htslib")
2076            .tempdir()
2077            .expect("Cannot create temp dir");
2078        let bampath1 = tmp.path().join("test1.bam");
2079        let bampath2 = tmp.path().join("test2.bam");
2080
2081        {
2082            let (mut bam1, mut bam2) = {
2083                let pool = crate::tpool::ThreadPool::new(4).unwrap();
2084
2085                let mut bam1 = Writer::from_path(
2086                    &bampath1,
2087                    Header::new().push_record(
2088                        HeaderRecord::new(b"SQ")
2089                            .push_tag(b"SN", &"chr1")
2090                            .push_tag(b"LN", &15072423),
2091                    ),
2092                    Format::Bam,
2093                )
2094                .expect("Error opening file.");
2095
2096                let mut bam2 = Writer::from_path(
2097                    &bampath2,
2098                    Header::new().push_record(
2099                        HeaderRecord::new(b"SQ")
2100                            .push_tag(b"SN", &"chr1")
2101                            .push_tag(b"LN", &15072423),
2102                    ),
2103                    Format::Bam,
2104                )
2105                .expect("Error opening file.");
2106
2107                bam1.set_thread_pool(&pool).unwrap();
2108                bam2.set_thread_pool(&pool).unwrap();
2109                (bam1, bam2)
2110            };
2111
2112            for i in 0..10000 {
2113                let mut rec = record::Record::new();
2114                let idx = i % names.len();
2115                rec.set(names[idx], Some(&cigars[idx]), seqs[idx], quals[idx]);
2116                rec.push_aux(b"NM", Aux::I32(15)).unwrap();
2117                rec.set_pos(i as i64);
2118
2119                bam1.write(&rec).expect("Failed to write record.");
2120                bam2.write(&rec).expect("Failed to write record.");
2121            }
2122        }
2123
2124        {
2125            let pool = crate::tpool::ThreadPool::new(2).unwrap();
2126
2127            for p in vec![bampath1, bampath2] {
2128                let mut bam = Reader::from_path(&p).expect("Error opening file.");
2129                bam.set_thread_pool(&pool).unwrap();
2130
2131                for (i, _rec) in bam.iter_chunk(None, None).enumerate() {
2132                    let idx = i % names.len();
2133
2134                    let rec = _rec.expect("Failed to read record.");
2135
2136                    assert_eq!(rec.pos(), i as i64);
2137                    assert_eq!(rec.qname(), names[idx]);
2138                    assert_eq!(*rec.cigar(), cigars[idx]);
2139                    assert_eq!(rec.seq().as_bytes(), seqs[idx]);
2140                    assert_eq!(rec.qual(), quals[idx]);
2141                    assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
2142                }
2143            }
2144        }
2145
2146        tmp.close().expect("Failed to delete temp dir");
2147    }
2148
2149    #[test]
2150    fn test_copy_template() {
2151        // Verify that BAM headers are transmitted correctly when using an existing BAM as a
2152        // template for headers.
2153
2154        let tmp = tempfile::Builder::new()
2155            .prefix("rust-htslib")
2156            .tempdir()
2157            .expect("Cannot create temp dir");
2158        let bampath = tmp.path().join("test.bam");
2159        println!("{:?}", bampath);
2160
2161        let mut input_bam = Reader::from_path(&"test/test.bam").expect("Error opening file.");
2162
2163        {
2164            let mut bam = Writer::from_path(
2165                &bampath,
2166                &Header::from_template(&input_bam.header()),
2167                Format::Bam,
2168            )
2169            .expect("Error opening file.");
2170
2171            for rec in input_bam.records() {
2172                bam.write(&rec.unwrap()).expect("Failed to write record.");
2173            }
2174        }
2175
2176        {
2177            let copy_bam = Reader::from_path(&bampath).expect("Error opening file.");
2178
2179            // Verify that the header came across correctly
2180            assert_eq!(input_bam.header().as_bytes(), copy_bam.header().as_bytes());
2181        }
2182
2183        tmp.close().expect("Failed to delete temp dir");
2184    }
2185
2186    #[test]
2187    fn test_pileup() {
2188        let (_, _, seqs, quals, _) = gold();
2189
2190        let mut bam = Reader::from_path(&"test/test.bam").expect("Error opening file.");
2191        let pileups = bam.pileup();
2192        for pileup in pileups.take(26) {
2193            let _pileup = pileup.expect("Expected successful pileup.");
2194            let pos = _pileup.pos() as usize;
2195            assert_eq!(_pileup.depth(), 6);
2196            assert!(_pileup.tid() == 0);
2197            for (i, a) in _pileup.alignments().enumerate() {
2198                assert_eq!(a.indel(), pileup::Indel::None);
2199                let qpos = a.qpos().unwrap();
2200                assert_eq!(qpos, pos - 1);
2201                assert_eq!(a.record().seq()[qpos], seqs[i][qpos]);
2202                assert_eq!(a.record().qual()[qpos], quals[i][qpos] - 33);
2203            }
2204        }
2205    }
2206
2207    #[test]
2208    fn test_idx_pileup() {
2209        let mut bam = IndexedReader::from_path(&"test/test.bam").expect("Error opening file.");
2210        // read without fetch
2211        for pileup in bam.pileup() {
2212            pileup.unwrap();
2213        }
2214        // go back again
2215        let tid = bam.header().tid(b"CHROMOSOME_I").unwrap();
2216        bam.fetch((tid, 0, 5)).unwrap();
2217        for p in bam.pileup() {
2218            println!("{}", p.unwrap().pos())
2219        }
2220    }
2221
2222    #[test]
2223    fn parse_from_sam() {
2224        use std::fs::File;
2225        use std::io::Read;
2226
2227        let bamfile = "./test/bam2sam_test.bam";
2228        let samfile = "./test/bam2sam_expected.sam";
2229
2230        // Load BAM file:
2231        let mut rdr = Reader::from_path(bamfile).unwrap();
2232        let bam_recs: Vec<Record> = rdr.records().map(|v| v.unwrap()).collect();
2233
2234        let mut sam = Vec::new();
2235        assert!(File::open(samfile).unwrap().read_to_end(&mut sam).is_ok());
2236
2237        let sam_recs: Vec<Record> = sam
2238            .split(|x| *x == b'\n')
2239            .filter(|x| !x.is_empty() && x[0] != b'@')
2240            .map(|line| Record::from_sam(rdr.header(), line).unwrap())
2241            .collect();
2242
2243        for (b1, s1) in bam_recs.iter().zip(sam_recs.iter()) {
2244            assert!(b1 == s1);
2245        }
2246    }
2247
2248    #[test]
2249    fn test_cigar_modes() {
2250        // test the cached and uncached ways of getting the cigar string.
2251
2252        let (_, _, _, _, cigars) = gold();
2253        let mut bam = Reader::from_path(&Path::new("test/test.bam")).expect("Error opening file.");
2254
2255        for (i, record) in bam.records().enumerate() {
2256            let rec = record.expect("Expected valid record");
2257
2258            let cigar = rec.cigar();
2259            assert_eq!(*cigar, cigars[i]);
2260        }
2261
2262        for (i, record) in bam.records().enumerate() {
2263            let mut rec = record.expect("Expected valid record");
2264            rec.cache_cigar();
2265
2266            let cigar = rec.cigar_cached().unwrap();
2267            assert_eq!(**cigar, cigars[i]);
2268
2269            let cigar = rec.cigar();
2270            assert_eq!(*cigar, cigars[i]);
2271        }
2272    }
2273
2274    #[test]
2275    fn test_read_cram() {
2276        let cram_path = "./test/test_cram.cram";
2277        let bam_path = "./test/test_cram.bam";
2278        let ref_path = "./test/test_cram.fa";
2279
2280        // Load CRAM file, records
2281        let mut cram_reader = Reader::from_path(cram_path).unwrap();
2282        cram_reader.set_reference(ref_path).unwrap();
2283        let cram_records: Vec<Record> = cram_reader.records().map(|v| v.unwrap()).collect();
2284
2285        // Load BAM file, records
2286        let mut bam_reader = Reader::from_path(bam_path).unwrap();
2287        let bam_records: Vec<Record> = bam_reader.records().map(|v| v.unwrap()).collect();
2288
2289        compare_inner_bam_cram_records(&cram_records, &bam_records);
2290    }
2291
2292    #[test]
2293    fn test_write_cram() {
2294        // BAM file, records
2295        let bam_path = "./test/test_cram.bam";
2296        let ref_path = "./test/test_cram.fa";
2297        let mut bam_reader = Reader::from_path(bam_path).unwrap();
2298        let bam_records: Vec<Record> = bam_reader.records().map(|v| v.unwrap()).collect();
2299
2300        // New CRAM file
2301        let tmp = tempfile::Builder::new()
2302            .prefix("rust-htslib")
2303            .tempdir()
2304            .expect("Cannot create temp dir");
2305        let cram_path = tmp.path().join("test.cram");
2306
2307        // Write BAM records to new CRAM file
2308        {
2309            let mut header = Header::new();
2310            header.push_record(
2311                HeaderRecord::new(b"HD")
2312                    .push_tag(b"VN", &"1.5")
2313                    .push_tag(b"SO", &"coordinate"),
2314            );
2315            header.push_record(
2316                HeaderRecord::new(b"SQ")
2317                    .push_tag(b"SN", &"chr1")
2318                    .push_tag(b"LN", &120)
2319                    .push_tag(b"M5", &"20a9a0fb770814e6c5e49946750f9724")
2320                    .push_tag(b"UR", &"test/test_cram.fa"),
2321            );
2322            header.push_record(
2323                HeaderRecord::new(b"SQ")
2324                    .push_tag(b"SN", &"chr2")
2325                    .push_tag(b"LN", &120)
2326                    .push_tag(b"M5", &"7a2006ccca94ea92b6dae5997e1b0d70")
2327                    .push_tag(b"UR", &"test/test_cram.fa"),
2328            );
2329            header.push_record(
2330                HeaderRecord::new(b"SQ")
2331                    .push_tag(b"SN", &"chr3")
2332                    .push_tag(b"LN", &120)
2333                    .push_tag(b"M5", &"a66b336bfe3ee8801c744c9545c87e24")
2334                    .push_tag(b"UR", &"test/test_cram.fa"),
2335            );
2336
2337            let mut cram_writer = Writer::from_path(&cram_path, &header, Format::Cram)
2338                .expect("Error opening CRAM file.");
2339            cram_writer.set_reference(ref_path).unwrap();
2340
2341            // Write BAM records to CRAM file
2342            for rec in bam_records.iter() {
2343                cram_writer
2344                    .write(&rec)
2345                    .expect("Faied to write record to CRAM.");
2346            }
2347        }
2348
2349        // Compare written CRAM records with BAM records
2350        {
2351            // Load written CRAM file
2352            let mut cram_reader = Reader::from_path(cram_path).unwrap();
2353            cram_reader.set_reference(ref_path).unwrap();
2354            let cram_records: Vec<Record> = cram_reader.records().map(|v| v.unwrap()).collect();
2355
2356            // Compare CRAM records to BAM records
2357            compare_inner_bam_cram_records(&cram_records, &bam_records);
2358        }
2359
2360        tmp.close().expect("Failed to delete temp dir");
2361    }
2362
2363    #[test]
2364    fn test_compression_level_conversion() {
2365        // predefined compression levels
2366        assert_eq!(CompressionLevel::Uncompressed.convert().unwrap(), 0);
2367        assert_eq!(CompressionLevel::Fastest.convert().unwrap(), 1);
2368        assert_eq!(CompressionLevel::Maximum.convert().unwrap(), 9);
2369
2370        // numeric compression levels
2371        for level in 0..=9 {
2372            assert_eq!(CompressionLevel::Level(level).convert().unwrap(), level);
2373        }
2374        // invalid levels
2375        assert!(CompressionLevel::Level(10).convert().is_err());
2376    }
2377
2378    #[test]
2379    fn test_write_compression() {
2380        let tmp = tempfile::Builder::new()
2381            .prefix("rust-htslib")
2382            .tempdir()
2383            .expect("Cannot create temp dir");
2384        let input_bam_path = "test/test.bam";
2385
2386        // test levels with decreasing compression factor
2387        let levels_to_test = vec![
2388            CompressionLevel::Maximum,
2389            CompressionLevel::Level(6),
2390            CompressionLevel::Fastest,
2391            CompressionLevel::Uncompressed,
2392        ];
2393        let file_sizes: Vec<_> = levels_to_test
2394            .iter()
2395            .map(|level| {
2396                let output_bam_path = tmp.path().join("test.bam");
2397                {
2398                    let mut reader = Reader::from_path(&input_bam_path).unwrap();
2399                    let header = Header::from_template(reader.header());
2400                    let mut writer =
2401                        Writer::from_path(&output_bam_path, &header, Format::Bam).unwrap();
2402                    writer.set_compression_level(*level).unwrap();
2403                    for record in reader.records() {
2404                        let r = record.unwrap();
2405                        writer.write(&r).unwrap();
2406                    }
2407                }
2408                fs::metadata(output_bam_path).unwrap().len()
2409            })
2410            .collect();
2411
2412        // check that out BAM file sizes are in decreasing order, in line with the expected compression factor
2413        println!("testing compression leves: {:?}", levels_to_test);
2414        println!("got compressed sizes: {:?}", file_sizes);
2415
2416        // libdeflate comes out with a slightly bigger file on Max compression
2417        // than on Level(6), so skip that check
2418        #[cfg(feature = "libdeflate")]
2419        assert!(file_sizes[1..].windows(2).all(|size| size[0] <= size[1]));
2420
2421        #[cfg(not(feature = "libdeflate"))]
2422        assert!(file_sizes.windows(2).all(|size| size[0] <= size[1]));
2423
2424        tmp.close().expect("Failed to delete temp dir");
2425    }
2426
2427    #[test]
2428    fn test_bam_fails_on_vcf() {
2429        let bam_path = "./test/test_left.vcf";
2430        let bam_reader = Reader::from_path(bam_path);
2431        assert!(bam_reader.is_err());
2432    }
2433
2434    #[test]
2435    fn test_indexde_bam_fails_on_vcf() {
2436        let bam_path = "./test/test_left.vcf";
2437        let bam_reader = IndexedReader::from_path(bam_path);
2438        assert!(bam_reader.is_err());
2439    }
2440
2441    #[test]
2442    fn test_bam_fails_on_toml() {
2443        let bam_path = "./Cargo.toml";
2444        let bam_reader = Reader::from_path(bam_path);
2445        assert!(bam_reader.is_err());
2446    }
2447
2448    #[test]
2449    fn test_sam_writer_example() {
2450        fn from_bam_with_filter<F>(bamfile: &str, samfile: &str, f: F) -> bool
2451        where
2452            F: Fn(&record::Record) -> Option<bool>,
2453        {
2454            let mut bam_reader = Reader::from_path(bamfile).unwrap(); // internal functions, just unwrap
2455            let header = header::Header::from_template(bam_reader.header());
2456            let mut sam_writer = Writer::from_path(samfile, &header, Format::Sam).unwrap();
2457            for record in bam_reader.records() {
2458                if record.is_err() {
2459                    return false;
2460                }
2461                let parsed = record.unwrap();
2462                match f(&parsed) {
2463                    None => return true,
2464                    Some(false) => {}
2465                    Some(true) => {
2466                        if sam_writer.write(&parsed).is_err() {
2467                            return false;
2468                        }
2469                    }
2470                }
2471            }
2472            true
2473        }
2474        use std::fs::File;
2475        use std::io::Read;
2476        let bamfile = "./test/bam2sam_test.bam";
2477        let samfile = "./test/bam2sam_out.sam";
2478        let expectedfile = "./test/bam2sam_expected.sam";
2479        let result = from_bam_with_filter(bamfile, samfile, |_| Some(true));
2480        assert!(result);
2481        let mut expected = Vec::new();
2482        let mut written = Vec::new();
2483        assert!(File::open(expectedfile)
2484            .unwrap()
2485            .read_to_end(&mut expected)
2486            .is_ok());
2487        assert!(File::open(samfile)
2488            .unwrap()
2489            .read_to_end(&mut written)
2490            .is_ok());
2491        assert_eq!(expected, written);
2492    }
2493
2494    // #[cfg(feature = "curl")]
2495    // #[test]
2496    // fn test_http_connect() {
2497    //     let url: Url = Url::parse(
2498    //         "https://raw.githubusercontent.com/brainstorm/tiny-test-data/master/wgs/mt.bam",
2499    //     )
2500    //     .unwrap();
2501    //     let r = Reader::from_url(&url);
2502    //     println!("{:#?}", r);
2503    //     let r = r.unwrap();
2504
2505    //     assert_eq!(r.header().target_names()[0], b"chr1");
2506    // }
2507
2508    #[test]
2509    fn test_rc_records() {
2510        let (names, flags, seqs, quals, cigars) = gold();
2511        let mut bam = Reader::from_path(&Path::new("test/test.bam")).expect("Error opening file.");
2512        let del_len = [1, 1, 1, 1, 1, 100000];
2513
2514        for (i, record) in bam.rc_records().enumerate() {
2515            //let rec = record.expect("Expected valid record");
2516            let rec = record.unwrap();
2517            println!("{}", str::from_utf8(rec.qname()).ok().unwrap());
2518            assert_eq!(rec.qname(), names[i]);
2519            assert_eq!(rec.flags(), flags[i]);
2520            assert_eq!(rec.seq().as_bytes(), seqs[i]);
2521
2522            let cigar = rec.cigar();
2523            assert_eq!(*cigar, cigars[i]);
2524
2525            let end_pos = cigar.end_pos();
2526            assert_eq!(end_pos, rec.pos() + 100 + del_len[i]);
2527            assert_eq!(
2528                cigar
2529                    .read_pos(end_pos as u32 - 10, false, false)
2530                    .unwrap()
2531                    .unwrap(),
2532                90
2533            );
2534            assert_eq!(
2535                cigar
2536                    .read_pos(rec.pos() as u32 + 20, false, false)
2537                    .unwrap()
2538                    .unwrap(),
2539                20
2540            );
2541            assert_eq!(cigar.read_pos(4000000, false, false).unwrap(), None);
2542            // fix qual offset
2543            let qual: Vec<u8> = quals[i].iter().map(|&q| q - 33).collect();
2544            assert_eq!(rec.qual(), &qual[..]);
2545        }
2546    }
2547
2548    #[test]
2549    fn test_aux_arrays() {
2550        let bam_header = Header::new();
2551        let mut test_record = Record::from_sam(
2552            &mut HeaderView::from_header(&bam_header),
2553            "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
2554        )
2555        .unwrap();
2556
2557        let array_i8: Vec<i8> = vec![std::i8::MIN, -1, 0, 1, std::i8::MAX];
2558        let array_u8: Vec<u8> = vec![std::u8::MIN, 0, 1, std::u8::MAX];
2559        let array_i16: Vec<i16> = vec![std::i16::MIN, -1, 0, 1, std::i16::MAX];
2560        let array_u16: Vec<u16> = vec![std::u16::MIN, 0, 1, std::u16::MAX];
2561        let array_i32: Vec<i32> = vec![std::i32::MIN, -1, 0, 1, std::i32::MAX];
2562        let array_u32: Vec<u32> = vec![std::u32::MIN, 0, 1, std::u32::MAX];
2563        let array_f32: Vec<f32> = vec![std::f32::MIN, 0.0, -0.0, 0.1, 0.99, std::f32::MAX];
2564
2565        test_record
2566            .push_aux(b"XA", Aux::ArrayI8((&array_i8).into()))
2567            .unwrap();
2568        test_record
2569            .push_aux(b"XB", Aux::ArrayU8((&array_u8).into()))
2570            .unwrap();
2571        test_record
2572            .push_aux(b"XC", Aux::ArrayI16((&array_i16).into()))
2573            .unwrap();
2574        test_record
2575            .push_aux(b"XD", Aux::ArrayU16((&array_u16).into()))
2576            .unwrap();
2577        test_record
2578            .push_aux(b"XE", Aux::ArrayI32((&array_i32).into()))
2579            .unwrap();
2580        test_record
2581            .push_aux(b"XF", Aux::ArrayU32((&array_u32).into()))
2582            .unwrap();
2583        test_record
2584            .push_aux(b"XG", Aux::ArrayFloat((&array_f32).into()))
2585            .unwrap();
2586
2587        {
2588            let tag = b"XA";
2589            if let Ok(Aux::ArrayI8(array)) = test_record.aux(tag) {
2590                // Retrieve aux array
2591                let aux_array_content = array.iter().collect::<Vec<_>>();
2592                assert_eq!(aux_array_content, array_i8);
2593
2594                // Copy the stored aux array to another record
2595                {
2596                    let mut copy_test_record = test_record.clone();
2597
2598                    // Pushing a field with an existing tag should fail
2599                    assert!(copy_test_record.push_aux(tag, Aux::I8(3)).is_err());
2600
2601                    // Remove aux array from target record
2602                    copy_test_record.remove_aux(tag).unwrap();
2603                    assert!(copy_test_record.aux(tag).is_err());
2604
2605                    // Copy array to target record
2606                    let src_aux = test_record.aux(tag).unwrap();
2607                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2608                    if let Ok(Aux::ArrayI8(array)) = copy_test_record.aux(tag) {
2609                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2610                        assert_eq!(aux_array_content_copied, array_i8);
2611                    } else {
2612                        panic!("Aux tag not found");
2613                    }
2614                }
2615            } else {
2616                panic!("Aux tag not found");
2617            }
2618        }
2619
2620        {
2621            let tag = b"XB";
2622            if let Ok(Aux::ArrayU8(array)) = test_record.aux(tag) {
2623                // Retrieve aux array
2624                let aux_array_content = array.iter().collect::<Vec<_>>();
2625                assert_eq!(aux_array_content, array_u8);
2626
2627                // Copy the stored aux array to another record
2628                {
2629                    let mut copy_test_record = test_record.clone();
2630
2631                    // Pushing a field with an existing tag should fail
2632                    assert!(copy_test_record.push_aux(tag, Aux::U8(3)).is_err());
2633
2634                    // Remove aux array from target record
2635                    copy_test_record.remove_aux(tag).unwrap();
2636                    assert!(copy_test_record.aux(tag).is_err());
2637
2638                    // Copy array to target record
2639                    let src_aux = test_record.aux(tag).unwrap();
2640                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2641                    if let Ok(Aux::ArrayU8(array)) = copy_test_record.aux(tag) {
2642                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2643                        assert_eq!(aux_array_content_copied, array_u8);
2644                    } else {
2645                        panic!("Aux tag not found");
2646                    }
2647                }
2648            } else {
2649                panic!("Aux tag not found");
2650            }
2651        }
2652
2653        {
2654            let tag = b"XC";
2655            if let Ok(Aux::ArrayI16(array)) = test_record.aux(tag) {
2656                // Retrieve aux array
2657                let aux_array_content = array.iter().collect::<Vec<_>>();
2658                assert_eq!(aux_array_content, array_i16);
2659
2660                // Copy the stored aux array to another record
2661                {
2662                    let mut copy_test_record = test_record.clone();
2663
2664                    // Pushing a field with an existing tag should fail
2665                    assert!(copy_test_record.push_aux(tag, Aux::I16(3)).is_err());
2666
2667                    // Remove aux array from target record
2668                    copy_test_record.remove_aux(tag).unwrap();
2669                    assert!(copy_test_record.aux(tag).is_err());
2670
2671                    // Copy array to target record
2672                    let src_aux = test_record.aux(tag).unwrap();
2673                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2674                    if let Ok(Aux::ArrayI16(array)) = copy_test_record.aux(tag) {
2675                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2676                        assert_eq!(aux_array_content_copied, array_i16);
2677                    } else {
2678                        panic!("Aux tag not found");
2679                    }
2680                }
2681            } else {
2682                panic!("Aux tag not found");
2683            }
2684        }
2685
2686        {
2687            let tag = b"XD";
2688            if let Ok(Aux::ArrayU16(array)) = test_record.aux(tag) {
2689                // Retrieve aux array
2690                let aux_array_content = array.iter().collect::<Vec<_>>();
2691                assert_eq!(aux_array_content, array_u16);
2692
2693                // Copy the stored aux array to another record
2694                {
2695                    let mut copy_test_record = test_record.clone();
2696
2697                    // Pushing a field with an existing tag should fail
2698                    assert!(copy_test_record.push_aux(tag, Aux::U16(3)).is_err());
2699
2700                    // Remove aux array from target record
2701                    copy_test_record.remove_aux(tag).unwrap();
2702                    assert!(copy_test_record.aux(tag).is_err());
2703
2704                    // Copy array to target record
2705                    let src_aux = test_record.aux(tag).unwrap();
2706                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2707                    if let Ok(Aux::ArrayU16(array)) = copy_test_record.aux(tag) {
2708                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2709                        assert_eq!(aux_array_content_copied, array_u16);
2710                    } else {
2711                        panic!("Aux tag not found");
2712                    }
2713                }
2714            } else {
2715                panic!("Aux tag not found");
2716            }
2717        }
2718
2719        {
2720            let tag = b"XE";
2721            if let Ok(Aux::ArrayI32(array)) = test_record.aux(tag) {
2722                // Retrieve aux array
2723                let aux_array_content = array.iter().collect::<Vec<_>>();
2724                assert_eq!(aux_array_content, array_i32);
2725
2726                // Copy the stored aux array to another record
2727                {
2728                    let mut copy_test_record = test_record.clone();
2729
2730                    // Pushing a field with an existing tag should fail
2731                    assert!(copy_test_record.push_aux(tag, Aux::I32(3)).is_err());
2732
2733                    // Remove aux array from target record
2734                    copy_test_record.remove_aux(tag).unwrap();
2735                    assert!(copy_test_record.aux(tag).is_err());
2736
2737                    // Copy array to target record
2738                    let src_aux = test_record.aux(tag).unwrap();
2739                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2740                    if let Ok(Aux::ArrayI32(array)) = copy_test_record.aux(tag) {
2741                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2742                        assert_eq!(aux_array_content_copied, array_i32);
2743                    } else {
2744                        panic!("Aux tag not found");
2745                    }
2746                }
2747            } else {
2748                panic!("Aux tag not found");
2749            }
2750        }
2751
2752        {
2753            let tag = b"XF";
2754            if let Ok(Aux::ArrayU32(array)) = test_record.aux(tag) {
2755                // Retrieve aux array
2756                let aux_array_content = array.iter().collect::<Vec<_>>();
2757                assert_eq!(aux_array_content, array_u32);
2758
2759                // Copy the stored aux array to another record
2760                {
2761                    let mut copy_test_record = test_record.clone();
2762
2763                    // Pushing a field with an existing tag should fail
2764                    assert!(copy_test_record.push_aux(tag, Aux::U32(3)).is_err());
2765
2766                    // Remove aux array from target record
2767                    copy_test_record.remove_aux(tag).unwrap();
2768                    assert!(copy_test_record.aux(tag).is_err());
2769
2770                    // Copy array to target record
2771                    let src_aux = test_record.aux(tag).unwrap();
2772                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2773                    if let Ok(Aux::ArrayU32(array)) = copy_test_record.aux(tag) {
2774                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2775                        assert_eq!(aux_array_content_copied, array_u32);
2776                    } else {
2777                        panic!("Aux tag not found");
2778                    }
2779                }
2780            } else {
2781                panic!("Aux tag not found");
2782            }
2783        }
2784
2785        {
2786            let tag = b"XG";
2787            if let Ok(Aux::ArrayFloat(array)) = test_record.aux(tag) {
2788                // Retrieve aux array
2789                let aux_array_content = array.iter().collect::<Vec<_>>();
2790                assert_eq!(aux_array_content, array_f32);
2791
2792                // Copy the stored aux array to another record
2793                {
2794                    let mut copy_test_record = test_record.clone();
2795
2796                    // Pushing a field with an existing tag should fail
2797                    assert!(copy_test_record.push_aux(tag, Aux::Float(3.0)).is_err());
2798
2799                    // Remove aux array from target record
2800                    copy_test_record.remove_aux(tag).unwrap();
2801                    assert!(copy_test_record.aux(tag).is_err());
2802
2803                    // Copy array to target record
2804                    let src_aux = test_record.aux(tag).unwrap();
2805                    assert!(copy_test_record.push_aux(tag, src_aux).is_ok());
2806                    if let Ok(Aux::ArrayFloat(array)) = copy_test_record.aux(tag) {
2807                        let aux_array_content_copied = array.iter().collect::<Vec<_>>();
2808                        assert_eq!(aux_array_content_copied, array_f32);
2809                    } else {
2810                        panic!("Aux tag not found");
2811                    }
2812                }
2813            } else {
2814                panic!("Aux tag not found");
2815            }
2816        }
2817
2818        // Test via `Iterator` impl
2819        for item in test_record.aux_iter() {
2820            match item.unwrap() {
2821                (b"XA", Aux::ArrayI8(array)) => {
2822                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_i8);
2823                }
2824                (b"XB", Aux::ArrayU8(array)) => {
2825                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_u8);
2826                }
2827                (b"XC", Aux::ArrayI16(array)) => {
2828                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_i16);
2829                }
2830                (b"XD", Aux::ArrayU16(array)) => {
2831                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_u16);
2832                }
2833                (b"XE", Aux::ArrayI32(array)) => {
2834                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_i32);
2835                }
2836                (b"XF", Aux::ArrayU32(array)) => {
2837                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_u32);
2838                }
2839                (b"XG", Aux::ArrayFloat(array)) => {
2840                    assert_eq!(&array.iter().collect::<Vec<_>>(), &array_f32);
2841                }
2842                _ => {
2843                    panic!();
2844                }
2845            }
2846        }
2847
2848        // Test via `PartialEq` impl
2849        assert_eq!(
2850            test_record.aux(b"XA").unwrap(),
2851            Aux::ArrayI8((&array_i8).into())
2852        );
2853        assert_eq!(
2854            test_record.aux(b"XB").unwrap(),
2855            Aux::ArrayU8((&array_u8).into())
2856        );
2857        assert_eq!(
2858            test_record.aux(b"XC").unwrap(),
2859            Aux::ArrayI16((&array_i16).into())
2860        );
2861        assert_eq!(
2862            test_record.aux(b"XD").unwrap(),
2863            Aux::ArrayU16((&array_u16).into())
2864        );
2865        assert_eq!(
2866            test_record.aux(b"XE").unwrap(),
2867            Aux::ArrayI32((&array_i32).into())
2868        );
2869        assert_eq!(
2870            test_record.aux(b"XF").unwrap(),
2871            Aux::ArrayU32((&array_u32).into())
2872        );
2873        assert_eq!(
2874            test_record.aux(b"XG").unwrap(),
2875            Aux::ArrayFloat((&array_f32).into())
2876        );
2877    }
2878
2879    #[test]
2880    fn test_aux_scalars() {
2881        let bam_header = Header::new();
2882        let mut test_record = Record::from_sam(
2883            &mut HeaderView::from_header(&bam_header),
2884            "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
2885        )
2886        .unwrap();
2887
2888        test_record.push_aux(b"XA", Aux::I8(i8::MIN)).unwrap();
2889        test_record.push_aux(b"XB", Aux::I8(i8::MAX)).unwrap();
2890        test_record.push_aux(b"XC", Aux::U8(u8::MIN)).unwrap();
2891        test_record.push_aux(b"XD", Aux::U8(u8::MAX)).unwrap();
2892        test_record.push_aux(b"XE", Aux::I16(i16::MIN)).unwrap();
2893        test_record.push_aux(b"XF", Aux::I16(i16::MAX)).unwrap();
2894        test_record.push_aux(b"XG", Aux::U16(u16::MIN)).unwrap();
2895        test_record.push_aux(b"XH", Aux::U16(u16::MAX)).unwrap();
2896        test_record.push_aux(b"XI", Aux::I32(i32::MIN)).unwrap();
2897        test_record.push_aux(b"XJ", Aux::I32(i32::MAX)).unwrap();
2898        test_record.push_aux(b"XK", Aux::U32(u32::MIN)).unwrap();
2899        test_record.push_aux(b"XL", Aux::U32(u32::MAX)).unwrap();
2900        test_record
2901            .push_aux(b"XM", Aux::Float(std::f32::consts::PI))
2902            .unwrap();
2903        test_record
2904            .push_aux(b"XN", Aux::Double(std::f64::consts::PI))
2905            .unwrap();
2906        test_record
2907            .push_aux(b"XO", Aux::String("Test str"))
2908            .unwrap();
2909        test_record.push_aux(b"XP", Aux::I8(0)).unwrap();
2910
2911        let collected_aux_fields = test_record.aux_iter().collect::<Result<Vec<_>>>().unwrap();
2912        assert_eq!(
2913            collected_aux_fields,
2914            vec![
2915                (&b"XA"[..], Aux::I8(i8::MIN)),
2916                (&b"XB"[..], Aux::I8(i8::MAX)),
2917                (&b"XC"[..], Aux::U8(u8::MIN)),
2918                (&b"XD"[..], Aux::U8(u8::MAX)),
2919                (&b"XE"[..], Aux::I16(i16::MIN)),
2920                (&b"XF"[..], Aux::I16(i16::MAX)),
2921                (&b"XG"[..], Aux::U16(u16::MIN)),
2922                (&b"XH"[..], Aux::U16(u16::MAX)),
2923                (&b"XI"[..], Aux::I32(i32::MIN)),
2924                (&b"XJ"[..], Aux::I32(i32::MAX)),
2925                (&b"XK"[..], Aux::U32(u32::MIN)),
2926                (&b"XL"[..], Aux::U32(u32::MAX)),
2927                (&b"XM"[..], Aux::Float(std::f32::consts::PI)),
2928                (&b"XN"[..], Aux::Double(std::f64::consts::PI)),
2929                (&b"XO"[..], Aux::String("Test str")),
2930                (&b"XP"[..], Aux::I8(0)),
2931            ]
2932        );
2933    }
2934
2935    #[test]
2936    fn test_aux_array_partial_eq() {
2937        use record::AuxArray;
2938
2939        // Target types
2940        let one_data: Vec<i8> = vec![0, 1, 2, 3, 4, 5, 6];
2941        let one_aux_array = AuxArray::from(&one_data);
2942
2943        let two_data: Vec<i8> = vec![0, 1, 2, 3, 4, 5];
2944        let two_aux_array = AuxArray::from(&two_data);
2945
2946        assert_ne!(&one_data, &two_data);
2947        assert_ne!(&one_aux_array, &two_aux_array);
2948
2949        let one_aux = Aux::ArrayI8(one_aux_array);
2950        let two_aux = Aux::ArrayI8(two_aux_array);
2951        assert_ne!(&one_aux, &two_aux);
2952
2953        // Raw bytes
2954        let bam_header = Header::new();
2955        let mut test_record = Record::from_sam(
2956            &mut HeaderView::from_header(&bam_header),
2957            "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
2958        )
2959        .unwrap();
2960
2961        test_record.push_aux(b"XA", one_aux).unwrap();
2962        test_record.push_aux(b"XB", two_aux).unwrap();
2963
2964        // RawLeBytes == RawLeBytes
2965        assert_eq!(
2966            test_record.aux(b"XA").unwrap(),
2967            test_record.aux(b"XA").unwrap()
2968        );
2969        // RawLeBytes != RawLeBytes
2970        assert_ne!(
2971            test_record.aux(b"XA").unwrap(),
2972            test_record.aux(b"XB").unwrap()
2973        );
2974
2975        // RawLeBytes == TargetType
2976        assert_eq!(
2977            test_record.aux(b"XA").unwrap(),
2978            Aux::ArrayI8((&one_data).into())
2979        );
2980        assert_eq!(
2981            test_record.aux(b"XB").unwrap(),
2982            Aux::ArrayI8((&two_data).into())
2983        );
2984        // RawLeBytes != TargetType
2985        assert_ne!(
2986            test_record.aux(b"XA").unwrap(),
2987            Aux::ArrayI8((&two_data).into())
2988        );
2989        assert_ne!(
2990            test_record.aux(b"XB").unwrap(),
2991            Aux::ArrayI8((&one_data).into())
2992        );
2993    }
2994
2995    /// Test if both text and binary representations of a BAM header are in sync (#156)
2996    #[test]
2997    fn test_bam_header_sync() {
2998        let reader = Reader::from_path("test/test_issue_156_no_text.bam").unwrap();
2999        let header_hashmap = Header::from_template(reader.header()).to_hashmap();
3000        let header_refseqs = header_hashmap.get("SQ").unwrap();
3001        assert_eq!(header_refseqs[0].get("SN").unwrap(), "ref_1",);
3002        assert_eq!(header_refseqs[0].get("LN").unwrap(), "10000000",);
3003    }
3004
3005    #[test]
3006    fn test_bam_new() {
3007        // Create the path to write the tmp test BAM
3008        let tmp = tempfile::Builder::new()
3009            .prefix("rust-htslib")
3010            .tempdir()
3011            .expect("Cannot create temp dir");
3012        let bampath = tmp.path().join("test.bam");
3013
3014        // write an unmapped BAM record (uBAM)
3015        {
3016            // Build the header
3017            let mut header = Header::new();
3018
3019            // Add the version
3020            header.push_record(
3021                HeaderRecord::new(b"HD")
3022                    .push_tag(b"VN", &"1.6")
3023                    .push_tag(b"SO", &"unsorted"),
3024            );
3025
3026            // Build the writer
3027            let mut writer = Writer::from_path(&bampath, &header, Format::Bam).unwrap();
3028
3029            // Build an empty record
3030            let mut record = Record::new();
3031
3032            // Write the record (this previously seg-faulted)
3033            assert!(writer.write(&record).is_ok());
3034        }
3035
3036        // Read the record
3037        {
3038            // Build th reader
3039            let mut reader = Reader::from_path(&bampath).expect("Error opening file.");
3040
3041            // Read the record
3042            let mut rec = Record::new();
3043            match reader.read(&mut rec) {
3044                Some(r) => r.expect("Failed to read record."),
3045                None => panic!("No record read."),
3046            };
3047
3048            // Check a few things
3049            assert!(rec.is_unmapped());
3050            assert_eq!(rec.tid(), -1);
3051            assert_eq!(rec.pos(), -1);
3052            assert_eq!(rec.mtid(), -1);
3053            assert_eq!(rec.mpos(), -1);
3054        }
3055    }
3056
3057    #[test]
3058    fn test_idxstats_bam() {
3059        let mut reader = IndexedReader::from_path("test/test.bam").unwrap();
3060        let expected = vec![
3061            (0, 15072423, 6, 0),
3062            (1, 15279345, 0, 0),
3063            (2, 13783700, 0, 0),
3064            (3, 17493793, 0, 0),
3065            (4, 20924149, 0, 0),
3066            (-1, 0, 0, 0),
3067        ];
3068        let actual = reader.index_stats().unwrap();
3069        assert_eq!(expected, actual);
3070    }
3071
3072    #[test]
3073    fn test_number_mapped_and_unmapped_bam() {
3074        let reader = IndexedReader::from_path("test/test.bam").unwrap();
3075        let expected = (6, 0);
3076        let actual = reader.index().number_mapped_unmapped(0);
3077        assert_eq!(expected, actual);
3078    }
3079
3080    #[test]
3081    fn test_number_unmapped_global_bam() {
3082        let reader = IndexedReader::from_path("test/test_unmapped.bam").unwrap();
3083        let expected = 8;
3084        let actual = reader.index().number_unmapped();
3085        assert_eq!(expected, actual);
3086    }
3087
3088    #[test]
3089    fn test_idxstats_cram() {
3090        let mut reader = IndexedReader::from_path("test/test_cram.cram").unwrap();
3091        reader.set_reference("test/test_cram.fa").unwrap();
3092        let expected = vec![
3093            (0, 120, 2, 0),
3094            (1, 120, 2, 0),
3095            (2, 120, 2, 0),
3096            (-1, 0, 0, 0),
3097        ];
3098        let actual = reader.index_stats().unwrap();
3099        assert_eq!(expected, actual);
3100    }
3101
3102    #[test]
3103    fn test_slow_idxstats_cram() {
3104        let mut reader = IndexedReader::from_path("test/test_cram.cram").unwrap();
3105        reader.set_reference("test/test_cram.fa").unwrap();
3106        let expected = vec![
3107            (0, 120, 2, 0),
3108            (1, 120, 2, 0),
3109            (2, 120, 2, 0),
3110            (-1, 0, 0, 0),
3111        ];
3112        let actual = reader.index_stats().unwrap();
3113        assert_eq!(expected, actual);
3114    }
3115
3116    // #[test]
3117    // fn test_number_mapped_and_unmapped_cram() {
3118    //     let mut reader = IndexedReader::from_path("test/test_cram.cram").unwrap();
3119    //     reader.set_reference("test/test_cram.fa").unwrap();
3120    //     let expected = (2, 0);
3121    //     let actual = reader.index().number_mapped_unmapped(0);
3122    //     assert_eq!(expected, actual);
3123    // }
3124    //
3125    // #[test]
3126    // fn test_number_unmapped_global_cram() {
3127    //     let mut reader = IndexedReader::from_path("test/test_unmapped.cram").unwrap();
3128    //     let expected = 8;
3129    //     let actual = reader.index().number_unmapped();
3130    //     assert_eq!(expected, actual);
3131    // }
3132}