rust_htslib/bcf/
mod.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Module for working with VCF and BCF files.
7//!
8//! # Performance Remarks
9//!
10//! Note that BCF corresponds to the in-memory representation of BCF/VCF records in Htslib
11//! itself. Thus, it comes without a runtime penalty for parsing, in contrast to reading VCF
12//! files.
13//!
14//! # Example (reading)
15//!
16//!   - Obtaining 0-based locus index of the VCF record.
17//!   - Obtaining alleles of the VCF record.
18//!   - calculate alt-allele dosage in a mutli-sample VCF / BCF
19//!
20//! ```
21//! use crate::rust_htslib::bcf::{Reader, Read};
22//! use std::convert::TryFrom;
23//!
24//! let path = &"test/test_string.vcf";
25//! let mut bcf = Reader::from_path(path).expect("Error opening file.");
26//! // iterate through each row of the vcf body.
27//! for (i, record_result) in bcf.records().enumerate() {
28//!     let mut record = record_result.expect("Fail to read record");
29//!     let mut s = String::new();
30//!      for allele in record.alleles() {
31//!          for c in allele {
32//!              s.push(char::from(*c))
33//!          }
34//!          s.push(' ')
35//!      }
36//!     // 0-based position and the list of alleles
37//!     println!("Locus: {}, Alleles: {}", record.pos(), s);
38//!     // number of sample in the vcf
39//!     let sample_count = usize::try_from(record.sample_count()).unwrap();
40//!
41//!     // Counting ref, alt and missing alleles for each sample
42//!     let mut n_ref = vec![0; sample_count];
43//!     let mut n_alt = vec![0; sample_count];
44//!     let mut n_missing = vec![0; sample_count];
45//!     let gts = record.genotypes().expect("Error reading genotypes");
46//!     for sample_index in 0..sample_count {
47//!         // for each sample
48//!         for gta in gts.get(sample_index).iter() {
49//!             // for each allele
50//!             match gta.index() {
51//!                 Some(0) => n_ref[sample_index] += 1,  // reference allele
52//!                 Some(_) => n_alt[sample_index] += 1,  // alt allele
53//!                 None => n_missing[sample_index] += 1, // missing allele
54//!             }
55//!         }
56//!     }
57//! }
58//! ```
59//!
60//! # Example (writing)
61//!
62//!   - Setting up a VCF writer from scratch (including a simple header)
63//!   - Creating a VCF record and writing it to the VCF file
64//!
65//! ```
66//! use rust_htslib::bcf::{Format, Writer};
67//! use rust_htslib::bcf::header::Header;
68//! use rust_htslib::bcf::record::GenotypeAllele;
69//!
70//! // Create minimal VCF header with a single contig and a single sample
71//! let mut header = Header::new();
72//! let header_contig_line = r#"##contig=<ID=1,length=10>"#;
73//! header.push_record(header_contig_line.as_bytes());
74//! let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
75//! header.push_record(header_gt_line.as_bytes());
76//! header.push_sample("test_sample".as_bytes());
77//!
78//! // Write uncompressed VCF to stdout with above header and get an empty record
79//! let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
80//! let mut record = vcf.empty_record();
81//!
82//! // Set chrom and pos to 1 and 7, respectively - note the 0-based positions
83//! let rid = vcf.header().name2rid(b"1").unwrap();
84//! record.set_rid(Some(rid));
85//! record.set_pos(6);
86//!
87//! // Set record genotype to 0|1 - note first allele is always unphased
88//! let alleles = &[GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)];
89//! record.push_genotypes(alleles).unwrap();
90//!
91//! // Write record
92//! vcf.write(&record).unwrap()
93//! ```
94//!
95//! This will print the following VCF to stdout:
96//!
97//! ```lang-none
98//! ##fileformat=VCFv4.2
99//! ##FILTER=<ID=PASS,Description="All filters passed">
100//! ##contig=<ID=1,length=10>
101//! ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
102//! #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  test_sample
103//! 1       7       .       .       .       0       .       .       GT      0|1
104//! ```
105
106use std::ffi;
107use std::path::Path;
108use std::rc::Rc;
109use std::str;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125/// A trait for a BCF reader with a read method.
126pub trait Read: Sized {
127    /// Read the next record.
128    ///
129    /// # Arguments
130    /// * record - an empty record, that can be created with `bcf::Reader::empty_record`.
131    ///
132    /// # Returns
133    /// None if end of file was reached, otherwise Some will contain
134    /// a result with an error in case of failure.
135    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137    /// Return an iterator over all records of the VCF/BCF file.
138    fn records(&mut self) -> Records<'_, Self>;
139
140    /// Return the header.
141    fn header(&self) -> &HeaderView;
142
143    /// Return empty record.  Can be reused multiple times.
144    fn empty_record(&self) -> Record;
145
146    /// Activate multi-threaded BCF/VCF read support in htslib. This should permit faster
147    /// reading of large VCF files.
148    ///
149    /// Setting `nthreads` to `0` does not change the current state.  Note that it is not
150    /// possible to set the number of background threads below `1` once it has been set.
151    ///
152    /// # Arguments
153    ///
154    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
155    fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158/// A VCF/BCF reader.
159#[derive(Debug)]
160pub struct Reader {
161    inner: *mut htslib::htsFile,
162    header: Rc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166/// # Safety
167///
168/// Implementation for `Reader::set_threads()` and `Writer::set_threads`.
169pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
170    assert!(n_threads > 0, "n_threads must be > 0");
171
172    let r = htslib::hts_set_threads(hts_file, n_threads as i32);
173    if r != 0 {
174        Err(Error::SetThreads)
175    } else {
176        Ok(())
177    }
178}
179
180impl Reader {
181    /// Create a new reader from a given path.
182    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
183        match path.as_ref().to_str() {
184            Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
185            Some(p) => Self::new(p.as_bytes()),
186            _ => Err(Error::NonUnicodePath),
187        }
188    }
189
190    /// Create a new reader from a given URL.
191    pub fn from_url(url: &Url) -> Result<Self> {
192        Self::new(url.as_str().as_bytes())
193    }
194
195    /// Create a new reader from standard input.
196    pub fn from_stdin() -> Result<Self> {
197        Self::new(b"-")
198    }
199
200    fn new(path: &[u8]) -> Result<Self> {
201        let htsfile = bcf_open(path, b"r")?;
202        let header = unsafe { htslib::bcf_hdr_read(htsfile) };
203        Ok(Reader {
204            inner: htsfile,
205            header: Rc::new(HeaderView::new(header)),
206        })
207    }
208}
209
210impl Read for Reader {
211    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
212        match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
213            0 => {
214                unsafe {
215                    // Always unpack record.
216                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
217                }
218                record.set_header(Rc::clone(&self.header));
219                Some(Ok(()))
220            }
221            -1 => None,
222            _ => Some(Err(Error::BcfInvalidRecord)),
223        }
224    }
225
226    fn records(&mut self) -> Records<'_, Self> {
227        Records { reader: self }
228    }
229
230    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
231        unsafe { set_threads(self.inner, n_threads) }
232    }
233
234    fn header(&self) -> &HeaderView {
235        &self.header
236    }
237
238    /// Return empty record.  Can be reused multiple times.
239    fn empty_record(&self) -> Record {
240        self.header.empty_record()
241    }
242}
243
244impl Drop for Reader {
245    fn drop(&mut self) {
246        unsafe {
247            htslib::hts_close(self.inner);
248        }
249    }
250}
251
252/// An indexed VCF/BCF reader.
253#[derive(Debug)]
254pub struct IndexedReader {
255    /// The synced VCF/BCF reader to use internally.
256    inner: *mut htslib::bcf_srs_t,
257    /// The header.
258    header: Rc<HeaderView>,
259
260    /// The position of the previous fetch, if any.
261    current_region: Option<(u32, u64, Option<u64>)>,
262}
263
264unsafe impl Send for IndexedReader {}
265
266impl IndexedReader {
267    /// Create a new `IndexedReader` from path.
268    ///
269    /// # Arguments
270    ///
271    /// * `path` - the path to open.
272    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
273        let path = path.as_ref();
274        match path.to_str() {
275            Some(p) if path.exists() => {
276                Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
277            }
278            Some(p) => Err(Error::FileNotFound { path: p.into() }),
279            None => Err(Error::NonUnicodePath),
280        }
281    }
282
283    /// Create a new `IndexedReader` from an URL.
284    pub fn from_url(url: &Url) -> Result<Self> {
285        Self::new(&ffi::CString::new(url.as_str()).unwrap())
286    }
287
288    /// Create a new `IndexedReader`.
289    ///
290    /// # Arguments
291    ///
292    /// * `path` - the path. Use "-" for stdin.
293    fn new(path: &ffi::CStr) -> Result<Self> {
294        // Create reader and require existence of index file.
295        let ser_reader = unsafe { htslib::bcf_sr_init() };
296        unsafe {
297            htslib::bcf_sr_set_opt(ser_reader, 0);
298        } // 0: BCF_SR_REQUIRE_IDX
299          // Attach a file with the path from the arguments.
300        if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
301            let header = Rc::new(HeaderView::new(unsafe {
302                htslib::bcf_hdr_dup((*(*ser_reader).readers.offset(0)).header)
303            }));
304            Ok(IndexedReader {
305                inner: ser_reader,
306                header,
307                current_region: None,
308            })
309        } else {
310            Err(Error::BcfOpen {
311                target: path.to_str().unwrap().to_owned(),
312            })
313        }
314    }
315
316    /// Jump to the given region.
317    ///
318    /// # Arguments
319    ///
320    /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
321    ///           contig name to ID.
322    /// * `start` - `0`-based **inclusive** start coordinate of region on reference.
323    /// * `end` - Optional `0`-based **inclusive** end coordinate of region on reference. If `None`
324    /// is given, records are fetched from `start` until the end of the contig.
325    ///
326    /// # Note
327    /// The entire contig can be fetched by setting `start` to `0` and `end` to `None`.
328    pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
329        let contig = self.header.rid2name(rid)?;
330        let contig = ffi::CString::new(contig).unwrap();
331        if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
332            Err(Error::GenomicSeek {
333                contig: contig.to_str().unwrap().to_owned(),
334                start,
335            })
336        } else {
337            self.current_region = Some((rid, start, end));
338            Ok(())
339        }
340    }
341}
342
343impl Read for IndexedReader {
344    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
345        match unsafe { htslib::bcf_sr_next_line(self.inner) } {
346            0 => {
347                if unsafe { (*self.inner).errnum } != 0 {
348                    Some(Err(Error::BcfInvalidRecord))
349                } else {
350                    None
351                }
352            }
353            i => {
354                assert!(i > 0, "Must not be negative");
355                // Note that the sync BCF reader has a different interface than the others
356                // as it keeps its own buffer already for each record.  An alternative here
357                // would be to replace the `inner` value by an enum that can be a pointer
358                // into a synced reader or an owning popinter to an allocated record.
359                unsafe {
360                    htslib::bcf_copy(
361                        record.inner,
362                        *(*(*self.inner).readers.offset(0)).buffer.offset(0),
363                    );
364                }
365
366                unsafe {
367                    // Always unpack record.
368                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
369                }
370
371                record.set_header(Rc::clone(&self.header));
372
373                match self.current_region {
374                    Some((rid, _start, end)) => {
375                        let endpos = match end {
376                            Some(e) => e,
377                            None => u64::MAX,
378                        };
379                        if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380                            Some(Ok(()))
381                        } else {
382                            None
383                        }
384                    }
385                    None => Some(Ok(())),
386                }
387            }
388        }
389    }
390
391    fn records(&mut self) -> Records<'_, Self> {
392        Records { reader: self }
393    }
394
395    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396        assert!(n_threads > 0, "n_threads must be > 0");
397
398        let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399        if r != 0 {
400            Err(Error::SetThreads)
401        } else {
402            Ok(())
403        }
404    }
405
406    fn header(&self) -> &HeaderView {
407        &self.header
408    }
409
410    fn empty_record(&self) -> Record {
411        Record::new(Rc::clone(&self.header))
412    }
413}
414
415impl Drop for IndexedReader {
416    fn drop(&mut self) {
417        unsafe { htslib::bcf_sr_destroy(self.inner) };
418    }
419}
420
421/// This module contains the `SyncedReader` class and related code.
422pub mod synced {
423
424    use super::*;
425
426    /// This module contains bitmask constants for `SyncedReader`.
427    pub mod pairing {
428        /// Allow different alleles, as long as they all are SNPs.
429        pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430        /// The same as above, but with indels.
431        pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432        /// Any combination of alleles can be returned by `bcf_sr_next_line()`.
433        pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434        /// At least some of multiallelic ALTs must match.  Implied by all the others with the exception of `EXACT`.
435        pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436        /// Allow REF-only records with SNPs.
437        pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438        /// Allow REF-only records with indels.
439        pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440        /// Require the exact same set of alleles in all files.
441        pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442        /// `SNPS | INDELS`.
443        pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444        /// `SNPS | INDELS | SNP_REF | INDEL_REF`.
445        pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446    }
447
448    /// A wrapper for `bcf_srs_t`; allows joint traversal of multiple VCF and/or BCF files.
449    #[derive(Debug)]
450    pub struct SyncedReader {
451        /// Internal handle for the synced reader.
452        inner: *mut crate::htslib::bcf_srs_t,
453
454        /// RC's of `HeaderView`s of the readers.
455        headers: Vec<Rc<HeaderView>>,
456
457        /// The position of the previous fetch, if any.
458        current_region: Option<(u32, u64, u64)>,
459    }
460
461    // TODO: add interface for setting threads, ensure that the pool is freed properly
462    impl SyncedReader {
463        pub fn new() -> Result<Self> {
464            let inner = unsafe { crate::htslib::bcf_sr_init() };
465            if inner.is_null() {
466                return Err(Error::BcfAllocationError);
467            }
468
469            Ok(SyncedReader {
470                inner,
471                headers: Vec::new(),
472                current_region: None,
473            })
474        }
475
476        /// Enable or disable requiring of index
477        pub fn set_require_index(&mut self, do_require: bool) {
478            unsafe {
479                (*self.inner).require_index = if do_require { 1 } else { 0 };
480            }
481        }
482
483        /// Set the given bitmask of values from `sr_pairing` module.
484        pub fn set_pairing(&mut self, bitmask: u32) {
485            unsafe {
486                // TODO: 1 actually is BCF_SR_PAIR_LOGIC but is not available here?
487                crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488            }
489        }
490
491        /// Add new reader with the path to the file.
492        pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493            match path.as_ref().to_str() {
494                Some(p) if path.as_ref().exists() => {
495                    let p_cstring = ffi::CString::new(p).unwrap();
496                    let res =
497                        unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499                    if res == 0 {
500                        return Err(Error::BcfOpen {
501                            target: p.to_owned(),
502                        });
503                    }
504
505                    let i = (self.reader_count() - 1) as isize;
506                    let header = Rc::new(HeaderView::new(unsafe {
507                        crate::htslib::bcf_hdr_dup((*(*self.inner).readers.offset(i)).header)
508                    }));
509                    self.headers.push(header);
510                    Ok(())
511                }
512                _ => Err(Error::NonUnicodePath),
513            }
514        }
515
516        /// Remove reader with the given index.
517        pub fn remove_reader(&mut self, idx: u32) {
518            if idx >= self.reader_count() {
519                panic!("Invalid reader!");
520            } else {
521                unsafe {
522                    crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
523                }
524                self.headers.remove(idx as usize);
525            }
526        }
527
528        /// Return number of open files/readers.
529        pub fn reader_count(&self) -> u32 {
530            unsafe { (*self.inner).nreaders as u32 }
531        }
532
533        /// Read next line and return number of readers that have the given line (0 if end of all files is reached).
534        pub fn read_next(&mut self) -> Result<u32> {
535            let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
536
537            if num == 0 {
538                if unsafe { (*self.inner).errnum } != 0 {
539                    return Err(Error::BcfInvalidRecord);
540                }
541                Ok(0)
542            } else {
543                assert!(num > 0, "num returned by htslib must not be negative");
544                match self.current_region {
545                    Some((rid, _start, end)) => {
546                        for idx in 0..self.reader_count() {
547                            if !self.has_line(idx) {
548                                continue;
549                            }
550                            unsafe {
551                                let record = *(*(*self.inner).readers.offset(idx as isize))
552                                    .buffer
553                                    .offset(0);
554                                if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
555                                    return Ok(0);
556                                }
557                            }
558                        }
559                        Ok(num)
560                    }
561                    None => Ok(num),
562                }
563            }
564        }
565
566        /// Return whether the given reader has the line.
567        pub fn has_line(&self, idx: u32) -> bool {
568            if idx >= self.reader_count() {
569                panic!("Invalid reader!");
570            } else {
571                unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
572            }
573        }
574
575        /// Return record from the given reader, if any.
576        pub fn record(&self, idx: u32) -> Option<Record> {
577            if self.has_line(idx) {
578                let record = Record::new(self.headers[idx as usize].clone());
579                unsafe {
580                    crate::htslib::bcf_copy(
581                        record.inner,
582                        *(*(*self.inner).readers.offset(idx as isize))
583                            .buffer
584                            .offset(0),
585                    );
586                }
587                Some(record)
588            } else {
589                None
590            }
591        }
592
593        /// Return header from the given reader.
594        pub fn header(&self, idx: u32) -> &HeaderView {
595            // TODO: is the mutability here correct?
596            if idx >= self.reader_count() {
597                panic!("Invalid reader!");
598            } else {
599                &self.headers[idx as usize]
600            }
601        }
602
603        /// Jump to the given region.
604        ///
605        /// # Arguments
606        ///
607        /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
608        ///           contig name to ID.
609        /// * `start` - `0`-based start coordinate of region on reference.
610        /// * `end` - `0`-based end coordinate of region on reference.
611        pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
612            let contig = {
613                let contig = self.header(0).rid2name(rid).unwrap(); //.clone();
614                ffi::CString::new(contig).unwrap()
615            };
616            if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
617                Err(Error::GenomicSeek {
618                    contig: contig.to_str().unwrap().to_owned(),
619                    start,
620                })
621            } else {
622                self.current_region = Some((rid, start, end));
623                Ok(())
624            }
625        }
626    }
627
628    impl Drop for SyncedReader {
629        fn drop(&mut self) {
630            unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
631        }
632    }
633}
634
635#[derive(Clone, Copy, Debug, PartialEq, Eq)]
636pub enum Format {
637    Vcf,
638    Bcf,
639}
640
641/// A VCF/BCF writer.
642#[derive(Debug)]
643pub struct Writer {
644    inner: *mut htslib::htsFile,
645    header: Rc<HeaderView>,
646    subset: Option<SampleSubset>,
647}
648
649unsafe impl Send for Writer {}
650
651impl Writer {
652    /// Create a new writer that writes to the given path.
653    ///
654    /// # Arguments
655    ///
656    /// * `path` - the path
657    /// * `header` - header definition to use
658    /// * `uncompressed` - disable compression
659    /// * `vcf` - write VCF instead of BCF
660    pub fn from_path<P: AsRef<Path>>(
661        path: P,
662        header: &Header,
663        uncompressed: bool,
664        format: Format,
665    ) -> Result<Self> {
666        if let Some(p) = path.as_ref().to_str() {
667            Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
668        } else {
669            Err(Error::NonUnicodePath)
670        }
671    }
672
673    /// Create a new writer from a URL.
674    ///
675    /// # Arguments
676    ///
677    /// * `url` - the URL
678    /// * `header` - header definition to use
679    /// * `uncompressed` - disable compression
680    /// * `vcf` - write VCF instead of BCF
681    pub fn from_url(
682        url: &Url,
683        header: &Header,
684        uncompressed: bool,
685        format: Format,
686    ) -> Result<Self> {
687        Self::new(url.as_str().as_bytes(), header, uncompressed, format)
688    }
689
690    /// Create a new writer to stdout.
691    ///
692    /// # Arguments
693    ///
694    /// * `header` - header definition to use
695    /// * `uncompressed` - disable compression
696    /// * `vcf` - write VCF instead of BCF
697    pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
698        Self::new(b"-", header, uncompressed, format)
699    }
700
701    fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
702        let mode: &[u8] = match (uncompressed, format) {
703            (true, Format::Vcf) => b"w",
704            (false, Format::Vcf) => b"wz",
705            (true, Format::Bcf) => b"wbu",
706            (false, Format::Bcf) => b"wb",
707        };
708
709        let htsfile = bcf_open(path, mode)?;
710        unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
711        Ok(Writer {
712            inner: htsfile,
713            header: Rc::new(HeaderView::new(unsafe {
714                htslib::bcf_hdr_dup(header.inner)
715            })),
716            subset: header.subset.clone(),
717        })
718    }
719
720    /// Obtain reference to the lightweight `HeaderView` of the BCF header.
721    pub fn header(&self) -> &HeaderView {
722        &self.header
723    }
724
725    /// Create empty record for writing to this writer.
726    ///
727    /// This record can then be reused multiple times.
728    pub fn empty_record(&self) -> Record {
729        record::Record::new(Rc::clone(&self.header))
730    }
731
732    /// Translate record to header of this writer.
733    ///
734    /// # Arguments
735    ///
736    /// - `record` - The `Record` to translate.
737    pub fn translate(&mut self, record: &mut record::Record) {
738        unsafe {
739            htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740        }
741        record.set_header(Rc::clone(&self.header));
742    }
743
744    /// Subset samples of record to match header of this writer.
745    ///
746    /// # Arguments
747    ///
748    /// - `record` - The `Record` to modify.
749    pub fn subset(&mut self, record: &mut record::Record) {
750        if let Some(ref mut subset) = self.subset {
751            unsafe {
752                htslib::bcf_subset(
753                    self.header.inner,
754                    record.inner,
755                    subset.len() as i32,
756                    subset.as_mut_ptr(),
757                );
758            }
759        }
760    }
761
762    /// Write `record` to the Writer.
763    ///
764    /// # Arguments
765    ///
766    /// - `record` - The `Record` to write.
767    pub fn write(&mut self, record: &record::Record) -> Result<()> {
768        if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769            Err(Error::WriteRecord)
770        } else {
771            Ok(())
772        }
773    }
774
775    /// Activate multi-threaded BCF write support in htslib. This should permit faster
776    /// writing of large BCF files.
777    ///
778    /// # Arguments
779    ///
780    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
781    pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782        unsafe { set_threads(self.inner, n_threads) }
783    }
784}
785
786impl Drop for Writer {
787    fn drop(&mut self) {
788        unsafe {
789            htslib::hts_close(self.inner);
790        }
791    }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796    reader: &'a mut R,
797}
798
799impl<'a, R: Read> Iterator for Records<'a, R> {
800    type Item = Result<record::Record>;
801
802    fn next(&mut self) -> Option<Result<record::Record>> {
803        let mut record = self.reader.empty_record();
804        match self.reader.read(&mut record) {
805            Some(Err(e)) => Some(Err(e)),
806            Some(Ok(_)) => Some(Ok(record)),
807            None => None,
808        }
809    }
810}
811
812/// Wrapper for opening a BCF file.
813fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
814    let p = ffi::CString::new(target).unwrap();
815    let c_str = ffi::CString::new(mode).unwrap();
816    let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
817
818    if ret.is_null() {
819        return Err(Error::BcfOpen {
820            target: str::from_utf8(target).unwrap().to_owned(),
821        });
822    }
823
824    unsafe {
825        if !(mode.contains(&b'w')
826            || (*ret).format.category == htslib::htsFormatCategory_variant_data)
827        {
828            return Err(Error::BcfOpen {
829                target: str::from_utf8(target).unwrap().to_owned(),
830            });
831        }
832    }
833    Ok(ret)
834}
835
836#[cfg(test)]
837mod tests {
838    use super::record::Buffer;
839    use super::*;
840    use crate::bcf::header::Id;
841    use crate::bcf::record::GenotypeAllele;
842    use crate::bcf::record::Numeric;
843    use crate::bcf::Reader;
844    use std::convert::TryFrom;
845    use std::fs::File;
846    use std::io::prelude::Read as IoRead;
847    use std::path::Path;
848    use std::str;
849
850    fn _test_read<P: AsRef<Path>>(path: &P) {
851        let mut bcf = Reader::from_path(path).expect("Error opening file.");
852        assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
853
854        for (i, rec) in bcf.records().enumerate() {
855            let record = rec.expect("Error reading record.");
856            assert_eq!(record.sample_count(), 1);
857
858            assert_eq!(record.rid().expect("Error reading rid."), 0);
859            assert_eq!(record.pos(), 10021 + i as i64);
860            assert!((record.qual() - 0f32).abs() < std::f32::EPSILON);
861            let mut buffer = Buffer::new();
862            assert!(
863                (record
864                    .info_shared_buffer(b"MQ0F", &mut buffer)
865                    .float()
866                    .expect("Error reading info.")
867                    .expect("Missing tag")[0]
868                    - 1.0)
869                    .abs()
870                    < std::f32::EPSILON
871            );
872            if i == 59 {
873                assert!(
874                    (record
875                        .info_shared_buffer(b"SGB", &mut buffer)
876                        .float()
877                        .expect("Error reading info.")
878                        .expect("Missing tag")[0]
879                        - -0.379885)
880                        .abs()
881                        < std::f32::EPSILON
882                );
883            }
884            // the artificial "not observed" allele is present in each record.
885            assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
886
887            let fmt = record.format(b"PL");
888            let pl = fmt.integer().expect("Error reading format.");
889            assert_eq!(pl.len(), 1);
890            if i == 59 {
891                assert_eq!(pl[0].len(), 6);
892            } else {
893                assert_eq!(pl[0].len(), 3);
894            }
895        }
896    }
897
898    #[test]
899    fn test_read() {
900        _test_read(&"test/test.bcf");
901    }
902
903    #[test]
904    fn test_reader_set_threads() {
905        let path = &"test/test.bcf";
906        let mut bcf = Reader::from_path(path).expect("Error opening file.");
907        bcf.set_threads(2).unwrap();
908    }
909
910    #[test]
911    fn test_writer_set_threads() {
912        let path = &"test/test.bcf";
913        let tmp = tempfile::Builder::new()
914            .prefix("rust-htslib")
915            .tempdir()
916            .expect("Cannot create temp dir");
917        let bcfpath = tmp.path().join("test.bcf");
918        let bcf = Reader::from_path(path).expect("Error opening file.");
919        let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
920            .expect("Error subsetting samples.");
921        let mut writer =
922            Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
923        writer.set_threads(2).unwrap();
924    }
925
926    #[test]
927    fn test_fetch() {
928        let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
929        bcf.set_threads(2).unwrap();
930        let rid = bcf
931            .header()
932            .name2rid(b"1")
933            .expect("Translating from contig '1' to ID failed.");
934        bcf.fetch(rid, 10_033, Some(10_060))
935            .expect("Fetching failed");
936        assert_eq!(bcf.records().count(), 28);
937    }
938
939    #[test]
940    fn test_fetch_all() {
941        let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
942        bcf.set_threads(2).unwrap();
943        let rid = bcf
944            .header()
945            .name2rid(b"1")
946            .expect("Translating from contig '1' to ID failed.");
947        bcf.fetch(rid, 0, None).expect("Fetching failed");
948        assert_eq!(bcf.records().count(), 62);
949    }
950
951    #[test]
952    fn test_fetch_open_ended() {
953        let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
954        bcf.set_threads(2).unwrap();
955        let rid = bcf
956            .header()
957            .name2rid(b"1")
958            .expect("Translating from contig '1' to ID failed.");
959        bcf.fetch(rid, 10077, None).expect("Fetching failed");
960        assert_eq!(bcf.records().count(), 6);
961    }
962
963    #[test]
964    fn test_fetch_start_greater_than_last_vcf_pos() {
965        let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
966        bcf.set_threads(2).unwrap();
967        let rid = bcf
968            .header()
969            .name2rid(b"1")
970            .expect("Translating from contig '1' to ID failed.");
971        bcf.fetch(rid, 20077, None).expect("Fetching failed");
972        assert_eq!(bcf.records().count(), 0);
973    }
974
975    #[test]
976    fn test_write() {
977        let mut bcf = Reader::from_path(&"test/test_multi.bcf").expect("Error opening file.");
978        let tmp = tempfile::Builder::new()
979            .prefix("rust-htslib")
980            .tempdir()
981            .expect("Cannot create temp dir");
982        let bcfpath = tmp.path().join("test.bcf");
983        println!("{:?}", bcfpath);
984        {
985            let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
986                .expect("Error subsetting samples.");
987            let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
988                .expect("Error opening file.");
989            for rec in bcf.records() {
990                let mut record = rec.expect("Error reading record.");
991                writer.translate(&mut record);
992                writer.subset(&mut record);
993                record.trim_alleles().expect("Error trimming alleles.");
994                writer.write(&record).expect("Error writing record");
995            }
996        }
997        {
998            _test_read(&bcfpath);
999        }
1000        tmp.close().expect("Failed to delete temp dir");
1001    }
1002
1003    #[test]
1004    fn test_strings() {
1005        let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1006        let fs1 = [
1007            &b"LongString1"[..],
1008            &b"LongString2"[..],
1009            &b"."[..],
1010            &b"LongString4"[..],
1011            &b"evenlength"[..],
1012            &b"ss6"[..],
1013        ];
1014        let mut buffer = Buffer::new();
1015        for (i, rec) in vcf.records().enumerate() {
1016            println!("record {}", i);
1017            let record = rec.expect("Error reading record.");
1018            assert_eq!(
1019                record
1020                    .info_shared_buffer(b"S1", &mut buffer)
1021                    .string()
1022                    .expect("Error reading string.")
1023                    .expect("Missing tag")[0],
1024                format!("string{}", i + 1).as_bytes()
1025            );
1026            let fs1_str_vec = record
1027                .format_shared_buffer(b"FS1", &mut buffer)
1028                .string()
1029                .expect("Error reading string.");
1030            assert_eq!(fs1_str_vec.len(), 2);
1031            println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1032            assert_eq!(
1033                record
1034                    .format(b"FS1")
1035                    .string()
1036                    .expect("Error reading string.")[0],
1037                fs1[i]
1038            );
1039        }
1040    }
1041
1042    #[test]
1043    fn test_missing() {
1044        let mut vcf = Reader::from_path(&"test/test_missing.vcf").expect("Error opening file.");
1045        let fn4 = [
1046            &[
1047                i32::missing(),
1048                i32::missing(),
1049                i32::missing(),
1050                i32::missing(),
1051            ][..],
1052            &[i32::missing()][..],
1053        ];
1054        let f1 = [false, true];
1055        let mut buffer = Buffer::new();
1056        for (i, rec) in vcf.records().enumerate() {
1057            let record = rec.expect("Error reading record.");
1058            assert_eq!(
1059                record
1060                    .info_shared_buffer(b"F1", &mut buffer)
1061                    .float()
1062                    .expect("Error reading float.")
1063                    .expect("Missing tag")[0]
1064                    .is_nan(),
1065                f1[i]
1066            );
1067            assert_eq!(
1068                record
1069                    .format(b"FN4")
1070                    .integer()
1071                    .expect("Error reading integer.")[1],
1072                fn4[i]
1073            );
1074            assert!(
1075                record.format(b"FF4").float().expect("Error reading float.")[1]
1076                    .iter()
1077                    .all(|&v| v.is_missing())
1078            );
1079        }
1080    }
1081
1082    #[test]
1083    fn test_genotypes() {
1084        let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1085        let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1086        for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1087            let rec = rec.expect("Error reading record.");
1088            let genotypes = rec.genotypes().expect("Error reading genotypes");
1089            assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1090        }
1091    }
1092
1093    #[test]
1094    fn test_header_ids() {
1095        let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1096        let header = &vcf.header();
1097        use crate::bcf::header::Id;
1098
1099        assert_eq!(header.id_to_name(Id(4)), b"GT");
1100        assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1101        assert!(header.name_to_id(b"XX").is_err());
1102    }
1103
1104    #[test]
1105    fn test_header_samples() {
1106        let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1107        let header = &vcf.header();
1108
1109        assert_eq!(header.id_to_sample(Id(0)), b"one");
1110        assert_eq!(header.id_to_sample(Id(1)), b"two");
1111        assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1112        assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1113        assert!(header.sample_to_id(b"three").is_err());
1114    }
1115
1116    #[test]
1117    fn test_header_contigs() {
1118        let vcf = Reader::from_path(&"test/test_multi.bcf").expect("Error opening file.");
1119        let header = &vcf.header();
1120
1121        assert_eq!(header.contig_count(), 86);
1122
1123        // test existing contig names and IDs
1124        assert_eq!(header.rid2name(0).unwrap(), b"1");
1125        assert_eq!(header.name2rid(b"1").unwrap(), 0);
1126
1127        assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1128        assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1129
1130        // test nonexistent contig names and IDs
1131        assert!(header.name2rid(b"nonexistent_contig").is_err());
1132        assert!(header.rid2name(100).is_err());
1133    }
1134
1135    #[test]
1136    fn test_header_records() {
1137        let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1138        let records = vcf.header().header_records();
1139        assert_eq!(records.len(), 10);
1140
1141        match records[1] {
1142            HeaderRecord::Filter {
1143                ref key,
1144                ref values,
1145            } => {
1146                assert_eq!(key, "FILTER");
1147                assert_eq!(values["ID"], "PASS");
1148            }
1149            _ => {
1150                panic!("Invalid HeaderRecord");
1151            }
1152        }
1153    }
1154
1155    #[test]
1156    fn test_header_info_types() {
1157        let vcf = Reader::from_path(&"test/test.bcf").unwrap();
1158        let header = vcf.header();
1159        let truth = vec![
1160            (
1161                // INFO=<ID=INDEL,Number=0,Type=Flag>
1162                "INDEL",
1163                header::TagType::Flag,
1164                header::TagLength::Fixed(0),
1165            ),
1166            (
1167                // INFO=<ID=DP,Number=1,Type=Integer>
1168                "DP",
1169                header::TagType::Integer,
1170                header::TagLength::Fixed(1),
1171            ),
1172            (
1173                // INFO=<ID=QS,Number=R,Type=Float>
1174                "QS",
1175                header::TagType::Float,
1176                header::TagLength::Alleles,
1177            ),
1178            (
1179                // INFO=<ID=I16,Number=16,Type=Float>
1180                "I16",
1181                header::TagType::Float,
1182                header::TagLength::Fixed(16),
1183            ),
1184        ];
1185        for (ref_name, ref_type, ref_length) in truth {
1186            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1187            assert_eq!(tag_type, ref_type);
1188            assert_eq!(tag_length, ref_length);
1189        }
1190
1191        let vcf = Reader::from_path(&"test/test_svlen.vcf").unwrap();
1192        let header = vcf.header();
1193        let truth = vec![
1194            (
1195                // INFO=<ID=IMPRECISE,Number=0,Type=Flag>
1196                "IMPRECISE",
1197                header::TagType::Flag,
1198                header::TagLength::Fixed(0),
1199            ),
1200            (
1201                // INFO=<ID=SVTYPE,Number=1,Type=String>
1202                "SVTYPE",
1203                header::TagType::String,
1204                header::TagLength::Fixed(1),
1205            ),
1206            (
1207                // INFO=<ID=SVLEN,Number=.,Type=Integer>
1208                "SVLEN",
1209                header::TagType::Integer,
1210                header::TagLength::Variable,
1211            ),
1212            (
1213                // INFO<ID=CIGAR,Number=A,Type=String>
1214                "CIGAR",
1215                header::TagType::String,
1216                header::TagLength::AltAlleles,
1217            ),
1218        ];
1219        for (ref_name, ref_type, ref_length) in truth {
1220            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1221            assert_eq!(tag_type, ref_type);
1222            assert_eq!(tag_length, ref_length);
1223        }
1224
1225        assert!(header.info_type(b"NOT_THERE").is_err());
1226    }
1227
1228    #[test]
1229    fn test_remove_alleles() {
1230        let mut bcf = Reader::from_path(&"test/test_multi.bcf").unwrap();
1231        for res in bcf.records() {
1232            let mut record = res.unwrap();
1233            if record.pos() == 10080 {
1234                record.remove_alleles(&[false, false, true]).unwrap();
1235                assert_eq!(record.alleles(), [b"A", b"C"]);
1236            }
1237        }
1238    }
1239
1240    // Helper function reading full file into string.
1241    fn read_all<P: AsRef<Path>>(path: P) -> String {
1242        let mut file = File::open(path.as_ref())
1243            .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1244        let mut contents = String::new();
1245        file.read_to_string(&mut contents)
1246            .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1247        contents
1248    }
1249
1250    // Open `test_various.vcf`, add a record from scratch to it and write it out again.
1251    //
1252    // This exercises the full functionality of updating information in a `record::Record`.
1253    #[test]
1254    fn test_write_various() {
1255        // Open reader, then create writer.
1256        let tmp = tempfile::Builder::new()
1257            .prefix("rust-htslib")
1258            .tempdir()
1259            .expect("Cannot create temp dir");
1260        let out_path = tmp.path().join("test_various.out.vcf");
1261
1262        let vcf = Reader::from_path(&"test/test_various.vcf").expect("Error opening file.");
1263        // The writer goes into its own block so we can ensure that the file is closed and
1264        // all data is written below.
1265        {
1266            let mut writer = Writer::from_path(
1267                &out_path,
1268                &Header::from_template(&vcf.header()),
1269                true,
1270                Format::Vcf,
1271            )
1272            .expect("Error opening file.");
1273
1274            // Setup empty record, filled below.
1275            let mut record = writer.empty_record();
1276
1277            record.set_rid(Some(0));
1278            assert_eq!(record.rid().unwrap(), 0);
1279
1280            record.set_pos(12);
1281            assert_eq!(record.pos(), 12);
1282
1283            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1284            record.set_id(b"to_be_cleared").unwrap();
1285            assert_eq!(
1286                str::from_utf8(record.id().as_ref()).unwrap(),
1287                "to_be_cleared"
1288            );
1289            record.clear_id().unwrap();
1290            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1291            record.set_id(b"first_id").unwrap();
1292            record.push_id(b"second_id").unwrap();
1293            record.push_id(b"first_id").unwrap();
1294
1295            assert!(record.filters().next().is_none());
1296            record.set_filters(&["q10".as_bytes()]).unwrap();
1297            record.push_filter("s50".as_bytes()).unwrap();
1298            record.remove_filter("q10".as_bytes(), true).unwrap();
1299            record.push_filter("q10".as_bytes()).unwrap();
1300
1301            record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1302
1303            record.set_qual(10.0);
1304
1305            record.push_info_integer(b"N1", &[32]).unwrap();
1306            record.push_info_float(b"F1", &[33.0]).unwrap();
1307            record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1308            record.push_info_flag(b"X1").unwrap();
1309
1310            record
1311                .push_genotypes(&[
1312                    GenotypeAllele::Unphased(0),
1313                    GenotypeAllele::Unphased(1),
1314                    GenotypeAllele::Unphased(1),
1315                    GenotypeAllele::Phased(1),
1316                ])
1317                .unwrap();
1318
1319            record
1320                .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1321                .unwrap();
1322            record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1323            record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1324            record
1325                .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1326                .unwrap();
1327
1328            // Finally, write out the record.
1329            writer.write(&record).unwrap();
1330        }
1331
1332        // Now, compare expected and real output.
1333        let expected = read_all("test/test_various.out.vcf");
1334        let actual = read_all(&out_path);
1335        assert_eq!(expected, actual);
1336    }
1337
1338    #[test]
1339    fn test_remove_headers() {
1340        let vcf = Reader::from_path(&"test/test_headers.vcf").expect("Error opening file.");
1341        let tmp = tempfile::Builder::new()
1342            .prefix("rust-htslib")
1343            .tempdir()
1344            .expect("Cannot create temp dir");
1345        let vcfpath = tmp.path().join("test.vcf");
1346        let mut header = Header::from_template(&vcf.header);
1347        header
1348            .remove_contig(b"contig2")
1349            .remove_info(b"INFO2")
1350            .remove_format(b"FORMAT2")
1351            .remove_filter(b"FILTER2")
1352            .remove_structured(b"Foo2")
1353            .remove_generic(b"Bar2");
1354        {
1355            let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1356                .expect("Error opening output file.");
1357            // Note that we don't need to write anything, we are just looking at the header.
1358        }
1359
1360        let expected = read_all("test/test_headers.out.vcf");
1361        let actual = read_all(&vcfpath);
1362        assert_eq!(expected, actual);
1363    }
1364
1365    #[test]
1366    fn test_synced_reader() {
1367        let mut reader = synced::SyncedReader::new().unwrap();
1368        reader.set_require_index(true);
1369        reader.set_pairing(synced::pairing::SNPS);
1370
1371        assert_eq!(reader.reader_count(), 0);
1372        reader.add_reader(&"test/test_left.vcf.gz").unwrap();
1373        reader.add_reader(&"test/test_right.vcf.gz").unwrap();
1374        assert_eq!(reader.reader_count(), 2);
1375
1376        let res1 = reader.read_next();
1377        assert_eq!(res1.unwrap(), 2);
1378        assert!(reader.has_line(0));
1379        assert!(reader.has_line(1));
1380
1381        let res2 = reader.read_next();
1382        assert_eq!(res2.unwrap(), 1);
1383        assert!(reader.has_line(0));
1384        assert!(!reader.has_line(1));
1385
1386        let res3 = reader.read_next();
1387        assert_eq!(res3.unwrap(), 1);
1388        assert!(!reader.has_line(0));
1389        assert!(reader.has_line(1));
1390
1391        let res4 = reader.read_next();
1392        assert_eq!(res4.unwrap(), 0);
1393    }
1394
1395    #[test]
1396    fn test_synced_reader_fetch() {
1397        let mut reader = synced::SyncedReader::new().unwrap();
1398        reader.set_require_index(true);
1399        reader.set_pairing(synced::pairing::SNPS);
1400
1401        assert_eq!(reader.reader_count(), 0);
1402        reader.add_reader(&"test/test_left.vcf.gz").unwrap();
1403        reader.add_reader(&"test/test_right.vcf.gz").unwrap();
1404        assert_eq!(reader.reader_count(), 2);
1405
1406        reader.fetch(0, 0, 1000).unwrap();
1407        let res1 = reader.read_next();
1408        assert_eq!(res1.unwrap(), 2);
1409        assert!(reader.has_line(0));
1410        assert!(reader.has_line(1));
1411
1412        let res2 = reader.read_next();
1413        assert_eq!(res2.unwrap(), 1);
1414        assert!(reader.has_line(0));
1415        assert!(!reader.has_line(1));
1416
1417        let res3 = reader.read_next();
1418        assert_eq!(res3.unwrap(), 1);
1419        assert!(!reader.has_line(0));
1420        assert!(reader.has_line(1));
1421
1422        let res4 = reader.read_next();
1423        assert_eq!(res4.unwrap(), 0);
1424    }
1425
1426    #[test]
1427    fn test_svlen() {
1428        let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1429
1430        let mut record = reader.empty_record();
1431        reader.read(&mut record).unwrap().unwrap();
1432
1433        assert_eq!(
1434            *record.info(b"SVLEN").integer().unwrap().unwrap(),
1435            &[-127][..]
1436        );
1437    }
1438
1439    #[test]
1440    fn test_fails_on_bam() {
1441        let reader = Reader::from_path("test/test.bam");
1442        assert!(reader.is_err());
1443    }
1444
1445    #[test]
1446    fn test_fails_on_non_existiant() {
1447        let reader = Reader::from_path("test/no_such_file");
1448        assert!(reader.is_err());
1449    }
1450
1451    #[test]
1452    fn test_multi_string_info_tag() {
1453        let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1454        let mut rec = reader.empty_record();
1455        let _ = reader.read(&mut rec);
1456
1457        assert_eq!(
1458            rec.info_shared_buffer(b"ANN", Buffer::new())
1459                .string()
1460                .unwrap()
1461                .unwrap()
1462                .len(),
1463            14
1464        );
1465    }
1466
1467    #[test]
1468    fn test_multi_string_info_tag_number_a() {
1469        let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1470        let mut rec = reader.empty_record();
1471        let _ = reader.read(&mut rec);
1472
1473        assert_eq!(
1474            rec.info_shared_buffer(b"X", Buffer::new())
1475                .string()
1476                .unwrap()
1477                .unwrap()
1478                .len(),
1479            2
1480        );
1481    }
1482
1483    #[test]
1484    fn test_genotype_allele_conversion() {
1485        let allele = GenotypeAllele::Unphased(1);
1486        let converted: i32 = allele.into();
1487        let expected = 4;
1488        assert_eq!(converted, expected);
1489        let reverse_conversion = GenotypeAllele::from(expected);
1490        assert_eq!(allele, reverse_conversion);
1491    }
1492
1493    #[test]
1494    fn test_genotype_missing_allele_conversion() {
1495        let allele = GenotypeAllele::PhasedMissing;
1496        let converted: i32 = allele.into();
1497        let expected = 1;
1498        assert_eq!(converted, expected);
1499        let reverse_conversion = GenotypeAllele::from(expected);
1500        assert_eq!(allele, reverse_conversion);
1501    }
1502
1503    #[test]
1504    fn test_alt_allele_dosage() {
1505        let path = &"test/test_string.vcf";
1506        let mut bcf = Reader::from_path(path).expect("Error opening file.");
1507        let _header = bcf.header();
1508        // FORMAT fields of first record of the vcf should look like:
1509        // GT:FS1:FN1	./1:LongString1:1	1/1:ss1:2
1510        let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1511        let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1512        assert_eq!(sample_count, 2);
1513        let mut n_ref = vec![0; sample_count];
1514        let mut n_alt = vec![0; sample_count];
1515        let mut n_missing = vec![0; sample_count];
1516        let gts = first_record.genotypes().expect("Error reading genotypes");
1517        for sample_index in 0..sample_count {
1518            // for each sample
1519            for gta in gts.get(sample_index).iter() {
1520                // for each allele
1521                match gta.index() {
1522                    Some(0) => n_ref[sample_index] += 1,  // reference allele
1523                    Some(_) => n_alt[sample_index] += 1,  // alt allele
1524                    None => n_missing[sample_index] += 1, // missing allele
1525                }
1526            }
1527        }
1528        assert_eq!(n_ref, [0, 0]);
1529        assert_eq!(n_alt, [1, 2]);
1530        assert_eq!(n_missing, [1, 0]);
1531    }
1532
1533    #[test]
1534    fn test_obs_cornercase() {
1535        let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1536        let first_record = reader
1537            .records()
1538            .next()
1539            .unwrap()
1540            .expect("Fail to read record");
1541
1542        assert_eq!(
1543            *first_record.info(b"EVENT").string().unwrap().unwrap(),
1544            [b"gridss33fb_1085"]
1545        );
1546        assert_eq!(
1547            *first_record.info(b"MATEID").string().unwrap().unwrap(),
1548            [b"gridss33fb_1085h"]
1549        );
1550    }
1551
1552    #[test]
1553    fn test_trailing_omitted_format_fields() {
1554        let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1555        let first_record = reader
1556            .records()
1557            .next()
1558            .unwrap()
1559            .expect("Fail to read record");
1560
1561        let expected: Vec<&[u8]> = Vec::new();
1562        assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1563        assert_eq!(
1564            *first_record.format(b"INT").integer().unwrap(),
1565            vec![&[i32::missing()]],
1566        );
1567        assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1568    }
1569
1570    // #[test]
1571    // fn test_buffer_lifetime() {
1572    //     let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1573    //     let first_record = reader
1574    //         .records()
1575    //         .next()
1576    //         .unwrap()
1577    //         .expect("Fail to read record");
1578
1579    //     fn get_value<'a, 'b>(record: &'a Record) -> &'b [u8] {
1580    //         // FIXME: this should not be possible, because the slice outlives the buffer.
1581    //         let buffer: BufferBacked<'b, _, _> = record.info(b"EVENT").string().unwrap().unwrap();
1582    //         let value: &'b [u8] = buffer[0];
1583    //         value
1584    //     }
1585
1586    //     let buffered = first_record.info(b"EVENT").string().unwrap().unwrap();
1587    //     assert_eq!(get_value(&first_record), buffered[0]);
1588    // }
1589}