1use std::ffi;
107use std::path::Path;
108use std::rc::Rc;
109use std::str;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125pub trait Read: Sized {
127 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137 fn records(&mut self) -> Records<'_, Self>;
139
140 fn header(&self) -> &HeaderView;
142
143 fn empty_record(&self) -> Record;
145
146 fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158#[derive(Debug)]
160pub struct Reader {
161 inner: *mut htslib::htsFile,
162 header: Rc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
170 assert!(n_threads > 0, "n_threads must be > 0");
171
172 let r = htslib::hts_set_threads(hts_file, n_threads as i32);
173 if r != 0 {
174 Err(Error::SetThreads)
175 } else {
176 Ok(())
177 }
178}
179
180impl Reader {
181 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
183 match path.as_ref().to_str() {
184 Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
185 Some(p) => Self::new(p.as_bytes()),
186 _ => Err(Error::NonUnicodePath),
187 }
188 }
189
190 pub fn from_url(url: &Url) -> Result<Self> {
192 Self::new(url.as_str().as_bytes())
193 }
194
195 pub fn from_stdin() -> Result<Self> {
197 Self::new(b"-")
198 }
199
200 fn new(path: &[u8]) -> Result<Self> {
201 let htsfile = bcf_open(path, b"r")?;
202 let header = unsafe { htslib::bcf_hdr_read(htsfile) };
203 Ok(Reader {
204 inner: htsfile,
205 header: Rc::new(HeaderView::new(header)),
206 })
207 }
208}
209
210impl Read for Reader {
211 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
212 match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
213 0 => {
214 unsafe {
215 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
217 }
218 record.set_header(Rc::clone(&self.header));
219 Some(Ok(()))
220 }
221 -1 => None,
222 _ => Some(Err(Error::BcfInvalidRecord)),
223 }
224 }
225
226 fn records(&mut self) -> Records<'_, Self> {
227 Records { reader: self }
228 }
229
230 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
231 unsafe { set_threads(self.inner, n_threads) }
232 }
233
234 fn header(&self) -> &HeaderView {
235 &self.header
236 }
237
238 fn empty_record(&self) -> Record {
240 self.header.empty_record()
241 }
242}
243
244impl Drop for Reader {
245 fn drop(&mut self) {
246 unsafe {
247 htslib::hts_close(self.inner);
248 }
249 }
250}
251
252#[derive(Debug)]
254pub struct IndexedReader {
255 inner: *mut htslib::bcf_srs_t,
257 header: Rc<HeaderView>,
259
260 current_region: Option<(u32, u64, Option<u64>)>,
262}
263
264unsafe impl Send for IndexedReader {}
265
266impl IndexedReader {
267 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
273 let path = path.as_ref();
274 match path.to_str() {
275 Some(p) if path.exists() => {
276 Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
277 }
278 Some(p) => Err(Error::FileNotFound { path: p.into() }),
279 None => Err(Error::NonUnicodePath),
280 }
281 }
282
283 pub fn from_url(url: &Url) -> Result<Self> {
285 Self::new(&ffi::CString::new(url.as_str()).unwrap())
286 }
287
288 fn new(path: &ffi::CStr) -> Result<Self> {
294 let ser_reader = unsafe { htslib::bcf_sr_init() };
296 unsafe {
297 htslib::bcf_sr_set_opt(ser_reader, 0);
298 } if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
301 let header = Rc::new(HeaderView::new(unsafe {
302 htslib::bcf_hdr_dup((*(*ser_reader).readers.offset(0)).header)
303 }));
304 Ok(IndexedReader {
305 inner: ser_reader,
306 header,
307 current_region: None,
308 })
309 } else {
310 Err(Error::BcfOpen {
311 target: path.to_str().unwrap().to_owned(),
312 })
313 }
314 }
315
316 pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
329 let contig = self.header.rid2name(rid)?;
330 let contig = ffi::CString::new(contig).unwrap();
331 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
332 Err(Error::GenomicSeek {
333 contig: contig.to_str().unwrap().to_owned(),
334 start,
335 })
336 } else {
337 self.current_region = Some((rid, start, end));
338 Ok(())
339 }
340 }
341}
342
343impl Read for IndexedReader {
344 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
345 match unsafe { htslib::bcf_sr_next_line(self.inner) } {
346 0 => {
347 if unsafe { (*self.inner).errnum } != 0 {
348 Some(Err(Error::BcfInvalidRecord))
349 } else {
350 None
351 }
352 }
353 i => {
354 assert!(i > 0, "Must not be negative");
355 unsafe {
360 htslib::bcf_copy(
361 record.inner,
362 *(*(*self.inner).readers.offset(0)).buffer.offset(0),
363 );
364 }
365
366 unsafe {
367 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
369 }
370
371 record.set_header(Rc::clone(&self.header));
372
373 match self.current_region {
374 Some((rid, _start, end)) => {
375 let endpos = match end {
376 Some(e) => e,
377 None => u64::MAX,
378 };
379 if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380 Some(Ok(()))
381 } else {
382 None
383 }
384 }
385 None => Some(Ok(())),
386 }
387 }
388 }
389 }
390
391 fn records(&mut self) -> Records<'_, Self> {
392 Records { reader: self }
393 }
394
395 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396 assert!(n_threads > 0, "n_threads must be > 0");
397
398 let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399 if r != 0 {
400 Err(Error::SetThreads)
401 } else {
402 Ok(())
403 }
404 }
405
406 fn header(&self) -> &HeaderView {
407 &self.header
408 }
409
410 fn empty_record(&self) -> Record {
411 Record::new(Rc::clone(&self.header))
412 }
413}
414
415impl Drop for IndexedReader {
416 fn drop(&mut self) {
417 unsafe { htslib::bcf_sr_destroy(self.inner) };
418 }
419}
420
421pub mod synced {
423
424 use super::*;
425
426 pub mod pairing {
428 pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430 pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432 pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434 pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436 pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438 pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440 pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442 pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444 pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446 }
447
448 #[derive(Debug)]
450 pub struct SyncedReader {
451 inner: *mut crate::htslib::bcf_srs_t,
453
454 headers: Vec<Rc<HeaderView>>,
456
457 current_region: Option<(u32, u64, u64)>,
459 }
460
461 impl SyncedReader {
463 pub fn new() -> Result<Self> {
464 let inner = unsafe { crate::htslib::bcf_sr_init() };
465 if inner.is_null() {
466 return Err(Error::BcfAllocationError);
467 }
468
469 Ok(SyncedReader {
470 inner,
471 headers: Vec::new(),
472 current_region: None,
473 })
474 }
475
476 pub fn set_require_index(&mut self, do_require: bool) {
478 unsafe {
479 (*self.inner).require_index = if do_require { 1 } else { 0 };
480 }
481 }
482
483 pub fn set_pairing(&mut self, bitmask: u32) {
485 unsafe {
486 crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488 }
489 }
490
491 pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493 match path.as_ref().to_str() {
494 Some(p) if path.as_ref().exists() => {
495 let p_cstring = ffi::CString::new(p).unwrap();
496 let res =
497 unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499 if res == 0 {
500 return Err(Error::BcfOpen {
501 target: p.to_owned(),
502 });
503 }
504
505 let i = (self.reader_count() - 1) as isize;
506 let header = Rc::new(HeaderView::new(unsafe {
507 crate::htslib::bcf_hdr_dup((*(*self.inner).readers.offset(i)).header)
508 }));
509 self.headers.push(header);
510 Ok(())
511 }
512 _ => Err(Error::NonUnicodePath),
513 }
514 }
515
516 pub fn remove_reader(&mut self, idx: u32) {
518 if idx >= self.reader_count() {
519 panic!("Invalid reader!");
520 } else {
521 unsafe {
522 crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
523 }
524 self.headers.remove(idx as usize);
525 }
526 }
527
528 pub fn reader_count(&self) -> u32 {
530 unsafe { (*self.inner).nreaders as u32 }
531 }
532
533 pub fn read_next(&mut self) -> Result<u32> {
535 let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
536
537 if num == 0 {
538 if unsafe { (*self.inner).errnum } != 0 {
539 return Err(Error::BcfInvalidRecord);
540 }
541 Ok(0)
542 } else {
543 assert!(num > 0, "num returned by htslib must not be negative");
544 match self.current_region {
545 Some((rid, _start, end)) => {
546 for idx in 0..self.reader_count() {
547 if !self.has_line(idx) {
548 continue;
549 }
550 unsafe {
551 let record = *(*(*self.inner).readers.offset(idx as isize))
552 .buffer
553 .offset(0);
554 if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
555 return Ok(0);
556 }
557 }
558 }
559 Ok(num)
560 }
561 None => Ok(num),
562 }
563 }
564 }
565
566 pub fn has_line(&self, idx: u32) -> bool {
568 if idx >= self.reader_count() {
569 panic!("Invalid reader!");
570 } else {
571 unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
572 }
573 }
574
575 pub fn record(&self, idx: u32) -> Option<Record> {
577 if self.has_line(idx) {
578 let record = Record::new(self.headers[idx as usize].clone());
579 unsafe {
580 crate::htslib::bcf_copy(
581 record.inner,
582 *(*(*self.inner).readers.offset(idx as isize))
583 .buffer
584 .offset(0),
585 );
586 }
587 Some(record)
588 } else {
589 None
590 }
591 }
592
593 pub fn header(&self, idx: u32) -> &HeaderView {
595 if idx >= self.reader_count() {
597 panic!("Invalid reader!");
598 } else {
599 &self.headers[idx as usize]
600 }
601 }
602
603 pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
612 let contig = {
613 let contig = self.header(0).rid2name(rid).unwrap(); ffi::CString::new(contig).unwrap()
615 };
616 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
617 Err(Error::GenomicSeek {
618 contig: contig.to_str().unwrap().to_owned(),
619 start,
620 })
621 } else {
622 self.current_region = Some((rid, start, end));
623 Ok(())
624 }
625 }
626 }
627
628 impl Drop for SyncedReader {
629 fn drop(&mut self) {
630 unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
631 }
632 }
633}
634
635#[derive(Clone, Copy, Debug, PartialEq, Eq)]
636pub enum Format {
637 Vcf,
638 Bcf,
639}
640
641#[derive(Debug)]
643pub struct Writer {
644 inner: *mut htslib::htsFile,
645 header: Rc<HeaderView>,
646 subset: Option<SampleSubset>,
647}
648
649unsafe impl Send for Writer {}
650
651impl Writer {
652 pub fn from_path<P: AsRef<Path>>(
661 path: P,
662 header: &Header,
663 uncompressed: bool,
664 format: Format,
665 ) -> Result<Self> {
666 if let Some(p) = path.as_ref().to_str() {
667 Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
668 } else {
669 Err(Error::NonUnicodePath)
670 }
671 }
672
673 pub fn from_url(
682 url: &Url,
683 header: &Header,
684 uncompressed: bool,
685 format: Format,
686 ) -> Result<Self> {
687 Self::new(url.as_str().as_bytes(), header, uncompressed, format)
688 }
689
690 pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
698 Self::new(b"-", header, uncompressed, format)
699 }
700
701 fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
702 let mode: &[u8] = match (uncompressed, format) {
703 (true, Format::Vcf) => b"w",
704 (false, Format::Vcf) => b"wz",
705 (true, Format::Bcf) => b"wbu",
706 (false, Format::Bcf) => b"wb",
707 };
708
709 let htsfile = bcf_open(path, mode)?;
710 unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
711 Ok(Writer {
712 inner: htsfile,
713 header: Rc::new(HeaderView::new(unsafe {
714 htslib::bcf_hdr_dup(header.inner)
715 })),
716 subset: header.subset.clone(),
717 })
718 }
719
720 pub fn header(&self) -> &HeaderView {
722 &self.header
723 }
724
725 pub fn empty_record(&self) -> Record {
729 record::Record::new(Rc::clone(&self.header))
730 }
731
732 pub fn translate(&mut self, record: &mut record::Record) {
738 unsafe {
739 htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740 }
741 record.set_header(Rc::clone(&self.header));
742 }
743
744 pub fn subset(&mut self, record: &mut record::Record) {
750 if let Some(ref mut subset) = self.subset {
751 unsafe {
752 htslib::bcf_subset(
753 self.header.inner,
754 record.inner,
755 subset.len() as i32,
756 subset.as_mut_ptr(),
757 );
758 }
759 }
760 }
761
762 pub fn write(&mut self, record: &record::Record) -> Result<()> {
768 if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769 Err(Error::WriteRecord)
770 } else {
771 Ok(())
772 }
773 }
774
775 pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782 unsafe { set_threads(self.inner, n_threads) }
783 }
784}
785
786impl Drop for Writer {
787 fn drop(&mut self) {
788 unsafe {
789 htslib::hts_close(self.inner);
790 }
791 }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796 reader: &'a mut R,
797}
798
799impl<'a, R: Read> Iterator for Records<'a, R> {
800 type Item = Result<record::Record>;
801
802 fn next(&mut self) -> Option<Result<record::Record>> {
803 let mut record = self.reader.empty_record();
804 match self.reader.read(&mut record) {
805 Some(Err(e)) => Some(Err(e)),
806 Some(Ok(_)) => Some(Ok(record)),
807 None => None,
808 }
809 }
810}
811
812fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
814 let p = ffi::CString::new(target).unwrap();
815 let c_str = ffi::CString::new(mode).unwrap();
816 let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
817
818 if ret.is_null() {
819 return Err(Error::BcfOpen {
820 target: str::from_utf8(target).unwrap().to_owned(),
821 });
822 }
823
824 unsafe {
825 if !(mode.contains(&b'w')
826 || (*ret).format.category == htslib::htsFormatCategory_variant_data)
827 {
828 return Err(Error::BcfOpen {
829 target: str::from_utf8(target).unwrap().to_owned(),
830 });
831 }
832 }
833 Ok(ret)
834}
835
836#[cfg(test)]
837mod tests {
838 use super::record::Buffer;
839 use super::*;
840 use crate::bcf::header::Id;
841 use crate::bcf::record::GenotypeAllele;
842 use crate::bcf::record::Numeric;
843 use crate::bcf::Reader;
844 use std::convert::TryFrom;
845 use std::fs::File;
846 use std::io::prelude::Read as IoRead;
847 use std::path::Path;
848 use std::str;
849
850 fn _test_read<P: AsRef<Path>>(path: &P) {
851 let mut bcf = Reader::from_path(path).expect("Error opening file.");
852 assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
853
854 for (i, rec) in bcf.records().enumerate() {
855 let record = rec.expect("Error reading record.");
856 assert_eq!(record.sample_count(), 1);
857
858 assert_eq!(record.rid().expect("Error reading rid."), 0);
859 assert_eq!(record.pos(), 10021 + i as i64);
860 assert!((record.qual() - 0f32).abs() < std::f32::EPSILON);
861 let mut buffer = Buffer::new();
862 assert!(
863 (record
864 .info_shared_buffer(b"MQ0F", &mut buffer)
865 .float()
866 .expect("Error reading info.")
867 .expect("Missing tag")[0]
868 - 1.0)
869 .abs()
870 < std::f32::EPSILON
871 );
872 if i == 59 {
873 assert!(
874 (record
875 .info_shared_buffer(b"SGB", &mut buffer)
876 .float()
877 .expect("Error reading info.")
878 .expect("Missing tag")[0]
879 - -0.379885)
880 .abs()
881 < std::f32::EPSILON
882 );
883 }
884 assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
886
887 let fmt = record.format(b"PL");
888 let pl = fmt.integer().expect("Error reading format.");
889 assert_eq!(pl.len(), 1);
890 if i == 59 {
891 assert_eq!(pl[0].len(), 6);
892 } else {
893 assert_eq!(pl[0].len(), 3);
894 }
895 }
896 }
897
898 #[test]
899 fn test_read() {
900 _test_read(&"test/test.bcf");
901 }
902
903 #[test]
904 fn test_reader_set_threads() {
905 let path = &"test/test.bcf";
906 let mut bcf = Reader::from_path(path).expect("Error opening file.");
907 bcf.set_threads(2).unwrap();
908 }
909
910 #[test]
911 fn test_writer_set_threads() {
912 let path = &"test/test.bcf";
913 let tmp = tempfile::Builder::new()
914 .prefix("rust-htslib")
915 .tempdir()
916 .expect("Cannot create temp dir");
917 let bcfpath = tmp.path().join("test.bcf");
918 let bcf = Reader::from_path(path).expect("Error opening file.");
919 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
920 .expect("Error subsetting samples.");
921 let mut writer =
922 Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
923 writer.set_threads(2).unwrap();
924 }
925
926 #[test]
927 fn test_fetch() {
928 let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
929 bcf.set_threads(2).unwrap();
930 let rid = bcf
931 .header()
932 .name2rid(b"1")
933 .expect("Translating from contig '1' to ID failed.");
934 bcf.fetch(rid, 10_033, Some(10_060))
935 .expect("Fetching failed");
936 assert_eq!(bcf.records().count(), 28);
937 }
938
939 #[test]
940 fn test_fetch_all() {
941 let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
942 bcf.set_threads(2).unwrap();
943 let rid = bcf
944 .header()
945 .name2rid(b"1")
946 .expect("Translating from contig '1' to ID failed.");
947 bcf.fetch(rid, 0, None).expect("Fetching failed");
948 assert_eq!(bcf.records().count(), 62);
949 }
950
951 #[test]
952 fn test_fetch_open_ended() {
953 let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
954 bcf.set_threads(2).unwrap();
955 let rid = bcf
956 .header()
957 .name2rid(b"1")
958 .expect("Translating from contig '1' to ID failed.");
959 bcf.fetch(rid, 10077, None).expect("Fetching failed");
960 assert_eq!(bcf.records().count(), 6);
961 }
962
963 #[test]
964 fn test_fetch_start_greater_than_last_vcf_pos() {
965 let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
966 bcf.set_threads(2).unwrap();
967 let rid = bcf
968 .header()
969 .name2rid(b"1")
970 .expect("Translating from contig '1' to ID failed.");
971 bcf.fetch(rid, 20077, None).expect("Fetching failed");
972 assert_eq!(bcf.records().count(), 0);
973 }
974
975 #[test]
976 fn test_write() {
977 let mut bcf = Reader::from_path(&"test/test_multi.bcf").expect("Error opening file.");
978 let tmp = tempfile::Builder::new()
979 .prefix("rust-htslib")
980 .tempdir()
981 .expect("Cannot create temp dir");
982 let bcfpath = tmp.path().join("test.bcf");
983 println!("{:?}", bcfpath);
984 {
985 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
986 .expect("Error subsetting samples.");
987 let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
988 .expect("Error opening file.");
989 for rec in bcf.records() {
990 let mut record = rec.expect("Error reading record.");
991 writer.translate(&mut record);
992 writer.subset(&mut record);
993 record.trim_alleles().expect("Error trimming alleles.");
994 writer.write(&record).expect("Error writing record");
995 }
996 }
997 {
998 _test_read(&bcfpath);
999 }
1000 tmp.close().expect("Failed to delete temp dir");
1001 }
1002
1003 #[test]
1004 fn test_strings() {
1005 let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1006 let fs1 = [
1007 &b"LongString1"[..],
1008 &b"LongString2"[..],
1009 &b"."[..],
1010 &b"LongString4"[..],
1011 &b"evenlength"[..],
1012 &b"ss6"[..],
1013 ];
1014 let mut buffer = Buffer::new();
1015 for (i, rec) in vcf.records().enumerate() {
1016 println!("record {}", i);
1017 let record = rec.expect("Error reading record.");
1018 assert_eq!(
1019 record
1020 .info_shared_buffer(b"S1", &mut buffer)
1021 .string()
1022 .expect("Error reading string.")
1023 .expect("Missing tag")[0],
1024 format!("string{}", i + 1).as_bytes()
1025 );
1026 let fs1_str_vec = record
1027 .format_shared_buffer(b"FS1", &mut buffer)
1028 .string()
1029 .expect("Error reading string.");
1030 assert_eq!(fs1_str_vec.len(), 2);
1031 println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1032 assert_eq!(
1033 record
1034 .format(b"FS1")
1035 .string()
1036 .expect("Error reading string.")[0],
1037 fs1[i]
1038 );
1039 }
1040 }
1041
1042 #[test]
1043 fn test_missing() {
1044 let mut vcf = Reader::from_path(&"test/test_missing.vcf").expect("Error opening file.");
1045 let fn4 = [
1046 &[
1047 i32::missing(),
1048 i32::missing(),
1049 i32::missing(),
1050 i32::missing(),
1051 ][..],
1052 &[i32::missing()][..],
1053 ];
1054 let f1 = [false, true];
1055 let mut buffer = Buffer::new();
1056 for (i, rec) in vcf.records().enumerate() {
1057 let record = rec.expect("Error reading record.");
1058 assert_eq!(
1059 record
1060 .info_shared_buffer(b"F1", &mut buffer)
1061 .float()
1062 .expect("Error reading float.")
1063 .expect("Missing tag")[0]
1064 .is_nan(),
1065 f1[i]
1066 );
1067 assert_eq!(
1068 record
1069 .format(b"FN4")
1070 .integer()
1071 .expect("Error reading integer.")[1],
1072 fn4[i]
1073 );
1074 assert!(
1075 record.format(b"FF4").float().expect("Error reading float.")[1]
1076 .iter()
1077 .all(|&v| v.is_missing())
1078 );
1079 }
1080 }
1081
1082 #[test]
1083 fn test_genotypes() {
1084 let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1085 let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1086 for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1087 let rec = rec.expect("Error reading record.");
1088 let genotypes = rec.genotypes().expect("Error reading genotypes");
1089 assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1090 }
1091 }
1092
1093 #[test]
1094 fn test_header_ids() {
1095 let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1096 let header = &vcf.header();
1097 use crate::bcf::header::Id;
1098
1099 assert_eq!(header.id_to_name(Id(4)), b"GT");
1100 assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1101 assert!(header.name_to_id(b"XX").is_err());
1102 }
1103
1104 #[test]
1105 fn test_header_samples() {
1106 let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1107 let header = &vcf.header();
1108
1109 assert_eq!(header.id_to_sample(Id(0)), b"one");
1110 assert_eq!(header.id_to_sample(Id(1)), b"two");
1111 assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1112 assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1113 assert!(header.sample_to_id(b"three").is_err());
1114 }
1115
1116 #[test]
1117 fn test_header_contigs() {
1118 let vcf = Reader::from_path(&"test/test_multi.bcf").expect("Error opening file.");
1119 let header = &vcf.header();
1120
1121 assert_eq!(header.contig_count(), 86);
1122
1123 assert_eq!(header.rid2name(0).unwrap(), b"1");
1125 assert_eq!(header.name2rid(b"1").unwrap(), 0);
1126
1127 assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1128 assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1129
1130 assert!(header.name2rid(b"nonexistent_contig").is_err());
1132 assert!(header.rid2name(100).is_err());
1133 }
1134
1135 #[test]
1136 fn test_header_records() {
1137 let vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
1138 let records = vcf.header().header_records();
1139 assert_eq!(records.len(), 10);
1140
1141 match records[1] {
1142 HeaderRecord::Filter {
1143 ref key,
1144 ref values,
1145 } => {
1146 assert_eq!(key, "FILTER");
1147 assert_eq!(values["ID"], "PASS");
1148 }
1149 _ => {
1150 panic!("Invalid HeaderRecord");
1151 }
1152 }
1153 }
1154
1155 #[test]
1156 fn test_header_info_types() {
1157 let vcf = Reader::from_path(&"test/test.bcf").unwrap();
1158 let header = vcf.header();
1159 let truth = vec![
1160 (
1161 "INDEL",
1163 header::TagType::Flag,
1164 header::TagLength::Fixed(0),
1165 ),
1166 (
1167 "DP",
1169 header::TagType::Integer,
1170 header::TagLength::Fixed(1),
1171 ),
1172 (
1173 "QS",
1175 header::TagType::Float,
1176 header::TagLength::Alleles,
1177 ),
1178 (
1179 "I16",
1181 header::TagType::Float,
1182 header::TagLength::Fixed(16),
1183 ),
1184 ];
1185 for (ref_name, ref_type, ref_length) in truth {
1186 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1187 assert_eq!(tag_type, ref_type);
1188 assert_eq!(tag_length, ref_length);
1189 }
1190
1191 let vcf = Reader::from_path(&"test/test_svlen.vcf").unwrap();
1192 let header = vcf.header();
1193 let truth = vec![
1194 (
1195 "IMPRECISE",
1197 header::TagType::Flag,
1198 header::TagLength::Fixed(0),
1199 ),
1200 (
1201 "SVTYPE",
1203 header::TagType::String,
1204 header::TagLength::Fixed(1),
1205 ),
1206 (
1207 "SVLEN",
1209 header::TagType::Integer,
1210 header::TagLength::Variable,
1211 ),
1212 (
1213 "CIGAR",
1215 header::TagType::String,
1216 header::TagLength::AltAlleles,
1217 ),
1218 ];
1219 for (ref_name, ref_type, ref_length) in truth {
1220 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1221 assert_eq!(tag_type, ref_type);
1222 assert_eq!(tag_length, ref_length);
1223 }
1224
1225 assert!(header.info_type(b"NOT_THERE").is_err());
1226 }
1227
1228 #[test]
1229 fn test_remove_alleles() {
1230 let mut bcf = Reader::from_path(&"test/test_multi.bcf").unwrap();
1231 for res in bcf.records() {
1232 let mut record = res.unwrap();
1233 if record.pos() == 10080 {
1234 record.remove_alleles(&[false, false, true]).unwrap();
1235 assert_eq!(record.alleles(), [b"A", b"C"]);
1236 }
1237 }
1238 }
1239
1240 fn read_all<P: AsRef<Path>>(path: P) -> String {
1242 let mut file = File::open(path.as_ref())
1243 .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1244 let mut contents = String::new();
1245 file.read_to_string(&mut contents)
1246 .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1247 contents
1248 }
1249
1250 #[test]
1254 fn test_write_various() {
1255 let tmp = tempfile::Builder::new()
1257 .prefix("rust-htslib")
1258 .tempdir()
1259 .expect("Cannot create temp dir");
1260 let out_path = tmp.path().join("test_various.out.vcf");
1261
1262 let vcf = Reader::from_path(&"test/test_various.vcf").expect("Error opening file.");
1263 {
1266 let mut writer = Writer::from_path(
1267 &out_path,
1268 &Header::from_template(&vcf.header()),
1269 true,
1270 Format::Vcf,
1271 )
1272 .expect("Error opening file.");
1273
1274 let mut record = writer.empty_record();
1276
1277 record.set_rid(Some(0));
1278 assert_eq!(record.rid().unwrap(), 0);
1279
1280 record.set_pos(12);
1281 assert_eq!(record.pos(), 12);
1282
1283 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1284 record.set_id(b"to_be_cleared").unwrap();
1285 assert_eq!(
1286 str::from_utf8(record.id().as_ref()).unwrap(),
1287 "to_be_cleared"
1288 );
1289 record.clear_id().unwrap();
1290 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1291 record.set_id(b"first_id").unwrap();
1292 record.push_id(b"second_id").unwrap();
1293 record.push_id(b"first_id").unwrap();
1294
1295 assert!(record.filters().next().is_none());
1296 record.set_filters(&["q10".as_bytes()]).unwrap();
1297 record.push_filter("s50".as_bytes()).unwrap();
1298 record.remove_filter("q10".as_bytes(), true).unwrap();
1299 record.push_filter("q10".as_bytes()).unwrap();
1300
1301 record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1302
1303 record.set_qual(10.0);
1304
1305 record.push_info_integer(b"N1", &[32]).unwrap();
1306 record.push_info_float(b"F1", &[33.0]).unwrap();
1307 record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1308 record.push_info_flag(b"X1").unwrap();
1309
1310 record
1311 .push_genotypes(&[
1312 GenotypeAllele::Unphased(0),
1313 GenotypeAllele::Unphased(1),
1314 GenotypeAllele::Unphased(1),
1315 GenotypeAllele::Phased(1),
1316 ])
1317 .unwrap();
1318
1319 record
1320 .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1321 .unwrap();
1322 record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1323 record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1324 record
1325 .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1326 .unwrap();
1327
1328 writer.write(&record).unwrap();
1330 }
1331
1332 let expected = read_all("test/test_various.out.vcf");
1334 let actual = read_all(&out_path);
1335 assert_eq!(expected, actual);
1336 }
1337
1338 #[test]
1339 fn test_remove_headers() {
1340 let vcf = Reader::from_path(&"test/test_headers.vcf").expect("Error opening file.");
1341 let tmp = tempfile::Builder::new()
1342 .prefix("rust-htslib")
1343 .tempdir()
1344 .expect("Cannot create temp dir");
1345 let vcfpath = tmp.path().join("test.vcf");
1346 let mut header = Header::from_template(&vcf.header);
1347 header
1348 .remove_contig(b"contig2")
1349 .remove_info(b"INFO2")
1350 .remove_format(b"FORMAT2")
1351 .remove_filter(b"FILTER2")
1352 .remove_structured(b"Foo2")
1353 .remove_generic(b"Bar2");
1354 {
1355 let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1356 .expect("Error opening output file.");
1357 }
1359
1360 let expected = read_all("test/test_headers.out.vcf");
1361 let actual = read_all(&vcfpath);
1362 assert_eq!(expected, actual);
1363 }
1364
1365 #[test]
1366 fn test_synced_reader() {
1367 let mut reader = synced::SyncedReader::new().unwrap();
1368 reader.set_require_index(true);
1369 reader.set_pairing(synced::pairing::SNPS);
1370
1371 assert_eq!(reader.reader_count(), 0);
1372 reader.add_reader(&"test/test_left.vcf.gz").unwrap();
1373 reader.add_reader(&"test/test_right.vcf.gz").unwrap();
1374 assert_eq!(reader.reader_count(), 2);
1375
1376 let res1 = reader.read_next();
1377 assert_eq!(res1.unwrap(), 2);
1378 assert!(reader.has_line(0));
1379 assert!(reader.has_line(1));
1380
1381 let res2 = reader.read_next();
1382 assert_eq!(res2.unwrap(), 1);
1383 assert!(reader.has_line(0));
1384 assert!(!reader.has_line(1));
1385
1386 let res3 = reader.read_next();
1387 assert_eq!(res3.unwrap(), 1);
1388 assert!(!reader.has_line(0));
1389 assert!(reader.has_line(1));
1390
1391 let res4 = reader.read_next();
1392 assert_eq!(res4.unwrap(), 0);
1393 }
1394
1395 #[test]
1396 fn test_synced_reader_fetch() {
1397 let mut reader = synced::SyncedReader::new().unwrap();
1398 reader.set_require_index(true);
1399 reader.set_pairing(synced::pairing::SNPS);
1400
1401 assert_eq!(reader.reader_count(), 0);
1402 reader.add_reader(&"test/test_left.vcf.gz").unwrap();
1403 reader.add_reader(&"test/test_right.vcf.gz").unwrap();
1404 assert_eq!(reader.reader_count(), 2);
1405
1406 reader.fetch(0, 0, 1000).unwrap();
1407 let res1 = reader.read_next();
1408 assert_eq!(res1.unwrap(), 2);
1409 assert!(reader.has_line(0));
1410 assert!(reader.has_line(1));
1411
1412 let res2 = reader.read_next();
1413 assert_eq!(res2.unwrap(), 1);
1414 assert!(reader.has_line(0));
1415 assert!(!reader.has_line(1));
1416
1417 let res3 = reader.read_next();
1418 assert_eq!(res3.unwrap(), 1);
1419 assert!(!reader.has_line(0));
1420 assert!(reader.has_line(1));
1421
1422 let res4 = reader.read_next();
1423 assert_eq!(res4.unwrap(), 0);
1424 }
1425
1426 #[test]
1427 fn test_svlen() {
1428 let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1429
1430 let mut record = reader.empty_record();
1431 reader.read(&mut record).unwrap().unwrap();
1432
1433 assert_eq!(
1434 *record.info(b"SVLEN").integer().unwrap().unwrap(),
1435 &[-127][..]
1436 );
1437 }
1438
1439 #[test]
1440 fn test_fails_on_bam() {
1441 let reader = Reader::from_path("test/test.bam");
1442 assert!(reader.is_err());
1443 }
1444
1445 #[test]
1446 fn test_fails_on_non_existiant() {
1447 let reader = Reader::from_path("test/no_such_file");
1448 assert!(reader.is_err());
1449 }
1450
1451 #[test]
1452 fn test_multi_string_info_tag() {
1453 let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1454 let mut rec = reader.empty_record();
1455 let _ = reader.read(&mut rec);
1456
1457 assert_eq!(
1458 rec.info_shared_buffer(b"ANN", Buffer::new())
1459 .string()
1460 .unwrap()
1461 .unwrap()
1462 .len(),
1463 14
1464 );
1465 }
1466
1467 #[test]
1468 fn test_multi_string_info_tag_number_a() {
1469 let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1470 let mut rec = reader.empty_record();
1471 let _ = reader.read(&mut rec);
1472
1473 assert_eq!(
1474 rec.info_shared_buffer(b"X", Buffer::new())
1475 .string()
1476 .unwrap()
1477 .unwrap()
1478 .len(),
1479 2
1480 );
1481 }
1482
1483 #[test]
1484 fn test_genotype_allele_conversion() {
1485 let allele = GenotypeAllele::Unphased(1);
1486 let converted: i32 = allele.into();
1487 let expected = 4;
1488 assert_eq!(converted, expected);
1489 let reverse_conversion = GenotypeAllele::from(expected);
1490 assert_eq!(allele, reverse_conversion);
1491 }
1492
1493 #[test]
1494 fn test_genotype_missing_allele_conversion() {
1495 let allele = GenotypeAllele::PhasedMissing;
1496 let converted: i32 = allele.into();
1497 let expected = 1;
1498 assert_eq!(converted, expected);
1499 let reverse_conversion = GenotypeAllele::from(expected);
1500 assert_eq!(allele, reverse_conversion);
1501 }
1502
1503 #[test]
1504 fn test_alt_allele_dosage() {
1505 let path = &"test/test_string.vcf";
1506 let mut bcf = Reader::from_path(path).expect("Error opening file.");
1507 let _header = bcf.header();
1508 let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1511 let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1512 assert_eq!(sample_count, 2);
1513 let mut n_ref = vec![0; sample_count];
1514 let mut n_alt = vec![0; sample_count];
1515 let mut n_missing = vec![0; sample_count];
1516 let gts = first_record.genotypes().expect("Error reading genotypes");
1517 for sample_index in 0..sample_count {
1518 for gta in gts.get(sample_index).iter() {
1520 match gta.index() {
1522 Some(0) => n_ref[sample_index] += 1, Some(_) => n_alt[sample_index] += 1, None => n_missing[sample_index] += 1, }
1526 }
1527 }
1528 assert_eq!(n_ref, [0, 0]);
1529 assert_eq!(n_alt, [1, 2]);
1530 assert_eq!(n_missing, [1, 0]);
1531 }
1532
1533 #[test]
1534 fn test_obs_cornercase() {
1535 let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1536 let first_record = reader
1537 .records()
1538 .next()
1539 .unwrap()
1540 .expect("Fail to read record");
1541
1542 assert_eq!(
1543 *first_record.info(b"EVENT").string().unwrap().unwrap(),
1544 [b"gridss33fb_1085"]
1545 );
1546 assert_eq!(
1547 *first_record.info(b"MATEID").string().unwrap().unwrap(),
1548 [b"gridss33fb_1085h"]
1549 );
1550 }
1551
1552 #[test]
1553 fn test_trailing_omitted_format_fields() {
1554 let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1555 let first_record = reader
1556 .records()
1557 .next()
1558 .unwrap()
1559 .expect("Fail to read record");
1560
1561 let expected: Vec<&[u8]> = Vec::new();
1562 assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1563 assert_eq!(
1564 *first_record.format(b"INT").integer().unwrap(),
1565 vec![&[i32::missing()]],
1566 );
1567 assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1568 }
1569
1570 }