rust_htslib/bcf/record.rs
1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::f32;
8use std::ffi;
9use std::fmt;
10use std::i32;
11use std::marker::PhantomData;
12use std::ops::Deref;
13use std::os::raw::c_char;
14use std::ptr;
15use std::rc::Rc;
16use std::slice;
17use std::str;
18
19use bio_types::genome;
20use derive_new::new;
21use ieee754::Ieee754;
22use lazy_static::lazy_static;
23
24use crate::bcf::header::{HeaderView, Id};
25use crate::bcf::Error;
26use crate::errors::Result;
27use crate::htslib;
28
29const MISSING_INTEGER: i32 = i32::MIN;
30const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
31
32lazy_static! {
33 static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
34 static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
35}
36
37/// Common methods for numeric INFO and FORMAT entries
38pub trait Numeric {
39 /// Return true if entry is a missing value
40 fn is_missing(&self) -> bool;
41
42 /// Return missing value for storage in BCF record.
43 fn missing() -> Self;
44}
45
46impl Numeric for f32 {
47 fn is_missing(&self) -> bool {
48 self.bits() == MISSING_FLOAT.bits()
49 }
50
51 fn missing() -> f32 {
52 *MISSING_FLOAT
53 }
54}
55
56impl Numeric for i32 {
57 fn is_missing(&self) -> bool {
58 *self == MISSING_INTEGER
59 }
60
61 fn missing() -> i32 {
62 MISSING_INTEGER
63 }
64}
65
66trait NumericUtils {
67 /// Return true if entry marks the end of the record.
68 fn is_vector_end(&self) -> bool;
69}
70
71impl NumericUtils for f32 {
72 fn is_vector_end(&self) -> bool {
73 self.bits() == VECTOR_END_FLOAT.bits()
74 }
75}
76
77impl NumericUtils for i32 {
78 fn is_vector_end(&self) -> bool {
79 *self == VECTOR_END_INTEGER
80 }
81}
82
83/// A trait to allow for seamless use of bytes or integer identifiers for filters
84pub trait FilterId {
85 fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
86 fn is_pass(&self) -> bool;
87}
88
89impl FilterId for [u8] {
90 fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
91 header.name_to_id(self)
92 }
93 fn is_pass(&self) -> bool {
94 matches!(self, b"PASS" | b".")
95 }
96}
97
98impl FilterId for Id {
99 fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
100 Ok(*self)
101 }
102 fn is_pass(&self) -> bool {
103 *self == Id(0)
104 }
105}
106
107/// A buffer for info or format data.
108#[derive(Debug)]
109pub struct Buffer {
110 inner: *mut ::std::os::raw::c_void,
111 len: i32,
112}
113
114impl Buffer {
115 pub fn new() -> Self {
116 Buffer {
117 inner: ptr::null_mut(),
118 len: 0,
119 }
120 }
121}
122
123impl Drop for Buffer {
124 fn drop(&mut self) {
125 unsafe {
126 ::libc::free(self.inner as *mut ::libc::c_void);
127 }
128 }
129}
130
131#[derive(new, Debug)]
132pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
133 value: T,
134 buffer: B,
135 #[new(default)]
136 phantom: PhantomData<&'a B>,
137}
138
139impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
140 type Target = T;
141
142 fn deref(&self) -> &T {
143 &self.value
144 }
145}
146
147impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
148 for BufferBacked<'a, T, B>
149{
150 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151 fmt::Display::fmt(&self.value, f)
152 }
153}
154
155/// A VCF/BCF record.
156/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
157/// and [`bcf::Writer`](crate::bcf::Writer).
158/// # Example
159/// ```rust
160/// use rust_htslib::bcf::{Format, Writer};
161/// use rust_htslib::bcf::header::Header;
162///
163/// // Create minimal VCF header with a single sample
164/// let mut header = Header::new();
165/// header.push_sample("sample".as_bytes());
166///
167/// // Write uncompressed VCF to stdout with above header and get an empty record
168/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
169/// let mut record = vcf.empty_record();
170/// ```
171#[derive(Debug)]
172pub struct Record {
173 pub inner: *mut htslib::bcf1_t,
174 header: Rc<HeaderView>,
175}
176
177impl Record {
178 /// Construct record with reference to header `HeaderView`, for create-internal use.
179 pub(crate) fn new(header: Rc<HeaderView>) -> Self {
180 let inner = unsafe {
181 let inner = htslib::bcf_init();
182 // Always unpack record.
183 htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
184 inner
185 };
186 Record { inner, header }
187 }
188
189 /// Force unpacking of internal record values.
190 pub fn unpack(&mut self) {
191 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
192 }
193
194 /// Return associated header.
195 pub fn header(&self) -> &HeaderView {
196 self.header.as_ref()
197 }
198
199 /// Set the record header.
200 pub(crate) fn set_header(&mut self, header: Rc<HeaderView>) {
201 self.header = header;
202 }
203
204 /// Return reference to the inner C struct.
205 ///
206 /// # Remarks
207 ///
208 /// Note that this function is only required as long as Rust-Htslib does not provide full
209 /// access to all aspects of Htslib.
210 pub fn inner(&self) -> &htslib::bcf1_t {
211 unsafe { &*self.inner }
212 }
213
214 /// Return mutable reference to inner C struct.
215 ///
216 /// # Remarks
217 ///
218 /// Note that this function is only required as long as Rust-Htslib does not provide full
219 /// access to all aspects of Htslib.
220 pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
221 unsafe { &mut *self.inner }
222 }
223
224 /// Get the reference id of the record.
225 ///
226 /// To look up the contig name,
227 /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
228 ///
229 /// # Returns
230 ///
231 /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
232 /// - `None` if the internal `rid` is set to `-1`
233 pub fn rid(&self) -> Option<u32> {
234 match self.inner().rid {
235 -1 => None,
236 rid => Some(rid as u32),
237 }
238 }
239
240 /// Update the reference id of the record.
241 ///
242 /// To look up reference id for a contig name,
243 /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
244 ///
245 /// # Example
246 ///
247 /// Example assumes we have a Record `record` from a VCF with a header containing region
248 /// named `1`. See [module documentation](../index.html#example-writing) for how to set
249 /// up VCF, header, and record.
250 ///
251 /// ```
252 /// # use rust_htslib::bcf::{Format, Writer};
253 /// # use rust_htslib::bcf::header::Header;
254 /// # let mut header = Header::new();
255 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
256 /// # header.push_record(header_contig_line.as_bytes());
257 /// # header.push_sample("test_sample".as_bytes());
258 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
259 /// # let mut record = vcf.empty_record();
260 /// let rid = record.header().name2rid(b"1").ok();
261 /// record.set_rid(rid);
262 /// assert_eq!(record.rid(), rid);
263 /// let name = record.header().rid2name(record.rid().unwrap()).ok();
264 /// assert_eq!(Some("1".as_bytes()), name);
265 /// ```
266 pub fn set_rid(&mut self, rid: Option<u32>) {
267 match rid {
268 Some(rid) => self.inner_mut().rid = rid as i32,
269 None => self.inner_mut().rid = -1,
270 }
271 }
272
273 /// Return **0-based** position
274 pub fn pos(&self) -> i64 {
275 self.inner().pos
276 }
277
278 /// Set **0-based** position
279 pub fn set_pos(&mut self, pos: i64) {
280 self.inner_mut().pos = pos;
281 }
282
283 /// Return the **0-based, exclusive** end position
284 ///
285 /// # Example
286 /// ```rust
287 /// # use rust_htslib::bcf::{Format, Header, Writer};
288 /// # use tempfile::NamedTempFile;
289 /// # let tmp = NamedTempFile::new().unwrap();
290 /// # let path = tmp.path();
291 /// # let header = Header::new();
292 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
293 /// # let mut record = vcf.empty_record();
294 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
295 /// record.set_alleles(alleles).expect("Failed to set alleles");
296 /// record.set_pos(5);
297 ///
298 /// assert_eq!(record.end(), 8)
299 /// ```
300 pub fn end(&self) -> i64 {
301 self.pos() + self.rlen()
302 }
303
304 /// Return the value of the ID column.
305 ///
306 /// When empty, returns `b".".to_vec()`.
307 pub fn id(&self) -> Vec<u8> {
308 if self.inner().d.id.is_null() {
309 b".".to_vec()
310 } else {
311 let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
312 id.to_bytes().to_vec()
313 }
314 }
315
316 /// Update the ID string to the given value.
317 pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
318 let c_str = ffi::CString::new(id).unwrap();
319 if unsafe {
320 htslib::bcf_update_id(
321 self.header().inner,
322 self.inner,
323 c_str.as_ptr() as *mut c_char,
324 )
325 } == 0
326 {
327 Ok(())
328 } else {
329 Err(Error::BcfSetValues)
330 }
331 }
332
333 /// Clear the ID column (set it to `"."`).
334 pub fn clear_id(&mut self) -> Result<()> {
335 let c_str = ffi::CString::new(&b"."[..]).unwrap();
336 if unsafe {
337 htslib::bcf_update_id(
338 self.header().inner,
339 self.inner,
340 c_str.as_ptr() as *mut c_char,
341 )
342 } == 0
343 {
344 Ok(())
345 } else {
346 Err(Error::BcfSetValues)
347 }
348 }
349
350 /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
351 pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
352 let c_str = ffi::CString::new(id).unwrap();
353 if unsafe {
354 htslib::bcf_add_id(
355 self.header().inner,
356 self.inner,
357 c_str.as_ptr() as *mut c_char,
358 )
359 } == 0
360 {
361 Ok(())
362 } else {
363 Err(Error::BcfSetValues)
364 }
365 }
366
367 /// Return `Filters` iterator for enumerating all filters that have been set.
368 ///
369 /// A record having the `PASS` filter will return an empty `Filter` here.
370 pub fn filters(&self) -> Filters<'_> {
371 Filters::new(self)
372 }
373
374 /// Query whether the filter with the given ID has been set.
375 ///
376 /// This method can be used to check if a record passes filtering by using either `Id(0)`,
377 /// `PASS` or `.`
378 ///
379 /// # Example
380 /// ```rust
381 /// # use rust_htslib::bcf::{Format, Header, Writer};
382 /// # use rust_htslib::bcf::header::Id;
383 /// # use tempfile::NamedTempFile;
384 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
385 /// # let path = tmp.path();
386 /// let mut header = Header::new();
387 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
388 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
389 /// # let mut record = vcf.empty_record();
390 /// assert!(record.has_filter("PASS".as_bytes()));
391 /// assert!(record.has_filter(".".as_bytes()));
392 /// assert!(record.has_filter(&Id(0)));
393 ///
394 /// record.push_filter("foo".as_bytes()).unwrap();
395 /// assert!(record.has_filter("foo".as_bytes()));
396 /// assert!(!record.has_filter("PASS".as_bytes()))
397 /// ```
398 pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
399 if flt_id.is_pass() && self.inner().d.n_flt == 0 {
400 return true;
401 }
402 let id = match flt_id.id_from_header(self.header()) {
403 Ok(i) => *i,
404 Err(_) => return false,
405 };
406 for i in 0..(self.inner().d.n_flt as isize) {
407 if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
408 return true;
409 }
410 }
411 false
412 }
413
414 /// Set the given filter IDs to the FILTER column.
415 ///
416 /// Setting an empty slice removes all filters and sets `PASS`.
417 ///
418 /// # Example
419 /// ```rust
420 /// # use rust_htslib::bcf::{Format, Header, Writer};
421 /// # use rust_htslib::bcf::header::Id;
422 /// # use tempfile::NamedTempFile;
423 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
424 /// # let path = tmp.path();
425 /// let mut header = Header::new();
426 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
427 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
428 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
429 /// # let mut record = vcf.empty_record();
430 /// let foo = record.header().name_to_id(b"foo").unwrap();
431 /// let bar = record.header().name_to_id(b"bar").unwrap();
432 /// assert!(record.has_filter("PASS".as_bytes()));
433 /// let mut filters = vec![&foo, &bar];
434 /// record.set_filters(&filters).unwrap();
435 /// assert!(record.has_filter(&foo));
436 /// assert!(record.has_filter(&bar));
437 /// assert!(!record.has_filter("PASS".as_bytes()));
438 /// filters.clear();
439 /// record.set_filters(&filters).unwrap();
440 /// assert!(record.has_filter("PASS".as_bytes()));
441 /// assert!(!record.has_filter("foo".as_bytes()));
442 /// // 'baz' isn't in the header
443 /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
444 /// ```
445 ///
446 /// # Errors
447 /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
448 ///
449 pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
450 let mut ids: Vec<i32> = flt_ids
451 .iter()
452 .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
453 .collect::<Result<Vec<i32>>>()?;
454 unsafe {
455 htslib::bcf_update_filter(
456 self.header().inner,
457 self.inner,
458 ids.as_mut_ptr(),
459 ids.len() as i32,
460 );
461 };
462 Ok(())
463 }
464
465 /// Add the given filter to the FILTER column.
466 ///
467 /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
468 /// any existing `PASS` filter is removed.
469 ///
470 /// # Example
471 /// ```rust
472 /// # use rust_htslib::bcf::{Format, Header, Writer};
473 /// # use tempfile::NamedTempFile;
474 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
475 /// # let path = tmp.path();
476 /// let mut header = Header::new();
477 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
478 /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
479 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
480 /// # let mut record = vcf.empty_record();
481 /// let foo = "foo".as_bytes();
482 /// let bar = record.header().name_to_id(b"bar").unwrap();
483 /// assert!(record.has_filter("PASS".as_bytes()));
484 ///
485 /// record.push_filter(foo).unwrap();
486 /// record.push_filter(&bar).unwrap();
487 /// assert!(record.has_filter(foo));
488 /// assert!(record.has_filter(&bar));
489 /// // filter must exist in the header
490 /// assert!(record.push_filter("baz".as_bytes()).is_err())
491 /// ```
492 ///
493 /// # Errors
494 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
495 ///
496 pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
497 let id = flt_id.id_from_header(self.header())?;
498 unsafe {
499 htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
500 };
501 Ok(())
502 }
503
504 /// Remove the given filter from the FILTER column.
505 ///
506 /// # Arguments
507 ///
508 /// - `flt_id` - The corresponding filter ID to remove.
509 /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
510 ///
511 /// # Example
512 /// ```rust
513 /// # use rust_htslib::bcf::{Format, Header, Writer};
514 /// # use tempfile::NamedTempFile;
515 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
516 /// # let path = tmp.path();
517 /// let mut header = Header::new();
518 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
519 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
520 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
521 /// # let mut record = vcf.empty_record();
522 /// let foo = "foo".as_bytes();
523 /// let bar = "bar".as_bytes();
524 /// record.set_filters(&[foo, bar]).unwrap();
525 /// assert!(record.has_filter(foo));
526 /// assert!(record.has_filter(bar));
527 ///
528 /// record.remove_filter(foo, true).unwrap();
529 /// assert!(!record.has_filter(foo));
530 /// assert!(record.has_filter(bar));
531 /// // 'baz' is not in the header
532 /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
533 ///
534 /// record.remove_filter(bar, true).unwrap();
535 /// assert!(!record.has_filter(bar));
536 /// assert!(record.has_filter("PASS".as_bytes()));
537 /// ```
538 ///
539 /// # Errors
540 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
541 ///
542 pub fn remove_filter<T: FilterId + ?Sized>(
543 &mut self,
544 flt_id: &T,
545 pass_on_empty: bool,
546 ) -> Result<()> {
547 let id = flt_id.id_from_header(self.header())?;
548 unsafe {
549 htslib::bcf_remove_filter(
550 self.header().inner,
551 self.inner,
552 *id as i32,
553 pass_on_empty as i32,
554 )
555 };
556 Ok(())
557 }
558
559 /// Get alleles strings.
560 ///
561 /// The first allele is the reference allele.
562 pub fn alleles(&self) -> Vec<&[u8]> {
563 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
564 let n = self.inner().n_allele() as usize;
565 let dec = self.inner().d;
566 let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
567 (0..n)
568 .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
569 .collect()
570 }
571
572 /// Set alleles. The first allele is the reference allele.
573 ///
574 /// # Example
575 /// ```rust
576 /// # use rust_htslib::bcf::{Format, Writer};
577 /// # use rust_htslib::bcf::header::Header;
578 /// #
579 /// # // Create minimal VCF header with a single sample
580 /// # let mut header = Header::new();
581 /// # header.push_sample("sample".as_bytes());
582 /// #
583 /// # // Write uncompressed VCF to stdout with above header and get an empty record
584 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
585 /// # let mut record = vcf.empty_record();
586 /// assert_eq!(record.allele_count(), 0);
587 ///
588 /// let alleles: &[&[u8]] = &[b"A", b"TG"];
589 /// record.set_alleles(alleles).expect("Failed to set alleles");
590 /// assert_eq!(record.allele_count(), 2)
591 /// ```
592 pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
593 let cstrings: Vec<ffi::CString> = alleles
594 .iter()
595 .map(|vec| ffi::CString::new(*vec).unwrap())
596 .collect();
597 let mut ptrs: Vec<*const c_char> = cstrings
598 .iter()
599 .map(|cstr| cstr.as_ptr() as *const c_char)
600 .collect();
601 if unsafe {
602 htslib::bcf_update_alleles(
603 self.header().inner,
604 self.inner,
605 ptrs.as_mut_ptr(),
606 alleles.len() as i32,
607 )
608 } == 0
609 {
610 Ok(())
611 } else {
612 Err(Error::BcfSetValues)
613 }
614 }
615
616 /// Get variant quality.
617 pub fn qual(&self) -> f32 {
618 self.inner().qual
619 }
620
621 /// Set variant quality.
622 pub fn set_qual(&mut self, qual: f32) {
623 self.inner_mut().qual = qual;
624 }
625
626 pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
627 self.info_shared_buffer(tag, Buffer::new())
628 }
629
630 /// Get the value of the given info tag.
631 pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
632 &'a self,
633 tag: &'a [u8],
634 buffer: B,
635 ) -> Info<'a, B> {
636 Info {
637 record: self,
638 tag,
639 buffer,
640 }
641 }
642
643 /// Get the number of samples in the record.
644 pub fn sample_count(&self) -> u32 {
645 self.inner().n_sample()
646 }
647
648 /// Get the number of alleles, including reference allele.
649 pub fn allele_count(&self) -> u32 {
650 self.inner().n_allele()
651 }
652
653 /// Add/replace genotypes in FORMAT GT tag.
654 ///
655 /// # Arguments
656 ///
657 /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
658 /// the first dimension contains one array for each sample.
659 ///
660 /// # Errors
661 ///
662 /// Returns error if GT tag is not present in header.
663 ///
664 /// # Example
665 ///
666 /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
667 /// See [module documentation](../index.html#example-writing) for how to set up
668 /// VCF, header, and record.
669 ///
670 /// ```
671 /// # use rust_htslib::bcf::{Format, Writer};
672 /// # use rust_htslib::bcf::header::Header;
673 /// # use rust_htslib::bcf::record::GenotypeAllele;
674 /// # let mut header = Header::new();
675 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
676 /// # header.push_record(header_contig_line.as_bytes());
677 /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
678 /// # header.push_record(header_gt_line.as_bytes());
679 /// # header.push_sample("test_sample".as_bytes());
680 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
681 /// # let mut record = vcf.empty_record();
682 /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
683 /// record.push_genotypes(alleles);
684 /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
685 /// ```
686 pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
687 let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
688 self.push_format_integer(b"GT", &encoded)
689 }
690
691 /// Get genotypes as vector of one `Genotype` per sample.
692 ///
693 /// # Example
694 /// Parsing genotype field (`GT` tag) from a VCF record:
695 /// ```
696 /// use crate::rust_htslib::bcf::{Reader, Read};
697 /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
698 /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
699 /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
700 /// let mut rec = rec.expect("Error reading record.");
701 /// let genotypes = rec.genotypes().expect("Error reading genotypes");
702 /// assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
703 /// }
704 /// ```
705 pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
706 self.genotypes_shared_buffer(Buffer::new())
707 }
708
709 /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
710 /// to avoid unnecessary allocations.
711 pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
712 where
713 B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
714 {
715 Ok(Genotypes {
716 encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
717 })
718 }
719
720 /// Retrieve data for a `FORMAT` field
721 ///
722 /// # Example
723 /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
724 /// for an example of the setup used here.*
725 ///
726 /// ```rust
727 /// # use rust_htslib::bcf::{Format, Writer};
728 /// # use rust_htslib::bcf::header::Header;
729 /// #
730 /// # // Create minimal VCF header with a single sample
731 /// # let mut header = Header::new();
732 /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
733 /// #
734 /// # // Write uncompressed VCF to stdout with above header and get an empty record
735 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
736 /// # let mut record = vcf.empty_record();
737 /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
738 ///
739 /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
740 /// let sample1_depth = read_depths[0];
741 /// assert_eq!(sample1_depth, &[20]);
742 /// let sample2_depth = read_depths[1];
743 /// assert_eq!(sample2_depth, &[12])
744 /// ```
745 ///
746 /// # Errors
747 /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
748 /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
749 /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
750 /// dropped, you will access unallocated memory.
751 pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
752 self.format_shared_buffer(tag, Buffer::new())
753 }
754
755 /// Get the value of the given format tag for each sample.
756 pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
757 &'a self,
758 tag: &'a [u8],
759 buffer: B,
760 ) -> Format<'a, B> {
761 Format::new(self, tag, buffer)
762 }
763
764 /// Add/replace an integer-typed FORMAT tag.
765 ///
766 /// # Arguments
767 ///
768 /// - `tag` - The tag's string.
769 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
770 /// for each sample.
771 ///
772 /// # Errors
773 ///
774 /// Returns error if tag is not present in header.
775 pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
776 self.push_format(tag, data, htslib::BCF_HT_INT)
777 }
778
779 /// Add/replace a float-typed FORMAT tag.
780 ///
781 /// # Arguments
782 ///
783 /// - `tag` - The tag's string.
784 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
785 /// for each sample.
786 ///
787 /// # Errors
788 ///
789 /// Returns error if tag is not present in header.
790 ///
791 /// # Example
792 ///
793 /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
794 /// See [module documentation](../index.html#example-writing) for how to set up
795 /// VCF, header, and record.
796 ///
797 /// ```
798 /// # use rust_htslib::bcf::{Format, Writer};
799 /// # use rust_htslib::bcf::header::Header;
800 /// # use rust_htslib::bcf::record::GenotypeAllele;
801 /// # let mut header = Header::new();
802 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
803 /// # header.push_record(header_contig_line.as_bytes());
804 /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
805 /// # header.push_record(header_af_line.as_bytes());
806 /// # header.push_sample("test_sample".as_bytes());
807 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
808 /// # let mut record = vcf.empty_record();
809 /// record.push_format_float(b"AF", &[0.5]);
810 /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
811 /// ```
812 pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
813 self.push_format(tag, data, htslib::BCF_HT_REAL)
814 }
815
816 /// Add/replace a single-char-typed FORMAT tag.
817 ///
818 /// # Arguments
819 ///
820 /// - `tag` - The tag's string.
821 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
822 /// for each sample.
823 ///
824 /// # Errors
825 ///
826 /// Returns error if tag is not present in header.
827 pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
828 self.push_format(tag, data, htslib::BCF_HT_STR)
829 }
830
831 /// Add a format tag. Data is a flattened two-dimensional array.
832 /// The first dimension contains one array for each sample.
833 fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
834 let tag_c_str = ffi::CString::new(tag).unwrap();
835 unsafe {
836 if htslib::bcf_update_format(
837 self.header().inner,
838 self.inner,
839 tag_c_str.as_ptr() as *mut c_char,
840 data.as_ptr() as *const ::std::os::raw::c_void,
841 data.len() as i32,
842 ht as i32,
843 ) == 0
844 {
845 Ok(())
846 } else {
847 Err(Error::BcfSetTag {
848 tag: str::from_utf8(tag).unwrap().to_owned(),
849 })
850 }
851 }
852 }
853
854 // TODO: should we add convenience methods clear_format_*?
855
856 /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
857 /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
858 ///
859 /// # Arguments
860 ///
861 /// - `tag` - The tag's string.
862 /// - `data` - a two-dimensional array, the first dimension contains one array
863 /// for each sample. Must be non-empty.
864 ///
865 /// # Errors
866 ///
867 /// Returns error if tag is not present in header.
868 pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
869 assert!(
870 !data.is_empty(),
871 "given string data must have at least 1 element"
872 );
873 let c_data = data
874 .iter()
875 .map(|s| ffi::CString::new(s.borrow()).unwrap())
876 .collect::<Vec<ffi::CString>>();
877 let c_ptrs = c_data
878 .iter()
879 .map(|s| s.as_ptr() as *mut i8)
880 .collect::<Vec<*mut i8>>();
881 let tag_c_str = ffi::CString::new(tag).unwrap();
882 unsafe {
883 if htslib::bcf_update_format_string(
884 self.header().inner,
885 self.inner,
886 tag_c_str.as_ptr() as *mut c_char,
887 c_ptrs.as_slice().as_ptr() as *mut *const c_char,
888 data.len() as i32,
889 ) == 0
890 {
891 Ok(())
892 } else {
893 Err(Error::BcfSetTag {
894 tag: str::from_utf8(tag).unwrap().to_owned(),
895 })
896 }
897 }
898 }
899
900 /// Add/replace an integer-typed INFO entry.
901 pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
902 self.push_info(tag, data, htslib::BCF_HT_INT)
903 }
904
905 /// Remove the integer-typed INFO entry.
906 pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
907 self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
908 }
909
910 /// Add/replace a float-typed INFO entry.
911 pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
912 self.push_info(tag, data, htslib::BCF_HT_REAL)
913 }
914
915 /// Remove the float-typed INFO entry.
916 pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
917 self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
918 }
919
920 /// Add/replace an INFO tag.
921 ///
922 /// # Arguments
923 /// * `tag` - the tag to add/replace
924 /// * `data` - the data to set
925 /// * `ht` - the HTSLib type to use
926 fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
927 let tag_c_str = ffi::CString::new(tag).unwrap();
928 unsafe {
929 if htslib::bcf_update_info(
930 self.header().inner,
931 self.inner,
932 tag_c_str.as_ptr() as *mut c_char,
933 data.as_ptr() as *const ::std::os::raw::c_void,
934 data.len() as i32,
935 ht as i32,
936 ) == 0
937 {
938 Ok(())
939 } else {
940 Err(Error::BcfSetTag {
941 tag: str::from_utf8(tag).unwrap().to_owned(),
942 })
943 }
944 }
945 }
946
947 /// Set flag into the INFO column.
948 pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
949 self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
950 }
951
952 /// Remove the flag from the INFO column.
953 pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
954 self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
955 }
956
957 /// Add/replace a string-typed INFO entry.
958 pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
959 self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
960 }
961
962 /// Remove the string field from the INFO column.
963 pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
964 self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
965 }
966
967 /// Add an string-valued INFO tag.
968 fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
969 let mut buf: Vec<u8> = Vec::new();
970 for (i, &s) in data.iter().enumerate() {
971 if i > 0 {
972 buf.extend(b",");
973 }
974 buf.extend(s);
975 }
976 let c_str = ffi::CString::new(buf).unwrap();
977 let len = if ht == htslib::BCF_HT_FLAG {
978 data.len()
979 } else {
980 c_str.to_bytes().len()
981 };
982 let tag_c_str = ffi::CString::new(tag).unwrap();
983 unsafe {
984 if htslib::bcf_update_info(
985 self.header().inner,
986 self.inner,
987 tag_c_str.as_ptr() as *mut c_char,
988 c_str.as_ptr() as *const ::std::os::raw::c_void,
989 len as i32,
990 ht as i32,
991 ) == 0
992 {
993 Ok(())
994 } else {
995 Err(Error::BcfSetTag {
996 tag: str::from_utf8(tag).unwrap().to_owned(),
997 })
998 }
999 }
1000 }
1001
1002 /// Remove unused alleles.
1003 pub fn trim_alleles(&mut self) -> Result<()> {
1004 match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1005 -1 => Err(Error::BcfRemoveAlleles),
1006 _ => Ok(()),
1007 }
1008 }
1009
1010 pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1011 let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1012
1013 for (i, &r) in remove.iter().enumerate() {
1014 if r {
1015 unsafe {
1016 htslib::kbs_insert(rm_set, i as i32);
1017 }
1018 }
1019 }
1020
1021 let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1022
1023 unsafe {
1024 htslib::kbs_destroy(rm_set);
1025 }
1026
1027 match ret {
1028 -1 => Err(Error::BcfRemoveAlleles),
1029 _ => Ok(()),
1030 }
1031 }
1032
1033 /// Get the length of the reference allele. If the record has no reference allele, then the
1034 /// result will be `0`.
1035 ///
1036 /// # Example
1037 /// ```rust
1038 /// # use rust_htslib::bcf::{Format, Writer};
1039 /// # use rust_htslib::bcf::header::Header;
1040 /// #
1041 /// # // Create minimal VCF header with a single sample
1042 /// # let mut header = Header::new();
1043 /// # header.push_sample("sample".as_bytes());
1044 /// #
1045 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1046 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1047 /// # let mut record = vcf.empty_record();
1048 /// # assert_eq!(record.rlen(), 0);
1049 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1050 /// record.set_alleles(alleles).expect("Failed to set alleles");
1051 /// assert_eq!(record.rlen(), 3)
1052 /// ```
1053 pub fn rlen(&self) -> i64 {
1054 self.inner().rlen
1055 }
1056
1057 /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1058 ///
1059 /// # Example
1060 /// ```rust
1061 /// # use rust_htslib::bcf::{Format, Writer};
1062 /// # use rust_htslib::bcf::header::Header;
1063 /// #
1064 /// # // Create minimal VCF header with a single sample
1065 /// # let mut header = Header::new();
1066 /// # header.push_sample("sample".as_bytes());
1067 /// #
1068 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1069 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1070 /// # let mut record = vcf.empty_record();
1071 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1072 /// record.set_alleles(alleles).expect("Failed to set alleles");
1073 /// record.set_pos(6);
1074 /// record.clear();
1075 /// assert_eq!(record.rlen(), 0);
1076 /// assert_eq!(record.pos(), 0)
1077 /// ```
1078 pub fn clear(&self) {
1079 unsafe { htslib::bcf_clear(self.inner) }
1080 }
1081
1082 /// Provide short description of record for locating it in the BCF/VCF file.
1083 pub fn desc(&self) -> String {
1084 if let Some(rid) = self.rid() {
1085 if let Ok(contig) = self.header.rid2name(rid) {
1086 return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1087 }
1088 }
1089 "".to_owned()
1090 }
1091
1092 /// Convert to VCF String
1093 ///
1094 /// Intended for debug only. Use Writer for efficient VCF output.
1095 ///
1096 pub fn to_vcf_string(&self) -> Result<String> {
1097 let mut buf = htslib::kstring_t {
1098 l: 0,
1099 m: 0,
1100 s: ptr::null_mut(),
1101 };
1102 let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1103
1104 if ret < 0 {
1105 if !buf.s.is_null() {
1106 unsafe {
1107 libc::free(buf.s as *mut libc::c_void);
1108 }
1109 }
1110 return Err(Error::BcfToString);
1111 }
1112
1113 let vcf_str = unsafe {
1114 let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1115 if !buf.s.is_null() {
1116 libc::free(buf.s as *mut libc::c_void);
1117 }
1118 vcf_str
1119 };
1120
1121 Ok(vcf_str)
1122 }
1123}
1124
1125impl Clone for Record {
1126 fn clone(&self) -> Self {
1127 let inner = unsafe {
1128 let inner = htslib::bcf_dup(self.inner);
1129 inner
1130 };
1131 Record {
1132 inner,
1133 header: self.header.clone(),
1134 }
1135 }
1136}
1137
1138impl genome::AbstractLocus for Record {
1139 fn contig(&self) -> &str {
1140 str::from_utf8(
1141 self.header()
1142 .rid2name(self.rid().expect("rid not set"))
1143 .expect("unable to find rid in header"),
1144 )
1145 .expect("unable to interpret contig name as UTF-8")
1146 }
1147
1148 fn pos(&self) -> u64 {
1149 self.pos() as u64
1150 }
1151}
1152
1153/// Phased or unphased alleles, represented as indices.
1154#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1155pub enum GenotypeAllele {
1156 Unphased(i32),
1157 Phased(i32),
1158 UnphasedMissing,
1159 PhasedMissing,
1160}
1161
1162impl GenotypeAllele {
1163 /// Decode given integer according to BCF standard.
1164 #[deprecated(
1165 since = "0.36.0",
1166 note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1167 )]
1168 pub fn from_encoded(encoded: i32) -> Self {
1169 match (encoded, encoded & 1) {
1170 (0, 0) => GenotypeAllele::UnphasedMissing,
1171 (1, 1) => GenotypeAllele::PhasedMissing,
1172 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1173 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1174 _ => panic!("unexpected phasing type"),
1175 }
1176 }
1177
1178 /// Get the index into the list of alleles.
1179 pub fn index(self) -> Option<u32> {
1180 match self {
1181 GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1182 GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1183 }
1184 }
1185}
1186
1187impl fmt::Display for GenotypeAllele {
1188 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1189 match self.index() {
1190 Some(a) => write!(f, "{}", a),
1191 None => write!(f, "."),
1192 }
1193 }
1194}
1195
1196impl From<GenotypeAllele> for i32 {
1197 fn from(allele: GenotypeAllele) -> i32 {
1198 let (allele, phased) = match allele {
1199 GenotypeAllele::UnphasedMissing => (-1, 0),
1200 GenotypeAllele::PhasedMissing => (-1, 1),
1201 GenotypeAllele::Unphased(a) => (a, 0),
1202 GenotypeAllele::Phased(a) => (a, 1),
1203 };
1204 (allele + 1) << 1 | phased
1205 }
1206}
1207
1208impl From<i32> for GenotypeAllele {
1209 fn from(encoded: i32) -> GenotypeAllele {
1210 match (encoded, encoded & 1) {
1211 (0, 0) => GenotypeAllele::UnphasedMissing,
1212 (1, 1) => GenotypeAllele::PhasedMissing,
1213 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1214 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1215 _ => panic!("unexpected phasing type"),
1216 }
1217 }
1218}
1219
1220custom_derive! {
1221 /// Genotype representation as a vector of `GenotypeAllele`.
1222 #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1223 pub struct Genotype(Vec<GenotypeAllele>);
1224}
1225
1226impl fmt::Display for Genotype {
1227 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1228 let &Genotype(ref alleles) = self;
1229 write!(f, "{}", alleles[0])?;
1230 for a in &alleles[1..] {
1231 let sep = match a {
1232 GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1233 GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1234 };
1235 write!(f, "{}{}", sep, a)?;
1236 }
1237 Ok(())
1238 }
1239}
1240
1241/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1242#[derive(Debug)]
1243pub struct Genotypes<'a, B>
1244where
1245 B: Borrow<Buffer> + 'a,
1246{
1247 encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1248}
1249
1250impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1251 /// Get genotype of ith sample. So far, only supports diploid genotypes.
1252 ///
1253 /// Note that the result complies with the BCF spec. This means that the
1254 /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1255 /// this method will return `[Unphased(1), Phased(1)]`.
1256 pub fn get(&self, i: usize) -> Genotype {
1257 let igt = self.encoded[i];
1258 Genotype(igt.iter().map(|&e| GenotypeAllele::from(e)).collect())
1259 }
1260}
1261
1262impl Drop for Record {
1263 fn drop(&mut self) {
1264 unsafe { htslib::bcf_destroy(self.inner) };
1265 }
1266}
1267
1268unsafe impl Send for Record {}
1269
1270unsafe impl Sync for Record {}
1271
1272/// Info tag representation.
1273#[derive(Debug)]
1274pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1275 record: &'a Record,
1276 tag: &'a [u8],
1277 buffer: B,
1278}
1279
1280impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'a, B> {
1281 /// Short description of info tag.
1282 pub fn desc(&self) -> String {
1283 str::from_utf8(self.tag).unwrap().to_owned()
1284 }
1285
1286 fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1287 let mut n: i32 = self.buffer.borrow().len;
1288 let c_str = ffi::CString::new(self.tag).unwrap();
1289 let ret = unsafe {
1290 htslib::bcf_get_info_values(
1291 self.record.header().inner,
1292 self.record.inner,
1293 c_str.as_ptr() as *mut c_char,
1294 &mut self.buffer.borrow_mut().inner,
1295 &mut n,
1296 data_type as i32,
1297 )
1298 };
1299 self.buffer.borrow_mut().len = n;
1300
1301 match ret {
1302 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1303 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1304 -3 => Ok(None),
1305 ret => Ok(Some(ret)),
1306 }
1307 }
1308
1309 /// Get integers from tag. `None` if tag not present in record.
1310 ///
1311 /// Import `bcf::record::Numeric` for missing value handling.
1312 ///
1313 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1314 /// as along as the data is accessed. If parts of the data are accessed while
1315 /// the BufferBacked object is already dropped, you will access unallocated
1316 /// memory.
1317 pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1318 self.data(htslib::BCF_HT_INT).map(|data| {
1319 data.map(|ret| {
1320 let values = unsafe {
1321 slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1322 };
1323 BufferBacked::new(&values[..ret as usize], self.buffer)
1324 })
1325 })
1326 }
1327
1328 /// Get floats from tag. `None` if tag not present in record.
1329 ///
1330 /// Import `bcf::record::Numeric` for missing value handling.
1331 ///
1332 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1333 /// as along as the data is accessed. If parts of the data are accessed while
1334 /// the BufferBacked object is already dropped, you will access unallocated
1335 /// memory.
1336 pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1337 self.data(htslib::BCF_HT_REAL).map(|data| {
1338 data.map(|ret| {
1339 let values = unsafe {
1340 slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1341 };
1342 BufferBacked::new(&values[..ret as usize], self.buffer)
1343 })
1344 })
1345 }
1346
1347 /// Get flags from tag. `false` if not set.
1348 pub fn flag(&mut self) -> Result<bool> {
1349 self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1350 Some(ret) => ret == 1,
1351 None => false,
1352 })
1353 }
1354
1355 /// Get strings from tag. `None` if tag not present in record.
1356 ///
1357 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1358 /// as along as the data is accessed. If parts of the data are accessed while
1359 /// the BufferBacked object is already dropped, you will access unallocated
1360 /// memory.
1361 pub fn string(mut self) -> Result<Option<BufferBacked<'b, Vec<&'b [u8]>, B>>> {
1362 self.data(htslib::BCF_HT_STR).map(|data| {
1363 data.map(|ret| {
1364 BufferBacked::new(
1365 unsafe {
1366 slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1367 }
1368 .split(|c| *c == b',')
1369 .map(|s| {
1370 // stop at zero character
1371 s.split(|c| *c == 0u8)
1372 .next()
1373 .expect("Bug: returned string should not be empty.")
1374 })
1375 .collect(),
1376 self.buffer,
1377 )
1378 })
1379 })
1380 }
1381}
1382
1383unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Send for Info<'a, B> {}
1384
1385unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Sync for Info<'a, B> {}
1386
1387fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1388 s.split(|v| v.is_vector_end())
1389 .next()
1390 .expect("Bug: returned slice should not be empty.")
1391}
1392
1393// Representation of per-sample data.
1394#[derive(Debug)]
1395pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1396 record: &'a Record,
1397 tag: &'a [u8],
1398 inner: *mut htslib::bcf_fmt_t,
1399 buffer: B,
1400}
1401
1402impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1403 /// Create new format data in a given record.
1404 fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1405 let c_str = ffi::CString::new(tag).unwrap();
1406 let inner = unsafe {
1407 htslib::bcf_get_fmt(
1408 record.header().inner,
1409 record.inner,
1410 c_str.as_ptr() as *mut c_char,
1411 )
1412 };
1413 Format {
1414 record,
1415 tag,
1416 inner,
1417 buffer,
1418 }
1419 }
1420
1421 /// Provide short description of format entry (just the tag name).
1422 pub fn desc(&self) -> String {
1423 str::from_utf8(self.tag).unwrap().to_owned()
1424 }
1425
1426 pub fn inner(&self) -> &htslib::bcf_fmt_t {
1427 unsafe { &*self.inner }
1428 }
1429
1430 pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1431 unsafe { &mut *self.inner }
1432 }
1433
1434 fn values_per_sample(&self) -> usize {
1435 self.inner().n as usize
1436 }
1437
1438 /// Read and decode format data into a given type.
1439 fn data(&mut self, data_type: u32) -> Result<i32> {
1440 let mut n: i32 = self.buffer.borrow().len;
1441 let c_str = ffi::CString::new(self.tag).unwrap();
1442 let ret = unsafe {
1443 htslib::bcf_get_format_values(
1444 self.record.header().inner,
1445 self.record.inner,
1446 c_str.as_ptr() as *mut c_char,
1447 &mut self.buffer.borrow_mut().inner,
1448 &mut n,
1449 data_type as i32,
1450 )
1451 };
1452 self.buffer.borrow_mut().len = n;
1453 match ret {
1454 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1455 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1456 -3 => Err(Error::BcfMissingTag {
1457 tag: self.desc(),
1458 record: self.record.desc(),
1459 }),
1460 ret => Ok(ret),
1461 }
1462 }
1463
1464 /// Get format data as integers.
1465 ///
1466 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1467 /// as long as the data is accessed. If parts of the data are accessed while
1468 /// the BufferBacked object is already dropped, you will access unallocated
1469 /// memory.
1470 pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1471 self.data(htslib::BCF_HT_INT).map(|ret| {
1472 BufferBacked::new(
1473 unsafe {
1474 slice::from_raw_parts(
1475 self.buffer.borrow_mut().inner as *const i32,
1476 ret as usize,
1477 )
1478 }
1479 .chunks(self.values_per_sample())
1480 .map(|s| trim_slice(s))
1481 .collect(),
1482 self.buffer,
1483 )
1484 })
1485 }
1486
1487 /// Get format data as floats.
1488 ///
1489 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1490 /// as along as the data is accessed. If parts of the data are accessed while
1491 /// the BufferBacked object is already dropped, you will access unallocated
1492 /// memory.
1493 pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1494 self.data(htslib::BCF_HT_REAL).map(|ret| {
1495 BufferBacked::new(
1496 unsafe {
1497 slice::from_raw_parts(
1498 self.buffer.borrow_mut().inner as *const f32,
1499 ret as usize,
1500 )
1501 }
1502 .chunks(self.values_per_sample())
1503 .map(|s| trim_slice(s))
1504 .collect(),
1505 self.buffer,
1506 )
1507 })
1508 }
1509
1510 /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1511 ///
1512 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1513 /// as along as the data is accessed. If parts of the data are accessed while
1514 /// the BufferBacked object is already dropped, you will access unallocated
1515 /// memory.
1516 pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1517 self.data(htslib::BCF_HT_STR).map(|ret| {
1518 if ret == 0 {
1519 return BufferBacked::new(Vec::new(), self.buffer);
1520 }
1521 BufferBacked::new(
1522 unsafe {
1523 slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1524 }
1525 .chunks(self.values_per_sample())
1526 .map(|s| {
1527 // stop at zero character
1528 s.split(|c| *c == 0u8)
1529 .next()
1530 .expect("Bug: returned string should not be empty.")
1531 })
1532 .collect(),
1533 self.buffer,
1534 )
1535 })
1536 }
1537}
1538
1539unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Send for Format<'a, B> {}
1540
1541unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Sync for Format<'a, B> {}
1542
1543#[derive(Debug)]
1544pub struct Filters<'a> {
1545 /// Reference to the `Record` to enumerate records for.
1546 record: &'a Record,
1547 /// Index of the next filter to return, if not at end.
1548 idx: i32,
1549}
1550
1551impl<'a> Filters<'a> {
1552 pub fn new(record: &'a Record) -> Self {
1553 Filters { record, idx: 0 }
1554 }
1555}
1556
1557impl<'a> Iterator for Filters<'a> {
1558 type Item = Id;
1559
1560 fn next(&mut self) -> Option<Id> {
1561 if self.record.inner().d.n_flt <= self.idx {
1562 None
1563 } else {
1564 let i = self.idx as isize;
1565 self.idx += 1;
1566 Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1567 }
1568 }
1569}
1570
1571#[cfg(test)]
1572mod tests {
1573 use super::*;
1574 use crate::bcf::{Format, Header, Writer};
1575 use tempfile::NamedTempFile;
1576
1577 #[test]
1578 fn test_missing_float() {
1579 let expected: u32 = 0x7F80_0001;
1580 assert_eq!(MISSING_FLOAT.bits(), expected);
1581 }
1582
1583 #[test]
1584 fn test_vector_end_float() {
1585 let expected: u32 = 0x7F80_0002;
1586 assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1587 }
1588
1589 #[test]
1590 fn test_record_rlen() {
1591 let tmp = NamedTempFile::new().unwrap();
1592 let path = tmp.path();
1593 let header = Header::new();
1594 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1595 let mut record = vcf.empty_record();
1596 assert_eq!(record.rlen(), 0);
1597 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1598 record.set_alleles(alleles).expect("Failed to set alleles");
1599 assert_eq!(record.rlen(), 3)
1600 }
1601
1602 #[test]
1603 fn test_record_end() {
1604 let tmp = NamedTempFile::new().unwrap();
1605 let path = tmp.path();
1606 let header = Header::new();
1607 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1608 let mut record = vcf.empty_record();
1609 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1610 record.set_alleles(alleles).expect("Failed to set alleles");
1611 record.set_pos(5);
1612
1613 assert_eq!(record.end(), 8)
1614 }
1615
1616 #[test]
1617 fn test_record_clear() {
1618 let tmp = NamedTempFile::new().unwrap();
1619 let path = tmp.path();
1620 let mut header = Header::new();
1621 header.push_sample("sample".as_bytes());
1622 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1623 let mut record = vcf.empty_record();
1624 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1625 record.set_alleles(alleles).expect("Failed to set alleles");
1626 record.set_pos(6);
1627 record.clear();
1628
1629 assert_eq!(record.rlen(), 0);
1630 assert_eq!(record.sample_count(), 0);
1631 assert_eq!(record.pos(), 0)
1632 }
1633
1634 #[test]
1635 fn test_record_clone() {
1636 let tmp = NamedTempFile::new().unwrap();
1637 let path = tmp.path();
1638 let header = Header::new();
1639 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1640 let mut record = vcf.empty_record();
1641 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1642 record.set_alleles(alleles).expect("Failed to set alleles");
1643 record.set_pos(6);
1644
1645 let mut cloned_record = record.clone();
1646 cloned_record.set_pos(5);
1647
1648 assert_eq!(record.pos(), 6);
1649 assert_eq!(record.allele_count(), 2);
1650 assert_eq!(cloned_record.pos(), 5);
1651 assert_eq!(cloned_record.allele_count(), 2);
1652 }
1653
1654 #[test]
1655 fn test_record_has_filter_pass_is_default() {
1656 let tmp = NamedTempFile::new().unwrap();
1657 let path = tmp.path();
1658 let header = Header::new();
1659 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1660 let record = vcf.empty_record();
1661
1662 assert!(record.has_filter("PASS".as_bytes()));
1663 assert!(record.has_filter(".".as_bytes()));
1664 assert!(record.has_filter(&Id(0)));
1665 assert!(!record.has_filter("foo".as_bytes()));
1666 assert!(!record.has_filter(&Id(2)));
1667 }
1668
1669 #[test]
1670 fn test_record_has_filter_custom() {
1671 let tmp = NamedTempFile::new().unwrap();
1672 let path = tmp.path();
1673 let mut header = Header::new();
1674 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1675 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1676 let mut record = vcf.empty_record();
1677 record.push_filter("foo".as_bytes()).unwrap();
1678
1679 assert!(record.has_filter("foo".as_bytes()));
1680 assert!(!record.has_filter("PASS".as_bytes()))
1681 }
1682
1683 #[test]
1684 fn test_record_push_filter() {
1685 let tmp = NamedTempFile::new().unwrap();
1686 let path = tmp.path();
1687 let mut header = Header::new();
1688 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1689 header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1690 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1691 let mut record = vcf.empty_record();
1692 assert!(record.has_filter("PASS".as_bytes()));
1693 record.push_filter("foo".as_bytes()).unwrap();
1694 let bar = record.header().name_to_id(b"bar").unwrap();
1695 record.push_filter(&bar).unwrap();
1696 assert!(record.has_filter("foo".as_bytes()));
1697 assert!(record.has_filter(&bar));
1698 assert!(!record.has_filter("PASS".as_bytes()));
1699 assert!(record.push_filter("baz".as_bytes()).is_err())
1700 }
1701
1702 #[test]
1703 fn test_record_set_filters() {
1704 let tmp = NamedTempFile::new().unwrap();
1705 let path = tmp.path();
1706 let mut header = Header::new();
1707 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1708 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1709 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1710 let mut record = vcf.empty_record();
1711 assert!(record.has_filter("PASS".as_bytes()));
1712 record
1713 .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1714 .unwrap();
1715 assert!(record.has_filter("foo".as_bytes()));
1716 assert!(record.has_filter("bar".as_bytes()));
1717 assert!(!record.has_filter("PASS".as_bytes()));
1718 let filters: &[&Id] = &[];
1719 record.set_filters(filters).unwrap();
1720 assert!(record.has_filter("PASS".as_bytes()));
1721 assert!(!record.has_filter("foo".as_bytes()));
1722 assert!(record
1723 .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1724 .is_err())
1725 }
1726
1727 #[test]
1728 fn test_record_remove_filter() {
1729 let tmp = NamedTempFile::new().unwrap();
1730 let path = tmp.path();
1731 let mut header = Header::new();
1732 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1733 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1734 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1735 let mut record = vcf.empty_record();
1736 let foo = record.header().name_to_id(b"foo").unwrap();
1737 let bar = record.header().name_to_id(b"bar").unwrap();
1738 record.set_filters(&[&foo, &bar]).unwrap();
1739 assert!(record.has_filter(&foo));
1740 assert!(record.has_filter(&bar));
1741 record.remove_filter(&foo, true).unwrap();
1742 assert!(!record.has_filter(&foo));
1743 assert!(record.has_filter(&bar));
1744 assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1745 record.remove_filter(&bar, true).unwrap();
1746 assert!(!record.has_filter(&bar));
1747 assert!(record.has_filter("PASS".as_bytes()));
1748 }
1749
1750 #[test]
1751 fn test_record_to_vcf_string_err() {
1752 let tmp = NamedTempFile::new().unwrap();
1753 let path = tmp.path();
1754 let header = Header::new();
1755 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1756 let record = vcf.empty_record();
1757 assert!(record.to_vcf_string().is_err());
1758 }
1759
1760 #[test]
1761 fn test_record_to_vcf_string() {
1762 let tmp = NamedTempFile::new().unwrap();
1763 let path = tmp.path();
1764 let mut header = Header::new();
1765 header.push_record(b"##contig=<ID=chr1,length=1000>");
1766 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1767 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1768 let mut record = vcf.empty_record();
1769 record.push_filter("foo".as_bytes()).unwrap();
1770 assert_eq!(
1771 record.to_vcf_string().unwrap(),
1772 "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1773 );
1774 }
1775}